1 // A version of the matcher with simplification |
1 // A version of the simple matcher with simplification |
2 // of derivatives |
2 // of derivatives |
3 // |
3 // |
4 // this keeps the regular expressions small, which |
4 // this keeps the regular expressions (often) small, which |
5 // is good for the run-time |
5 // is good for the run-time |
6 // |
6 // |
7 // call the test cases with X = {1,2} |
7 // call the test cases with X = {1,2,3,4} |
8 // |
8 // |
9 // amm re3.sc testX |
9 // amm re3.sc testX |
10 // |
10 // |
11 // or |
11 // or |
12 // |
12 // |
19 case class CHAR(c: Char) extends Rexp |
19 case class CHAR(c: Char) extends Rexp |
20 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
20 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
21 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
21 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
22 case class STAR(r: Rexp) extends Rexp |
22 case class STAR(r: Rexp) extends Rexp |
23 case class NTIMES(r: Rexp, n: Int) extends Rexp |
23 case class NTIMES(r: Rexp, n: Int) extends Rexp |
24 |
|
25 |
24 |
26 |
25 |
27 // the nullable function: tests whether the regular |
26 // the nullable function: tests whether the regular |
28 // expression can recognise the empty string |
27 // expression can recognise the empty string |
29 def nullable (r: Rexp) : Boolean = r match { |
28 def nullable (r: Rexp) : Boolean = r match { |
65 case (r1s, r2s) => SEQ(r1s, r2s) |
64 case (r1s, r2s) => SEQ(r1s, r2s) |
66 } |
65 } |
67 case r => r |
66 case r => r |
68 } |
67 } |
69 |
68 |
70 // the derivative w.r.t. a string (iterates der) |
69 // the derivative w.r.t. a string (iterates der and simp) |
71 def ders(s: List[Char], r: Rexp) : Rexp = s match { |
70 def ders(s: List[Char], r: Rexp) : Rexp = s match { |
72 case Nil => r |
71 case Nil => r |
73 case c::s => ders(s, simp(der(c, r))) |
72 case c::s => ders(s, simp(der(c, r))) |
74 } |
73 } |
75 |
74 |
79 |
78 |
80 |
79 |
81 // Test Cases |
80 // Test Cases |
82 //============ |
81 //============ |
83 |
82 |
84 // one or zero |
83 // optional is still just defined |
85 def OPT(r: Rexp) = ALT(r, ONE) |
84 def OPT(r: Rexp) = ALT(r, ONE) |
86 |
85 |
87 // evil regular expressions: (a?){n} a{n} and (a*)* b |
86 // evil regular expressions: (a?){n} a{n} and (a*)* b |
88 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) |
87 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) |
89 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |
88 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |
128 case NTIMES(r, _) => 1 + size(r) |
127 case NTIMES(r, _) => 1 + size(r) |
129 } |
128 } |
130 |
129 |
131 |
130 |
132 // now the size of the derivatives grows |
131 // now the size of the derivatives grows |
133 // much, much slower |
132 // much, much slower and actually maxes out |
|
133 // at size 8 |
134 |
134 |
135 size(ders("".toList, EVIL2)) // 5 |
135 size(ders("".toList, EVIL2)) // 5 |
136 size(ders("a".toList, EVIL2)) // 8 |
136 size(ders("a".toList, EVIL2)) // 8 |
137 size(ders("aa".toList, EVIL2)) // 8 |
137 size(ders("aa".toList, EVIL2)) // 8 |
138 size(ders("aaa".toList, EVIL2)) // 8 |
138 size(ders("aaa".toList, EVIL2)) // 8 |
139 size(ders("aaaa".toList, EVIL2)) // 8 |
139 size(ders("aaaa".toList, EVIL2)) // 8 |
140 size(ders("aaaaa".toList, EVIL2)) // 8 |
140 size(ders("aaaaa".toList, EVIL2)) // 8 |
|
141 |
|
142 size(ders(("a" * 20000).toList, EVIL2)) // 8 |
141 |
143 |
142 |
144 |
143 @arg(doc = "All tests.") |
145 @arg(doc = "All tests.") |
144 @main |
146 @main |
145 def all() = { test1(); test2() } |
147 def all() = { test1(); test2() } |