41 def % = STAR(s) |
41 def % = STAR(s) |
42 def ~ (r: Rexp) = SEQ(s, r) |
42 def ~ (r: Rexp) = SEQ(s, r) |
43 def ~ (r: String) = SEQ(s, r) |
43 def ~ (r: String) = SEQ(s, r) |
44 } |
44 } |
45 |
45 |
46 // (1) |
46 // examples for the implicits: |
|
47 // ALT(CHAR('a'), CHAR('b')) |
|
48 // val areg : Rexp = "a" | "b" |
|
49 |
|
50 // SEQ(CHAR('a'), CHAR('b')) |
|
51 // val sreg : Rexp = "a" ~ "b" |
|
52 |
|
53 |
|
54 // ADD YOUR CODE BELOW |
|
55 //====================== |
|
56 |
|
57 // (1) |
47 def nullable (r: Rexp) : Boolean = r match { |
58 def nullable (r: Rexp) : Boolean = r match { |
48 case ZERO => false |
59 case ZERO => false |
49 case ONE => true |
60 case ONE => true |
50 case CHAR(_) => false |
61 case CHAR(_) => false |
51 case ALTs(rs) => rs.exists(nullable) |
62 case ALTs(rs) => (for(reg <- rs) yield nullable(reg)).exists(_ == true) |
52 case SEQs(rs) => rs.forall(nullable) |
63 case SEQs(rs) => (for(reg <- rs) yield nullable(reg)).forall(_ == true) |
53 case STAR(_) => true |
64 case STAR(_) => true |
54 } |
65 } |
|
66 |
|
67 /* |
|
68 nullable(ZERO) == false |
|
69 nullable(ONE) == true |
|
70 nullable(CHAR('a')) == false |
|
71 nullable(ZERO | ONE) == true |
|
72 nullable(ZERO | CHAR('a')) == false |
|
73 nullable(ONE ~ ONE) == true |
|
74 nullable(ONE ~ CHAR('a')) == false |
|
75 nullable(STAR(ZERO)) == true |
|
76 nullable(ALTs(List(ONE, CHAR('a'), ZERO))) == true |
|
77 nullable(SEQs(List(ONE, ALTs(List(ONE, CHAR('a'), ZERO)), STAR(ZERO)))) == true |
|
78 */ |
55 |
79 |
56 // (2) |
80 // (2) |
57 def der(c: Char, r: Rexp) : Rexp = r match { |
81 def der (c: Char, r: Rexp) : Rexp = r match { |
58 case ZERO => ZERO |
82 case ZERO => ZERO |
59 case ONE => ZERO |
83 case ONE => ZERO |
60 case CHAR(d) => if (c == d) ONE else ZERO |
84 case CHAR(d) => if(c == d) ONE else ZERO |
61 case ALTs(rs) => ALTs(rs.map(der(c, _))) |
85 case ALTs(rs) => ALTs(for(reg <- rs) yield der(c, reg)) |
62 case SEQs(Nil) => ZERO |
86 case SEQs(Nil) => ZERO |
63 case SEQs(r1::rs) => |
87 case SEQs(r :: rs) => if(nullable(r)) ALT(SEQs(der(c, r) :: rs), der(c, SEQs(rs))) else SEQs(der(c, r) :: rs) |
64 if (nullable(r1)) ALT(SEQs(der(c, r1)::rs), der(c, SEQs(rs))) |
88 case STAR(r) => SEQ(der(c,r), STAR(r)) |
65 else SEQs(der(c, r1):: rs) |
89 } |
66 case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |
90 |
67 } |
91 /* |
68 |
92 der('a', ZERO | ONE) == (ZERO | ZERO) |
|
93 der('a', (CHAR('a') | ONE) ~ CHAR('a')) == ALT((ONE | ZERO) ~ CHAR('a'), SEQs(List(ONE))) |
|
94 der('a', (CHAR('a') | CHAR('a')) ~ CHAR('a')) == (ONE | ONE) ~ CHAR('a') |
|
95 der('a', STAR(CHAR('a'))) == (ONE ~ STAR(CHAR('a'))) |
|
96 der('b', STAR(CHAR('a'))) == (ZERO ~ STAR(CHAR('a'))) |
|
97 */ |
69 |
98 |
70 // (3) |
99 // (3) |
71 def denest(rs: List[Rexp]) : List[Rexp] = rs match { |
100 def denest(rs: List[Rexp]) : List[Rexp] = rs match { |
72 case Nil => Nil |
101 case Nil => Nil |
73 case ZERO::tl => denest(tl) |
102 case ZERO :: rest => denest(rest) |
74 case ALTs(rs1)::rs2 => rs1 ::: denest(rs2) |
103 case ALTs(rgs) :: rest => rgs ::: denest(rest) |
75 case r::rs => r :: denest(rs) |
104 case r :: rest => r :: denest(rest) |
76 } |
105 } |
|
106 |
|
107 /* |
|
108 denest(List(ONE, ZERO, ALTs(List(ONE, CHAR('a'))))) == List(ONE, ONE, CHAR('a')) |
|
109 denest(List(ONE ~ ONE, ZERO, ZERO | ONE)) == List(ONE ~ ONE, ZERO, ONE) |
|
110 */ |
77 |
111 |
78 // (4) |
112 // (4) |
79 def flts(rs: List[Rexp], acc: List[Rexp] = Nil) : List[Rexp] = rs match { |
113 def flts(rs: List[Rexp], acc: List[Rexp] = Nil) : List[Rexp] = rs match { |
80 case Nil => acc |
114 case Nil => acc |
81 case ZERO::rs => ZERO::Nil |
115 case ZERO :: rest => List(ZERO) |
82 case ONE::rs => flts(rs, acc) |
116 case ONE :: rest => flts(rest, acc) |
83 case SEQs(rs1)::rs => flts(rs, acc ::: rs1) |
117 case SEQs(rgs) :: rest => flts(rest, acc ::: rgs) |
84 case r::rs => flts(rs, acc :+ r) |
118 case r :: rest => flts(rest, acc ::: List(r)) |
85 } |
119 } |
|
120 |
|
121 /* |
|
122 flts(List(CHAR('a'), ZERO, ONE), Nil) == List(ZERO) |
|
123 flts(List(CHAR('a'), ONE, ONE, CHAR('b')), Nil) == List(CHAR('a'), CHAR('b')) |
|
124 flts(List(ONE ~ CHAR('a'), CHAR('b') ~ ONE), Nil) == List(ONE, CHAR('a'), CHAR('b'), ONE) |
|
125 */ |
86 |
126 |
87 // (5) |
127 // (5) |
88 def ALTs_smart(rs: List[Rexp]) : Rexp = rs match { |
128 def ALTs_smart(rs: List[Rexp]) : Rexp = rs match { |
89 case Nil => ZERO |
129 case Nil => ZERO |
90 case r::Nil => r |
130 case List(r) => r |
91 case rs => ALTs(rs) |
131 case _ => ALTs(rs) |
92 } |
132 } |
93 |
133 |
94 def SEQs_smart(rs: List[Rexp]) : Rexp = rs match { |
134 def SEQs_smart(rs: List[Rexp]) : Rexp = rs match { |
95 case Nil => ONE |
135 case Nil => ONE |
96 case ZERO::Nil => ZERO |
136 case List(r) => r |
97 case r::Nil => r |
137 case _ => SEQs(rs) |
98 case rs => SEQs(rs) |
138 } |
99 } |
139 |
100 |
140 /* |
101 // (6) |
141 SEQs_smart(Nil) == ONE |
102 |
142 SEQs_smart(List(ZERO)) == ZERO |
|
143 SEQs_smart(List(CHAR('a'))) == CHAR('a') |
|
144 SEQs_smart(List(ONE ~ ONE)) == ONE ~ ONE |
|
145 SEQs_smart(List(ONE, ONE)) == SEQs(List(ONE, ONE)) |
|
146 ALTs_smart(Nil) == ZERO |
|
147 ALTs_smart(List(ONE ~ ONE)) == ONE ~ ONE |
|
148 ALTs_smart(List(ZERO, ZERO)) == ALTs(List(ZERO, ZERO)) |
|
149 */ |
|
150 |
|
151 // (6) |
103 def simp(r: Rexp) : Rexp = r match { |
152 def simp(r: Rexp) : Rexp = r match { |
104 case ALTs(rs) => |
153 case ALTs(rs) => ALTs_smart(denest(for(reg <- rs) yield simp(reg)).distinct) |
105 ALTs_smart(denest(rs.map(simp)).distinct) |
154 case SEQs(rs) => SEQs_smart(flts(for(reg <- rs) yield simp(reg))) |
106 case SEQs(rs) => |
155 case _ => r |
107 SEQs_smart(flts(rs.map(simp))) |
156 } |
108 case r => r |
157 |
109 } |
158 /* |
110 |
159 simp(ZERO | ONE) == ONE |
111 //println("Simp tests") |
160 simp(STAR(ZERO | ONE)) == STAR(ZERO | ONE) |
112 //println(simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE))) |
161 simp(ONE ~ (ONE ~ (ONE ~ CHAR('a')))) == CHAR('a') |
113 //println(simp(((CHAR('a') | ZERO) ~ ONE) | |
162 simp(((ONE ~ ONE) ~ ONE) ~ CHAR('a')) == CHAR('a') |
114 // (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO)))) |
163 simp(((ONE | ONE) ~ ONE) ~ CHAR('a')) == CHAR('a') |
115 |
164 simp(ONE ~ (ONE ~ (ONE ~ ZERO))) == ZERO |
116 |
165 simp(ALT(ONE ~ (ONE ~ (ONE ~ ZERO)), CHAR('a'))) == CHAR('a') |
117 // (7) |
166 simp(CHAR('a') | CHAR('a')) == CHAR('a') |
118 |
167 simp(CHAR('a') ~ CHAR('a')) == CHAR('a') ~ CHAR('a') |
|
168 simp(ONE | CHAR('a')) == (ONE | CHAR('a')) |
|
169 simp(ALT((CHAR('a') | ZERO) ~ ONE,((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO))) == CHAR('a') |
|
170 simp((ZERO | ((ZERO | ZERO) | (ZERO | ZERO))) ~ ((ONE | ZERO) | ONE ) ~ (CHAR('a'))) == ZERO |
|
171 simp(ALT(ONE | ONE, ONE | ONE)) == ONE |
|
172 simp(ALT(ZERO | CHAR('a'), CHAR('a') | ZERO)) == CHAR('a') |
|
173 simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)) == ALT(ONE, CHAR('a')) |
|
174 simp(ALTs(Nil)) == ZERO |
|
175 simp(SEQs(List(CHAR('a')))) == CHAR('a') |
|
176 */ |
|
177 |
|
178 // (7) |
119 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
179 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
120 case Nil => r |
180 case Nil => r |
121 case c::s => ders(s, simp(der(c, r))) |
181 case c :: cs => ders(cs, simp(der(c, r))) |
122 } |
182 } |
123 |
183 def matcher(r: Rexp, s: String): Boolean = { |
124 // main matcher function |
184 val derivatives = ders(s.toList, r) |
125 def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r)) |
185 nullable(derivatives) |
|
186 } |
|
187 |
|
188 /* |
|
189 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |
|
190 ders("aaaaa".toList, EVIL) == SEQs(List(STAR(CHAR('a')), STAR(STAR(CHAR('a'))), CHAR('b'))) |
|
191 ders(List('b'), EVIL) == ONE |
|
192 ders("bb".toList, EVIL) == ZERO |
|
193 matcher(EVIL, "a" * 5 ++ "b") == true |
|
194 matcher(EVIL, "b") == true |
|
195 matcher(EVIL, "bb") == false |
|
196 matcher("abc", "abc") == true |
|
197 matcher(("ab" | "a") ~ (ONE | "bc"), "abc") == true |
|
198 matcher(ONE, "") == true |
|
199 matcher(ZERO, "") == false |
|
200 matcher(ONE | CHAR('a'), "") == true |
|
201 matcher(ONE | CHAR('a'), "a") == true |
|
202 */ |
126 |
203 |
127 // (8) |
204 // (8) |
128 |
|
129 def size(r: Rexp): Int = r match { |
205 def size(r: Rexp): Int = r match { |
130 case ZERO => 1 |
206 case ZERO => 1 |
131 case ONE => 1 |
207 case ONE => 1 |
132 case CHAR(_) => 1 |
208 case CHAR(_) => 1 |
133 case ALTs(rs) => 1 + rs.map(size).sum |
209 case ALTs(rs) => 1 + (for(reg <- rs) yield size(reg)).sum |
134 case SEQs(rs) => 1 + rs.map(size).sum |
210 case SEQs(rs) => 1 + (for(reg <- rs) yield size(reg)).sum |
135 case STAR(r1) => 1 + size(r1) |
211 case STAR(r) => 1 + size(r) |
136 } |
212 } |
137 |
213 |
138 |
214 /* |
139 |
215 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |
140 // some testing data |
216 size(der('a', der('a', EVIL))) == 36 |
141 /* |
217 size(der('a', der('a', der('a', EVIL)))) == 83 |
142 println(matcher(("a" ~ "b") ~ "c", "abc")) // => true |
218 size(ders("aaaaaa".toList, EVIL)) == 7 |
143 println(matcher(("a" ~ "b") ~ "c", "ab")) // => false |
219 size(ders(("a" * 50).toList, EVIL)) == 7 |
|
220 */ |
|
221 |
|
222 |
|
223 // Some testing data |
|
224 //=================== |
|
225 /* |
|
226 |
|
227 simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)) // => ALTs(List(ONE, CHAR(a))) |
|
228 simp(((CHAR('a') | ZERO) ~ ONE) | (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO))) // => CHAR(a) |
|
229 |
|
230 matcher(("a" ~ "b") ~ "c", "ab") // => false |
|
231 matcher(("a" ~ "b") ~ "c", "abc") // => true |
|
232 |
144 |
233 |
145 // the supposedly 'evil' regular expression (a*)* b |
234 // the supposedly 'evil' regular expression (a*)* b |
146 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |
235 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |
147 |
236 |
148 println(matcher(EVIL, "a" * 1000 ++ "b")) // => true |
237 matcher(EVIL, "a" * 1000) // => false |
149 println(matcher(EVIL, "a" * 1000)) // => false |
238 matcher(EVIL, "a" * 1000 ++ "b") // => true |
|
239 |
150 |
240 |
151 // size without simplifications |
241 // size without simplifications |
152 println(size(der('a', der('a', EVIL)))) // => 36 |
242 size(der('a', der('a', EVIL))) // => 36 |
153 println(size(der('a', der('a', der('a', EVIL))))) // => 83 |
243 size(der('a', der('a', der('a', EVIL)))) // => 83 |
154 |
244 |
155 // size with simplification |
245 // size with simplification |
156 println(simp(der('a', der('a', EVIL)))) |
246 size(simp(der('a', der('a', EVIL)))) // => 7 |
157 println(simp(der('a', der('a', der('a', EVIL))))) |
247 size(simp(der('a', der('a', der('a', EVIL))))) // => 7 |
158 |
|
159 println(size(simp(der('a', der('a', EVIL))))) // => 7 |
|
160 println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 7 |
|
161 |
248 |
162 // Python needs around 30 seconds for matching 28 a's with EVIL. |
249 // Python needs around 30 seconds for matching 28 a's with EVIL. |
163 // Java 9 and later increase this to an "astonishing" 40000 a's in |
250 // Java 9 and later increase this to an "astonishing" 40000 a's in |
164 // around 30 seconds. |
251 // 30 seconds. |
165 // |
252 // |
166 // Lets see how long it takes to match strings with |
253 // Lets see how long it really takes to match strings with |
167 // 5 Million a's...it should be in the range of a |
254 // 5 Million a's...it should be in the range of a few |
168 // few seconds. |
255 // of seconds. |
169 |
256 |
170 def time_needed[T](i: Int, code: => T) = { |
257 def time_needed[T](i: Int, code: => T) = { |
171 val start = System.nanoTime() |
258 val start = System.nanoTime() |
172 for (j <- 1 to i) code |
259 for (j <- 1 to i) code |
173 val end = System.nanoTime() |
260 val end = System.nanoTime() |