|
1 import scala.language.implicitConversions |
|
2 import scala.language.reflectiveCalls |
|
3 import scala.annotation.tailrec |
|
4 |
|
5 |
|
6 abstract class Rexp |
|
7 case object ZERO extends Rexp |
|
8 case object ONE extends Rexp |
|
9 case class CHAR(c: Char) extends Rexp |
|
10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
|
11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
|
12 case class STAR(r: Rexp) extends Rexp |
|
13 |
|
14 |
|
15 |
|
16 abstract class Bit |
|
17 case object Z extends Bit |
|
18 case object S extends Bit |
|
19 |
|
20 type Bits = List[Bit] |
|
21 |
|
22 abstract class ARexp |
|
23 case object AZERO extends ARexp |
|
24 case class AONE(bs: Bits) extends ARexp |
|
25 case class ACHAR(bs: Bits, c: Char) extends ARexp |
|
26 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp |
|
27 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp |
|
28 case class ASTAR(bs: Bits, r: ARexp) extends ARexp |
|
29 |
|
30 def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2)) |
|
31 |
|
32 abstract class Val |
|
33 case object Empty extends Val |
|
34 case class Chr(c: Char) extends Val |
|
35 case class Sequ(v1: Val, v2: Val) extends Val |
|
36 case class Left(v: Val) extends Val |
|
37 case class Right(v: Val) extends Val |
|
38 case class Stars(vs: List[Val]) extends Val |
|
39 case class Position(i: Int, v: Val) extends Val // for testing purposes |
|
40 case object Undefined extends Val // for testing purposes |
|
41 |
|
42 // some convenience for typing in regular expressions |
|
43 def charlist2rexp(s: List[Char]): Rexp = s match { |
|
44 case Nil => ONE |
|
45 case c::Nil => CHAR(c) |
|
46 case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
|
47 } |
|
48 implicit def string2rexp(s: String) : Rexp = charlist2rexp(s.toList) |
|
49 |
|
50 implicit def RexpOps(r: Rexp) = new { |
|
51 def | (s: Rexp) = ALT(r, s) |
|
52 def % = STAR(r) |
|
53 def ~ (s: Rexp) = SEQ(r, s) |
|
54 } |
|
55 |
|
56 implicit def stringOps(s: String) = new { |
|
57 def | (r: Rexp) = ALT(s, r) |
|
58 def | (r: String) = ALT(s, r) |
|
59 def % = STAR(s) |
|
60 def ~ (r: Rexp) = SEQ(s, r) |
|
61 def ~ (r: String) = SEQ(s, r) |
|
62 } |
|
63 |
|
64 |
|
65 // nullable function: tests whether the regular |
|
66 // expression can recognise the empty string |
|
67 def nullable(r: Rexp) : Boolean = r match { |
|
68 case ZERO => false |
|
69 case ONE => true |
|
70 case CHAR(_) => false |
|
71 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
|
72 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
|
73 case STAR(_) => true |
|
74 } |
|
75 |
|
76 // derivative of a regular expression w.r.t. a character |
|
77 def der(c: Char, r: Rexp) : Rexp = r match { |
|
78 case ZERO => ZERO |
|
79 case ONE => ZERO |
|
80 case CHAR(d) => if (c == d) ONE else ZERO |
|
81 case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |
|
82 case SEQ(r1, r2) => |
|
83 if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
|
84 else SEQ(der(c, r1), r2) |
|
85 case STAR(r) => SEQ(der(c, r), STAR(r)) |
|
86 } |
|
87 |
|
88 // derivative w.r.t. a string (iterates der) |
|
89 def ders(s: List[Char], r: Rexp) : Rexp = s match { |
|
90 case Nil => r |
|
91 case c::s => ders(s, der(c, r)) |
|
92 } |
|
93 |
|
94 // mkeps and injection part |
|
95 def mkeps(r: Rexp) : Val = r match { |
|
96 case ONE => Empty |
|
97 case ALT(r1, r2) => |
|
98 if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) |
|
99 case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
|
100 case STAR(r) => Stars(Nil) |
|
101 } |
|
102 |
|
103 |
|
104 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
|
105 case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
|
106 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
|
107 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
|
108 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
|
109 case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) |
|
110 case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) |
|
111 case (CHAR(d), Empty) => Chr(c) |
|
112 } |
|
113 |
|
114 // main lexing function (produces a value) |
|
115 // - no simplification |
|
116 def lex(r: Rexp, s: List[Char]) : Val = s match { |
|
117 case Nil => if (nullable(r)) mkeps(r) |
|
118 else throw new Exception("Not matched") |
|
119 case c::cs => inj(r, c, lex(der(c, r), cs)) |
|
120 } |
|
121 |
|
122 def lexing(r: Rexp, s: String) : Val = lex(r, s.toList) |
|
123 |
|
124 |
|
125 |
|
126 // Bitcoded + Annotation |
|
127 //======================= |
|
128 |
|
129 //erase function: extracts the regx from Aregex |
|
130 def erase(r:ARexp): Rexp = r match{ |
|
131 case AZERO => ZERO |
|
132 case AONE(_) => ONE |
|
133 case ACHAR(bs, c) => CHAR(c) |
|
134 case AALTS(bs, Nil) => ZERO |
|
135 case AALTS(bs, r::Nil) => erase(r) |
|
136 case AALTS(bs, r::rs) => ALT(erase(r), erase(AALTS(bs, rs))) |
|
137 case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2)) |
|
138 case ASTAR(cs, r)=> STAR(erase(r)) |
|
139 } |
|
140 |
|
141 // translation into ARexps |
|
142 def fuse(bs: Bits, r: ARexp) : ARexp = r match { |
|
143 case AZERO => AZERO |
|
144 case AONE(cs) => AONE(bs ++ cs) |
|
145 case ACHAR(cs, c) => ACHAR(bs ++ cs, c) |
|
146 case AALTS(cs, rs) => AALTS(bs ++ cs, rs) |
|
147 case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2) |
|
148 case ASTAR(cs, r) => ASTAR(bs ++ cs, r) |
|
149 } |
|
150 |
|
151 def internalise(r: Rexp) : ARexp = r match { |
|
152 case ZERO => AZERO |
|
153 case ONE => AONE(Nil) |
|
154 case CHAR(c) => ACHAR(Nil, c) |
|
155 case ALT(r1, r2) => AALT(Nil, fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))) |
|
156 case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2)) |
|
157 case STAR(r) => ASTAR(Nil, internalise(r)) |
|
158 } |
|
159 |
|
160 |
|
161 internalise(("a" | "ab") ~ ("b" | "")) |
|
162 |
|
163 |
|
164 |
|
165 def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match { |
|
166 case (ONE, bs) => (Empty, bs) |
|
167 case (CHAR(c), bs) => (Chr(c), bs) |
|
168 case (ALT(r1, r2), Z::bs) => { |
|
169 val (v, bs1) = decode_aux(r1, bs) |
|
170 (Left(v), bs1) |
|
171 } |
|
172 case (ALT(r1, r2), S::bs) => { |
|
173 val (v, bs1) = decode_aux(r2, bs) |
|
174 (Right(v), bs1) |
|
175 } |
|
176 case (SEQ(r1, r2), bs) => { |
|
177 val (v1, bs1) = decode_aux(r1, bs) |
|
178 val (v2, bs2) = decode_aux(r2, bs1) |
|
179 (Sequ(v1, v2), bs2) |
|
180 } |
|
181 case (STAR(r1), Z::bs) => { |
|
182 val (v, bs1) = decode_aux(r1, bs) |
|
183 val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1) |
|
184 (Stars(v::vs), bs2) |
|
185 } |
|
186 case (STAR(_), S::bs) => (Stars(Nil), bs) |
|
187 } |
|
188 |
|
189 def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match { |
|
190 case (v, Nil) => v |
|
191 case _ => throw new Exception("Not decodable") |
|
192 } |
|
193 |
|
194 def encode(v: Val) : Bits = v match { |
|
195 case Empty => Nil |
|
196 case Chr(c) => Nil |
|
197 case Left(v) => Z :: encode(v) |
|
198 case Right(v) => S :: encode(v) |
|
199 case Sequ(v1, v2) => encode(v1) ::: encode(v2) |
|
200 case Stars(Nil) => List(S) |
|
201 case Stars(v::vs) => Z :: encode(v) ::: encode(Stars(vs)) |
|
202 } |
|
203 |
|
204 |
|
205 // nullable function: tests whether the aregular |
|
206 // expression can recognise the empty string |
|
207 def bnullable (r: ARexp) : Boolean = r match { |
|
208 case AZERO => false |
|
209 case AONE(_) => true |
|
210 case ACHAR(_,_) => false |
|
211 case AALTS(_, rs) => rs.exists(bnullable) |
|
212 case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2) |
|
213 case ASTAR(_, _) => true |
|
214 } |
|
215 |
|
216 def bmkeps(r: ARexp) : Bits = r match { |
|
217 case AONE(bs) => bs |
|
218 case AALTS(bs, r::Nil) => bs ++ bmkeps(r) |
|
219 case AALTS(bs, r::rs) => |
|
220 if (bnullable(r)) bs ++ bmkeps(r) else bmkeps(AALTS(bs, rs)) |
|
221 case ASEQ(bs, r1, r2) => bs ++ bmkeps(r1) ++ bmkeps(r2) |
|
222 case ASTAR(bs, r) => bs ++ List(S) |
|
223 } |
|
224 |
|
225 // derivative of a regular expression w.r.t. a character |
|
226 def bder(c: Char, r: ARexp) : ARexp = r match { |
|
227 case AZERO => AZERO |
|
228 case AONE(_) => AZERO |
|
229 case ACHAR(bs, d) => if (c == d) AONE(bs) else AZERO |
|
230 case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _))) |
|
231 case ASEQ(bs, r1, r2) => |
|
232 if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(bmkeps(r1), bder(c, r2))) |
|
233 else ASEQ(bs, bder(c, r1), r2) |
|
234 case ASTAR(bs, r) => ASEQ(bs, fuse(List(Z), bder(c, r)), ASTAR(Nil, r)) |
|
235 } |
|
236 |
|
237 // derivative w.r.t. a string (iterates der) |
|
238 @tailrec |
|
239 def bders (s: List[Char], r: ARexp) : ARexp = s match { |
|
240 case Nil => r |
|
241 case c::s => bders(s, bder(c, r)) |
|
242 } |
|
243 |
|
244 // main unsimplified lexing function (produces a value) |
|
245 def blex(r: ARexp, s: List[Char]) : Bits = s match { |
|
246 case Nil => if (bnullable(r)) bmkeps(r) else throw new Exception("Not matched") |
|
247 case c::cs => blex(bder(c, r), cs) |
|
248 } |
|
249 |
|
250 def pre_blexing(r: ARexp, s: String) : Bits = blex(r, s.toList) |
|
251 def blexing(r: Rexp, s: String) : Val = decode(r, pre_blexing(internalise(r), s)) |
|
252 |
|
253 |
|
254 // example by Tudor |
|
255 val reg = ((("a".%)) ~ ("b" | "c")).% |
|
256 |
|
257 println(blexing(reg, "aab")) |
|
258 |
|
259 |
|
260 //======================= |
|
261 // simplification |
|
262 // |
|
263 |
|
264 |
|
265 def flts(rs: List[ARexp]) : List[ARexp] = rs match { |
|
266 case Nil => Nil |
|
267 case AZERO :: rs => flts(rs) |
|
268 case AALTS(bs, rs1) :: rs => rs1.map(fuse(bs, _)) ++ flts(rs) |
|
269 case r1 :: rs => r1 :: flts(rs) |
|
270 } |
|
271 |
|
272 def distinctBy[B, C](xs: List[B], |
|
273 f: B => C, |
|
274 acc: List[C] = Nil): List[B] = xs match { |
|
275 case Nil => Nil |
|
276 case x::xs => { |
|
277 val res = f(x) |
|
278 if (acc.contains(res)) distinctBy(xs, f, acc) |
|
279 else x::distinctBy(xs, f, res::acc) |
|
280 } |
|
281 } |
|
282 |
|
283 |
|
284 def bsimp(r: ARexp): ARexp = r match { |
|
285 case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match { |
|
286 case (AZERO, _) => AZERO |
|
287 case (_, AZERO) => AZERO |
|
288 case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) |
|
289 //case (AALTS(bs, rs), r2) => AALTS(bs, rs.map(ASEQ(Nil, _, r2))) |
|
290 case (r1s, r2s) => ASEQ(bs1, r1s, r2s) |
|
291 } |
|
292 case AALTS(bs1, rs) => distinctBy(flts(rs.map(bsimp)), erase) match { |
|
293 case Nil => AZERO |
|
294 case r::Nil => fuse(bs1, r) |
|
295 case rs => AALTS(bs1, rs) |
|
296 } |
|
297 case r => r |
|
298 } |
|
299 |
|
300 def blex_simp(r: ARexp, s: List[Char]) : Bits = s match { |
|
301 case Nil => if (bnullable(r)) bmkeps(r) |
|
302 else throw new Exception("Not matched") |
|
303 case c::cs => blex(bsimp(bder(c, r)), cs) |
|
304 } |
|
305 |
|
306 def blexing_simp(r: Rexp, s: String) : Val = |
|
307 decode(r, blex_simp(internalise(r), s.toList)) |
|
308 |
|
309 println(blexing_simp(reg, "aab")) |
|
310 |
|
311 // bsimp2 by Chengsong |
|
312 |
|
313 def pos_i(rs: List[ARexp], v: Val): Int = (rs, v) match { |
|
314 case (r::Nil, v1) => 0 |
|
315 case ( r::rs1, Right(v)) => pos_i(rs1, v) + 1 |
|
316 case ( r::rs1, Left(v) ) => 0 |
|
317 } |
|
318 |
|
319 def pos_v(rs: List[ARexp], v: Val): Val = (rs, v) match { |
|
320 case (r::Nil, v1) => v1 |
|
321 case (r::rs1, Right(v)) => pos_v(rs1, v) |
|
322 case (r::rs1, Left(v) ) => v |
|
323 } |
|
324 |
|
325 def unify(rs: List[ARexp], v: Val): Val = { |
|
326 Position(pos_i(rs, v), pos_v(rs, v)) |
|
327 } |
|
328 |
|
329 // coat does the job of "coating" a value |
|
330 // given the number of right outside it |
|
331 def coat(v: Val, i: Int) : Val = i match { |
|
332 case 0 => v |
|
333 case i => if (i > 0) coat(Right(v), i - 1) else { println(v, i); throw new Exception("coat minus")} |
|
334 } |
|
335 |
|
336 def distinctBy2[B, C](v: Val, xs: List[B], f: B => C, acc: List[C] = Nil, res: List[B] = Nil): (List[B], Val) = xs match { |
|
337 case Nil => (res, v) |
|
338 case (x::xs) => { |
|
339 val re = f(x) |
|
340 if (acc.contains(re)) v match { |
|
341 case Position(i, v0) => distinctBy2(Position(i - 1, v0), xs, f, acc, res) |
|
342 case _ => throw new Exception("dB2") |
|
343 } |
|
344 else distinctBy2(v, xs, f, re::acc, x::res) |
|
345 } |
|
346 } |
|
347 |
|
348 |
|
349 def flats(rs: List[ARexp]): List[ARexp] = rs match { |
|
350 case Nil => Nil |
|
351 case AZERO :: rs1 => flats(rs1) |
|
352 case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2) |
|
353 case r1 :: rs2 => r1 :: flats(rs2) |
|
354 } |
|
355 |
|
356 |
|
357 |
|
358 def flats2(front: List[ARexp], i: Int, rs: List[ARexp], v: Val): (List[ARexp], Val) = v match { |
|
359 case Position(j, v0) => { |
|
360 if (i < 0) (front ::: flats(rs), Position(j, v0) ) |
|
361 else if(i == 0){ |
|
362 rs match { |
|
363 case AALTS(bs, rs1) :: rs2 => ( (front ::: rs1.map(fuse(bs, _))):::flats(rs2), Position(j + rs1.length - 1, pos_v(rs1, v0))) |
|
364 case r::rs2 => (front ::: List(r) ::: flats(rs2), Position(j, v0)) |
|
365 case _ => throw new Exception("flats2 i = 0") |
|
366 } |
|
367 } |
|
368 else{ |
|
369 rs match { |
|
370 case AZERO::rs1 => flats2(front, i - 1, rs1, Position(j - 1, v0)) |
|
371 case AALTS(bs, rs1) ::rs2 => flats2(front:::rs1.map(fuse(bs, _)), i - 1, rs2, Position(j + rs1.length - 1, v0)) |
|
372 case r::rs1 => flats2(front:::List(r), i - 1, rs1, Position(j, v0)) |
|
373 case _ => throw new Exception("flats2 i>0") |
|
374 } |
|
375 } |
|
376 } |
|
377 case _ => throw new Exception("flats2 error") |
|
378 } |
|
379 |
|
380 def deunify(rs_length: Int, v: Val): Val = v match{ |
|
381 case Position(i, v0) => { |
|
382 if (i <0 ) { println(rs_length, v); throw new Exception("deunify minus") } |
|
383 else if (rs_length == 1) {println(v); v} |
|
384 else if (rs_length - 1 == i) coat(v0, i) |
|
385 else coat(Left(v0), i) |
|
386 } |
|
387 case _ => throw new Exception("deunify error") |
|
388 } |
|
389 |
|
390 |
|
391 def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r, v) match { |
|
392 case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match { |
|
393 case ((AZERO, _), (_, _)) => (AZERO, Undefined) |
|
394 case ((_, _), (AZERO, _)) => (AZERO, Undefined) |
|
395 case ((AONE(bs2), v1s) , (r2s, v2s)) => (fuse(bs1 ++ bs2, r2s), v2s) |
|
396 // v2 tells how to retrieve bits in r2s, which is enough as the bits |
|
397 // of the first part of the sequence has already been integrated to |
|
398 // the top level of the second regx. |
|
399 case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s), Sequ(v1s, v2s)) |
|
400 } |
|
401 case (AALTS(bs1, rs), v) => { |
|
402 val vlist = unify(rs, v) |
|
403 vlist match { |
|
404 case Position(i, v0) => { |
|
405 val v_simp = bsimp2(rs(i), v0)._2 |
|
406 val rs_simp = rs.map(bsimp) |
|
407 val flat_res = flats2( Nil, i, rs_simp, Position(i, v_simp) ) |
|
408 val dist_res = distinctBy2(flat_res._2, flat_res._1, erase) |
|
409 val rs_new = dist_res._1 |
|
410 val v_new = deunify(rs_new.length, dist_res._2) |
|
411 rs_new match { |
|
412 case Nil => (AZERO, Undefined) |
|
413 case s :: Nil => (fuse(bs1, s), v_new) |
|
414 case rs => (AALTS(bs1, rs), v_new) |
|
415 } |
|
416 } |
|
417 case _ => throw new Exception("Funny vlist bsimp2") |
|
418 } |
|
419 } |
|
420 // STAR case |
|
421 // case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) |
|
422 case (r, v) => (r, v) |
|
423 } |
|
424 |
|
425 |
|
426 |
|
427 val dr = ASEQ(List(),AALTS(List(S),List(AONE(List(Z)), AONE(List(S)))),ASTAR(List(),AALTS(List(),List(ACHAR(List(Z),'a'), ACHAR(List(S),'a'))))) |
|
428 val dv = Sequ(Left(Empty),Stars(List())) |
|
429 println(bsimp2(dr, dv)) |
|
430 |
|
431 |
|
432 /* |
|
433 def blex_simp2(r: ARexp, s: List[Char]) : Bits = s match { |
|
434 case Nil => if (bnullable(r)) bmkeps(r) |
|
435 else throw new Exception("Not matched") |
|
436 case c::cs => blex(bsimp2(bder(c, r)), cs) |
|
437 } |
|
438 |
|
439 def blexing_simp2(r: Rexp, s: String) : Val = |
|
440 decode(r, blex_simp2(internalise(r), s.toList)) |
|
441 |
|
442 println(blexing_simp2(reg, "aab")) |
|
443 */ |
|
444 |
|
445 // extracts a string from value |
|
446 def flatten(v: Val) : String = v match { |
|
447 case Empty => "" |
|
448 case Chr(c) => c.toString |
|
449 case Left(v) => flatten(v) |
|
450 case Right(v) => flatten(v) |
|
451 case Sequ(v1, v2) => flatten(v1) + flatten(v2) |
|
452 case Stars(vs) => vs.map(flatten).mkString |
|
453 } |
|
454 |
|
455 // extracts an environment from a value |
|
456 def env(v: Val) : List[(String, String)] = v match { |
|
457 case Empty => Nil |
|
458 case Chr(c) => Nil |
|
459 case Left(v) => env(v) |
|
460 case Right(v) => env(v) |
|
461 case Sequ(v1, v2) => env(v1) ::: env(v2) |
|
462 case Stars(vs) => vs.flatMap(env) |
|
463 } |
|
464 |
|
465 // Some Tests |
|
466 //============ |
|
467 |
|
468 /* |
|
469 def time_needed[T](i: Int, code: => T) = { |
|
470 val start = System.nanoTime() |
|
471 for (j <- 1 to i) code |
|
472 val end = System.nanoTime() |
|
473 (end - start)/(i * 1.0e9) |
|
474 } |
|
475 |
|
476 |
|
477 val evil1 = (("a").%) ~ "b" |
|
478 val evil2 = (((("a").%).%).%) ~ "b" |
|
479 val evil3 = (("a"~"a") | ("a")).% |
|
480 |
|
481 for(i <- 1 to 10000 by 1000) { |
|
482 println(time_needed(1, blex_simp(internalise(evil1), ("a"*i + "b").toList))) |
|
483 } |
|
484 |
|
485 for(i <- 1 to 10000 by 1000) { |
|
486 println(time_needed(1, blexing_simp(evil1, "a"*i + "b"))) |
|
487 } |
|
488 |
|
489 for(i <- 1 to 10000 by 1000) { |
|
490 println(time_needed(1, blexing_simp(evil2, "a"*i + "b"))) |
|
491 } |
|
492 |
|
493 for(i <- 1 to 10000 by 1000) { |
|
494 println(time_needed(1, blexing_simp(evil3, "a"*i))) |
|
495 } |
|
496 */ |
|
497 |
|
498 |
|
499 |
|
500 |
|
501 |
|
502 ///////////////////////// |
|
503 ///////////////////////// |
|
504 ///// Below not relevant |
|
505 |
|
506 /* |
|
507 val rf = ("a" | "ab") ~ ("ab" | "") |
|
508 println(pre_blexing(internalise(rf), "ab")) |
|
509 println(blexing(rf, "ab")) |
|
510 println(blexing_simp(rf, "ab")) |
|
511 |
|
512 val r0 = ("a" | "ab") ~ ("b" | "") |
|
513 println(pre_blexing(internalise(r0), "ab")) |
|
514 println(blexing(r0, "ab")) |
|
515 println(blexing_simp(r0, "ab")) |
|
516 |
|
517 val r1 = ("a" | "ab") ~ ("bcd" | "cd") |
|
518 println(blexing(r1, "abcd")) |
|
519 println(blexing_simp(r1, "abcd")) |
|
520 |
|
521 println(blexing((("" | "a") ~ ("ab" | "b")), "ab")) |
|
522 println(blexing_simp((("" | "a") ~ ("ab" | "b")), "ab")) |
|
523 |
|
524 println(blexing((("" | "a") ~ ("b" | "ab")), "ab")) |
|
525 println(blexing_simp((("" | "a") ~ ("b" | "ab")), "ab")) |
|
526 |
|
527 println(blexing((("" | "a") ~ ("c" | "ab")), "ab")) |
|
528 println(blexing_simp((("" | "a") ~ ("c" | "ab")), "ab")) |
|
529 |
|
530 |
|
531 // Sulzmann's tests |
|
532 //================== |
|
533 |
|
534 val sulzmann = ("a" | "b" | "ab").% |
|
535 |
|
536 println(blexing(sulzmann, "a" * 10)) |
|
537 println(blexing_simp(sulzmann, "a" * 10)) |
|
538 |
|
539 for (i <- 0 to 4000 by 500) { |
|
540 println(i + ": " + "%.5f".format(time_needed(1, blexing_simp(sulzmann, "a" * i)))) |
|
541 } |
|
542 |
|
543 for (i <- 0 to 15 by 5) { |
|
544 println(i + ": " + "%.5f".format(time_needed(1, blexing_simp(sulzmann, "ab" * i)))) |
|
545 } |
|
546 |
|
547 */ |
|
548 |
|
549 |
|
550 // some automatic testing |
|
551 |
|
552 /* |
|
553 def clear() = { |
|
554 print("") |
|
555 //print("\33[H\33[2J") |
|
556 } |
|
557 |
|
558 def merge[A](l1: LazyList[A], l2: LazyList[A], l3: LazyList[A]) : LazyList[A] = |
|
559 l1.head #:: l2.head #:: l3.head #:: merge(l1.tail, l2.tail, l3.tail) |
|
560 |
|
561 |
|
562 // enumerates regular expressions until a certain depth |
|
563 def enum(rs: LazyList[Rexp]) : LazyList[Rexp] = { |
|
564 rs #::: enum( (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) ++ |
|
565 (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) ++ |
|
566 (for (r1 <- rs) yield STAR(r1))) |
|
567 } |
|
568 |
|
569 |
|
570 enum(LazyList(ZERO, ONE, CHAR('a'), CHAR('b'))).take(200).force.mkString("\n") |
|
571 enum(LazyList(ZERO, ONE, CHAR('a'), CHAR('b'))).take(200_000).force |
|
572 |
|
573 |
|
574 |
|
575 import scala.util.Try |
|
576 |
|
577 def test_mkeps(r: Rexp) = { |
|
578 val res1 = Try(Some(mkeps(r))).getOrElse(None) |
|
579 val res2 = Try(Some(decode(r, bmkeps(internalise(r))))).getOrElse(None) |
|
580 if (res1 != res2) println(s"Mkeps disagrees on ${r}") |
|
581 if (res1 != res2) Some(r) else (None) |
|
582 } |
|
583 |
|
584 println("Testing mkeps") |
|
585 enum(LazyList(ZERO, ONE, CHAR('a'), CHAR('b'))).take(100).exists(test_mkeps(_).isDefined) |
|
586 //enum(3, "ab").map(test_mkeps).toSet |
|
587 //enum(3, "abc").map(test_mkeps).toSet |
|
588 |
|
589 |
|
590 //enumerates strings of length n over alphabet cs |
|
591 def strs(n: Int, cs: String) : Set[String] = { |
|
592 if (n == 0) Set("") |
|
593 else { |
|
594 val ss = strs(n - 1, cs) |
|
595 ss ++ |
|
596 (for (s <- ss; c <- cs.toList) yield c + s) |
|
597 } |
|
598 } |
|
599 |
|
600 //tests lexing and lexingB |
|
601 def tests_inj(ss: Set[String])(r: Rexp) = { |
|
602 clear() |
|
603 println(s"Testing ${r}") |
|
604 for (s <- ss.par) yield { |
|
605 val res1 = Try(Some(alexing(r, s))).getOrElse(None) |
|
606 val res2 = Try(Some(alexing_simp(r, s))).getOrElse(None) |
|
607 if (res1 != res2) println(s"Disagree on ${r} and ${s}") |
|
608 if (res1 != res2) println(s" ${res1} != ${res2}") |
|
609 if (res1 != res2) Some((r, s)) else None |
|
610 } |
|
611 } |
|
612 |
|
613 //println("Testing lexing 1") |
|
614 //enum(2, "ab").map(tests_inj(strs(2, "ab"))).toSet |
|
615 //println("Testing lexing 2") |
|
616 //enum(2, "ab").map(tests_inj(strs(3, "abc"))).toSet |
|
617 //println("Testing lexing 3") |
|
618 //enum(3, "ab").map(tests_inj(strs(3, "abc"))).toSet |
|
619 |
|
620 |
|
621 def tests_alexer(ss: Set[String])(r: Rexp) = { |
|
622 clear() |
|
623 println(s"Testing ${r}") |
|
624 for (s <- ss.par) yield { |
|
625 val d = der('b', r) |
|
626 val ad = bder('b', internalise(r)) |
|
627 val res1 = Try(Some(encode(inj(r, 'a', alexing(d, s))))).getOrElse(None) |
|
628 val res2 = Try(Some(pre_alexing(ad, s))).getOrElse(None) |
|
629 if (res1 != res2) println(s"Disagree on ${r} and 'a'::${s}") |
|
630 if (res1 != res2) println(s" ${res1} != ${res2}") |
|
631 if (res1 != res2) Some((r, s)) else None |
|
632 } |
|
633 } |
|
634 |
|
635 println("Testing alexing 1") |
|
636 println(enum(2, "ab").map(tests_alexer(strs(2, "ab"))).toSet) |
|
637 |
|
638 |
|
639 def values(r: Rexp) : Set[Val] = r match { |
|
640 case ZERO => Set() |
|
641 case ONE => Set(Empty) |
|
642 case CHAR(c) => Set(Chr(c)) |
|
643 case ALT(r1, r2) => (for (v1 <- values(r1)) yield Left(v1)) ++ |
|
644 (for (v2 <- values(r2)) yield Right(v2)) |
|
645 case SEQ(r1, r2) => for (v1 <- values(r1); v2 <- values(r2)) yield Sequ(v1, v2) |
|
646 case STAR(r) => (Set(Stars(Nil)) ++ |
|
647 (for (v <- values(r)) yield Stars(List(v)))) |
|
648 // to do more would cause the set to be infinite |
|
649 } |
|
650 |
|
651 def tests_bder(c: Char)(r: Rexp) = { |
|
652 val d = der(c, r) |
|
653 val vals = values(d) |
|
654 for (v <- vals) { |
|
655 println(s"Testing ${r} and ${v}") |
|
656 val res1 = retrieve(bder(c, internalise(r)), v) |
|
657 val res2 = encode(inj(r, c, decode(d, retrieve(internalise(der(c, r)), v)))) |
|
658 if (res1 != res2) println(s"Disagree on ${r}, ${v} and der = ${d}") |
|
659 if (res1 != res2) println(s" ${res1} != ${res2}") |
|
660 if (res1 != res2) Some((r, v)) else None |
|
661 } |
|
662 } |
|
663 |
|
664 println("Testing bder/der") |
|
665 println(enum(2, "ab").map(tests_bder('a')).toSet) |
|
666 |
|
667 val er = SEQ(ONE,CHAR('a')) |
|
668 val ev = Right(Empty) |
|
669 val ed = ALT(SEQ(ZERO,CHAR('a')),ONE) |
|
670 |
|
671 retrieve(internalise(ed), ev) // => [true] |
|
672 internalise(er) |
|
673 bder('a', internalise(er)) |
|
674 retrieve(bder('a', internalise(er)), ev) // => [] |
|
675 decode(ed, List(true)) // gives the value for derivative |
|
676 decode(er, List()) // gives the value for original value |
|
677 |
|
678 |
|
679 val dr = STAR(CHAR('a')) |
|
680 val dr_der = SEQ(ONE,STAR(CHAR('a'))) // derivative of dr |
|
681 val dr_val = Sequ(Empty,Stars(List())) // value of dr_def |
|
682 |
|
683 |
|
684 val res1 = retrieve(internalise(der('a', dr)), dr_val) // => [true] |
|
685 val res2 = retrieve(bder('a', internalise(dr)), dr_val) // => [false, true] |
|
686 decode(dr_der, res1) // gives the value for derivative |
|
687 decode(dr, res2) // gives the value for original value |
|
688 |
|
689 encode(inj(dr, 'a', decode(dr_der, res1))) |
|
690 |
|
691 */ |