1 import scala.language.implicitConversions |
1 import scala.language.implicitConversions |
2 import scala.language.reflectiveCalls |
2 import scala.language.reflectiveCalls |
3 import scala.annotation.tailrec |
3 import scala.annotation.tailrec |
4 |
4 |
5 |
5 // standard regular expressions |
6 abstract class Rexp |
6 abstract class Rexp |
7 case object ZERO extends Rexp |
7 case object ZERO extends Rexp |
8 case object ONE extends Rexp |
8 case object ONE extends Rexp |
9 case class CHAR(c: Char) extends Rexp |
9 case class CHAR(c: Char) extends Rexp |
10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
12 case class STAR(r: Rexp) extends Rexp |
12 case class STAR(r: Rexp) extends Rexp |
13 |
13 |
14 |
|
15 |
|
16 abstract class Bit |
14 abstract class Bit |
17 case object Z extends Bit |
15 case object Z extends Bit |
18 case object S extends Bit |
16 case object S extends Bit |
19 |
17 |
20 type Bits = List[Bit] |
18 type Bits = List[Bit] |
21 |
19 |
|
20 // annotated regular expressions |
22 abstract class ARexp |
21 abstract class ARexp |
23 case object AZERO extends ARexp |
22 case object AZERO extends ARexp |
24 case class AONE(bs: Bits) extends ARexp |
23 case class AONE(bs: Bits) extends ARexp |
25 case class ACHAR(bs: Bits, c: Char) extends ARexp |
24 case class ACHAR(bs: Bits, c: Char) extends ARexp |
26 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp |
25 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp |
27 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp |
26 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp |
28 case class ASTAR(bs: Bits, r: ARexp) extends ARexp |
27 case class ASTAR(bs: Bits, r: ARexp) extends ARexp |
29 |
28 |
|
29 // an abbreviation for binary alternatives |
30 def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2)) |
30 def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2)) |
31 |
31 |
32 abstract class Val |
32 abstract class Val |
33 case object Empty extends Val |
33 case object Empty extends Val |
34 case class Chr(c: Char) extends Val |
34 case class Chr(c: Char) extends Val |
35 case class Sequ(v1: Val, v2: Val) extends Val |
35 case class Sequ(v1: Val, v2: Val) extends Val |
36 case class Left(v: Val) extends Val |
36 case class Left(v: Val) extends Val |
37 case class Right(v: Val) extends Val |
37 case class Right(v: Val) extends Val |
38 case class Stars(vs: List[Val]) extends Val |
38 case class Stars(vs: List[Val]) extends Val |
39 case class Position(i: Int, v: Val) extends Val // for testing purposes |
39 |
40 case object Undefined extends Val // for testing purposes |
|
41 |
40 |
42 // some convenience for typing in regular expressions |
41 // some convenience for typing in regular expressions |
43 def charlist2rexp(s: List[Char]): Rexp = s match { |
42 def charlist2rexp(s: List[Char]): Rexp = s match { |
44 case Nil => ONE |
43 case Nil => ONE |
45 case c::Nil => CHAR(c) |
44 case c::Nil => CHAR(c) |
60 def ~ (r: Rexp) = SEQ(s, r) |
59 def ~ (r: Rexp) = SEQ(s, r) |
61 def ~ (r: String) = SEQ(s, r) |
60 def ~ (r: String) = SEQ(s, r) |
62 } |
61 } |
63 |
62 |
64 |
63 |
65 // nullable function: tests whether the regular |
|
66 // expression can recognise the empty string |
|
67 def nullable(r: Rexp) : Boolean = r match { |
|
68 case ZERO => false |
|
69 case ONE => true |
|
70 case CHAR(_) => false |
|
71 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
|
72 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
|
73 case STAR(_) => true |
|
74 } |
|
75 |
|
76 // derivative of a regular expression w.r.t. a character |
|
77 def der(c: Char, r: Rexp) : Rexp = r match { |
|
78 case ZERO => ZERO |
|
79 case ONE => ZERO |
|
80 case CHAR(d) => if (c == d) ONE else ZERO |
|
81 case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |
|
82 case SEQ(r1, r2) => |
|
83 if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
|
84 else SEQ(der(c, r1), r2) |
|
85 case STAR(r) => SEQ(der(c, r), STAR(r)) |
|
86 } |
|
87 |
|
88 // derivative w.r.t. a string (iterates der) |
|
89 def ders(s: List[Char], r: Rexp) : Rexp = s match { |
|
90 case Nil => r |
|
91 case c::s => ders(s, der(c, r)) |
|
92 } |
|
93 |
|
94 // mkeps and injection part |
|
95 def mkeps(r: Rexp) : Val = r match { |
|
96 case ONE => Empty |
|
97 case ALT(r1, r2) => |
|
98 if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) |
|
99 case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
|
100 case STAR(r) => Stars(Nil) |
|
101 } |
|
102 |
|
103 |
|
104 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
|
105 case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
|
106 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
|
107 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
|
108 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
|
109 case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) |
|
110 case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) |
|
111 case (CHAR(d), Empty) => Chr(c) |
|
112 } |
|
113 |
|
114 // main lexing function (produces a value) |
|
115 // - no simplification |
|
116 def lex(r: Rexp, s: List[Char]) : Val = s match { |
|
117 case Nil => if (nullable(r)) mkeps(r) |
|
118 else throw new Exception("Not matched") |
|
119 case c::cs => inj(r, c, lex(der(c, r), cs)) |
|
120 } |
|
121 |
|
122 def lexing(r: Rexp, s: String) : Val = lex(r, s.toList) |
|
123 |
|
124 |
|
125 |
|
126 // Bitcoded + Annotation |
64 // Bitcoded + Annotation |
127 //======================= |
65 //======================= |
128 |
66 |
129 //erase function: extracts the regx from Aregex |
67 //erase function: extracts the Rexp from ARexp |
130 def erase(r:ARexp): Rexp = r match{ |
68 def erase(r:ARexp): Rexp = r match{ |
131 case AZERO => ZERO |
69 case AZERO => ZERO |
132 case AONE(_) => ONE |
70 case AONE(_) => ONE |
133 case ACHAR(bs, c) => CHAR(c) |
71 case ACHAR(bs, c) => CHAR(c) |
134 case AALTS(bs, Nil) => ZERO |
72 case AALTS(bs, Nil) => ZERO |
304 } |
241 } |
305 |
242 |
306 def blexing_simp(r: Rexp, s: String) : Val = |
243 def blexing_simp(r: Rexp, s: String) : Val = |
307 decode(r, blex_simp(internalise(r), s.toList)) |
244 decode(r, blex_simp(internalise(r), s.toList)) |
308 |
245 |
309 println(blexing_simp(reg, "aab")) |
246 //println(blexing_simp(reg, "aab")) |
310 |
247 |
311 // bsimp2 by Chengsong |
|
312 |
|
313 def pos_i(rs: List[ARexp], v: Val): Int = (rs, v) match { |
|
314 case (r::Nil, v1) => 0 |
|
315 case ( r::rs1, Right(v)) => pos_i(rs1, v) + 1 |
|
316 case ( r::rs1, Left(v) ) => 0 |
|
317 } |
|
318 |
|
319 def pos_v(rs: List[ARexp], v: Val): Val = (rs, v) match { |
|
320 case (r::Nil, v1) => v1 |
|
321 case (r::rs1, Right(v)) => pos_v(rs1, v) |
|
322 case (r::rs1, Left(v) ) => v |
|
323 } |
|
324 |
|
325 def unify(rs: List[ARexp], v: Val): Val = { |
|
326 Position(pos_i(rs, v), pos_v(rs, v)) |
|
327 } |
|
328 |
|
329 // coat does the job of "coating" a value |
|
330 // given the number of right outside it |
|
331 def coat(v: Val, i: Int) : Val = i match { |
|
332 case 0 => v |
|
333 case i => if (i > 0) coat(Right(v), i - 1) else { println(v, i); throw new Exception("coat minus")} |
|
334 } |
|
335 |
|
336 def distinctBy2[B, C](v: Val, xs: List[B], f: B => C, acc: List[C] = Nil, res: List[B] = Nil): (List[B], Val) = xs match { |
|
337 case Nil => (res, v) |
|
338 case (x::xs) => { |
|
339 val re = f(x) |
|
340 if (acc.contains(re)) v match { |
|
341 case Position(i, v0) => distinctBy2(Position(i - 1, v0), xs, f, acc, res) |
|
342 case _ => throw new Exception("dB2") |
|
343 } |
|
344 else distinctBy2(v, xs, f, re::acc, x::res) |
|
345 } |
|
346 } |
|
347 |
|
348 |
|
349 def flats(rs: List[ARexp]): List[ARexp] = rs match { |
|
350 case Nil => Nil |
|
351 case AZERO :: rs1 => flats(rs1) |
|
352 case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2) |
|
353 case r1 :: rs2 => r1 :: flats(rs2) |
|
354 } |
|
355 |
|
356 |
|
357 |
|
358 def flats2(front: List[ARexp], i: Int, rs: List[ARexp], v: Val): (List[ARexp], Val) = v match { |
|
359 case Position(j, v0) => { |
|
360 if (i < 0) (front ::: flats(rs), Position(j, v0) ) |
|
361 else if(i == 0){ |
|
362 rs match { |
|
363 case AALTS(bs, rs1) :: rs2 => ( (front ::: rs1.map(fuse(bs, _))):::flats(rs2), Position(j + rs1.length - 1, pos_v(rs1, v0))) |
|
364 case r::rs2 => (front ::: List(r) ::: flats(rs2), Position(j, v0)) |
|
365 case _ => throw new Exception("flats2 i = 0") |
|
366 } |
|
367 } |
|
368 else{ |
|
369 rs match { |
|
370 case AZERO::rs1 => flats2(front, i - 1, rs1, Position(j - 1, v0)) |
|
371 case AALTS(bs, rs1) ::rs2 => flats2(front:::rs1.map(fuse(bs, _)), i - 1, rs2, Position(j + rs1.length - 1, v0)) |
|
372 case r::rs1 => flats2(front:::List(r), i - 1, rs1, Position(j, v0)) |
|
373 case _ => throw new Exception("flats2 i>0") |
|
374 } |
|
375 } |
|
376 } |
|
377 case _ => throw new Exception("flats2 error") |
|
378 } |
|
379 |
|
380 def deunify(rs_length: Int, v: Val): Val = v match{ |
|
381 case Position(i, v0) => { |
|
382 if (i <0 ) { println(rs_length, v); throw new Exception("deunify minus") } |
|
383 else if (rs_length == 1) {println(v); v} |
|
384 else if (rs_length - 1 == i) coat(v0, i) |
|
385 else coat(Left(v0), i) |
|
386 } |
|
387 case _ => throw new Exception("deunify error") |
|
388 } |
|
389 |
|
390 |
|
391 def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r, v) match { |
|
392 case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match { |
|
393 case ((AZERO, _), (_, _)) => (AZERO, Undefined) |
|
394 case ((_, _), (AZERO, _)) => (AZERO, Undefined) |
|
395 case ((AONE(bs2), v1s) , (r2s, v2s)) => (fuse(bs1 ++ bs2, r2s), v2s) |
|
396 // v2 tells how to retrieve bits in r2s, which is enough as the bits |
|
397 // of the first part of the sequence has already been integrated to |
|
398 // the top level of the second regx. |
|
399 case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s), Sequ(v1s, v2s)) |
|
400 } |
|
401 case (AALTS(bs1, rs), v) => { |
|
402 val vlist = unify(rs, v) |
|
403 vlist match { |
|
404 case Position(i, v0) => { |
|
405 val v_simp = bsimp2(rs(i), v0)._2 |
|
406 val rs_simp = rs.map(bsimp) |
|
407 val flat_res = flats2( Nil, i, rs_simp, Position(i, v_simp) ) |
|
408 val dist_res = distinctBy2(flat_res._2, flat_res._1, erase) |
|
409 val rs_new = dist_res._1 |
|
410 val v_new = deunify(rs_new.length, dist_res._2) |
|
411 rs_new match { |
|
412 case Nil => (AZERO, Undefined) |
|
413 case s :: Nil => (fuse(bs1, s), v_new) |
|
414 case rs => (AALTS(bs1, rs), v_new) |
|
415 } |
|
416 } |
|
417 case _ => throw new Exception("Funny vlist bsimp2") |
|
418 } |
|
419 } |
|
420 // STAR case |
|
421 // case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) |
|
422 case (r, v) => (r, v) |
|
423 } |
|
424 |
|
425 |
|
426 |
|
427 val dr = ASEQ(List(),AALTS(List(S),List(AONE(List(Z)), AONE(List(S)))),ASTAR(List(),AALTS(List(),List(ACHAR(List(Z),'a'), ACHAR(List(S),'a'))))) |
|
428 val dv = Sequ(Left(Empty),Stars(List())) |
|
429 println(bsimp2(dr, dv)) |
|
430 |
|
431 |
|
432 /* |
|
433 def blex_simp2(r: ARexp, s: List[Char]) : Bits = s match { |
|
434 case Nil => if (bnullable(r)) bmkeps(r) |
|
435 else throw new Exception("Not matched") |
|
436 case c::cs => blex(bsimp2(bder(c, r)), cs) |
|
437 } |
|
438 |
|
439 def blexing_simp2(r: Rexp, s: String) : Val = |
|
440 decode(r, blex_simp2(internalise(r), s.toList)) |
|
441 |
|
442 println(blexing_simp2(reg, "aab")) |
|
443 */ |
|
444 |
248 |
445 // extracts a string from value |
249 // extracts a string from value |
446 def flatten(v: Val) : String = v match { |
250 def flatten(v: Val) : String = v match { |
447 case Empty => "" |
251 case Empty => "" |
448 case Chr(c) => c.toString |
252 case Chr(c) => c.toString |