2 import scala.language.implicitConversions |
2 import scala.language.implicitConversions |
3 import scala.language.reflectiveCalls |
3 import scala.language.reflectiveCalls |
4 import scala.annotation.tailrec |
4 import scala.annotation.tailrec |
5 import scala.util.Try |
5 import scala.util.Try |
6 |
6 |
|
7 // for escaping strings |
7 def escape(raw: String) : String = { |
8 def escape(raw: String) : String = { |
8 import scala.reflect.runtime.universe._ |
9 import scala.reflect.runtime.universe._ |
9 Literal(Constant(raw)).toString |
10 Literal(Constant(raw)).toString |
10 } |
11 } |
11 |
12 |
119 // START OF NON-BITCODE PART |
120 // START OF NON-BITCODE PART |
120 // |
121 // |
121 |
122 |
122 // nullable function: tests whether the regular |
123 // nullable function: tests whether the regular |
123 // expression can recognise the empty string |
124 // expression can recognise the empty string |
124 def nullable (r: Rexp) : Boolean = r match { |
125 def nullable(r: Rexp) : Boolean = r match { |
125 case ZERO => false |
126 case ZERO => false |
126 case ONE => true |
127 case ONE => true |
127 case PRED(_, _) => false |
128 case PRED(_, _) => false |
128 case ALTS(rs) => rs.exists(nullable) |
129 case ALTS(rs) => rs.exists(nullable) |
129 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
130 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
130 case STAR(_) => true |
131 case STAR(_) => true |
131 case RECD(_, r) => nullable(r) |
132 case RECD(_, r) => nullable(r) |
132 } |
133 } |
133 |
134 |
134 // derivative of a regular expression w.r.t. a character |
135 // derivative of a regular expression w.r.t. a character |
135 def der (c: Char, r: Rexp) : Rexp = r match { |
136 def der(c: Char, r: Rexp) : Rexp = r match { |
136 case ZERO => ZERO |
137 case ZERO => ZERO |
137 case ONE => ZERO |
138 case ONE => ZERO |
138 case PRED(f, _) => if (f(c)) ONE else ZERO |
139 case PRED(f, _) => if (f(c)) ONE else ZERO |
139 case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2))) |
140 case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2))) |
140 case SEQ(r1, r2) => |
141 case SEQ(r1, r2) => |
234 case SEQ(r1, r2) => { |
235 case SEQ(r1, r2) => { |
235 val (r1s, f1s) = simp(r1) |
236 val (r1s, f1s) = simp(r1) |
236 val (r2s, f2s) = simp(r2) |
237 val (r2s, f2s) = simp(r2) |
237 (r1s, r2s) match { |
238 (r1s, r2s) match { |
238 case (ZERO, _) => (ZERO, F_ERROR) |
239 case (ZERO, _) => (ZERO, F_ERROR) |
239 case (_, ZERO) => (ZERO, F_ERROR) |
240 //case (_, ZERO) => (ZERO, F_ERROR) |
240 case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) |
241 case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) |
241 case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s)) |
242 //case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s)) |
242 case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) |
243 case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) |
243 } |
244 } |
244 } |
245 } |
245 case RECD(x, r1) => { |
246 case RECD(x, r1) => { |
246 val (r1s, f1s) = simp(r1) |
247 val (r1s, f1s) = simp(r1) |
302 rs.map(size).sum |
303 rs.map(size).sum |
303 |
304 |
304 //-------------------------------------------------------------------- |
305 //-------------------------------------------------------------------- |
305 // BITCODED PART |
306 // BITCODED PART |
306 |
307 |
|
308 def retrieve(r: ARexp, v: Val) : List[Boolean] = (r, v) match { |
|
309 case (AONE(bs), Empty) => bs |
|
310 case (ACHAR(bs, c), Chr(d)) => bs |
|
311 case (AALTS(bs, r::Nil), v) => bs ++ retrieve(r, v) |
|
312 case (AALTS(bs, r::rs), Left(v)) => bs ++ retrieve(r, v) |
|
313 case (AALTS(bs, r::rs), Right(v)) => bs ++ retrieve(AALTS(Nil, rs), v) |
|
314 case (ASEQ(bs, r1, r2), Sequ(v1, v2)) => |
|
315 bs ++ retrieve(r1, v1) ++ retrieve(r2, v2) |
|
316 case (ASTAR(bs, r), Stars(Nil)) => bs ++ List(S) |
|
317 case (ASTAR(bs, r), Stars(v::vs)) => |
|
318 bs ++ List(Z) ++ retrieve(r, v) ++ retrieve(ASTAR(Nil, r), Stars(vs)) |
|
319 } |
|
320 |
|
321 |
307 |
322 |
308 def fuse(bs: Bits, r: ARexp) : ARexp = r match { |
323 def fuse(bs: Bits, r: ARexp) : ARexp = r match { |
309 case AZERO => AZERO |
324 case AZERO => AZERO |
310 case AONE(cs) => AONE(bs ++ cs) |
325 case AONE(cs) => AONE(bs ++ cs) |
311 case APRED(cs, f, s) => APRED(bs ++ cs, f, s) |
326 case APRED(cs, f, s) => APRED(bs ++ cs, f, s) |
461 decode(r, blex_simp(internalise(r), s.toList)) |
476 decode(r, blex_simp(internalise(r), s.toList)) |
462 |
477 |
463 |
478 |
464 def btokenise_simp(r: Rexp, s: String) = |
479 def btokenise_simp(r: Rexp, s: String) = |
465 env(blexing_simp(r, s)).map(esc2) |
480 env(blexing_simp(r, s)).map(esc2) |
|
481 |
|
482 // Quick example |
|
483 |
|
484 val r : Rexp = ZERO | "a" |
|
485 |
|
486 lexing(r, "a") |
|
487 |
|
488 val a0 = internalise(r) |
|
489 val a1 = bder('a', a0) |
|
490 val a1s = bsimp(bder('a', a0)) |
|
491 |
|
492 val a2 = bmkeps(a1) |
|
493 val a2s = bmkeps(a1s) |
|
494 |
|
495 val v = decode(r, a2) |
|
496 val vs = decode(r, a2s) |
|
497 |
|
498 |
|
499 |
|
500 val Rr : Rexp = ONE ~ "a" |
|
501 |
|
502 lexing(Rr, "a") |
|
503 |
|
504 val Ra0 = internalise(Rr) |
|
505 astring(Ra0) |
|
506 val Ra1 = bder('a', Ra0) |
|
507 astring(Ra1) |
|
508 val Ra1s = bsimp(bder('a', Ra0)) |
|
509 astring(Ra1s) |
|
510 |
|
511 val Ra2 = bmkeps(Ra1) |
|
512 val Ra2s = bmkeps(Ra1s) |
|
513 |
|
514 val Rv = decode(Rr, Ra2) |
|
515 val Rvs = decode(Rr, Ra2s) |
466 |
516 |
467 |
517 |
468 |
518 |
469 // INCLUDING SIMPLIFICATION UNDER STARS |
519 // INCLUDING SIMPLIFICATION UNDER STARS |
470 |
520 |
528 |
578 |
529 def btokenise2_simp(r: Rexp, s: String) = |
579 def btokenise2_simp(r: Rexp, s: String) = |
530 env(blexing2_simp(r, s)).map(esc2) |
580 env(blexing2_simp(r, s)).map(esc2) |
531 |
581 |
532 |
582 |
|
583 |
|
584 |
|
585 |
|
586 |
|
587 //============================================ |
533 // Parser for regexes |
588 // Parser for regexes |
534 |
589 |
535 case class Parser(s: String) { |
590 case class Parser(s: String) { |
536 var i = 0 |
591 var i = 0 |
537 |
592 |