30 |
30 |
31 // usual regular expressions |
31 // usual regular expressions |
32 abstract class Rexp |
32 abstract class Rexp |
33 case object ZERO extends Rexp |
33 case object ZERO extends Rexp |
34 case object ONE extends Rexp |
34 case object ONE extends Rexp |
35 case class PRED(f: Char => Boolean, s: String = "_") extends Rexp |
35 case class PRED(f: Char => Boolean, s: String = "_") extends Rexp { |
|
36 override def toString = "'" ++ s ++ "'" |
|
37 } |
36 case class ALTS(rs: List[Rexp]) extends Rexp |
38 case class ALTS(rs: List[Rexp]) extends Rexp |
37 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
39 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
38 case class STAR(r: Rexp) extends Rexp |
40 case class STAR(r: Rexp) extends Rexp |
39 case class RECD(x: String, r: Rexp) extends Rexp |
41 case class RECD(x: String, r: Rexp) extends Rexp |
40 |
42 |
47 |
49 |
48 // annotated regular expressions |
50 // annotated regular expressions |
49 abstract class ARexp |
51 abstract class ARexp |
50 case object AZERO extends ARexp |
52 case object AZERO extends ARexp |
51 case class AONE(bs: Bits) extends ARexp |
53 case class AONE(bs: Bits) extends ARexp |
52 case class APRED(bs: Bits, f: Char => Boolean, s: String = "_") extends ARexp |
54 case class APRED(bs: Bits, f: Char => Boolean, s: String = "_") extends ARexp { |
|
55 override def toString = "'" ++ s ++ "'" |
|
56 } |
53 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp |
57 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp |
54 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp |
58 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp |
55 case class ASTAR(bs: Bits, r: ARexp) extends ARexp |
59 case class ASTAR(bs: Bits, r: ARexp) extends ARexp |
56 |
60 |
57 // abbreviations |
61 // abbreviations |
92 def $ (r: Rexp) = RECD(s, r) |
96 def $ (r: Rexp) = RECD(s, r) |
93 } |
97 } |
94 |
98 |
95 |
99 |
96 // string of a regular expressions - for testing purposes |
100 // string of a regular expressions - for testing purposes |
97 def string(r: Rexp): String = r match { |
101 def string(r: Rexp, s: String = ""): String = r match { |
98 case ZERO => "0" |
102 case ZERO => "0" |
99 case ONE => "1" |
103 case ONE => "1" |
100 case PRED(_, s) => s |
104 case PRED(_, s1) => s1 |
101 case ALTS(rs) => rs.map(string).mkString("[", "|", "]") |
105 case ALTS(rs) => rs.map(string(_, s ++ " ")).mkString("[", ",\n" ++ s ++ "|", "]") |
102 case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})" |
106 case SEQ(r1, r2) => { |
103 case STAR(r) => s"{${string(r)}}*" |
107 val s1 = string(r1, s) |
104 case RECD(x, r) => s"(${x}! ${string(r)})" |
108 val i = (s1 ++ "\n").toList.indexOf('\n') |
|
109 val s2 = string(r2, (" " * i) + " ") |
|
110 s"${s1} ~ ${s2}" |
|
111 } |
|
112 case STAR(r) => s"<${string(r, s ++ " ")}>*" |
|
113 case RECD(x, r) => s"(${x}! ${string(r, s)})" |
105 } |
114 } |
106 |
115 |
107 def strings(rs: Set[Rexp]): String = |
116 def strings(rs: Set[Rexp]): String = |
108 rs.map(string).mkString("{", "|", "}") |
117 rs.map(string(_, "")).mkString("{", ",\n|", "}") |
109 |
118 |
110 // string of an annotated regular expressions - for testing purposes |
119 // string of an annotated regular expressions - for testing purposes |
111 |
120 def astring(a: ARexp, s: String = ""): String = a match { |
112 def astring(a: ARexp): String = a match { |
|
113 case AZERO => "0" |
121 case AZERO => "0" |
114 case AONE(_) => "1" |
122 case AONE(_) => "1" |
115 case APRED(_, _, s) => s |
123 case APRED(_, _, s) => s |
116 case AALTS(_, rs) => rs.map(astring).mkString("[", "|", "]") |
124 case AALTS(_, rs) => rs.map(astring(_, s ++ " ")).mkString("[", ",\n" ++ s ++ "|", "]") |
117 case ASEQ(_, r1, r2) => s"(${astring(r1)} ~ ${astring(r2)})" |
125 case ASEQ(_, r1, r2) => { |
118 case ASTAR(_, r) => s"{${astring(r)}}*" |
126 val s1 = astring(r1, s) |
119 } |
127 val i = (s1 ++ "\n").toList.indexOf('\n') |
|
128 val s2 = astring(r2, (" " * i) + " ") |
|
129 s"${s1} ~ ${s2}" |
|
130 } |
|
131 case ASTAR(_, r) => s"<${astring(r, s ++ " ")}>*" |
|
132 } |
|
133 |
120 |
134 |
121 |
135 |
122 //-------------------------------------------------------------- |
136 //-------------------------------------------------------------- |
123 // START OF NON-BITCODE PART |
137 // START OF NON-BITCODE PART |
124 // |
138 // |
321 // translation into ARexps |
333 // translation into ARexps |
322 def internalise(r: Rexp) : ARexp = r match { |
334 def internalise(r: Rexp) : ARexp = r match { |
323 case ZERO => AZERO |
335 case ZERO => AZERO |
324 case ONE => AONE(Nil) |
336 case ONE => AONE(Nil) |
325 case PRED(f, s) => APRED(Nil, f, s) |
337 case PRED(f, s) => APRED(Nil, f, s) |
|
338 case ALTS(List(r1)) => |
|
339 AALTS(Nil, List(fuse(List(Z), internalise(r1)))) |
326 case ALTS(List(r1, r2)) => |
340 case ALTS(List(r1, r2)) => |
327 AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2)))) |
341 AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2)))) |
328 case ALTS(r1::rs) => { |
342 case ALTS(r1::rs) => { |
329 val AALTS(Nil, rs2) = internalise(ALTS(rs)) |
343 val AALTS(Nil, rs2) = internalise(ALTS(rs)) |
330 AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _))) |
344 AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _))) |
431 case AZERO :: rs1 => flats(rs1) |
445 case AZERO :: rs1 => flats(rs1) |
432 case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2) |
446 case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2) |
433 case r1 :: rs2 => r1 :: flats(rs2) |
447 case r1 :: rs2 => r1 :: flats(rs2) |
434 } |
448 } |
435 |
449 |
|
450 def stack(r1: ARexp, r2: ARexp) = r1 match { |
|
451 case AONE(bs2) => fuse(bs2, r2) |
|
452 case _ => ASEQ(Nil, r1, r2) |
|
453 } |
|
454 |
436 def bsimp(r: ARexp): ARexp = r match { |
455 def bsimp(r: ARexp): ARexp = r match { |
437 case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match { |
456 case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match { |
438 case (AZERO, _) => AZERO |
457 case (AZERO, _) => AZERO |
439 case (_, AZERO) => AZERO |
458 case (_, AZERO) => AZERO |
440 case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) |
459 case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) |
441 //case (AALTS(bs2, rs), r2s) => |
460 case (AALTS(bs2, rs), r2s) => |
442 // AALTS(bs1 ++ bs2, rs.map(ASEQ(Nil, _, r2s))) |
461 AALTS(bs1 ++ bs2, rs.map(stack(_, r2s))) |
443 case (r1s, r2s) => ASEQ(bs1, r1s, r2s) |
462 case (r1s, r2s) => ASEQ(bs1, r1s, r2s) |
444 } |
463 } |
445 case AALTS(bs1, rs) => distinctBy(flats(rs.map(bsimp)), erase) match { |
464 case AALTS(bs1, rs) => distinctBy(flats(rs.map(bsimp)), erase) match { |
446 case Nil => AZERO |
465 case Nil => AZERO |
447 case r :: Nil => fuse(bs1, r) |
466 case r :: Nil => fuse(bs1, r) |
448 case rs => AALTS(bs1, rs) |
467 case rs2 => AALTS(bs1, rs2) |
449 } |
468 } |
450 //case ASTAR(bs1, r1) => ASTAR(bs1, bsimp(r1)) |
469 //case ASTAR(bs1, r1) => ASTAR(bs1, bsimp(r1)) |
451 case r => r |
470 case r => r |
452 } |
471 } |
453 |
472 |
477 def bsimp_full(r: ARexp): ARexp = r match { |
496 def bsimp_full(r: ARexp): ARexp = r match { |
478 case ASEQ(bs1, r1, r2) => (bsimp_full(r1), bsimp_full(r2)) match { |
497 case ASEQ(bs1, r1, r2) => (bsimp_full(r1), bsimp_full(r2)) match { |
479 case (AZERO, _) => AZERO |
498 case (AZERO, _) => AZERO |
480 case (_, AZERO) => AZERO |
499 case (_, AZERO) => AZERO |
481 case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) |
500 case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) |
482 //case (AALTS(bs2, rs), r2s) => |
501 case (AALTS(bs2, rs), r2s) => |
483 // AALTS(bs1 ++ bs2, rs.map(ASEQ(Nil, _, r2s))) |
502 AALTS(bs1 ++ bs2, rs.map(ASEQ(Nil, _, r2s))) |
484 case (r1s, r2s) => ASEQ(bs1, r1s, r2s) |
503 case (r1s, r2s) => ASEQ(bs1, r1s, r2s) |
485 } |
504 } |
486 case AALTS(bs1, rs) => distinctBy(flats(rs.map(bsimp_full)), erase) match { |
505 case AALTS(bs1, rs) => distinctBy(flats(rs.map(bsimp_full)), erase) match { |
487 case Nil => AZERO |
506 case Nil => AZERO |
488 case r :: Nil => fuse(bs1, r) |
507 case r :: Nil => fuse(bs1, r) |
677 |
698 |
678 |
699 |
679 // Some Small Tests |
700 // Some Small Tests |
680 //================== |
701 //================== |
681 |
702 |
|
703 // string of a regular expressions - for testing purposes |
|
704 |
|
705 |
|
706 string(ALTS(List("a","b",ALTS(List("0","1","2")),"c"))) |
|
707 string(ALTS(List("a","b",ALTS(List("0","1",ALTS(List("X","Y")),"2")),"c"))) |
|
708 string(ALTS(List("aa","b",ALTS(List("0","1","2")),"c"))) |
|
709 string(ALTS(List("aa","b",SEQ("Q", ALTS(List("0","1","2"))),"c"))) |
|
710 string(ALTS(List("aa","b",SEQ(SEQ("Q", ALTS(List("0","1","2"))),"W"),"c"))) |
|
711 string(ALTS(List("aa","b",SEQ("Q", STAR(ALTS(List("0","1","2")))),"c"))) |
|
712 string(ALTS(List("aaa","bbbb",ALTS(List("000","1111","2222")),"ccccc"))) |
|
713 |
682 println("Small tests") |
714 println("Small tests") |
683 |
715 |
684 val q = STAR(STAR("bb" | ("a" | "b"))) |
716 val q = STAR(STAR(STAR(ALTS(List(ALTS(List(CHAR('c'), CHAR('b'))), SEQ(CHAR('c'),CHAR('c'))))))) |
685 val qs = "bb" |
717 val qs = "cccc" |
|
718 |
|
719 //val q = STAR(STAR("bb" | ("a" | "b"))) |
|
720 //val qs = "bbbbbbb" |
|
721 |
|
722 println("Size Bit " + asize(bders_simp(qs.toList, internalise(q)))) |
|
723 println("Size Pder " + psize(pders_simp(qs.toList, q))) |
|
724 println(astring(bders_simp(qs.toList, internalise(q)))) |
|
725 println(strings(pders_simp(qs.toList, q))) |
686 |
726 |
687 println("Size Bit " + asize(bders_simp(qs.toList, internalise(q)))) |
727 println("Size Bit " + asize(bders_simp(qs.toList, internalise(q)))) |
688 println("Size Bitf " + asize(bders_simp_full(qs.toList, internalise(q)))) |
728 println("Size Bitf " + asize(bders_simp_full(qs.toList, internalise(q)))) |
689 println("Size Bit2 " + asize(bders2_simp(qs.toList, internalise(q)))) |
729 println("Size Bit2 " + asize(bders2_simp(qs.toList, internalise(q)))) |
690 println("Size Old " + size(ders_simp(qs.toList, q))) |
730 println("Size Old " + size(ders_simp(qs.toList, q))) |
691 println("Size Pder " + psize(pders(qs.toList, q))) |
731 println("Size Pder " + psize(pders(qs.toList, q))) |
692 println("Size Pder simp " + psize(pders_simp(qs.toList, q))) |
732 println("Size Pder simp " + psize(pders_simp(qs.toList, q))) |
693 |
733 |
694 println(astring(bders_simp(qs.toList, internalise(q)))) |
734 println(astring(bders_simp(qs.toList, internalise(q)))) |
695 println(astring(bders_simp_full(qs.toList, internalise(q)))) |
735 //println(astring(bders_simp_full(qs.toList, internalise(q)))) |
696 println(string(ders_simp(qs.toList, q))) |
736 //println(string(ders_simp(qs.toList, q))) |
697 println(strings(pders(qs.toList, q))) |
737 //println(strings(pders(qs.toList, q))) |
698 println(strings(pders_simp(qs.toList, q))) |
738 println(strings(pders_simp(qs.toList, q))) |
|
739 |
|
740 |
699 |
741 |
700 System.exit(0) |
742 System.exit(0) |
701 |
743 |
702 val re1 = STAR("a" | "aa") |
744 val re1 = STAR("a" | "aa") |
703 println(astring(bders_simp("".toList, internalise(re1)))) |
745 println(astring(bders_simp("".toList, internalise(re1)))) |