1 import RexpRelated._ |
|
2 import RexpRelated.Rexp._ |
|
3 import Spiral._ |
|
4 import scala.collection.mutable.ArrayBuffer |
|
5 abstract class BRexp |
|
6 case object BZERO extends BRexp |
|
7 case object BONE extends BRexp |
|
8 case class BCHAR(c: Char) extends BRexp |
|
9 case class BALTS(b: Bit, rs: List[BRexp]) extends BRexp |
|
10 case class BSEQ(r1: BRexp, r2: BRexp) extends BRexp |
|
11 case class BSTAR(r: BRexp) extends BRexp |
|
12 |
|
13 object BRexp{ |
|
14 def brternalise(r: Rexp) : BRexp = r match {//remember the initial shadowed bit should be set to Z as there |
|
15 //are no enclosing STAR or SEQ |
|
16 case ZERO => BZERO |
|
17 case ONE => BONE |
|
18 case CHAR(c) => BCHAR(c) |
|
19 case ALTS(rs) => BALTS(Z, rs.map(brternalise)) |
|
20 case SEQ(r1, r2) => BSEQ( brternalise(r1), brternalise(r2) ) |
|
21 case STAR(r) => BSTAR(brternalise(r)) |
|
22 case RECD(x, r) => brternalise(r) |
|
23 } |
|
24 def brnullable (r: BRexp) : Boolean = r match { |
|
25 case BZERO => false |
|
26 case BONE => true |
|
27 case BCHAR(_) => false |
|
28 case BALTS(bs, rs) => rs.exists(brnullable) |
|
29 case BSEQ(r1, r2) => brnullable(r1) && brnullable(r2) |
|
30 case BSTAR(_) => true |
|
31 } |
|
32 //this function tells bspill when converting a brexp into a list of rexps |
|
33 //the conversion : (a+b)*c -> {a*c, b*c} can only happen when a+b is generated from scratch (e.g. when deriving a seq) |
|
34 //or is from the original regex but have been "touched" (i.e. have been derived) |
|
35 //Why make this distinction? Look at the following example: |
|
36 //r = (c+cb)+c(a+b) |
|
37 // r\c = (1+b)+(a+b) |
|
38 //after simplification |
|
39 // (r\c)simp= 1+b+a |
|
40 //we lost the structure that tells us 1+b should be grouped together and a grouped as itself |
|
41 //however in our brexp simplification, |
|
42 //(r\c)br_simp = 1+b+(a+b) |
|
43 //we do not allow the bracket to be opened as it is from the original expression and have not been touched |
|
44 def brder(c: Char, r: BRexp) : BRexp = r match { |
|
45 case BZERO => BZERO |
|
46 case BONE => BZERO |
|
47 case BCHAR(d) => if (c == d) BONE else BZERO |
|
48 case BALTS(bs, rs) => BALTS(S, rs.map(brder(c, _)))//After derivative: Splittable in the sense of PD |
|
49 case BSEQ(r1, r2) => |
|
50 if (brnullable(r1)) BALTS(S, List(BSEQ(brder(c, r1), r2), brder(c, r2) ) )//in r1\c~r2 r2's structure is maintained together with its splittablility bit |
|
51 else BSEQ(brder(c, r1), r2) |
|
52 case BSTAR(r) => BSEQ(brder(c, r), BSTAR(r)) |
|
53 } |
|
54 def bflat(rs: List[BRexp]): List[BRexp] = { |
|
55 rs match { |
|
56 case Nil => Nil//TODO: Look at this! bs should have been exhausted one bit earlier. |
|
57 case BZERO :: rs1 => bflat(rs1) |
|
58 case BALTS(S, rs1) :: rs2 => rs1 ::: bflat(rs2) |
|
59 case BALTS(Z, rs1) :: rs2 => BALTS(Z, rs1) :: bflat(rs2) |
|
60 case r1 :: rs2 => r1 :: bflat(rs2) |
|
61 } |
|
62 } |
|
63 def stflat(rs: List[BRexp]): List[BRexp] = { |
|
64 //println("bs: " + bs + "rs: "+ rs + "length of bs, rs" + bs.length + ", " + rs.length) |
|
65 //assert(bs.length == rs.length - 1 || bs.length == rs.length)//bs == Nil, rs == Nil May add early termination later. |
|
66 rs match { |
|
67 case Nil => Nil//TODO: Look at this! bs should have been exhausted one bit earlier. |
|
68 case BZERO :: rs1 => bflat(rs1) |
|
69 case BALTS(_, rs1) :: rs2 => rs1 ::: bflat(rs2) |
|
70 case r1 :: rs2 => r1 :: bflat(rs2) |
|
71 } |
|
72 } |
|
73 def berase(r:BRexp): Rexp = r match{ |
|
74 case BZERO => ZERO |
|
75 case BONE => ONE |
|
76 case BCHAR(f) => CHAR(f) |
|
77 case BALTS(bs, rs) => ALTS(rs.map(berase(_))) |
|
78 case BSEQ(r1, r2) => SEQ (berase(r1), berase(r2)) |
|
79 case BSTAR(r)=> STAR(berase(r)) |
|
80 } |
|
81 def br_simp(r: BRexp): BRexp = r match { |
|
82 case BSEQ(r1, r2) => (br_simp(r1), br_simp(r2)) match { |
|
83 case (BZERO, _) => BZERO |
|
84 case (_, BZERO) => BZERO |
|
85 case (BONE, r2s) => r2s |
|
86 case (r1s, r2s) => BSEQ(r1s, r2s) |
|
87 } |
|
88 case BALTS(bs, rs) => { |
|
89 //assert(bs.length == rs.length - 1) |
|
90 val dist_res = if(bs == S) {//all S |
|
91 val rs_simp = rs.map(br_simp) |
|
92 val flat_res = bflat(rs_simp) |
|
93 distinctBy(flat_res, berase) |
|
94 } |
|
95 else{//not allowed to simplify (all Z) |
|
96 rs |
|
97 } |
|
98 dist_res match { |
|
99 case Nil => {BZERO} |
|
100 case s :: Nil => { s} |
|
101 case rs => {BALTS(bs, rs)} |
|
102 } |
|
103 } |
|
104 //case BSTAR(r) => BSTAR(r) |
|
105 case r => r |
|
106 } |
|
107 |
|
108 def strong_br_simp(r: BRexp): BRexp = r match { |
|
109 case BSEQ(r1, r2) => (strong_br_simp(r1), strong_br_simp(r2)) match { |
|
110 case (BZERO, _) => BZERO |
|
111 case (_, BZERO) => BZERO |
|
112 case (BONE, r2s) => r2s |
|
113 case (r1s, r2s) => BSEQ(r1s, r2s) |
|
114 } |
|
115 case BALTS(bs, rs) => { |
|
116 //assert(bs.length == rs.length - 1) |
|
117 val dist_res = {//all S |
|
118 val rs_simp = rs.map(strong_br_simp) |
|
119 val flat_res = stflat(rs_simp) |
|
120 distinctBy(flat_res, berase) |
|
121 } |
|
122 dist_res match { |
|
123 case Nil => {BZERO} |
|
124 case s :: Nil => { s} |
|
125 case rs => {BALTS(bs, rs)} |
|
126 } |
|
127 } |
|
128 case BSTAR(r) => BSTAR(strong_br_simp(r)) |
|
129 case r => r |
|
130 } |
|
131 //we want to bound the size by a function bspill s.t. |
|
132 //bspill(ders_simp(r,s)) ⊂ PD(r) |
|
133 //and bspill is size-preserving (up to a constant factor) |
|
134 //so we bound |ders_simp(r,s)| by |PD(r)| |
|
135 //where we already have a nice bound for |PD(r)|: t^2*n^2 in Antimirov's paper |
|
136 |
|
137 //the function bspill converts a brexp into a list of rexps |
|
138 //the conversion mainly about this: (r1+r2)*r3*r4*.....rn -> {r1*r3*r4*....rn, r2*r3*r4*.....rn} |
|
139 //but this is not always allowed |
|
140 //sometimes, we just leave the expression as it is: |
|
141 //eg1: (a+b)*c -> {(a+b)*c} |
|
142 //eg2: r1+r2 -> {r1+r2} instead of {r1, r2} |
|
143 //why? |
|
144 //if we always return {a, b} when we encounter a+b, the property |
|
145 //bspill(ders_simp(r,s)) ⊂ PD(r) |
|
146 //does not always hold |
|
147 //for instance |
|
148 //if we call the bspill that always returns {r1, r2} when encountering r1+r2 "bspilli" |
|
149 //then bspilli( ders_simp( (c+cb)+c(a+b), c ) ) == bspilli(1+b+a) = {1, b, a} |
|
150 //However the letter a does not belong to PD( (c+cb)+c(a+b) ) |
|
151 //then we can no longer say ders_simp(r,s)'s size is bounded by PD(r) because the former contains something the latter does not have |
|
152 //In order to make sure the inclusion holds, we have to find out why new terms appeared in the bspilli set that don't exist in the PD(r) set |
|
153 //Why a does not belong to PD((c+cb)+c(a+b))? |
|
154 //PD(r1+r2) = PD(r1) union PD(r2) => PD((c+cb)+c(a+b)) = PD(c+cb) union PD(c(a+b)) |
|
155 //So the only possibility for PD to include a must be in the latter part of the regex, namely, c(a+b) |
|
156 //we have lemma that PD(r) = union of pders(s, r) where s is taken over all strings whose length does not exceed depth(r) |
|
157 //so PD(r) ⊂ pder(empty_string, r) union pder(c, r) union pder(ca, r) union pder(cb, r) where r = c(a+b) |
|
158 //RHS = {1} union pder(c, c(a+b)) |
|
159 //Observe that pder(c, c(a+b)) == {a+b} |
|
160 //a and b are together, from the original regular expression (c+cb)+c(a+b). |
|
161 //but by our simplification, we first flattened this a+b into the same level with 1+b, then |
|
162 //removed duplicates of b, thereby destroying the structure in a+b and making this term a, instead of a+b |
|
163 //But in PD(r) we only have a+b, no a |
|
164 //one ad hoc solution might be to try this bspill(ders_simp(r,s)) ⊂ PD(r) union {all letters} |
|
165 //But this does not hold either according to experiment. |
|
166 //So we need to make sure the structure r1+r2 is not simplified away if it is from the original expression |
|
167 |
|
168 |
|
169 |
|
170 def bspill(r: BRexp): List[Rexp] = { |
|
171 r match { |
|
172 case BALTS(b, rs) => { |
|
173 if(b == S) |
|
174 rs.flatMap(bspill) |
|
175 else |
|
176 List(ALTS(rs.map(berase))) |
|
177 } |
|
178 case BSEQ(r2, r3) => bspill(r2).map( a => if(a == ONE) berase(r3) else SEQ(a, berase(r3)) ) |
|
179 case BZERO => List() |
|
180 case r => List(berase(r)) |
|
181 } |
|
182 |
|
183 } |
|
184 |
|
185 } |
|