0
|
1 |
import RexpRelated._
|
|
2 |
import RexpRelated.Rexp._
|
|
3 |
import Spiral._
|
|
4 |
import scala.collection.mutable.ArrayBuffer
|
|
5 |
abstract class BRexp
|
|
6 |
case object BZERO extends BRexp
|
|
7 |
case object BONE extends BRexp
|
|
8 |
case class BCHAR(c: Char) extends BRexp
|
|
9 |
case class BALTS(b: Bit, rs: List[BRexp]) extends BRexp
|
|
10 |
case class BSEQ(r1: BRexp, r2: BRexp) extends BRexp
|
|
11 |
case class BSTAR(r: BRexp) extends BRexp
|
|
12 |
|
|
13 |
object BRexp{
|
|
14 |
def brternalise(r: Rexp) : BRexp = r match {//remember the initial shadowed bit should be set to Z as there
|
|
15 |
//are no enclosing STAR or SEQ
|
|
16 |
case ZERO => BZERO
|
|
17 |
case ONE => BONE
|
|
18 |
case CHAR(c) => BCHAR(c)
|
|
19 |
case ALTS(rs) => BALTS(Z, rs.map(brternalise))
|
|
20 |
case SEQ(r1, r2) => BSEQ( brternalise(r1), brternalise(r2) )
|
|
21 |
case STAR(r) => BSTAR(brternalise(r))
|
|
22 |
case RECD(x, r) => brternalise(r)
|
|
23 |
}
|
|
24 |
def brnullable (r: BRexp) : Boolean = r match {
|
|
25 |
case BZERO => false
|
|
26 |
case BONE => true
|
|
27 |
case BCHAR(_) => false
|
|
28 |
case BALTS(bs, rs) => rs.exists(brnullable)
|
|
29 |
case BSEQ(r1, r2) => brnullable(r1) && brnullable(r2)
|
|
30 |
case BSTAR(_) => true
|
|
31 |
}
|
1
|
32 |
//this function tells bspill when converting a brexp into a list of rexps
|
|
33 |
//the conversion : (a+b)*c -> {a*c, b*c} can only happen when a+b is generated from scratch (e.g. when deriving a seq)
|
|
34 |
//or is from the original regex but have been "touched" (i.e. have been derived)
|
|
35 |
//Why make this distinction? Look at the following example:
|
|
36 |
//r = (c+cb)+c(a+b)
|
|
37 |
// r\c = (1+b)+(a+b)
|
|
38 |
//after simplification
|
|
39 |
// (r\c)simp= 1+b+a
|
|
40 |
//we lost the structure that tells us 1+b should be grouped together and a grouped as itself
|
|
41 |
//however in our brexp simplification,
|
|
42 |
//(r\c)br_simp = 1+b+(a+b)
|
|
43 |
//we do not allow the bracket to be opened as it is from the original expression and have not been touched
|
0
|
44 |
def brder(c: Char, r: BRexp) : BRexp = r match {
|
|
45 |
case BZERO => BZERO
|
|
46 |
case BONE => BZERO
|
|
47 |
case BCHAR(d) => if (c == d) BONE else BZERO
|
|
48 |
case BALTS(bs, rs) => BALTS(S, rs.map(brder(c, _)))//After derivative: Splittable in the sense of PD
|
|
49 |
case BSEQ(r1, r2) =>
|
|
50 |
if (brnullable(r1)) BALTS(S, List(BSEQ(brder(c, r1), r2), brder(c, r2) ) )//in r1\c~r2 r2's structure is maintained together with its splittablility bit
|
|
51 |
else BSEQ(brder(c, r1), r2)
|
|
52 |
case BSTAR(r) => BSEQ(brder(c, r), BSTAR(r))
|
|
53 |
}
|
|
54 |
def bflat(rs: List[BRexp]): List[BRexp] = {
|
|
55 |
//println("bs: " + bs + "rs: "+ rs + "length of bs, rs" + bs.length + ", " + rs.length)
|
|
56 |
//assert(bs.length == rs.length - 1 || bs.length == rs.length)//bs == Nil, rs == Nil May add early termination later.
|
|
57 |
rs match {
|
|
58 |
case Nil => Nil//TODO: Look at this! bs should have been exhausted one bit earlier.
|
|
59 |
case BZERO :: rs1 => bflat(rs1)
|
|
60 |
case BALTS(S, rs1) :: rs2 => rs1 ::: bflat(rs2)
|
|
61 |
case BALTS(Z, rs1) :: rs2 => BALTS(Z, rs1) :: bflat(rs2)
|
|
62 |
case r1 :: rs2 => r1 :: bflat(rs2)
|
|
63 |
}
|
|
64 |
}
|
|
65 |
def stflat(rs: List[BRexp]): List[BRexp] = {
|
|
66 |
//println("bs: " + bs + "rs: "+ rs + "length of bs, rs" + bs.length + ", " + rs.length)
|
|
67 |
//assert(bs.length == rs.length - 1 || bs.length == rs.length)//bs == Nil, rs == Nil May add early termination later.
|
|
68 |
rs match {
|
|
69 |
case Nil => Nil//TODO: Look at this! bs should have been exhausted one bit earlier.
|
|
70 |
case BZERO :: rs1 => bflat(rs1)
|
|
71 |
case BALTS(_, rs1) :: rs2 => rs1 ::: bflat(rs2)
|
|
72 |
case r1 :: rs2 => r1 :: bflat(rs2)
|
|
73 |
}
|
|
74 |
}
|
|
75 |
def berase(r:BRexp): Rexp = r match{
|
|
76 |
case BZERO => ZERO
|
|
77 |
case BONE => ONE
|
|
78 |
case BCHAR(f) => CHAR(f)
|
|
79 |
case BALTS(bs, rs) => ALTS(rs.map(berase(_)))
|
|
80 |
case BSEQ(r1, r2) => SEQ (berase(r1), berase(r2))
|
|
81 |
case BSTAR(r)=> STAR(berase(r))
|
|
82 |
}
|
|
83 |
def br_simp(r: BRexp): BRexp = r match {
|
|
84 |
case BSEQ(r1, r2) => (br_simp(r1), br_simp(r2)) match {
|
|
85 |
case (BZERO, _) => BZERO
|
|
86 |
case (_, BZERO) => BZERO
|
|
87 |
case (BONE, r2s) => r2s
|
|
88 |
case (r1s, r2s) => BSEQ(r1s, r2s)
|
|
89 |
}
|
|
90 |
case BALTS(bs, rs) => {
|
|
91 |
//assert(bs.length == rs.length - 1)
|
|
92 |
val dist_res = if(bs == S) {//all S
|
|
93 |
val rs_simp = rs.map(br_simp)
|
|
94 |
val flat_res = bflat(rs_simp)
|
|
95 |
distinctBy(flat_res, berase)
|
|
96 |
}
|
|
97 |
else{//not allowed to simplify (all Z)
|
|
98 |
rs
|
|
99 |
}
|
|
100 |
dist_res match {
|
|
101 |
case Nil => {BZERO}
|
|
102 |
case s :: Nil => { s}
|
|
103 |
case rs => {BALTS(bs, rs)}
|
|
104 |
}
|
|
105 |
}
|
|
106 |
//case BSTAR(r) => BSTAR(r)
|
|
107 |
case r => r
|
|
108 |
}
|
|
109 |
|
|
110 |
def strong_br_simp(r: BRexp): BRexp = r match {
|
|
111 |
case BSEQ(r1, r2) => (strong_br_simp(r1), strong_br_simp(r2)) match {
|
|
112 |
case (BZERO, _) => BZERO
|
|
113 |
case (_, BZERO) => BZERO
|
|
114 |
case (BONE, r2s) => r2s
|
|
115 |
case (r1s, r2s) => BSEQ(r1s, r2s)
|
|
116 |
}
|
|
117 |
case BALTS(bs, rs) => {
|
|
118 |
//assert(bs.length == rs.length - 1)
|
|
119 |
val dist_res = {//all S
|
|
120 |
val rs_simp = rs.map(strong_br_simp)
|
|
121 |
val flat_res = stflat(rs_simp)
|
|
122 |
distinctBy(flat_res, berase)
|
|
123 |
}
|
|
124 |
dist_res match {
|
|
125 |
case Nil => {BZERO}
|
|
126 |
case s :: Nil => { s}
|
|
127 |
case rs => {BALTS(bs, rs)}
|
|
128 |
}
|
|
129 |
}
|
|
130 |
//case BSTAR(r) => BSTAR(r)
|
|
131 |
case r => r
|
|
132 |
}
|
|
133 |
|
|
134 |
def break_chain(bs: Bit, rs: List[BRexp]): List[Rexp] = {
|
|
135 |
//do it in a functional style
|
|
136 |
if(bs == S)
|
|
137 |
rs.flatMap(bspill)
|
|
138 |
else
|
|
139 |
List(ALTS(rs.map(berase)))
|
|
140 |
}
|
|
141 |
|
1
|
142 |
//this function converts a brexp into a list of rexps
|
|
143 |
//the conversion mainly about this: (a+b)*c -> {a*c, b*c} if a+b is generated from scratch (e.g. when deriving a seq)
|
|
144 |
//or is from the original regex but have been "touched" (i.e. have been derived)
|
|
145 |
//Why make this distinction? Look at the following example:
|
|
146 |
//r = (c+cb)+c(a+b)
|
|
147 |
// r\c = (1+b)+(a+b)
|
|
148 |
//after simplification
|
|
149 |
// (r\c)simp= 1+b+a
|
|
150 |
//we lost the structure that tells us 1+b should be grouped together and a grouped as itself
|
|
151 |
//however in our brexp simplification,
|
|
152 |
//(r\c)br_simp = 1+b+(a+b)
|
|
153 |
//we do not allow the bracket to be opened as it is from the original expression and have not been touched
|
|
154 |
def bspill(r: BRexp): List[Rexp] = {
|
|
155 |
r match {
|
0
|
156 |
case BALTS(bs, rs) => {
|
|
157 |
break_chain(bs, rs)
|
|
158 |
}
|
|
159 |
case BSEQ(r2, r3) => bspill(r2).map( a => if(a == ONE) berase(r3) else SEQ(a, berase(r3)) )
|
|
160 |
case BZERO => List()
|
|
161 |
case r => List(berase(r))
|
|
162 |
}
|
|
163 |
|
|
164 |
}
|
|
165 |
|
|
166 |
} |