author | Christian Urban <urbanc@in.tum.de> |
Sat, 09 Jun 2018 21:02:04 +0100 | |
changeset 552 | 40fa0f628dc4 |
parent 550 | 71fc4a7a7039 |
child 579 | 1a521448d211 |
permissions | -rw-r--r-- |
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
1 |
import scala.language.implicitConversions |
164
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
2 |
import scala.language.reflectiveCalls |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
3 |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
4 |
abstract class Rexp |
426
0debe6f41396
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
354
diff
changeset
|
5 |
case object ZERO extends Rexp |
0debe6f41396
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
354
diff
changeset
|
6 |
case object ONE extends Rexp |
164
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
7 |
case class CHAR(c: Char) extends Rexp |
549 | 8 |
case class ALTS(rs: List[Rexp]) extends Rexp |
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
9 |
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
10 |
case class STAR(r: Rexp) extends Rexp |
164
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
11 |
|
549 | 12 |
// ALT is now an abbreviation |
13 |
def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2)) |
|
14 |
||
15 |
// proj is now used instead for Left and Right |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
16 |
abstract class Val |
354
86b2aeae3e98
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
352
diff
changeset
|
17 |
case object Empty extends Val |
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
18 |
case class Chr(c: Char) extends Val |
520 | 19 |
case class Sequ(v1: Val, v2: Val) extends Val |
549 | 20 |
case class Proj(n: Int, v: Val) extends Val |
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
21 |
case class Stars(vs: List[Val]) extends Val |
549 | 22 |
case class Rec(s: String, v: Val) extends Val |
23 |
||
24 |
// for manipulating projections |
|
25 |
def IncProj(m: Int, v: Val) = v match { |
|
26 |
case Proj(n, v) => Proj(n + m, v) |
|
27 |
} |
|
28 |
||
29 |
def DecProj(m: Int, v: Val) = v match { |
|
30 |
case Proj(n, v) => Proj(n - m, v) |
|
31 |
} |
|
32 |
||
33 |
||
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
34 |
// some convenience for typing in regular expressions |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
35 |
def charlist2rexp(s : List[Char]): Rexp = s match { |
426
0debe6f41396
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
354
diff
changeset
|
36 |
case Nil => ONE |
164
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
37 |
case c::Nil => CHAR(c) |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
38 |
case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
39 |
} |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
40 |
implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
41 |
|
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
42 |
implicit def RexpOps(r: Rexp) = new { |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
43 |
def | (s: Rexp) = ALT(r, s) |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
44 |
def % = STAR(r) |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
45 |
def ~ (s: Rexp) = SEQ(r, s) |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
46 |
} |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
47 |
|
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
48 |
implicit def stringOps(s: String) = new { |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
49 |
def | (r: Rexp) = ALT(s, r) |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
50 |
def | (r: String) = ALT(s, r) |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
51 |
def % = STAR(s) |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
52 |
def ~ (r: Rexp) = SEQ(s, r) |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
53 |
def ~ (r: String) = SEQ(s, r) |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
54 |
} |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
55 |
|
549 | 56 |
// string of a regular expression - for testing purposes |
57 |
def string(r: Rexp) : String = r match { |
|
58 |
case ZERO => "0" |
|
59 |
case ONE => "1" |
|
60 |
case CHAR(c) => c.toString |
|
61 |
case ALTS(rs) => rs.map(string).mkString("[", "|", "]") |
|
62 |
case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}" |
|
63 |
case SEQ(ONE, CHAR(c)) => s"1${c}" |
|
64 |
case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})" |
|
65 |
case STAR(r) => s"(${string(r)})*" |
|
66 |
} |
|
67 |
||
68 |
// size of a regular expression - for testing purposes |
|
69 |
def size(r: Rexp) : Int = r match { |
|
70 |
case ZERO => 1 |
|
71 |
case ONE => 1 |
|
72 |
case CHAR(_) => 1 |
|
73 |
case ALTS(rs) => 1 + rs.map(size).sum |
|
74 |
case SEQ(r1, r2) => 1 + size(r1) + size(r2) |
|
75 |
case STAR(r) => 1 + size(r) |
|
76 |
} |
|
77 |
||
78 |
||
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
79 |
// nullable function: tests whether the regular |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
80 |
// expression can recognise the empty string |
164
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
81 |
def nullable (r: Rexp) : Boolean = r match { |
426
0debe6f41396
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
354
diff
changeset
|
82 |
case ZERO => false |
0debe6f41396
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
354
diff
changeset
|
83 |
case ONE => true |
164
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
84 |
case CHAR(_) => false |
549 | 85 |
case ALTS(rs) => rs.exists(nullable) |
164
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
86 |
case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
87 |
case STAR(_) => true |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
88 |
} |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
89 |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
90 |
// derivative of a regular expression w.r.t. a character |
164
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
91 |
def der (c: Char, r: Rexp) : Rexp = r match { |
426
0debe6f41396
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
354
diff
changeset
|
92 |
case ZERO => ZERO |
0debe6f41396
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
354
diff
changeset
|
93 |
case ONE => ZERO |
0debe6f41396
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
354
diff
changeset
|
94 |
case CHAR(d) => if (c == d) ONE else ZERO |
549 | 95 |
case ALTS(rs) => ALTS(rs.map(der(c, _))) |
164
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
96 |
case SEQ(r1, r2) => |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
97 |
if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
98 |
else SEQ(der(c, r1), r2) |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
99 |
case STAR(r) => SEQ(der(c, r), STAR(r)) |
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
100 |
} |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
101 |
|
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
102 |
// derivative w.r.t. a string (iterates der) |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
103 |
def ders (s: List[Char], r: Rexp) : Rexp = s match { |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
104 |
case Nil => r |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
105 |
case c::s => ders(s, der(c, r)) |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
106 |
} |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
107 |
|
550 | 108 |
val test : Rexp= STAR("a" | "aa") |
109 |
size(test) |
|
110 |
size(der('a', test)) |
|
111 |
size(der('a', der('a', test))) |
|
112 |
||
113 |
size(ders("aaaaaa".toList, test)) |
|
114 |
string(ders("aaaaaa".toList, test)) |
|
115 |
||
116 |
||
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
117 |
// extracts a string from value |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
118 |
def flatten(v: Val) : String = v match { |
354
86b2aeae3e98
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
352
diff
changeset
|
119 |
case Empty => "" |
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
120 |
case Chr(c) => c.toString |
549 | 121 |
case Proj(_, v) => flatten(v) |
542 | 122 |
case Sequ(v1, v2) => flatten(v1) ++ flatten(v2) |
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
123 |
case Stars(vs) => vs.map(flatten).mkString |
164
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
124 |
} |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
125 |
|
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
126 |
|
549 | 127 |
// mkeps |
128 |
def mkeps(r: Rexp) : Val = r match { |
|
129 |
case ONE => Empty |
|
130 |
case ALTS(r1::rs) => |
|
131 |
if (nullable(r1)) Proj(0, mkeps(r1)) |
|
132 |
else IncProj(1, mkeps(ALTS(rs))) |
|
133 |
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
|
134 |
case STAR(r) => Stars(Nil) |
|
135 |
} |
|
136 |
||
137 |
// injection |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
138 |
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
520 | 139 |
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
140 |
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
|
549 | 141 |
case (SEQ(r1, r2), Proj(0, Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
142 |
case (SEQ(r1, r2), Proj(1, v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
|
143 |
case (ALTS(rs), Proj(n, v1)) => Proj(n, inj(rs(n), c, v1)) |
|
354
86b2aeae3e98
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
352
diff
changeset
|
144 |
case (CHAR(d), Empty) => Chr(c) |
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
145 |
} |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
146 |
|
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
147 |
// main lexing function (produces a value) |
549 | 148 |
// - does not simplify |
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
149 |
def lex(r: Rexp, s: List[Char]) : Val = s match { |
549 | 150 |
case Nil => { |
151 |
//println(s"Size of the last regex: ${size(r)}") |
|
152 |
if (nullable(r)) mkeps(r) else throw new Exception("Not matched") |
|
153 |
} |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
154 |
case c::cs => inj(r, c, lex(der(c, r), cs)) |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
155 |
} |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
156 |
|
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
157 |
def lexing(r: Rexp, s: String) : Val = lex(r, s.toList) |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
158 |
|
549 | 159 |
lexing("a" | ZERO, "a") |
160 |
lexing(ZERO | "a", "a") |
|
450
d91a1748a9be
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
426
diff
changeset
|
161 |
|
d91a1748a9be
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
426
diff
changeset
|
162 |
lexing(("ab" | "a") ~ ("b" | ONE), "ab") |
d91a1748a9be
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
426
diff
changeset
|
163 |
|
549 | 164 |
|
165 |
// removing duplicate regular expressions |
|
166 |
val unit = (ZERO, F_ERROR(_)) |
|
167 |
||
168 |
def dups2(xs: List[(Rexp, Val => Val)], |
|
169 |
acc: List[(Rexp, Val => Val)] = Nil) : List[(Rexp, Val => Val)] = xs match { |
|
170 |
case Nil => acc |
|
171 |
case (x, y)::xs => |
|
172 |
if (acc.map(_._1).contains(x)) dups2(xs, acc :+ unit) |
|
173 |
else dups2(xs, acc :+ (x, y)) |
|
174 |
} |
|
175 |
||
176 |
def dups(xs: List[(Rexp, Val => Val)]) : List[(Rexp, Val => Val)] = { |
|
177 |
val out = dups2(xs) |
|
178 |
//if (out != xs) { |
|
179 |
// println() |
|
180 |
// println(s"Input ${string(ALTS(xs.map(_._1)))}") |
|
181 |
// println(s"Ouput ${string(ALTS(out.map(_._1)))}") |
|
182 |
//} |
|
183 |
out |
|
184 |
} |
|
185 |
||
186 |
||
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
187 |
// some "rectification" functions for simplification |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
188 |
def F_ID(v: Val): Val = v |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
189 |
def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { |
520 | 190 |
case Sequ(v1, v2) => Sequ(f1(v1), f2(v2)) |
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
191 |
} |
354
86b2aeae3e98
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
352
diff
changeset
|
192 |
def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = |
520 | 193 |
(v:Val) => Sequ(f1(Empty), f2(v)) |
354
86b2aeae3e98
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
352
diff
changeset
|
194 |
def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = |
520 | 195 |
(v:Val) => Sequ(f1(v), f2(Empty)) |
549 | 196 |
def F_ERROR(v: Val): Val = throw new Exception("error") |
197 |
def F_PRINT(v: Val): Val = { |
|
198 |
println(s"value is ${v}") |
|
199 |
throw new Exception("caught error") |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
200 |
} |
549 | 201 |
|
202 |
def flats(rs: List[Rexp], seen: Set[Rexp]) : (List[Rexp], Val => Val) = { |
|
203 |
//println(s"I am flats: ${string(ALTS(rs))}") |
|
204 |
//println(s"The size of seen is ${seen.size}, ${seen.map(string)}") |
|
205 |
rs match { |
|
206 |
case Nil => (Nil, F_ERROR) |
|
207 |
case r::rs1 if seen.contains(simp(r)._1) => { |
|
208 |
//println(s" I remove ${string(r)}") |
|
209 |
val (rs2, f) = flats(rs1, seen) |
|
210 |
(rs2, (v:Val) => IncProj(1, f(v))) |
|
211 |
} |
|
212 |
case ZERO::rs1 => { |
|
213 |
val (rs2, f) = flats(rs1, seen) |
|
214 |
(rs2, (v:Val) => IncProj(1, f(v))) |
|
215 |
} |
|
216 |
case ALTS(rs0)::rs1 => { |
|
217 |
val (rss, f1) = flats(rs0, seen) |
|
218 |
val (rs2, f2) = flats(rs1, rss.toSet ++ seen) |
|
219 |
(rss:::rs2, (v:Val) => v match { |
|
220 |
case Proj(n, vn) => |
|
221 |
if (n < rss.length) Proj(0, f1(Proj(n, vn))) |
|
222 |
else IncProj(1, f2(Proj(n - rss.length, vn))) |
|
223 |
}) |
|
224 |
} |
|
225 |
case r1::rs2 => { |
|
226 |
val (r1s, f1) = simp(r1) |
|
227 |
val (rs3, f2) = flats(rs2, seen + r1s) |
|
228 |
(r1s::rs3, (v:Val) => v match { |
|
229 |
case Proj(0, vn) => Proj(0, f1(vn)) |
|
230 |
case Proj(n, vn) => IncProj(1, f2(Proj(n - 1, vn))) |
|
231 |
}) |
|
232 |
} |
|
233 |
}} |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
234 |
|
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
235 |
// simplification of regular expressions returning also an |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
236 |
// rectification function; no simplification under STAR |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
237 |
def simp(r: Rexp): (Rexp, Val => Val) = r match { |
549 | 238 |
case ALTS(rs) => { |
239 |
val (rs_s, fs_s) = flats(rs, Set()) |
|
240 |
rs_s match { |
|
241 |
case Nil => (ZERO, F_ERROR) |
|
242 |
case r::Nil => (r, (v:Val) => fs_s(Proj(0, v))) |
|
243 |
case rs_sd => (ALTS(rs_sd), fs_s) |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
244 |
} |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
245 |
} |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
246 |
case SEQ(r1, r2) => { |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
247 |
val (r1s, f1s) = simp(r1) |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
248 |
val (r2s, f2s) = simp(r2) |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
249 |
(r1s, r2s) match { |
426
0debe6f41396
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
354
diff
changeset
|
250 |
case (ZERO, _) => (ZERO, F_ERROR) |
0debe6f41396
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
354
diff
changeset
|
251 |
case (_, ZERO) => (ZERO, F_ERROR) |
0debe6f41396
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
354
diff
changeset
|
252 |
case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) |
0debe6f41396
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
354
diff
changeset
|
253 |
case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s)) |
549 | 254 |
case (ALTS(rs), r2s) => (ALTS(rs.map(SEQ(_, r2s))), |
255 |
(v:Val) => v match { |
|
256 |
case Proj(n, Sequ(v1, v2)) => Sequ(f1s(Proj(n, v1)), f2s(v2)) |
|
257 |
} |
|
258 |
) |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
259 |
case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
260 |
} |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
261 |
} |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
262 |
case r => (r, F_ID) |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
263 |
} |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
264 |
|
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
265 |
def lex_simp(r: Rexp, s: List[Char]) : Val = s match { |
549 | 266 |
case Nil => { |
267 |
//println(s"Size of the last regex: ${size(r)}") |
|
268 |
//println(s"${string(r)}") |
|
269 |
if (nullable(r)) mkeps(r) else throw new Exception("Not matched") |
|
270 |
} |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
271 |
case c::cs => { |
549 | 272 |
val rd = der(c, r) |
273 |
val (r_simp, f_simp) = simp(rd) |
|
552 | 274 |
println(s"BEFORE ${string(rd)}") |
275 |
println(s"AFTER ${string(r_simp)}") |
|
549 | 276 |
val rec = lex_simp(r_simp, cs) |
277 |
inj(r, c, f_simp(rec)) |
|
164
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
278 |
} |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
279 |
} |
6c1d214c39ef
added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
280 |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
281 |
def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList) |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
282 |
|
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
283 |
|
549 | 284 |
lexing_simp("ab" | "aa", "ab") |
285 |
lexing_simp("ab" | "aa", "aa") |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
286 |
|
549 | 287 |
lexing(STAR("a" | "aa"), "aaaaa") |
288 |
lexing_simp(STAR("a" | "aa"), "aaaaa") |
|
450
d91a1748a9be
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
426
diff
changeset
|
289 |
|
549 | 290 |
lexing(STAR("a" | "aa"), "aaaaaaaaaaa") |
291 |
lexing_simp(STAR("a" | "aa"), "aaaaaaaaaaa") |
|
292 |
||
293 |
lexing_simp(STAR("a" | "aa"), "a" * 2) |
|
294 |
lexing_simp(STAR("a" | "aa"), "a" * 3) |
|
295 |
lexing_simp(STAR("a" | "aa"), "a" * 4) |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
296 |
|
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
297 |
|
549 | 298 |
lexing_simp(STAR("a" | "aa"), "a" * 20) |
299 |
lexing_simp(STAR("a" | "aa"), "a" * 2000) |
|
300 |
||
301 |
lexing(ALTS(List("aa", "aa", "aa", "ab", "ab")), "ab") |
|
302 |
lexing_simp(ALTS(List("aa", "aa", "aa", "ab", "ab")), "ab") |
|
303 |
||
304 |
lexing(ALTS(List(("aa" | "ab" | "aa"), "aa", "ab", "ab")), "ab") |
|
305 |
lexing_simp(ALTS(List(("aa" | "ab" | "aa"), "aa", "ab", "ab")), "ab") |
|
306 |
||
307 |
lexing(ALTS(List(ZERO, ZERO, ONE, "aa", ZERO, "aa", "aa")), "aa") |
|
308 |
lexing_simp(ALTS(List(ZERO, ZERO, ONE, "aa", ZERO, "aa", "aa")), "aa") |
|
309 |
||
310 |
lexing_simp(ONE | ZERO, "") |
|
311 |
lexing_simp(ZERO | ONE, "") |
|
312 |
||
313 |
lexing("a" | ZERO, "a") |
|
314 |
lexing_simp("a" | ZERO, "a") |
|
315 |
lexing(ZERO | "a", "a") |
|
316 |
lexing_simp(ZERO | "a", "a") |
|
317 |
||
318 |
lexing(ALTS(List(ZERO, ZERO, ONE, "a", ZERO, "a")), "a") |
|
319 |
lexing_simp(ALTS(List(ZERO, ZERO, ONE, "a", ZERO, "a")), "a") |
|
320 |
lexing(ALTS(List("a")), "a") |
|
321 |
lexing_simp(ALTS(List("a")), "a") |
|
322 |
||
323 |
lexing_simp(("a" | ZERO) | ZERO, "a") |
|
324 |
lexing_simp("a" | (ZERO | ZERO), "a") |
|
325 |
lexing_simp(ZERO | ("a" | ZERO), "a") |
|
326 |
||
327 |
||
328 |
lexing_simp("abc", "abc") |
|
329 |
||
330 |
lexing_simp("abc" | ONE, "abc") |
|
331 |
||
332 |
lexing(("a" | "ab") ~ ("b" | ""), "ab") |
|
333 |
lexing_simp(("a" | "ab") ~ ("b" | ""), "ab") |
|
334 |
lexing_simp(("ba" | "c" | "ab"), "ab") |
|
335 |
||
336 |
lexing(ALTS(List(ALTS(Nil), "c", "ab")), "ab") |
|
337 |
lexing_simp(ALTS(List(ALTS(Nil), "c", "ab")), "ab") |
|
338 |
||
339 |
lexing(ALTS(List(ALTS("ab"::Nil), "c", "ab")), "ab") |
|
340 |
lexing_simp(ALTS(List(ALTS("ab"::Nil), "c", "ab")), "ab") |
|
341 |
||
342 |
lexing(ALTS(List(ALTS(List("a","ab")), "c", "ab")), "ab") |
|
343 |
lexing_simp(ALTS(List(ALTS(List("a","ab")), "c", "ab")), "ab") |
|
344 |
||
345 |
lexing(ALTS(List(ALTS(List("ab","a")), "c", "ab")), "ab") |
|
346 |
lexing_simp(ALTS(List(ALTS(List("ab","a")), "c", "ab")), "ab") |
|
347 |
||
348 |
lexing(ALTS(List(ALTS(List("ab","a","a")), "c", "ab")), "ab") |
|
349 |
lexing_simp(ALTS(List(ALTS(List("ab","a","a")), "c", "ab")), "ab") |
|
350 |
||
351 |
lexing(ALTS(List(ALTS(List("a","ab","a")), "c", "ab")), "ab") |
|
352 |
lexing_simp(ALTS(List(ALTS(List("a","ab","a")), "c", "ab")), "ab") |
|
353 |
||
354 |
lexing(ALTS(List(ALTS(List("b","a","ab")), "c", "ab")), "ab") |
|
355 |
lexing_simp(ALTS(List(ALTS(List("b","a","ab")), "c", "ab")), "ab") |
|
356 |
||
357 |
||
358 |
lexing_simp(ALTS(List("ba", "c", "ab")), "ab") |
|
359 |
||
360 |
lexing(ALTS(List("a", "ab", "a")), "ab") |
|
361 |
lexing_simp(ALTS(List("a", "ab", "a")), "ab") |
|
362 |
||
363 |
lexing(STAR("a" | "aa"), "aa") |
|
364 |
lexing_simp(STAR("a" | "aa"), "aa") |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
365 |
|
542 | 366 |
|
367 |
||
368 |
||
549 | 369 |
def enum(n: Int, s: String) : Set[Rexp] = n match { |
370 |
case 0 => Set(ZERO, ONE) ++ s.toSet.map(CHAR) |
|
371 |
case n => { |
|
372 |
val rs = enum(n - 1, s) |
|
373 |
rs ++ |
|
374 |
(for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) ++ |
|
375 |
(for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) ++ |
|
376 |
(for (r1 <- rs) yield STAR(r1)) |
|
377 |
} |
|
542 | 378 |
} |
379 |
||
549 | 380 |
def strs(n: Int, cs: String) : Set[String] = { |
381 |
if (n == 0) Set("") |
|
382 |
else { |
|
383 |
val ss = strs(n - 1, cs) |
|
384 |
ss ++ |
|
385 |
(for (s <- ss; c <- cs.toList) yield c + s) |
|
386 |
} |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
387 |
} |
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
388 |
|
549 | 389 |
import scala.util.Try |
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
390 |
|
549 | 391 |
def tests(n: Int, m: Int, s: String) = { |
392 |
val rs = enum(n, s) |
|
393 |
val ss = strs(m, s) |
|
394 |
println(s"cases generated: ${rs.size} regexes and ${ss.size} strings") |
|
395 |
for (r1 <- rs.par; s1 <- ss.par) yield { |
|
396 |
val res1 = Try(Some(lexing(r1, s1))).getOrElse(None) |
|
397 |
val res2 = Try(Some(lexing_simp(r1, s1))).getOrElse(None) |
|
398 |
if (res1 != res2) println(s"Disagree on ${r1} and ${s1}") |
|
399 |
if (res1 != res2) Some((r1, s1)) else None |
|
400 |
} |
|
401 |
} |
|
402 |
||
403 |
println("Testing") |
|
404 |
println(tests(2,7,"abc")) |
|
352
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
405 |
|
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
406 |
|
1e1b0fe66107
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
165
diff
changeset
|
407 |