author | Chengsong |
Tue, 02 Jul 2019 11:22:39 +0100 | |
changeset 37 | 17d8e7599a01 |
parent 17 | 3241b1e71633 |
child 59 | 8ff7b7508824 |
permissions | -rw-r--r-- |
0 | 1 |
package RexpRelated |
2 |
import scala.language.implicitConversions |
|
3 |
import scala.language.reflectiveCalls |
|
4 |
import scala.annotation.tailrec |
|
5 |
import scala.util.Try |
|
6 |
||
7 |
abstract class Bit |
|
8 |
case object Z extends Bit |
|
9 |
case object S extends Bit |
|
12
768b833d6230
removed C(c) The retrieve and code in the previous version is still not correct and will crash. no prob now.
Chengsong
parents:
11
diff
changeset
|
10 |
//case class C(c: Char) extends Bit |
0 | 11 |
|
12 |
||
13 |
abstract class Rexp |
|
14 |
case object ZERO extends Rexp |
|
15 |
case object ONE extends Rexp |
|
16 |
case class CHAR(c: Char) extends Rexp |
|
17 |
case class ALTS(rs: List[Rexp]) extends Rexp |
|
18 |
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
|
19 |
case class STAR(r: Rexp) extends Rexp |
|
20 |
case class RECD(x: String, r: Rexp) extends Rexp |
|
21 |
||
22 |
||
23 |
||
24 |
object Rexp{ |
|
25 |
type Bits = List[Bit] |
|
26 |
// abbreviations |
|
27 |
type Mon = (Char, Rexp) |
|
28 |
type Lin = Set[Mon] |
|
29 |
def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2)) |
|
30 |
def PLUS(r: Rexp) = SEQ(r, STAR(r)) |
|
31 |
def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2)) |
|
32 |
||
33 |
||
34 |
def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match { |
|
35 |
case Nil => Nil |
|
36 |
case (x::xs) => { |
|
37 |
val res = f(x) |
|
38 |
if (acc.contains(res)) distinctBy(xs, f, acc) |
|
39 |
else x::distinctBy(xs, f, res::acc) |
|
40 |
} |
|
41 |
} |
|
42 |
// some convenience for typing in regular expressions |
|
43 |
def charlist2rexp(s : List[Char]): Rexp = s match { |
|
44 |
case Nil => ONE |
|
45 |
case c::Nil => CHAR(c) |
|
46 |
case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
|
47 |
} |
|
48 |
implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) |
|
49 |
||
50 |
implicit def RexpOps(r: Rexp) = new { |
|
51 |
def | (s: Rexp) = ALT(r, s) |
|
52 |
def % = STAR(r) |
|
53 |
def ~ (s: Rexp) = SEQ(r, s) |
|
54 |
} |
|
55 |
||
56 |
implicit def stringOps(s: String) = new { |
|
57 |
def | (r: Rexp) = ALT(s, r) |
|
58 |
def | (r: String) = ALT(s, r) |
|
59 |
def % = STAR(s) |
|
60 |
def ~ (r: Rexp) = SEQ(s, r) |
|
61 |
def ~ (r: String) = SEQ(s, r) |
|
62 |
def $ (r: Rexp) = RECD(s, r) |
|
63 |
} |
|
64 |
||
65 |
// translation into ARexps |
|
66 |
def fuse(bs: Bits, r: ARexp) : ARexp = r match { |
|
67 |
case AZERO => AZERO |
|
68 |
case AONE(cs) => AONE(bs ++ cs) |
|
69 |
case ACHAR(cs, f) => ACHAR(bs ++ cs, f) |
|
70 |
case AALTS(cs, rs) => AALTS(bs ++ cs, rs) |
|
71 |
case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2) |
|
72 |
case ASTAR(cs, r) => ASTAR(bs ++ cs, r) |
|
73 |
} |
|
74 |
||
75 |
def internalise(r: Rexp) : ARexp = r match { |
|
76 |
case ZERO => AZERO |
|
77 |
case ONE => AONE(Nil) |
|
78 |
case CHAR(c) => ACHAR(Nil, c) |
|
79 |
case ALTS(List(r1, r2)) => |
|
80 |
AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2)))) |
|
81 |
case ALTS(r1::rs) => { |
|
82 |
val AALTS(Nil, rs2) = internalise(ALTS(rs)) |
|
83 |
AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _))) |
|
84 |
} |
|
85 |
case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2)) |
|
86 |
case STAR(r) => ASTAR(Nil, internalise(r)) |
|
87 |
case RECD(x, r) => internalise(r) |
|
88 |
} |
|
89 |
||
90 |
internalise(("a" | "ab") ~ ("b" | "")) |
|
5 | 91 |
|
0 | 92 |
def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match { |
93 |
case (ONE, bs) => (Empty, bs) |
|
12
768b833d6230
removed C(c) The retrieve and code in the previous version is still not correct and will crash. no prob now.
Chengsong
parents:
11
diff
changeset
|
94 |
case (CHAR(f), bs) => (Chr(f), bs) |
768b833d6230
removed C(c) The retrieve and code in the previous version is still not correct and will crash. no prob now.
Chengsong
parents:
11
diff
changeset
|
95 |
case (ALTS(r::Nil), bs) => decode_aux(r, bs)//this case seems only used when we simp a regex before derivatives and it contains things like alt("a") |
0 | 96 |
case (ALTS(rs), bs) => bs match { |
97 |
case Z::bs1 => { |
|
98 |
val (v, bs2) = decode_aux(rs.head, bs1) |
|
99 |
(Left(v), bs2) |
|
100 |
} |
|
101 |
case S::bs1 => { |
|
102 |
val (v, bs2) = decode_aux(ALTS(rs.tail), bs1) |
|
103 |
(Right(v), bs2) |
|
104 |
} |
|
105 |
} |
|
106 |
case (SEQ(r1, r2), bs) => { |
|
107 |
val (v1, bs1) = decode_aux(r1, bs) |
|
108 |
val (v2, bs2) = decode_aux(r2, bs1) |
|
109 |
(Sequ(v1, v2), bs2) |
|
110 |
} |
|
111 |
case (STAR(r1), S::bs) => { |
|
112 |
val (v, bs1) = decode_aux(r1, bs) |
|
113 |
//println(v) |
|
114 |
val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1) |
|
115 |
(Stars(v::vs), bs2) |
|
116 |
} |
|
117 |
case (STAR(_), Z::bs) => (Stars(Nil), bs) |
|
118 |
case (RECD(x, r1), bs) => { |
|
119 |
val (v, bs1) = decode_aux(r1, bs) |
|
120 |
(Rec(x, v), bs1) |
|
121 |
} |
|
122 |
} |
|
123 |
||
124 |
def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match { |
|
125 |
case (v, Nil) => v |
|
126 |
case _ => throw new Exception("Not decodable") |
|
127 |
} |
|
5 | 128 |
|
11
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
129 |
def code(v: Val): Bits = v match { |
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
130 |
case Empty => Nil |
12
768b833d6230
removed C(c) The retrieve and code in the previous version is still not correct and will crash. no prob now.
Chengsong
parents:
11
diff
changeset
|
131 |
case Chr(a) => Nil |
11
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
132 |
case Left(v) => Z :: code(v) |
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
133 |
case Right(v) => S :: code(v) |
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
134 |
case Sequ(v1, v2) => code(v1) ::: code(v2) |
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
135 |
case Stars(Nil) => Z::Nil |
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
136 |
case Stars(v::vs) => S::code(v) ::: code(Stars(vs)) |
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
137 |
} |
0 | 138 |
|
11
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
139 |
|
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
140 |
def retrieve(r: ARexp, v: Val): Bits = (r,v) match { |
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
141 |
case (AONE(bs), Empty) => bs |
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
142 |
case (ACHAR(bs, c), Chr(d)) => bs |
14 | 143 |
case (AALTS(bs, a::Nil), v) => bs ++ retrieve(a, v) |
11
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
144 |
case (AALTS(bs, as), Left(v)) => bs ++ retrieve(as.head,v) |
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
145 |
case (AALTS(bs, as), Right(v)) => bs ++ retrieve(AALTS(Nil,as.tail),v) |
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
146 |
case (ASEQ(bs, a1, a2), Sequ(v1, v2)) => bs ++ retrieve(a1, v1) ++ retrieve(a2, v2) |
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
147 |
case (ASTAR(bs, a), Stars(Nil)) => bs ++ List(Z) |
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
148 |
case (ASTAR(bs, a), Stars(v::vs)) => bs ++ List(S) ++ retrieve(a, v) ++ retrieve(ASTAR(Nil, a), Stars(vs)) |
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
149 |
} |
0 | 150 |
//erase function: extracts the regx from Aregex |
151 |
def erase(r:ARexp): Rexp = r match{ |
|
152 |
case AZERO => ZERO |
|
153 |
case AONE(_) => ONE |
|
154 |
case ACHAR(bs, f) => CHAR(f) |
|
155 |
case AALTS(bs, rs) => ALTS(rs.map(erase(_))) |
|
156 |
case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2)) |
|
157 |
case ASTAR(cs, r)=> STAR(erase(r)) |
|
158 |
} |
|
159 |
||
160 |
//--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART |
|
161 |
// nullable function: tests whether the regular |
|
162 |
// expression can recognise the empty string |
|
163 |
def nullable (r: Rexp) : Boolean = r match { |
|
164 |
case ZERO => false |
|
165 |
case ONE => true |
|
166 |
case CHAR(_) => false |
|
167 |
case ALTS(rs) => rs.exists(nullable) |
|
168 |
case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
|
169 |
case STAR(_) => true |
|
170 |
case RECD(_, r) => nullable(r) |
|
171 |
//case PLUS(r) => nullable(r) |
|
172 |
} |
|
173 |
||
174 |
// derivative of a regular expression w.r.t. a character |
|
175 |
def der (c: Char, r: Rexp) : Rexp = r match { |
|
176 |
case ZERO => ZERO |
|
177 |
case ONE => ZERO |
|
178 |
case CHAR(f) => if (c == f) ONE else ZERO |
|
179 |
case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2))) |
|
180 |
case SEQ(r1, r2) => |
|
181 |
if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2))) |
|
182 |
else SEQ(der(c, r1), r2) |
|
183 |
case STAR(r) => SEQ(der(c, r), STAR(r)) |
|
184 |
case RECD(_, r1) => der(c, r1) |
|
185 |
//case PLUS(r) => SEQ(der(c, r), STAR(r)) |
|
186 |
} |
|
187 |
||
188 |
def flatten(v: Val) : String = v match { |
|
189 |
case Empty => "" |
|
190 |
case Chr(c) => c.toString |
|
191 |
case Left(v) => flatten(v) |
|
192 |
case Right(v) => flatten(v) |
|
193 |
case Sequ(v1, v2) => flatten(v1) + flatten(v2) |
|
194 |
case Stars(vs) => vs.map(flatten).mkString |
|
195 |
case Rec(_, v) => flatten(v) |
|
196 |
} |
|
197 |
||
198 |
// extracts an environment from a value |
|
199 |
def env(v: Val) : List[(String, String)] = v match { |
|
200 |
case Empty => Nil |
|
201 |
case Chr(c) => Nil |
|
202 |
case Left(v) => env(v) |
|
203 |
case Right(v) => env(v) |
|
204 |
case Sequ(v1, v2) => env(v1) ::: env(v2) |
|
205 |
case Stars(vs) => vs.flatMap(env) |
|
206 |
case Rec(x, v) => (x, flatten(v))::env(v) |
|
207 |
} |
|
208 |
||
209 |
||
210 |
// injection part |
|
211 |
def mkeps(r: Rexp) : Val = r match { |
|
212 |
case ONE => Empty |
|
213 |
case ALTS(List(r1, r2)) => |
|
214 |
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) |
|
215 |
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
|
216 |
case STAR(r) => Stars(Nil) |
|
217 |
case RECD(x, r) => Rec(x, mkeps(r)) |
|
218 |
//case PLUS(r) => Stars(List(mkeps(r))) |
|
219 |
} |
|
220 |
||
221 |
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
|
222 |
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
|
223 |
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
|
224 |
case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
|
225 |
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
|
226 |
case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1)) |
|
227 |
case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2)) |
|
228 |
case (CHAR(_), Empty) => Chr(c) |
|
229 |
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) |
|
230 |
//case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
|
231 |
} |
|
232 |
def lex(r: Rexp, s: List[Char]) : Val = s match { |
|
233 |
case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched") |
|
234 |
case c::cs => inj(r, c, lex(der(c, r), cs)) |
|
235 |
} |
|
236 |
||
237 |
def lexing(r: Rexp, s: String) : Val = lex(r, s.toList) |
|
238 |
||
239 |
// some "rectification" functions for simplification |
|
240 |
def F_ID(v: Val): Val = v |
|
241 |
def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v)) |
|
242 |
def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v)) |
|
243 |
def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { |
|
244 |
case Right(v) => Right(f2(v)) |
|
245 |
case Left(v) => Left(f1(v)) |
|
246 |
} |
|
247 |
def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { |
|
248 |
case Sequ(v1, v2) => Sequ(f1(v1), f2(v2)) |
|
249 |
} |
|
250 |
def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = |
|
251 |
(v:Val) => Sequ(f1(Empty), f2(v)) |
|
252 |
def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = |
|
253 |
(v:Val) => Sequ(f1(v), f2(Empty)) |
|
254 |
def F_RECD(f: Val => Val) = (v:Val) => v match { |
|
255 |
case Rec(x, v) => Rec(x, f(v)) |
|
256 |
} |
|
257 |
def F_ERROR(v: Val): Val = throw new Exception("error") |
|
258 |
||
259 |
// simplification of regular expressions returning also an |
|
260 |
// rectification function; no simplification under STAR |
|
261 |
def simp(r: Rexp): (Rexp, Val => Val) = r match { |
|
262 |
case ALTS(List(r1, r2)) => { |
|
263 |
val (r1s, f1s) = simp(r1) |
|
264 |
val (r2s, f2s) = simp(r2) |
|
265 |
(r1s, r2s) match { |
|
266 |
case (ZERO, _) => (r2s, F_RIGHT(f2s)) |
|
267 |
case (_, ZERO) => (r1s, F_LEFT(f1s)) |
|
268 |
case _ => if (r1s == r2s) (r1s, F_LEFT(f1s)) |
|
269 |
else (ALTS(List(r1s, r2s)), F_ALT(f1s, f2s)) |
|
270 |
} |
|
271 |
} |
|
272 |
case SEQ(r1, r2) => { |
|
273 |
val (r1s, f1s) = simp(r1) |
|
274 |
val (r2s, f2s) = simp(r2) |
|
275 |
(r1s, r2s) match { |
|
276 |
case (ZERO, _) => (ZERO, F_ERROR) |
|
277 |
case (_, ZERO) => (ZERO, F_ERROR) |
|
278 |
case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) |
|
279 |
case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s)) |
|
280 |
case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) |
|
281 |
} |
|
282 |
} |
|
283 |
case RECD(x, r1) => { |
|
284 |
val (r1s, f1s) = simp(r1) |
|
285 |
(RECD(x, r1s), F_RECD(f1s)) |
|
286 |
} |
|
287 |
case r => (r, F_ID) |
|
288 |
} |
|
289 |
/* |
|
290 |
val each_simp_time = scala.collection.mutable.ArrayBuffer.empty[Long] |
|
291 |
val each_simp_timeb = scala.collection.mutable.ArrayBuffer.empty[Long] |
|
292 |
*/ |
|
293 |
def lex_simp(r: Rexp, s: List[Char]) : Val = s match { |
|
294 |
case Nil => { |
|
295 |
if (nullable(r)) { |
|
296 |
mkeps(r) |
|
297 |
} |
|
298 |
else throw new Exception("Not matched") |
|
299 |
} |
|
300 |
case c::cs => { |
|
301 |
val (r_simp, f_simp) = simp(der(c, r)) |
|
302 |
inj(r, c, f_simp(lex_simp(r_simp, cs))) |
|
303 |
} |
|
304 |
} |
|
305 |
||
306 |
def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList) |
|
307 |
||
308 |
//println(lexing_simp(("a" | "ab") ~ ("b" | ""), "ab")) |
|
309 |
||
310 |
// filters out all white spaces |
|
311 |
def tokenise(r: Rexp, s: String) = |
|
312 |
env(lexing_simp(r, s)).filterNot { _._1 == "w"} |
|
313 |
||
314 |
||
315 |
//reads the string from a file |
|
316 |
def fromFile(name: String) : String = |
|
317 |
io.Source.fromFile(name).mkString |
|
318 |
||
319 |
def tokenise_file(r: Rexp, name: String) = |
|
320 |
tokenise(r, fromFile(name)) |
|
321 |
||
322 |
// Testing |
|
323 |
//============ |
|
324 |
||
325 |
def time[T](code: => T) = { |
|
326 |
val start = System.nanoTime() |
|
327 |
val result = code |
|
328 |
val end = System.nanoTime() |
|
329 |
println((end - start)/1.0e9) |
|
330 |
result |
|
331 |
} |
|
332 |
||
333 |
//--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART |
|
334 |
||
335 |
// bnullable function: tests whether the aregular |
|
336 |
// expression can recognise the empty string |
|
337 |
def bnullable (r: ARexp) : Boolean = r match { |
|
338 |
case AZERO => false |
|
339 |
case AONE(_) => true |
|
340 |
case ACHAR(_,_) => false |
|
341 |
case AALTS(_, rs) => rs.exists(bnullable) |
|
342 |
case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2) |
|
343 |
case ASTAR(_, _) => true |
|
344 |
} |
|
345 |
||
346 |
def mkepsBC(r: ARexp) : Bits = r match { |
|
347 |
case AONE(bs) => bs |
|
348 |
case AALTS(bs, rs) => { |
|
349 |
val n = rs.indexWhere(bnullable) |
|
350 |
bs ++ mkepsBC(rs(n)) |
|
351 |
} |
|
352 |
case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2) |
|
353 |
case ASTAR(bs, r) => bs ++ List(Z) |
|
354 |
} |
|
355 |
||
356 |
// derivative of a regular expression w.r.t. a character |
|
357 |
def bder(c: Char, r: ARexp) : ARexp = r match { |
|
358 |
case AZERO => AZERO |
|
359 |
case AONE(_) => AZERO |
|
12
768b833d6230
removed C(c) The retrieve and code in the previous version is still not correct and will crash. no prob now.
Chengsong
parents:
11
diff
changeset
|
360 |
case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO |
0 | 361 |
case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _))) |
362 |
case ASEQ(bs, r1, r2) => |
|
363 |
if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(mkepsBC(r1), bder(c, r2))) |
|
364 |
else ASEQ(bs, bder(c, r1), r2) |
|
365 |
case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r)) |
|
366 |
} |
|
367 |
||
368 |
||
369 |
def ders (s: List[Char], r: Rexp) : Rexp = s match { |
|
370 |
case Nil => r |
|
371 |
case c::s => ders(s, der(c, r)) |
|
372 |
} |
|
373 |
||
374 |
// derivative w.r.t. a string (iterates bder) |
|
375 |
@tailrec |
|
376 |
def bders (s: List[Char], r: ARexp) : ARexp = s match { |
|
377 |
case Nil => r |
|
378 |
case c::s => bders(s, bder(c, r)) |
|
379 |
} |
|
380 |
||
381 |
def flats(rs: List[ARexp]): List[ARexp] = rs match { |
|
382 |
case Nil => Nil |
|
383 |
case AZERO :: rs1 => flats(rs1) |
|
384 |
case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2) |
|
385 |
case r1 :: rs2 => r1 :: flats(rs2) |
|
15 | 386 |
} |
17 | 387 |
/* |
15 | 388 |
def remove(v: Val): Val = v match{ |
389 |
case Right(v1) => v1 |
|
390 |
case Left(v1) => v1 |
|
391 |
case _ => throw new Exception("Not removable") |
|
17 | 392 |
}*/ |
15 | 393 |
def augment(v: Val, i: Int): Val = if(i > 1) augment(Right(v), i - 1) else Right(v) |
394 |
//an overly complex version |
|
395 |
/* |
|
396 |
if(rel_dist >0){//the regex we are dealing with is not what v points at |
|
397 |
rs match{ |
|
398 |
case Nil => throw new Exception("Trying to simplify a non-existent value") |
|
399 |
case AZERO :: rs1 => flats_vsimp(rs1, rel_dist - 1, remove(v)) |
|
400 |
case AALTS(bs, rs1) :: rs2 => flats_vsimp(rs2, rel_dist - 1, augment(v, rs1.length - 1))//rs1 is guaranteed to have a len geq 2 |
|
401 |
case r1 :: rs2 => flats_vsimp(rs2, rel_dist - 1, v) |
|
402 |
} |
|
0 | 403 |
} |
15 | 404 |
else if(rel_dist == 0){//we are dealing with regex v is pointing to -- "v->r itself" |
405 |
rs match{//r1 cannot be zero |
|
406 |
AALTS(bs, rs1) :: rs2 => flats_vsimp( ) |
|
407 |
AZERO::rs2 => throw new Exception("Value corresponds to zero") |
|
408 |
r1::rs2 => flats_vsimp(rs2, rel_dist - 1, v) |
|
409 |
} |
|
410 |
||
411 |
} |
|
412 |
else{ |
|
413 |
||
414 |
} |
|
415 |
*/ |
|
416 |
def flats_vsimp(rs: List[ARexp], position: Int): Int = (rs, position) match { |
|
417 |
case (_, 0) => 0 |
|
418 |
case (Nil, _) => 0 |
|
16 | 419 |
case (AZERO :: rs1, _) => flats_vsimp(rs1, position - 1) - 1 |
15 | 420 |
case (AALTS(bs, rs1) :: rs2, _) => rs1.length - 1 + flats_vsimp(rs2, position - 1) |
421 |
case (r1 :: rs2, _) => flats_vsimp(rs2, position - 1) |
|
422 |
} |
|
0 | 423 |
def rflats(rs: List[Rexp]): List[Rexp] = rs match { |
424 |
case Nil => Nil |
|
425 |
case ZERO :: rs1 => rflats(rs1) |
|
426 |
case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2) |
|
427 |
case r1 :: rs2 => r1 :: rflats(rs2) |
|
428 |
} |
|
429 |
var flats_time = 0L |
|
430 |
var dist_time = 0L |
|
431 |
||
432 |
def bsimp(r: ARexp): ARexp = r match { |
|
433 |
case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match { |
|
434 |
case (AZERO, _) => AZERO |
|
435 |
case (_, AZERO) => AZERO |
|
436 |
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) |
|
437 |
case (r1s, r2s) => ASEQ(bs1, r1s, r2s) |
|
438 |
} |
|
439 |
case AALTS(bs1, rs) => { |
|
440 |
val rs_simp = rs.map(bsimp) |
|
441 |
val flat_res = flats(rs_simp) |
|
442 |
val dist_res = distinctBy(flat_res, erase) |
|
443 |
dist_res match { |
|
444 |
case Nil => AZERO |
|
445 |
case s :: Nil => fuse(bs1, s) |
|
446 |
case rs => AALTS(bs1, rs) |
|
447 |
} |
|
448 |
} |
|
5 | 449 |
//case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) |
0 | 450 |
case r => r |
451 |
} |
|
17 | 452 |
def find_pos(v: Val, rs: List[ARexp]): Int = (v, rs) match{ |
453 |
case (v, r::Nil) => 0 |
|
15 | 454 |
case (Right(v), r::rs) => find_pos(v, rs) + 1 |
17 | 455 |
case (Left(v), r::rs) => 0 |
456 |
//case (v, _) => 0 |
|
457 |
} |
|
458 |
def find_pos_aux(v: Val, r: ARexp): Int = r match { |
|
459 |
case AALTS(bs, rs) => find_pos(v, rs) |
|
460 |
case r => 0 |
|
15 | 461 |
} |
17 | 462 |
def remove(v: Val, rs: List[ARexp]) : Val = (v,rs) match {//remove the outmost layer of ALTS's Left and Right |
463 |
//we have to use r to detect the bound of nested L/Rs |
|
464 |
case (v, r::Nil) => v |
|
465 |
case (Right(v), r::rs) => remove(v, rs) |
|
15 | 466 |
case (Left(v), r::rs) => v |
17 | 467 |
//case (v, r::Nil) => v |
15 | 468 |
} |
16 | 469 |
def simple_end(v: Val): Boolean = v match { |
15 | 470 |
case Left(v) => return false |
471 |
case Right(v) => return simple_end(v) |
|
472 |
case v => return true |
|
473 |
} |
|
17 | 474 |
def isend(v: Val, rs: List[ARexp], position: Int): Boolean = {//TODO: here the slice api i am not familiar with so this call might be incorrect and crash the bsimp2 |
475 |
val rsbh = rs.slice(position + 1, rs.length) |
|
15 | 476 |
val out_end = if(flats(rsbh) == Nil) true else false |
477 |
val inner_end = simple_end(v) |
|
478 |
inner_end && out_end |
|
479 |
} |
|
16 | 480 |
def get_coat(v: Val, rs: List[Rexp], vs: Val): Val = (v, rs) match{//the dual operation of remove(so-called by myself) |
15 | 481 |
case (Right(v), r::Nil) => Right(vs) |
482 |
case (Left(v), r::rs) => Left(vs) |
|
483 |
case (Right(v), r::rs) => Right(get_coat(v, rs, vs)) |
|
484 |
} |
|
485 |
def coat(v: Val, i: Int) : Val = i match { |
|
486 |
case 0 => v |
|
487 |
case i => coat(Right(v), i - 1) |
|
488 |
} |
|
17 | 489 |
//This version takes a regex and a value, return a simplified regex and its corresponding simplified value |
490 |
def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r,v) match{ |
|
491 |
case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match { |
|
492 |
case ((AZERO, _), (_, _) )=> (AZERO, undefined) |
|
493 |
case ((_, _), (AZERO, _)) => (AZERO, undefined) |
|
494 |
case ((AONE(bs2), v1s) , (r2s, v2s)) => (fuse(bs1 ++ bs2, r2s), v2s )//v2 tells how to retrieve bits in r2s, which is enough as the bits of the first part of the sequence has already been integrated to the top level of the second regx. |
|
495 |
case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s), Sequ(v1s, v2s)) |
|
496 |
} |
|
497 |
case (AALTS(bs1, rs), v) => { |
|
498 |
//phase 1 transformation so that aalts(bs1, rs) => aalts(bs1, rsf) and v => vf |
|
499 |
val init_ind = find_pos(v, rs) |
|
500 |
//println(rs) |
|
501 |
//println(v) |
|
502 |
val vs = bsimp2(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to match the regx rs[i] |
|
503 |
//println(vs) |
|
504 |
val rs_simp = rs.map(bsimp) |
|
505 |
val vs_kernel = rs_simp(init_ind) match { |
|
506 |
case AALTS(bs2, rs2) => remove(vs._2, rs2)//remove the secondary layer of left and right |
|
507 |
case r => vs._2 |
|
508 |
} |
|
509 |
val flat_res = flats(rs_simp) |
|
510 |
//println(rs_simp) |
|
511 |
//println(flat_res) |
|
512 |
//println(init_ind) |
|
513 |
val vs_for_coating = if(isend(vs._2, rs_simp, init_ind)||flat_res.length == 1) vs_kernel else Left(vs_kernel) |
|
514 |
//println(vs_for_coating) |
|
515 |
val r_s = rs_simp(init_ind)//or vs._1 |
|
516 |
val shift = flats_vsimp(rs_simp, init_ind) + find_pos_aux(vs._2, rs_simp(init_ind)) |
|
517 |
//println(shift) |
|
518 |
val new_ind = init_ind + shift |
|
519 |
//println("new ind:") |
|
520 |
//println(new_ind) |
|
521 |
val vf = coat(vs_for_coating, new_ind) |
|
522 |
//println("vf:") |
|
523 |
//println(vf) |
|
524 |
//flats2 returns a list of regex and a single v |
|
525 |
//now |- vf: ALTS(bs1, flat_res) |
|
526 |
||
527 |
//phase 2 transformation so that aalts(bs1, rsf) => aalts(bs, rsdb) and vf => vdb |
|
528 |
val dist_res = distinctBy(flat_res, erase) |
|
529 |
val front_part = distinctBy(flat_res.slice(0, new_ind + 1), erase) |
|
530 |
//val size_reduction = new_ind + 1 - front_part.length |
|
531 |
//println(flat_res.length) |
|
532 |
//println(dist_res) |
|
533 |
//println(front_part) |
|
534 |
val vdb = if(dist_res.length == front_part.length )//that means the regex we are interested in is at the end of the list |
|
535 |
{ |
|
536 |
coat(vs_kernel, front_part.length - 1) |
|
537 |
} |
|
538 |
else{ |
|
539 |
coat(Left(vs_kernel), front_part.length - 1) |
|
540 |
} |
|
541 |
//println(vdb) |
|
542 |
//we don't need to transform vdb as this phase will not make enough changes to the regex to affect value. |
|
543 |
//the above statement needs verification. but can be left as is now. |
|
544 |
dist_res match { |
|
545 |
case Nil => (AZERO, undefined) |
|
546 |
case s :: Nil => (fuse(bs1, s), vdb) |
|
547 |
case rs => (AALTS(bs1, rs), vdb) |
|
548 |
} |
|
549 |
} |
|
550 |
//case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) |
|
551 |
case (r, v) => (r, v) |
|
552 |
} |
|
553 |
def vsimp(r: ARexp, v: Val): Val = bsimp2(r, v)._2 |
|
554 |
/*This version was first intended for returning a function however a value would be simpler. |
|
15 | 555 |
def bsimp2(r: ARexp, v: Val): (ARexp, Val => Val) = (r,v) match{ |
556 |
case (ASEQ(bs1, r1, r2), v) => (bsimp2(r1), bsimp2(r2)) match { |
|
557 |
case ((AZERO, _), (_, _) )=> (AZERO, undefined) |
|
558 |
case ((_, _), (AZERO, _)) => (AZERO, undefined) |
|
559 |
case ((AONE(bs2), f1) , (r2s, f2)) => (fuse(bs1 ++ bs2, r2s), lambda v => v match { case Sequ(_, v) => f2(v) } ) |
|
560 |
case ((r1s, f1), (r2s, f2)) => (ASEQ(bs1, r1s, r2s), lambda v => v match {case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))} |
|
561 |
} |
|
562 |
case AALTS(bs1, rs) => { |
|
563 |
val init_ind = find_pos(v, rs) |
|
564 |
val vs = bsimp2(rs[init_ind], remove(v, rs))//remove all the outer layers of left and right in v to match the regx rs[i] |
|
565 |
val rs_simp = rs.map(bsimp) |
|
566 |
val vs_kernel = rs_simp[init_ind] match { |
|
567 |
case AALTS(bs2, rs2) => remove(vs, rs_simp[init_ind])//remove the secondary layer of left and right |
|
568 |
case r => vs |
|
569 |
} |
|
570 |
val vs_for_coating = if(isend(vs, rs_simp, init_ind)) vs_kernel else Left(vs_kernel) |
|
571 |
||
572 |
val r_s = rs_simp[init_ind] |
|
573 |
val shift = flats_vsimp(vs, rs_simp, init_ind) + find_pos(vs, rs_simp[init_ind]) |
|
574 |
val vf = coat(vs_for_coating, shift + init_ind) |
|
575 |
||
576 |
val flat_res = flats(rs_simp)//flats2 returns a list of regex and a single v |
|
577 |
val dist_res = distinctBy(flat_res, erase) |
|
578 |
dist_res match { |
|
579 |
case Nil => AZERO |
|
580 |
case s :: Nil => fuse(bs1, s) |
|
581 |
case rs => AALTS(bs1, rs) |
|
582 |
} |
|
583 |
} |
|
584 |
//case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) |
|
585 |
case r => r |
|
16 | 586 |
}*/ |
5 | 587 |
def super_bsimp(r: ARexp): ARexp = r match { |
588 |
case ASEQ(bs1, r1, r2) => (super_bsimp(r1), super_bsimp(r2)) match { |
|
0 | 589 |
case (AZERO, _) => AZERO |
590 |
case (_, AZERO) => AZERO |
|
11
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
591 |
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)//万一是(r1, alts(rs))这种形式呢 |
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
5
diff
changeset
|
592 |
case (AALTS(bs2, rs), r2) => AALTS(bs1 ++ bs2, rs.map(r => r match {case AONE(bs3) => fuse(bs3, r2) case r => ASEQ(Nil, r, r2)} ) ) |
0 | 593 |
case (r1s, r2s) => ASEQ(bs1, r1s, r2s) |
594 |
} |
|
595 |
case AALTS(bs1, rs) => { |
|
5 | 596 |
val rs_simp = rs.map(super_bsimp) |
0 | 597 |
val flat_res = flats(rs_simp) |
598 |
val dist_res = distinctBy(flat_res, erase) |
|
599 |
dist_res match { |
|
600 |
case Nil => AZERO |
|
601 |
case s :: Nil => fuse(bs1, s) |
|
602 |
case rs => AALTS(bs1, rs) |
|
603 |
} |
|
604 |
} |
|
5 | 605 |
//case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) |
0 | 606 |
case r => r |
607 |
} |
|
608 |
||
5 | 609 |
|
0 | 610 |
def simp_weakened(r: Rexp): Rexp = r match { |
611 |
case SEQ(r1, r2) => (simp_weakened(r1), r2) match { |
|
612 |
case (ZERO, _) => ZERO |
|
613 |
case (_, ZERO) => ZERO |
|
614 |
case (ONE, r2s) => r2s |
|
615 |
case (r1s, r2s) => SEQ(r1s, r2s) |
|
616 |
} |
|
617 |
case ALTS(rs) => { |
|
618 |
val rs_simp = rs.map(simp_weakened) |
|
619 |
val flat_res = rflats(rs_simp) |
|
620 |
val dist_res = rs_simp.distinct |
|
621 |
dist_res match { |
|
622 |
case Nil => ZERO |
|
623 |
case s :: Nil => s |
|
624 |
case rs => ALTS(rs) |
|
625 |
} |
|
626 |
} |
|
627 |
case STAR(r) => STAR(simp_weakened(r)) |
|
628 |
case r => r |
|
629 |
} |
|
630 |
||
631 |
def bders_simp (s: List[Char], r: ARexp) : ARexp = s match { |
|
632 |
case Nil => r |
|
633 |
case c::s => bders_simp(s, bsimp(bder(c, r))) |
|
634 |
} |
|
14 | 635 |
//----------------------------------------------------------------------------experiment bsimp |
636 |
/* |
|
637 |
||
0 | 638 |
*/ |
639 |
/* |
|
640 |
def time[T](code: => T) = { |
|
641 |
val start = System.nanoTime() |
|
642 |
val result = code |
|
643 |
val end = System.nanoTime() |
|
644 |
println((end - start)/1.0e9) |
|
645 |
result |
|
646 |
} |
|
647 |
*/ |
|
648 |
// main unsimplified lexing function (produces a value) |
|
649 |
def blex(r: ARexp, s: List[Char]) : Bits = s match { |
|
650 |
case Nil => if (bnullable(r)) mkepsBC(r) else throw new Exception("Not matched") |
|
651 |
case c::cs => { |
|
652 |
val der_res = bder(c,r) |
|
653 |
blex(der_res, cs) |
|
654 |
} |
|
655 |
} |
|
656 |
||
657 |
def bpre_lexing(r: Rexp, s: String) = blex(internalise(r), s.toList) |
|
14 | 658 |
def blexing(r: Rexp, s: String) : Val = decode(r, blex(internalise(r), s.toList)) |
0 | 659 |
|
660 |
var bder_time = 0L |
|
661 |
var bsimp_time = 0L |
|
662 |
var mkepsBC_time = 0L |
|
663 |
var small_de = 2 |
|
664 |
var big_de = 5 |
|
665 |
var usual_de = 3 |
|
666 |
||
667 |
def blex_simp(r: ARexp, s: List[Char]) : Bits = s match { |
|
668 |
case Nil => { |
|
669 |
if (bnullable(r)) { |
|
670 |
//println(asize(r)) |
|
5 | 671 |
mkepsBC(r) |
0 | 672 |
} |
673 |
else throw new Exception("Not matched") |
|
674 |
} |
|
675 |
case c::cs => { |
|
676 |
val der_res = bder(c,r) |
|
677 |
val simp_res = bsimp(der_res) |
|
678 |
blex_simp(simp_res, cs) |
|
679 |
} |
|
680 |
} |
|
5 | 681 |
def super_blex_simp(r: ARexp, s: List[Char]): Bits = s match { |
682 |
case Nil => { |
|
683 |
if (bnullable(r)) { |
|
684 |
mkepsBC(r) |
|
685 |
} |
|
686 |
else throw new Exception("Not matched") |
|
687 |
} |
|
688 |
case c::cs => { |
|
689 |
super_blex_simp(super_bsimp(bder(c,r)), cs) |
|
690 |
} |
|
691 |
} |
|
0 | 692 |
def blex_real_simp(r: ARexp, s: List[Char]): ARexp = s match{ |
693 |
case Nil => r |
|
694 |
case c::cs => blex_real_simp(bsimp(bder(c, r)), cs) |
|
695 |
} |
|
696 |
||
697 |
||
698 |
//size: of a Aregx for testing purposes |
|
699 |
def size(r: Rexp) : Int = r match { |
|
700 |
case ZERO => 1 |
|
701 |
case ONE => 1 |
|
702 |
case CHAR(_) => 1 |
|
703 |
case SEQ(r1, r2) => 1 + size(r1) + size(r2) |
|
704 |
case ALTS(rs) => 1 + rs.map(size).sum |
|
705 |
case STAR(r) => 1 + size(r) |
|
706 |
} |
|
707 |
||
708 |
def asize(a: ARexp) = size(erase(a)) |
|
709 |
||
710 |
||
711 |
// decoding does not work yet |
|
712 |
def blexing_simp(r: Rexp, s: String) = { |
|
713 |
val bit_code = blex_simp(internalise(r), s.toList) |
|
5 | 714 |
decode(r, bit_code) |
715 |
} |
|
716 |
def super_blexing_simp(r: Rexp, s: String) = { |
|
717 |
decode(r, super_blex_simp(internalise(r), s.toList)) |
|
0 | 718 |
} |
719 |
||
720 |
||
721 |
||
722 |
||
723 |
||
724 |
// Lexing Rules for a Small While Language |
|
725 |
||
726 |
//symbols |
|
727 |
/* |
|
728 |
val SYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_)) |
|
729 |
||
730 |
//digits |
|
731 |
val DIGIT = PRED("0123456789".contains(_)) |
|
732 |
//identifiers |
|
733 |
val ID = SYM ~ (SYM | DIGIT).% |
|
734 |
//numbers |
|
735 |
val NUM = STAR(DIGIT) |
|
736 |
//keywords |
|
737 |
val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false" |
|
738 |
val AKEYWORD: Rexp = ALTS(List("skip" , "while" , "do" , "if" , "then" , "else" , "read" , "write" , "true" , "false")) |
|
739 |
//semicolons |
|
740 |
val SEMI: Rexp = ";" |
|
741 |
//operators |
|
742 |
val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/" |
|
743 |
val AOP: Rexp = ALTS(List(":=" , "==" , "-" , "+" , "*" , "!=" , "<" , ">" , "<=" , ">=" , "%" , "/")) |
|
744 |
//whitespaces |
|
745 |
val WHITESPACE = PLUS(" " | "\n" | "\t") |
|
746 |
//parentheses |
|
747 |
val RPAREN: Rexp = ")" |
|
748 |
val LPAREN: Rexp = "(" |
|
749 |
val BEGIN: Rexp = "{" |
|
750 |
val END: Rexp = "}" |
|
751 |
//strings...but probably needs not |
|
752 |
val STRING: Rexp = "\"" ~ SYM.% ~ "\"" |
|
753 |
||
754 |
||
755 |
||
756 |
val WHILE_REGS = (("k" $ KEYWORD) | |
|
757 |
("i" $ ID) | |
|
758 |
("o" $ OP) | |
|
759 |
("n" $ NUM) | |
|
760 |
("s" $ SEMI) | |
|
761 |
("str" $ STRING) | |
|
762 |
("p" $ (LPAREN | RPAREN)) | |
|
763 |
("b" $ (BEGIN | END)) | |
|
764 |
("w" $ WHITESPACE)).% |
|
765 |
||
766 |
val AWHILE_REGS = ( |
|
767 |
ALTS( |
|
768 |
List( |
|
769 |
("k" $ AKEYWORD), |
|
770 |
("i" $ ID), |
|
771 |
("o" $ AOP) , |
|
772 |
("n" $ NUM) , |
|
773 |
("s" $ SEMI) , |
|
774 |
("str" $ STRING), |
|
775 |
("p" $ (LPAREN | RPAREN)), |
|
776 |
("b" $ (BEGIN | END)), |
|
777 |
("w" $ WHITESPACE) |
|
778 |
) |
|
779 |
) |
|
780 |
).% |
|
781 |
||
782 |
*/ |
|
783 |
||
784 |
||
785 |
//--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART (TESTING) |
|
786 |
/* |
|
787 |
// Two Simple While programs |
|
788 |
//======================== |
|
789 |
println("prog0 test") |
|
790 |
||
791 |
val prog0 = """read n""" |
|
792 |
println(env(lexing_simp(WHILE_REGS, prog0))) |
|
793 |
println(tokenise(WHILE_REGS, prog0)) |
|
794 |
||
795 |
println("prog1 test") |
|
796 |
||
797 |
val prog1 = """read n; write (n)""" |
|
798 |
println(tokenise(WHILE_REGS, prog1)) |
|
799 |
||
800 |
*/ |
|
801 |
// Bigger Tests |
|
802 |
//============== |
|
803 |
||
804 |
def escape(raw: String): String = { |
|
805 |
import scala.reflect.runtime.universe._ |
|
806 |
Literal(Constant(raw)).toString |
|
807 |
} |
|
808 |
||
809 |
val prog2 = """ |
|
810 |
write "Fib"; |
|
811 |
read n; |
|
812 |
minus1 := 0; |
|
813 |
minus2 := 1; |
|
814 |
while n > 0 do { |
|
815 |
temp := minus2; |
|
816 |
minus2 := minus1 + minus2; |
|
817 |
minus1 := temp; |
|
818 |
n := n - 1 |
|
819 |
}; |
|
820 |
write "Result"; |
|
821 |
write minus2 |
|
822 |
""" |
|
823 |
||
824 |
val prog3 = """ |
|
825 |
start := 1000; |
|
826 |
x := start; |
|
827 |
y := start; |
|
828 |
z := start; |
|
829 |
while 0 < x do { |
|
830 |
while 0 < y do { |
|
831 |
while 0 < z do { |
|
832 |
z := z - 1 |
|
833 |
}; |
|
834 |
z := start; |
|
835 |
y := y - 1 |
|
836 |
}; |
|
837 |
y := start; |
|
838 |
x := x - 1 |
|
839 |
} |
|
840 |
""" |
|
841 |
/* |
|
842 |
for(i <- 400 to 400 by 1){ |
|
843 |
println(i+":") |
|
844 |
blexing_simp(WHILE_REGS, prog2 * i) |
|
845 |
} */ |
|
846 |
||
847 |
/* |
|
848 |
for (i <- 2 to 5){ |
|
849 |
for(j <- 1 to 3){ |
|
850 |
println(i,j) |
|
851 |
small_de = i |
|
852 |
usual_de = i + j |
|
853 |
big_de = i + 2*j |
|
854 |
blexing_simp(AWHILE_REGS, prog2 * 100) |
|
855 |
} |
|
856 |
}*/ |
|
857 |
||
858 |
/* |
|
859 |
println("Tokens of prog2") |
|
860 |
println(tokenise(WHILE_REGS, prog2).mkString("\n")) |
|
861 |
||
862 |
val fib_tokens = tokenise(WHILE_REGS, prog2) |
|
863 |
fib_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n") |
|
864 |
||
865 |
||
866 |
val test_tokens = tokenise(WHILE_REGS, prog3) |
|
867 |
test_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n") |
|
868 |
*/ |
|
869 |
||
870 |
/* |
|
871 |
println("time test for blexing_simp") |
|
872 |
for (i <- 1 to 1 by 1) { |
|
873 |
lexing_simp(WHILE_REGS, prog2 * i) |
|
874 |
blexing_simp(WHILE_REGS, prog2 * i) |
|
875 |
for( j <- 0 to each_simp_timeb.length - 1){ |
|
876 |
if( each_simp_timeb(j)/each_simp_time(j) >= 10.0 ) |
|
877 |
println(j, each_simp_timeb(j), each_simp_time(j)) |
|
878 |
} |
|
879 |
} |
|
880 |
*/ |
|
881 |
||
882 |
||
883 |
//--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART (TESTING) |
|
884 |
||
885 |
||
886 |
||
887 |
def clear() = { |
|
888 |
print("") |
|
889 |
//print("\33[H\33[2J") |
|
890 |
} |
|
891 |
||
892 |
//testing the two lexings produce the same value |
|
893 |
//enumerates strings of length n over alphabet cs |
|
894 |
def strs(n: Int, cs: String) : Set[String] = { |
|
895 |
if (n == 0) Set("") |
|
896 |
else { |
|
897 |
val ss = strs(n - 1, cs) |
|
898 |
ss ++ |
|
899 |
(for (s <- ss; c <- cs.toList) yield c + s) |
|
900 |
} |
|
901 |
} |
|
902 |
def enum(n: Int, s: String) : Stream[Rexp] = n match { |
|
903 |
case 0 => ZERO #:: ONE #:: s.toStream.map(CHAR) |
|
904 |
case n => { |
|
905 |
val rs = enum(n - 1, s) |
|
906 |
rs #::: |
|
907 |
(for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) #::: |
|
908 |
(for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) #::: |
|
909 |
(for (r1 <- rs) yield STAR(r1)) |
|
910 |
} |
|
911 |
} |
|
912 |
||
913 |
//tests blexing and lexing |
|
914 |
def tests_blexer_simp(ss: Set[String])(r: Rexp) = { |
|
915 |
clear() |
|
916 |
//println(s"Testing ${r}") |
|
917 |
for (s <- ss.par) yield { |
|
918 |
val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None) |
|
5 | 919 |
val res2 = Try(Some(super_blexing_simp(r, s))).getOrElse(None) |
0 | 920 |
if (res1 != res2) println(s"Disagree on ${r} and ${s}") |
921 |
if (res1 != res2) println(s" ${res1} != ${res2}") |
|
922 |
if (res1 != res2) Some((r, s)) else None |
|
923 |
} |
|
924 |
} |
|
925 |
||
926 |
||
927 |
||
5 | 928 |
|
0 | 929 |
/* |
930 |
def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = { |
|
931 |
for (s <- ss){ |
|
932 |
||
933 |
val der_res = bder(c, ar) |
|
934 |
val simp_res = bsimp(der_res) |
|
935 |
println(asize(der_res)) |
|
936 |
println(asize(simp_res)) |
|
937 |
single_expression_explorer(simp_res, (sc - c)) |
|
938 |
} |
|
939 |
}*/ |
|
940 |
||
941 |
//single_expression_explorer(internalise(("c"~("a"+"b"))%) , Set('a','b','c')) |
|
942 |
||
943 |
||
944 |
} |
|
945 |
||
946 |
import Rexp.Bits |
|
947 |
abstract class ARexp |
|
948 |
case object AZERO extends ARexp |
|
949 |
case class AONE(bs: Bits) extends ARexp |
|
950 |
case class ACHAR(bs: Bits, f: Char) extends ARexp |
|
951 |
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp |
|
952 |
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp |
|
953 |
case class ASTAR(bs: Bits, r: ARexp) extends ARexp |
|
954 |
||
955 |
||
956 |
||
957 |
abstract class Val |
|
958 |
case object Empty extends Val |
|
959 |
case class Chr(c: Char) extends Val |
|
960 |
case class Sequ(v1: Val, v2: Val) extends Val |
|
961 |
case class Left(v: Val) extends Val |
|
962 |
case class Right(v: Val) extends Val |
|
963 |
case class Stars(vs: List[Val]) extends Val |
|
964 |
case class Rec(x: String, v: Val) extends Val |
|
17 | 965 |
case object undefined extends Val |
0 | 966 |
//case class Pos(i: Int, v: Val) extends Val |
967 |
case object Prd extends Val |