49 case class ACHARSET(bs: Bits, f: Char => Boolean) extends ARexp |
49 case class ACHARSET(bs: Bits, f: Char => Boolean) extends ARexp |
50 |
50 |
51 // an abbreviation for binary alternatives |
51 // an abbreviation for binary alternatives |
52 def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2)) |
52 def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2)) |
53 |
53 |
54 abstract class Val |
54 |
55 case object Empty extends Val |
|
56 case class Chr(c: Char) extends Val |
|
57 case class Sequ(v1: Val, v2: Val) extends Val |
|
58 case class Left(v: Val) extends Val |
|
59 case class Right(v: Val) extends Val |
|
60 case class Stars(vs: List[Val]) extends Val |
|
61 case class Recd(x: String, v: Val) extends Val |
|
62 |
|
63 // some convenience for typing in regular expressions |
55 // some convenience for typing in regular expressions |
64 def charlist2rexp(s: List[Char]): Rexp = s match { |
56 def charlist2rexp(s: List[Char]): Rexp = s match { |
65 case Nil => ONE |
57 case Nil => ONE |
66 case c::Nil => CHAR(c) |
58 case c::Nil => CHAR(c) |
67 case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
59 case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
132 |
113 |
133 // example |
114 // example |
134 // internalise(("a" | "ab") ~ ("b" | "")) |
115 // internalise(("a" | "ab") ~ ("b" | "")) |
135 |
116 |
136 |
117 |
137 // decoding of a value from a bitsequence |
|
138 // (this is not tail-recursive and therefore a potential bottleneck) |
|
139 def vdecode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match { |
|
140 case (ONE, bs) => (Empty, bs) |
|
141 case (ALT(r1, r2), Z::bs) => { |
|
142 val (v, bs1) = vdecode_aux(r1, bs) |
|
143 (Left(v), bs1) |
|
144 } |
|
145 case (ALT(r1, r2), S::bs) => { |
|
146 val (v, bs1) = vdecode_aux(r2, bs) |
|
147 (Right(v), bs1) |
|
148 } |
|
149 case (SEQ(r1, r2), bs) => { |
|
150 val (v1, bs1) = vdecode_aux(r1, bs) |
|
151 val (v2, bs2) = vdecode_aux(r2, bs1) |
|
152 (Sequ(v1, v2), bs2) |
|
153 } |
|
154 case (STAR(r1), Z::bs) => { |
|
155 val (v, bs1) = vdecode_aux(r1, bs) |
|
156 val (Stars(vs), bs2) = vdecode_aux(STAR(r1), bs1) |
|
157 (Stars(v::vs), bs2) |
|
158 } |
|
159 case (STAR(_), S::bs) => (Stars(Nil), bs) |
|
160 case (RECD(s, r1), bs) => |
|
161 val (v, bs1) = vdecode_aux(r1, bs) |
|
162 (Recd(s, v), bs1) |
|
163 case (CHARSET(_), C(c)::bs) => (Chr(c), bs) |
|
164 } |
|
165 |
|
166 def vdecode(r: Rexp, bs: Bits) = vdecode_aux(r, bs) match { |
|
167 case (v, Nil) => v |
|
168 case _ => throw new Exception("Not decodable") |
|
169 } |
|
170 |
|
171 // decoding of sequence of string tokens from a bitsequence |
118 // decoding of sequence of string tokens from a bitsequence |
172 // tail-recursive version using an accumulator (alternative for |
119 // tail-recursive version using an accumulator |
173 // vdecode) |
|
174 @tailrec |
120 @tailrec |
175 def sdecode_aux(rs: List[Rexp], bs: Bits, acc: List[String]) : List[String] = (rs, bs) match { |
121 def sdecode_aux(rs: List[Rexp], bs: Bits, acc: List[String]) : List[String] = (rs, bs) match { |
176 case (Nil, _) => acc |
122 case (Nil, _) => acc |
177 case (_, Nil) => acc |
123 case (_, Nil) => acc |
178 case (ONE::rest, bs) => sdecode_aux(rest, bs, acc) |
124 case (ONE::rest, bs) => sdecode_aux(rest, bs, acc) |
281 case Nil => if (bnullable(r)) bmkeps(r) |
227 case Nil => if (bnullable(r)) bmkeps(r) |
282 else throw new Exception("Not matched") |
228 else throw new Exception("Not matched") |
283 case c::cs => blex_simp(bsimp(bder(c, r)), cs) |
229 case c::cs => blex_simp(bsimp(bder(c, r)), cs) |
284 } |
230 } |
285 |
231 |
286 // blexing_simp decodes a value from the bitsequence (potentially slow) |
232 // blexing_simp decodes a string-list from the bitsequence |
287 // blexing2_simp decodes a string-list from the bitsequence |
233 def blexing_simp(r: Rexp, s: String) : List[String] = |
288 def blexing_simp(r: Rexp, s: String) : Val = |
|
289 vdecode(r, blex_simp(internalise(r), s.toList)) |
|
290 |
|
291 def blexing2_simp(r: Rexp, s: String) : List[String] = |
|
292 sdecode(r, blex_simp(internalise(r), s.toList)) |
234 sdecode(r, blex_simp(internalise(r), s.toList)) |
293 |
235 |
294 |
236 |
295 //println(blexing_simp(reg, "aab")) |
237 //println(blexing_simp(reg, "aab")) |
296 |
238 |
297 |
239 |
298 // extracts a string from value |
240 |
299 def flatten(v: Val) : String = v match { |
241 def size(r: Rexp) : Int = r match { |
300 case Empty => "" |
242 case ZERO => 1 |
301 case Chr(c) => c.toString |
243 case ONE => 1 |
302 case Left(v) => flatten(v) |
244 case CHARSET(_) => 1 |
303 case Right(v) => flatten(v) |
245 case ALT(r1, r2) => 1 + size(r1) + size(r2) |
304 case Sequ(v1, v2) => flatten(v1) + flatten(v2) |
246 case SEQ(r1, r2) => 1 + size(r1) + size(r2) |
305 case Stars(vs) => vs.map(flatten).mkString |
247 case STAR(r) => 1 + size(r) |
306 } |
248 case RECD(_, r) => 1 + size(r) |
307 |
249 } |
308 // extracts an environment from a value |
250 |
309 def env(v: Val) : List[(String, String)] = v match { |
251 def bsize(r: ARexp) = size(erase(r)) |
310 case Empty => Nil |
|
311 case Chr(c) => Nil |
|
312 case Left(v) => env(v) |
|
313 case Right(v) => env(v) |
|
314 case Sequ(v1, v2) => env(v1) ::: env(v2) |
|
315 case Stars(vs) => vs.flatMap(env) |
|
316 case Recd(x, v) => (x, flatten(v))::env(v) |
|
317 } |
|
318 |
|
319 def bsize(a: ARexp) = size(erase(a)) |
|
320 |
252 |
321 // Some Tests |
253 // Some Tests |
322 //============ |
254 //============ |
323 |
|
324 |
255 |
325 def time_needed[T](i: Int, code: => T) = { |
256 def time_needed[T](i: Int, code: => T) = { |
326 val start = System.nanoTime() |
257 val start = System.nanoTime() |
327 for (j <- 1 to i) code |
258 for (j <- 1 to i) code |
328 val end = System.nanoTime() |
259 val end = System.nanoTime() |
329 (end - start)/(i * 1.0e9) |
260 (end - start)/(i * 1.0e9) |
330 } |
261 } |
331 |
262 |
|
263 val ones = SEQ(SEQ(CHAR('/'), CHAR('*')), SEQ(STAR(CHAR('1')), SEQ(CHAR('*'), CHAR('/')))) |
|
264 println("sizes unsimplified") |
|
265 println(bsize(bders("/*".toList, internalise(ones)))) // 12 |
|
266 println(bsize(bders("/*1".toList, internalise(ones)))) // 25 |
|
267 println(bsize(bders("/*11".toList, internalise(ones)))) // 34 |
|
268 println(bsize(bders("/*111".toList, internalise(ones)))) // 43 |
|
269 println(bsize(bders("/*1111".toList, internalise(ones)))) // 52 |
|
270 println("sizes simplified") |
|
271 println(bsize(bsimp(bders("/*".toList, internalise(ones))))) // 6 |
|
272 println(bsize(bsimp(bders("/*1".toList, internalise(ones))))) // 6 |
|
273 println(bsize(bsimp(bders("/*11".toList, internalise(ones))))) // 6 |
|
274 println(bsize(bsimp(bders("/*111".toList, internalise(ones))))) // 6 |
|
275 println(bsize(bsimp(bders("/*1111".toList, internalise(ones))))) // 6 |
|
276 |
|
277 println("ones:") |
|
278 for(i <- 0 to 10000 by 1000) { |
|
279 println(s"$i: ${time_needed(1, blexing_simp(ones, "/*" ++ "1" * i ++ "*/"))}") |
|
280 } |
|
281 |
|
282 |
|
283 |
|
284 |
|
285 System.exit(1) |
|
286 |
332 val evil1 = STAR(STAR("a")) ~ "b" |
287 val evil1 = STAR(STAR("a")) ~ "b" |
333 val evil2 = STAR(STAR(STAR("a"))) ~ "b" |
288 val evil2 = STAR(STAR(STAR("a"))) ~ "b" |
334 val evil3 = STAR("aa" | "a") |
289 val evil3 = STAR("aa" | "a") |
335 |
290 |
336 /* |
|
337 println("evil1") |
291 println("evil1") |
338 for(i <- 0 to 10000 by 1000) { |
292 for(i <- 0 to 10000 by 1000) { |
339 println(time_needed(1, blexing2_simp(evil1, "a"*i ++ "b"))) |
293 println(time_needed(1, blexing_simp(evil1, "a" * i ++ "b"))) |
340 } |
294 } |
341 */ |
295 |
342 |
296 |
343 /* |
|
344 println("evil2") |
297 println("evil2") |
345 for(i <- 0 to 10000 by 1000) { |
298 for(i <- 0 to 10000 by 1000) { |
346 println(time_needed(1, blexing2_simp(evil2, "a"*i ++ "b"))) |
299 println(time_needed(1, blexing_simp(evil2, "a" * i ++ "b"))) |
347 } |
300 } |
348 */ |
301 |
349 |
302 |
350 /* |
|
351 println("evil3") |
303 println("evil3") |
352 for(i <- 0 to 10000 by 1000) { |
304 for(i <- 0 to 10000 by 1000) { |
353 println(time_needed(1, blexing2_simp(evil3, "a"*i))) |
305 println(time_needed(1, blexing_simp(evil3, "a" * i))) |
354 } |
306 } |
355 */ |
|
356 |
307 |
357 // WHILE LANGUAGE |
308 // WHILE LANGUAGE |
358 //================ |
309 //================ |
359 def PLUS(r: Rexp) = r ~ r.% |
310 def PLUS(r: Rexp) = r ~ r.% |
360 def RANGE(s: String) = CHARSET(s.toSet) |
311 def RANGE(s: String) = CHARSET(s.toSet) |
382 ("w" $ WHITESPACE)).% |
333 ("w" $ WHITESPACE)).% |
383 |
334 |
384 |
335 |
385 // Some Simple While Tests |
336 // Some Simple While Tests |
386 //======================== |
337 //======================== |
|
338 println("WHILE TESTS") |
|
339 |
|
340 |
387 |
341 |
388 val prog0 = """read n""" |
342 val prog0 = """read n""" |
389 println(s"test: $prog0") |
343 println(s"test: $prog0") |
390 println(env(blexing_simp(WHILE_REGS, prog0))) |
344 println(blexing_simp(WHILE_REGS, prog0)) |
391 println(blexing2_simp(WHILE_REGS, prog0)) |
|
392 |
345 |
393 val prog1 = """read n; write n""" |
346 val prog1 = """read n; write n""" |
394 println(s"test: $prog1") |
347 println(s"test: $prog1") |
395 println(env(blexing_simp(WHILE_REGS, prog1))) |
348 println(blexing_simp(WHILE_REGS, prog1)) |
396 println(blexing2_simp(WHILE_REGS, prog1)) |
|
397 |
349 |
398 val prog2 = """ |
350 val prog2 = """ |
399 write "Fib"; |
351 write "Fib"; |
400 read n; |
352 read n; |
401 minus1 := 0; |
353 minus1 := 0; |