|
1 // A simple lexer inspired by work of Sulzmann & Lu |
|
2 //================================================== |
|
3 // |
|
4 // Call the test cases with |
|
5 // |
|
6 // amm lexer.sc small |
|
7 // amm lexer.sc fib |
|
8 // amm lexer.sc loops |
|
9 // amm lexer.sc email |
|
10 // |
|
11 // amm lexer.sc all |
|
12 |
|
13 |
|
14 |
|
15 // regular expressions including records |
|
16 abstract class Rexp |
|
17 case object ZERO extends Rexp |
|
18 case object ONE extends Rexp |
|
19 case object ANYCHAR extends Rexp |
|
20 case class CHAR(c: Char) extends Rexp |
|
21 case class ALTS(r1: Rexp, r2: Rexp) extends Rexp |
|
22 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
|
23 case class STAR(r: Rexp) extends Rexp |
|
24 case class RECD(x: String, r: Rexp) extends Rexp |
|
25 case class NTIMES(n: Int, r: Rexp) extends Rexp |
|
26 case class OPTIONAL(r: Rexp) extends Rexp |
|
27 case class NOT(r: Rexp) extends Rexp |
|
28 // records for extracting strings or tokens |
|
29 |
|
30 // values |
|
31 abstract class Val |
|
32 case object Failure extends Val |
|
33 case object Empty extends Val |
|
34 case class Chr(c: Char) extends Val |
|
35 case class Sequ(v1: Val, v2: Val) extends Val |
|
36 case class Left(v: Val) extends Val |
|
37 case class Right(v: Val) extends Val |
|
38 case class Stars(vs: List[Val]) extends Val |
|
39 case class Rec(x: String, v: Val) extends Val |
|
40 case class Ntime(vs: List[Val]) extends Val |
|
41 case class Optionall(v: Val) extends Val |
|
42 case class Nots(s: String) extends Val |
|
43 |
|
44 |
|
45 |
|
46 abstract class Bit |
|
47 case object Z extends Bit |
|
48 case object S extends Bit |
|
49 |
|
50 |
|
51 type Bits = List[Bit] |
|
52 |
|
53 abstract class ARexp |
|
54 case object AZERO extends ARexp |
|
55 case class AONE(bs: Bits) extends ARexp |
|
56 case class ACHAR(bs: Bits, c: Char) extends ARexp |
|
57 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp |
|
58 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp |
|
59 case class ASTAR(bs: Bits, r: ARexp) extends ARexp |
|
60 case class ANOT(bs: Bits, r: ARexp) extends ARexp |
|
61 case class AANYCHAR(bs: Bits) extends ARexp |
|
62 |
|
63 import scala.util.Try |
|
64 |
|
65 trait Generator[+T] { |
|
66 self => // an alias for "this" |
|
67 def generate(): T |
|
68 |
|
69 def gen(n: Int) : List[T] = |
|
70 if (n == 0) Nil else self.generate() :: gen(n - 1) |
|
71 |
|
72 def map[S](f: T => S): Generator[S] = new Generator[S] { |
|
73 def generate = f(self.generate()) |
|
74 } |
|
75 def flatMap[S](f: T => Generator[S]): Generator[S] = new Generator[S] { |
|
76 def generate = f(self.generate()).generate() |
|
77 } |
|
78 |
|
79 |
|
80 } |
|
81 |
|
82 // tests a property according to a given random generator |
|
83 def test[T](r: Generator[T], amount: Int = 100)(pred: T => Boolean) { |
|
84 for (_ <- 0 until amount) { |
|
85 val value = r.generate() |
|
86 assert(pred(value), s"Test failed for: $value") |
|
87 } |
|
88 println(s"Test passed $amount times") |
|
89 } |
|
90 def test2[T, S](r: Generator[T], s: Generator[S], amount: Int = 100)(pred: (T, S) => Boolean) { |
|
91 for (_ <- 0 until amount) { |
|
92 val valueR = r.generate() |
|
93 val valueS = s.generate() |
|
94 assert(pred(valueR, valueS), s"Test failed for: $valueR, $valueS") |
|
95 } |
|
96 println(s"Test passed $amount times") |
|
97 } |
|
98 |
|
99 // random integers |
|
100 val integers = new Generator[Int] { |
|
101 val rand = new java.util.Random |
|
102 def generate() = rand.nextInt() |
|
103 } |
|
104 |
|
105 // random booleans |
|
106 val booleans = integers.map(_ > 0) |
|
107 |
|
108 // random integers in the range lo and high |
|
109 def range(lo: Int, hi: Int): Generator[Int] = |
|
110 for (x <- integers) yield (lo + x.abs % (hi - lo)).abs |
|
111 |
|
112 // random characters |
|
113 def chars_range(lo: Char, hi: Char) = range(lo, hi).map(_.toChar) |
|
114 val chars = chars_range('a', 'z') |
|
115 |
|
116 |
|
117 def oneOf[T](xs: T*): Generator[T] = |
|
118 for (idx <- range(0, xs.length)) yield xs(idx) |
|
119 |
|
120 def single[T](x: T) = new Generator[T] { |
|
121 def generate() = x |
|
122 } |
|
123 |
|
124 def pairs[T, U](t: Generator[T], u: Generator[U]): Generator[(T, U)] = |
|
125 for (x <- t; y <- u) yield (x, y) |
|
126 |
|
127 // lists |
|
128 def emptyLists = single(Nil) |
|
129 |
|
130 def nonEmptyLists : Generator[List[Int]] = |
|
131 for (head <- integers; tail <- lists) yield head :: tail |
|
132 |
|
133 def lists: Generator[List[Int]] = for { |
|
134 kind <- booleans |
|
135 list <- if (kind) emptyLists else nonEmptyLists |
|
136 } yield list |
|
137 |
|
138 def char_list(len: Int): Generator[List[Char]] = { |
|
139 if(len <= 0) single(Nil) |
|
140 else{ |
|
141 for { |
|
142 c <- chars_range('a', 'c') |
|
143 tail <- char_list(len - 1) |
|
144 } yield c :: tail |
|
145 } |
|
146 } |
|
147 |
|
148 def strings(len: Int): Generator[String] = for(cs <- char_list(len)) yield cs.toString |
|
149 |
|
150 def sampleString(r: Rexp) : List[String] = r match { |
|
151 case STAR(r) => stringsFromRexp(r).flatMap(s => List("", s, s ++ s))//only generate 0, 1, 2 reptitions |
|
152 case SEQ(r1, r2) => stringsFromRexp(r1).flatMap(s1 => stringsFromRexp(r2).map(s2 => s1 ++ s2) ) |
|
153 case ALTS(r1, r2) => throw new Error(s" Rexp ${r} not expected: all alternatives are supposed to have been opened up") |
|
154 case ONE => "" :: Nil |
|
155 case ZERO => Nil |
|
156 case CHAR(c) => c.toString :: Nil |
|
157 |
|
158 } |
|
159 |
|
160 def stringsFromRexp(r: Rexp) : List[String] = |
|
161 breakIntoTerms(r).flatMap(r => sampleString(r)) |
|
162 |
|
163 |
|
164 // (simple) binary trees |
|
165 trait Tree[T] |
|
166 case class Inner[T](left: Tree[T], right: Tree[T]) extends Tree[T] |
|
167 case class Leaf[T](x: T) extends Tree[T] |
|
168 |
|
169 def leafs[T](t: Generator[T]): Generator[Leaf[T]] = |
|
170 for (x <- t) yield Leaf(x) |
|
171 |
|
172 def inners[T](t: Generator[T]): Generator[Inner[T]] = |
|
173 for (l <- trees(t); r <- trees(t)) yield Inner(l, r) |
|
174 |
|
175 def trees[T](t: Generator[T]): Generator[Tree[T]] = |
|
176 for (kind <- range(0, 2); |
|
177 tree <- if (kind == 0) leafs(t) else inners(t)) yield tree |
|
178 |
|
179 // regular expressions |
|
180 |
|
181 // generates random leaf-regexes; prefers CHAR-regexes |
|
182 def leaf_rexp() : Generator[Rexp] = |
|
183 for (kind <- range(0, 5); |
|
184 c <- chars_range('a', 'd')) yield |
|
185 kind match { |
|
186 case 0 => ZERO |
|
187 case 1 => ONE |
|
188 case _ => CHAR(c) |
|
189 } |
|
190 |
|
191 // generates random inner regexes with maximum depth d |
|
192 def inner_rexp(d: Int) : Generator[Rexp] = |
|
193 for (kind <- range(0, 3); |
|
194 l <- rexp(d); |
|
195 r <- rexp(d)) |
|
196 yield kind match { |
|
197 case 0 => ALTS(l, r) |
|
198 case 1 => SEQ(l, r) |
|
199 case 2 => STAR(r) |
|
200 } |
|
201 |
|
202 // generates random regexes with maximum depth d; |
|
203 // prefers inner regexes in 2/3 of the cases |
|
204 def rexp(d: Int = 100): Generator[Rexp] = |
|
205 for (kind <- range(0, 3); |
|
206 r <- if (d <= 0 || kind == 0) leaf_rexp() else inner_rexp(d - 1)) yield r |
|
207 |
|
208 |
|
209 // some test functions for rexps |
|
210 def height(r: Rexp) : Int = r match { |
|
211 case ZERO => 1 |
|
212 case ONE => 1 |
|
213 case CHAR(_) => 1 |
|
214 case ALTS(r1, r2) => 1 + List(height(r1), height(r2)).max |
|
215 case SEQ(r1, r2) => 1 + List(height(r1), height(r2)).max |
|
216 case STAR(r) => 1 + height(r) |
|
217 } |
|
218 |
|
219 // def size(r: Rexp) : Int = r match { |
|
220 // case ZERO => 1 |
|
221 // case ONE => 1 |
|
222 // case CHAR(_) => 1 |
|
223 // case ALTS(r1, r2) => 1 + size(r1) + size(r2) |
|
224 // case SEQ(r1, r2) => 1 + size(r1) + size(r2) |
|
225 // case STAR(r) => 1 + size(r) |
|
226 // } |
|
227 |
|
228 // randomly subtracts 1 or 2 from the STAR case |
|
229 def size_faulty(r: Rexp) : Int = r match { |
|
230 case ZERO => 1 |
|
231 case ONE => 1 |
|
232 case CHAR(_) => 1 |
|
233 case ALTS(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2) |
|
234 case SEQ(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2) |
|
235 case STAR(r) => 1 + size_faulty(r) - range(0, 2).generate |
|
236 } |
|
237 |
|
238 |
|
239 |
|
240 // some convenience for typing in regular expressions |
|
241 |
|
242 def charlist2rexp(s : List[Char]): Rexp = s match { |
|
243 case Nil => ONE |
|
244 case c::Nil => CHAR(c) |
|
245 case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
|
246 } |
|
247 implicit def string2rexp(s : String) : Rexp = |
|
248 charlist2rexp(s.toList) |
|
249 |
|
250 implicit def RexpOps(r: Rexp) = new { |
|
251 def | (s: Rexp) = ALTS(r, s) |
|
252 def % = STAR(r) |
|
253 def ~ (s: Rexp) = SEQ(r, s) |
|
254 } |
|
255 |
|
256 implicit def stringOps(s: String) = new { |
|
257 def | (r: Rexp) = ALTS(s, r) |
|
258 def | (r: String) = ALTS(s, r) |
|
259 def % = STAR(s) |
|
260 def ~ (r: Rexp) = SEQ(s, r) |
|
261 def ~ (r: String) = SEQ(s, r) |
|
262 def $ (r: Rexp) = RECD(s, r) |
|
263 } |
|
264 |
|
265 def nullable(r: Rexp) : Boolean = r match { |
|
266 case ZERO => false |
|
267 case ONE => true |
|
268 case CHAR(_) => false |
|
269 case ANYCHAR => false |
|
270 case ALTS(r1, r2) => nullable(r1) || nullable(r2) |
|
271 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
|
272 case STAR(_) => true |
|
273 case RECD(_, r1) => nullable(r1) |
|
274 case NTIMES(n, r) => if (n == 0) true else nullable(r) |
|
275 case OPTIONAL(r) => true |
|
276 case NOT(r) => !nullable(r) |
|
277 } |
|
278 |
|
279 def der(c: Char, r: Rexp) : Rexp = r match { |
|
280 case ZERO => ZERO |
|
281 case ONE => ZERO |
|
282 case CHAR(d) => if (c == d) ONE else ZERO |
|
283 case ANYCHAR => ONE |
|
284 case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2)) |
|
285 case SEQ(r1, r2) => |
|
286 if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2)) |
|
287 else SEQ(der(c, r1), r2) |
|
288 case STAR(r) => SEQ(der(c, r), STAR(r)) |
|
289 case RECD(_, r1) => der(c, r1) |
|
290 case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO |
|
291 case OPTIONAL(r) => der(c, r) |
|
292 case NOT(r) => NOT(der(c, r)) |
|
293 } |
|
294 |
|
295 |
|
296 // extracts a string from a value |
|
297 def flatten(v: Val) : String = v match { |
|
298 case Empty => "" |
|
299 case Chr(c) => c.toString |
|
300 case Left(v) => flatten(v) |
|
301 case Right(v) => flatten(v) |
|
302 case Sequ(v1, v2) => flatten(v1) ++ flatten(v2) |
|
303 case Stars(vs) => vs.map(flatten).mkString |
|
304 case Ntime(vs) => vs.map(flatten).mkString |
|
305 case Optionall(v) => flatten(v) |
|
306 case Rec(_, v) => flatten(v) |
|
307 } |
|
308 |
|
309 |
|
310 // extracts an environment from a value; |
|
311 // used for tokenising a string |
|
312 def env(v: Val) : List[(String, String)] = v match { |
|
313 case Empty => Nil |
|
314 case Chr(c) => Nil |
|
315 case Left(v) => env(v) |
|
316 case Right(v) => env(v) |
|
317 case Sequ(v1, v2) => env(v1) ::: env(v2) |
|
318 case Stars(vs) => vs.flatMap(env) |
|
319 case Ntime(vs) => vs.flatMap(env) |
|
320 case Rec(x, v) => (x, flatten(v))::env(v) |
|
321 case Optionall(v) => env(v) |
|
322 case Nots(s) => ("Negative", s) :: Nil |
|
323 } |
|
324 |
|
325 |
|
326 // The injection and mkeps part of the lexer |
|
327 //=========================================== |
|
328 |
|
329 def mkeps(r: Rexp) : Val = r match { |
|
330 case ONE => Empty |
|
331 case ALTS(r1, r2) => |
|
332 if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) |
|
333 case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
|
334 case STAR(r) => Stars(Nil) |
|
335 case RECD(x, r) => Rec(x, mkeps(r)) |
|
336 case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r))) |
|
337 case OPTIONAL(r) => Optionall(Empty) |
|
338 case NOT(rInner) => if(nullable(rInner)) throw new Exception("error") |
|
339 else Nots("")//Nots(s.reverse.toString) |
|
340 // case NOT(ZERO) => Empty |
|
341 // case NOT(CHAR(c)) => Empty |
|
342 // case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2))) |
|
343 // case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2))) |
|
344 // case NOT(STAR(r)) => Stars(Nil) |
|
345 |
|
346 } |
|
347 |
|
348 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
|
349 case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
|
350 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
|
351 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
|
352 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
|
353 case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) |
|
354 case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) |
|
355 case (CHAR(d), Empty) => Chr(c) |
|
356 case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) |
|
357 case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs) |
|
358 case (OPTIONAL(r), v) => Optionall(inj(r, c, v)) |
|
359 case (NOT(r), Nots(s)) => Nots(c.toString ++ s) |
|
360 case (ANYCHAR, Empty) => Chr(c) |
|
361 } |
|
362 |
|
363 def size(r: Rexp) : Int = r match { |
|
364 case ZERO => 1 |
|
365 case ONE => 1 |
|
366 case CHAR(_) => 1 |
|
367 case ANYCHAR => 1 |
|
368 case NOT(r0) => 1 + size(r0) |
|
369 case SEQ(r1, r2) => 1 + size(r1) + size(r2) |
|
370 case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum |
|
371 case STAR(r) => 1 + size(r) |
|
372 } |
|
373 |
|
374 //almost as if a zeroable in called on each subexpression, and if so, turn that sub-expression into 0 |
|
375 def light_simp(r: Rexp): Rexp = r match { |
|
376 case ALTS(r1, r2) => (light_simp(r1), light_simp(r2)) match { |
|
377 case (ZERO, ZERO) => ZERO |
|
378 case (r1p, r2p) => ALTS(r1p, r2p) |
|
379 } |
|
380 case SEQ(r1, r2) => (light_simp(r1), light_simp(r2)) match { |
|
381 case (ZERO, _) => ZERO |
|
382 case (_, ZERO) => ZERO |
|
383 case (r1p, r2p) => SEQ(r1p, r2p) |
|
384 } |
|
385 case STAR(r) => STAR(r) |
|
386 case CHAR(d) => CHAR(d) |
|
387 case ONE => ONE |
|
388 case ZERO => ZERO |
|
389 } |
|
390 |
|
391 |
|
392 |
|
393 def simp_lexer(r: Rexp, s: List[Char]): Val = s match{ |
|
394 case Nil => {//println("mkeps!"); println(r); |
|
395 val v = mkeps(r); |
|
396 //println(v); |
|
397 v} |
|
398 case c::cs => { |
|
399 val derc1 = der(c, r); |
|
400 val derc = light_simp(derc1); |
|
401 println("derivative size: "+ size(derc)); |
|
402 val v = simp_lexer(derc, cs); |
|
403 //println("Injection!");println("r: "+r); println("c: "+c); println("v:"+v); |
|
404 inj(r, c, v ) } |
|
405 } |
|
406 |
|
407 |
|
408 def print_lexer(r: Rexp, s: List[Char]): Val = s match{ |
|
409 case Nil => {//println("mkeps!"); println(r); |
|
410 val v = mkeps(r); |
|
411 //println(v); |
|
412 v} |
|
413 case c::cs => { |
|
414 val derc = der(c, r); |
|
415 //val derc = light_simp(derc1); |
|
416 println("derivative size: "+ size(derc)); |
|
417 val v = print_lexer(derc, cs); |
|
418 //println("Injection!");println("r: "+r); println("c: "+c); println("v:"+v); |
|
419 inj(r, c, v ) } |
|
420 } |
|
421 val r = STAR("a"|"aa") ~ ("c") |
|
422 println("unsimplified lexer's intermediate derivative sizes:") |
|
423 val v = print_lexer(r, "aac".toList) |
|
424 println("simplified lexer's intermediate derivative sizes:") |
|
425 val v2 = simp_lexer(r, "aac".toList) |
|
426 //println(v) |
|
427 |
|
428 // some "rectification" functions for simplification |
|
429 |
|
430 |
|
431 |
|
432 |
|
433 // The Lexing Rules for the WHILE Language |
|
434 |
|
435 // bnullable function: tests whether the aregular |
|
436 // expression can recognise the empty string |
|
437 def bnullable (r: ARexp) : Boolean = r match { |
|
438 case AZERO => false |
|
439 case AONE(_) => true |
|
440 case ACHAR(_,_) => false |
|
441 case AALTS(_, rs) => rs.exists(bnullable) |
|
442 case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2) |
|
443 case ASTAR(_, _) => true |
|
444 case ANOT(_, rn) => !bnullable(rn) |
|
445 } |
|
446 |
|
447 def mkepsBC(r: ARexp) : Bits = r match { |
|
448 case AONE(bs) => bs |
|
449 case AALTS(bs, rs) => { |
|
450 val n = rs.indexWhere(bnullable) |
|
451 bs ++ mkepsBC(rs(n)) |
|
452 } |
|
453 case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2) |
|
454 case ASTAR(bs, r) => bs ++ List(Z) |
|
455 case ANOT(bs, rn) => bs |
|
456 } |
|
457 |
|
458 |
|
459 def bder(c: Char, r: ARexp) : ARexp = r match { |
|
460 case AZERO => AZERO |
|
461 case AONE(_) => AZERO |
|
462 case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO |
|
463 case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _))) |
|
464 case ASEQ(bs, r1, r2) => |
|
465 if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil ) |
|
466 else ASEQ(bs, bder(c, r1), r2) |
|
467 case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r)) |
|
468 case ANOT(bs, rn) => ANOT(bs, bder(c, rn)) |
|
469 case AANYCHAR(bs) => AONE(bs) |
|
470 } |
|
471 |
|
472 def fuse(bs: Bits, r: ARexp) : ARexp = r match { |
|
473 case AZERO => AZERO |
|
474 case AONE(cs) => AONE(bs ++ cs) |
|
475 case ACHAR(cs, f) => ACHAR(bs ++ cs, f) |
|
476 case AALTS(cs, rs) => AALTS(bs ++ cs, rs) |
|
477 case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2) |
|
478 case ASTAR(cs, r) => ASTAR(bs ++ cs, r) |
|
479 case ANOT(cs, r) => ANOT(bs ++ cs, r) |
|
480 } |
|
481 |
|
482 |
|
483 def internalise(r: Rexp) : ARexp = r match { |
|
484 case ZERO => AZERO |
|
485 case ONE => AONE(Nil) |
|
486 case CHAR(c) => ACHAR(Nil, c) |
|
487 //case PRED(f) => APRED(Nil, f) |
|
488 case ALTS(r1, r2) => |
|
489 AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2)))) |
|
490 // case ALTS(r1::rs) => { |
|
491 // val AALTS(Nil, rs2) = internalise(ALTS(rs)) |
|
492 // AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _))) |
|
493 // } |
|
494 case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2)) |
|
495 case STAR(r) => ASTAR(Nil, internalise(r)) |
|
496 case RECD(x, r) => internalise(r) |
|
497 case NOT(r) => ANOT(Nil, internalise(r)) |
|
498 case ANYCHAR => AANYCHAR(Nil) |
|
499 } |
|
500 |
|
501 |
|
502 def bsimp(r: ARexp): ARexp = |
|
503 { |
|
504 r match { |
|
505 case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match { |
|
506 case (AZERO, _) => AZERO |
|
507 case (_, AZERO) => AZERO |
|
508 case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) |
|
509 case (r1s, r2s) => ASEQ(bs1, r1s, r2s) |
|
510 } |
|
511 case AALTS(bs1, rs) => { |
|
512 val rs_simp = rs.map(bsimp(_)) |
|
513 val flat_res = flats(rs_simp) |
|
514 val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase) |
|
515 dist_res match { |
|
516 case Nil => AZERO |
|
517 case s :: Nil => fuse(bs1, s) |
|
518 case rs => AALTS(bs1, rs) |
|
519 } |
|
520 |
|
521 } |
|
522 case r => r |
|
523 } |
|
524 } |
|
525 def strongBsimp(r: ARexp): ARexp = |
|
526 { |
|
527 r match { |
|
528 case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match { |
|
529 case (AZERO, _) => AZERO |
|
530 case (_, AZERO) => AZERO |
|
531 case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) |
|
532 case (r1s, r2s) => ASEQ(bs1, r1s, r2s) |
|
533 } |
|
534 case AALTS(bs1, rs) => { |
|
535 val rs_simp = rs.map(strongBsimp(_)) |
|
536 val flat_res = flats(rs_simp) |
|
537 val dist_res = distinctBy4(flat_res)//distinctBy(flat_res, erase) |
|
538 dist_res match { |
|
539 case Nil => AZERO |
|
540 case s :: Nil => fuse(bs1, s) |
|
541 case rs => AALTS(bs1, rs) |
|
542 } |
|
543 |
|
544 } |
|
545 case r => r |
|
546 } |
|
547 } |
|
548 |
|
549 def strongBsimp5(r: ARexp): ARexp = |
|
550 { |
|
551 // println("was this called?") |
|
552 r match { |
|
553 case ASEQ(bs1, r1, r2) => (strongBsimp5(r1), strongBsimp5(r2)) match { |
|
554 case (AZERO, _) => AZERO |
|
555 case (_, AZERO) => AZERO |
|
556 case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) |
|
557 case (r1s, r2s) => ASEQ(bs1, r1s, r2s) |
|
558 } |
|
559 case AALTS(bs1, rs) => { |
|
560 // println("alts case") |
|
561 val rs_simp = rs.map(strongBsimp5(_)) |
|
562 val flat_res = flats(rs_simp) |
|
563 var dist_res = distinctBy5(flat_res)//distinctBy(flat_res, erase) |
|
564 var dist2_res = distinctBy5(dist_res) |
|
565 while(dist_res != dist2_res){ |
|
566 dist_res = dist2_res |
|
567 dist2_res = distinctBy5(dist_res) |
|
568 } |
|
569 (dist2_res) match { |
|
570 case Nil => AZERO |
|
571 case s :: Nil => fuse(bs1, s) |
|
572 case rs => AALTS(bs1, rs) |
|
573 } |
|
574 } |
|
575 case r => r |
|
576 } |
|
577 } |
|
578 |
|
579 def bders (s: List[Char], r: ARexp) : ARexp = s match { |
|
580 case Nil => r |
|
581 case c::s => bders(s, bder(c, r)) |
|
582 } |
|
583 |
|
584 def flats(rs: List[ARexp]): List[ARexp] = rs match { |
|
585 case Nil => Nil |
|
586 case AZERO :: rs1 => flats(rs1) |
|
587 case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2) |
|
588 case r1 :: rs2 => r1 :: flats(rs2) |
|
589 } |
|
590 |
|
591 def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match { |
|
592 case Nil => Nil |
|
593 case (x::xs) => { |
|
594 val res = f(x) |
|
595 if (acc.contains(res)) distinctBy(xs, f, acc) |
|
596 else x::distinctBy(xs, f, res::acc) |
|
597 } |
|
598 } |
|
599 |
|
600 |
|
601 def pruneRexp(r: ARexp, allowableTerms: List[Rexp]) : ARexp = { |
|
602 r match { |
|
603 case ASEQ(bs, r1, r2) => |
|
604 val termsTruncated = allowableTerms.collect(rt => rt match { |
|
605 case SEQ(r1p, r2p) if(r2p == erase(r2)) => r1p//if(r2p == erase(r2)) |
|
606 }) |
|
607 val pruned : ARexp = pruneRexp(r1, termsTruncated) |
|
608 pruned match { |
|
609 case AZERO => AZERO |
|
610 case AONE(bs1) => fuse(bs ++ bs1, r2) |
|
611 case pruned1 => ASEQ(bs, pruned1, r2) |
|
612 } |
|
613 |
|
614 case AALTS(bs, rs) => |
|
615 //allowableTerms.foreach(a => println(shortRexpOutput(a))) |
|
616 val rsp = rs.map(r => |
|
617 pruneRexp(r, allowableTerms) |
|
618 ) |
|
619 .filter(r => r != AZERO) |
|
620 rsp match { |
|
621 case Nil => AZERO |
|
622 case r1::Nil => fuse(bs, r1) |
|
623 case rs1 => AALTS(bs, rs1) |
|
624 } |
|
625 case r => |
|
626 if(allowableTerms.contains(erase(r))) r else AZERO //assert(r != AZERO) |
|
627 } |
|
628 } |
|
629 |
|
630 def oneSimp(r: Rexp) : Rexp = r match { |
|
631 case SEQ(ONE, r) => r |
|
632 case SEQ(r1, r2) => SEQ(oneSimp(r1), r2) |
|
633 case r => r//assert r != 0 |
|
634 |
|
635 } |
|
636 |
|
637 |
|
638 def distinctBy4(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match { |
|
639 case Nil => Nil |
|
640 case x :: xs => |
|
641 //assert(acc.distinct == acc) |
|
642 val erased = erase(x) |
|
643 if(acc.contains(erased)) |
|
644 distinctBy4(xs, acc) |
|
645 else{ |
|
646 val addToAcc = breakIntoTerms(erased).filter(r => !acc.contains(oneSimp(r))) |
|
647 //val xp = pruneRexp(x, addToAcc) |
|
648 pruneRexp(x, addToAcc) match { |
|
649 case AZERO => distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc) |
|
650 case xPrime => xPrime :: distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc) |
|
651 } |
|
652 } |
|
653 } |
|
654 |
|
655 // fun pAKC_aux :: "arexp list ⇒ arexp ⇒ rexp ⇒ arexp" |
|
656 // where |
|
657 // "pAKC_aux rsa r ctx = (if (L (SEQ (erase r) ( ctx) )) ⊆ (L (erase (AALTs [] rsa))) then AZERO else |
|
658 // case r of (ASEQ bs r1 r2) ⇒ |
|
659 // bsimp_ASEQ bs (pAKC_aux rsa r1 (SEQ (erase r2) ( ctx) )) r2 | |
|
660 // (AALTs bs rs) ⇒ |
|
661 // bsimp_AALTs bs (flts (map (λr. pAKC_aux rsa r ( ctx) ) rs) ) | |
|
662 // r ⇒ r |
|
663 |
|
664 // def canonicalSeq(rs: List[Rexp], acc: Rexp) = rs match { |
|
665 // case r::Nil => SEQ(r, acc) |
|
666 // case Nil => acc |
|
667 // case r1::r2::Nil => SEQ(SEQ(r1, r2), acc) |
|
668 // } |
|
669 |
|
670 |
|
671 //the "fake" Language interpretation: just concatenates! |
|
672 def L(acc: Rexp, rs: List[Rexp]) : Rexp = rs match { |
|
673 case Nil => acc |
|
674 case r :: rs1 => |
|
675 // if(acc == ONE) |
|
676 // L(r, rs1) |
|
677 // else |
|
678 L(SEQ(acc, r), rs1) |
|
679 } |
|
680 |
|
681 def rprint(r: Rexp) : Unit = println(shortRexpOutput(r)) |
|
682 def rsprint(rs: List[Rexp]) = rs.foreach(r => println(shortRexpOutput(r))) |
|
683 def aprint(a: ARexp) = println(shortRexpOutput(erase(a))) |
|
684 def asprint(as: List[ARexp]) = as.foreach(a => println(shortRexpOutput(erase(a)))) |
|
685 |
|
686 def pAKC(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = { |
|
687 // println("pakc") |
|
688 // println(shortRexpOutput(erase(r))) |
|
689 // println("acc") |
|
690 // rsprint(acc) |
|
691 // println("ctx---------") |
|
692 // rsprint(ctx) |
|
693 // println("ctx---------end") |
|
694 // rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp)) |
|
695 |
|
696 if (breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)) {//acc.flatMap(breakIntoTerms |
|
697 AZERO |
|
698 } |
|
699 else{ |
|
700 r match { |
|
701 case ASEQ(bs, r1, r2) => |
|
702 (pAKC(acc, r1, erase(r2) :: ctx)) match{ |
|
703 case AZERO => |
|
704 AZERO |
|
705 case AONE(bs1) => |
|
706 fuse(bs1, r2) |
|
707 case r1p => |
|
708 ASEQ(bs, r1p, r2) |
|
709 } |
|
710 case AALTS(bs, rs0) => |
|
711 // println("before pruning") |
|
712 // println(s"ctx is ") |
|
713 // ctx.foreach(r => println(shortRexpOutput(r))) |
|
714 // println(s"rs0 is ") |
|
715 // rs0.foreach(r => println(shortRexpOutput(erase(r)))) |
|
716 // println(s"acc is ") |
|
717 // acc.foreach(r => println(shortRexpOutput(r))) |
|
718 rs0.map(r => pAKC(acc, r, ctx)).filter(_ != AZERO) match { |
|
719 case Nil => |
|
720 // println("after pruning Nil") |
|
721 AZERO |
|
722 case r :: Nil => |
|
723 // println("after pruning singleton") |
|
724 // println(shortRexpOutput(erase(r))) |
|
725 r |
|
726 case rs0p => |
|
727 // println("after pruning non-singleton") |
|
728 AALTS(bs, rs0p) |
|
729 } |
|
730 case r => r |
|
731 } |
|
732 } |
|
733 } |
|
734 |
|
735 def distinctBy5(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match { |
|
736 case Nil => |
|
737 Nil |
|
738 case x :: xs => { |
|
739 val erased = erase(x) |
|
740 if(acc.contains(erased)){ |
|
741 distinctBy5(xs, acc) |
|
742 } |
|
743 else{ |
|
744 val pruned = pAKC(acc, x, Nil) |
|
745 val newTerms = breakIntoTerms(erase(pruned)) |
|
746 pruned match { |
|
747 case AZERO => |
|
748 distinctBy5(xs, acc) |
|
749 case xPrime => |
|
750 xPrime :: distinctBy5(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc) |
|
751 } |
|
752 } |
|
753 } |
|
754 } |
|
755 |
|
756 |
|
757 def breakIntoTerms(r: Rexp) : List[Rexp] = r match { |
|
758 case SEQ(r1, r2) => breakIntoTerms(r1).map(r11 => SEQ(r11, r2)) |
|
759 case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2) |
|
760 case ZERO => Nil |
|
761 case _ => r::Nil |
|
762 } |
|
763 |
|
764 |
|
765 |
|
766 def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match { |
|
767 case (ONE, bs) => (Empty, bs) |
|
768 case (CHAR(f), bs) => (Chr(f), bs) |
|
769 case (ALTS(r1, r2), Z::bs1) => { |
|
770 val (v, bs2) = decode_aux(r1, bs1) |
|
771 (Left(v), bs2) |
|
772 } |
|
773 case (ALTS(r1, r2), S::bs1) => { |
|
774 val (v, bs2) = decode_aux(r2, bs1) |
|
775 (Right(v), bs2) |
|
776 } |
|
777 case (SEQ(r1, r2), bs) => { |
|
778 val (v1, bs1) = decode_aux(r1, bs) |
|
779 val (v2, bs2) = decode_aux(r2, bs1) |
|
780 (Sequ(v1, v2), bs2) |
|
781 } |
|
782 case (STAR(r1), S::bs) => { |
|
783 val (v, bs1) = decode_aux(r1, bs) |
|
784 //(v) |
|
785 val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1) |
|
786 (Stars(v::vs), bs2) |
|
787 } |
|
788 case (STAR(_), Z::bs) => (Stars(Nil), bs) |
|
789 case (RECD(x, r1), bs) => { |
|
790 val (v, bs1) = decode_aux(r1, bs) |
|
791 (Rec(x, v), bs1) |
|
792 } |
|
793 case (NOT(r), bs) => (Nots(r.toString), bs) |
|
794 } |
|
795 |
|
796 def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match { |
|
797 case (v, Nil) => v |
|
798 case _ => throw new Exception("Not decodable") |
|
799 } |
|
800 |
|
801 |
|
802 def blexing(r: Rexp, s: String) : Option[Val] = { |
|
803 val bit_code = blex(internalise(r), s.toList) |
|
804 bit_code match { |
|
805 case Some(bits) => Some(decode(r, bits)) |
|
806 case None => None |
|
807 } |
|
808 } |
|
809 |
|
810 def blexing_simp(r: Rexp, s: String) : Option[Val] = { |
|
811 val bit_code = blex_simp(internalise(r), s.toList) |
|
812 bit_code match { |
|
813 case Some(bits) => Some(decode(r, bits)) |
|
814 case None => None |
|
815 } |
|
816 } |
|
817 //def simpBlexer(r: Rexp, s: String) : Val = { |
|
818 // Try(blexing_simp(r, s)).getOrElse(Failure) |
|
819 //} |
|
820 |
|
821 def strong_blexing_simp(r: Rexp, s: String) : Val = { |
|
822 decode(r, strong_blex_simp(internalise(r), s.toList)) |
|
823 } |
|
824 |
|
825 def strong_blexing_simp5(r: Rexp, s: String) : Val = { |
|
826 decode(r, strong_blex_simp5(internalise(r), s.toList)) |
|
827 } |
|
828 |
|
829 |
|
830 def strongBlexer(r: Rexp, s: String) : Val = { |
|
831 Try(strong_blexing_simp(r, s)).getOrElse(Failure) |
|
832 } |
|
833 |
|
834 def strongBlexer5(r: Rexp, s: String): Val = { |
|
835 Try(strong_blexing_simp5(r, s)).getOrElse(Failure) |
|
836 } |
|
837 |
|
838 def strong_blex_simp(r: ARexp, s: List[Char]) : Bits = s match { |
|
839 case Nil => { |
|
840 if (bnullable(r)) { |
|
841 //println(asize(r)) |
|
842 val bits = mkepsBC(r) |
|
843 bits |
|
844 } |
|
845 else |
|
846 throw new Exception("Not matched") |
|
847 } |
|
848 case c::cs => { |
|
849 val der_res = bder(c,r) |
|
850 val simp_res = strongBsimp(der_res) |
|
851 strong_blex_simp(simp_res, cs) |
|
852 } |
|
853 } |
|
854 |
|
855 def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match { |
|
856 case Nil => { |
|
857 if (bnullable(r)) { |
|
858 //println(asize(r)) |
|
859 val bits = mkepsBC(r) |
|
860 bits |
|
861 } |
|
862 else |
|
863 throw new Exception("Not matched") |
|
864 } |
|
865 case c::cs => { |
|
866 val der_res = bder(c,r) |
|
867 val simp_res = strongBsimp5(der_res) |
|
868 strong_blex_simp5(simp_res, cs) |
|
869 } |
|
870 } |
|
871 |
|
872 |
|
873 def bders_simp(s: List[Char], r: ARexp) : ARexp = s match { |
|
874 case Nil => r |
|
875 case c::s => |
|
876 //println(erase(r)) |
|
877 bders_simp(s, bsimp(bder(c, r))) |
|
878 } |
|
879 |
|
880 def bdersStrong5(s: List[Char], r: ARexp) : ARexp = s match { |
|
881 case Nil => r |
|
882 case c::s => bdersStrong5(s, strongBsimp5(bder(c, r))) |
|
883 } |
|
884 |
|
885 def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r)) |
|
886 |
|
887 def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match { |
|
888 case Nil => r |
|
889 case c::s => bdersStrong(s, strongBsimp(bder(c, r))) |
|
890 } |
|
891 |
|
892 def bdersStrongRexp(s: String, r: Rexp) : ARexp = bdersStrong(s.toList, internalise(r)) |
|
893 |
|
894 def erase(r:ARexp): Rexp = r match{ |
|
895 case AZERO => ZERO |
|
896 case AONE(_) => ONE |
|
897 case ACHAR(bs, c) => CHAR(c) |
|
898 case AALTS(bs, Nil) => ZERO |
|
899 case AALTS(bs, a::Nil) => erase(a) |
|
900 case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as))) |
|
901 case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2)) |
|
902 case ASTAR(cs, r)=> STAR(erase(r)) |
|
903 case ANOT(bs, r) => NOT(erase(r)) |
|
904 case AANYCHAR(bs) => ANYCHAR |
|
905 } |
|
906 |
|
907 |
|
908 def allCharSeq(r: Rexp) : Boolean = r match { |
|
909 case CHAR(c) => true |
|
910 case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2) |
|
911 case _ => false |
|
912 } |
|
913 |
|
914 def flattenSeq(r: Rexp) : String = r match { |
|
915 case CHAR(c) => c.toString |
|
916 case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2) |
|
917 case _ => throw new Error("flatten unflattenable rexp") |
|
918 } |
|
919 |
|
920 def shortRexpOutput(r: Rexp) : String = r match { |
|
921 case CHAR(c) => c.toString |
|
922 case ONE => "1" |
|
923 case ZERO => "0" |
|
924 case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]" |
|
925 case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]" |
|
926 case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")" |
|
927 case STAR(STAR(r)) => "(..)*"// ++ shortRexpOutput(r) ++ "]*" |
|
928 case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")" |
|
929 //case RTOP => "RTOP" |
|
930 } |
|
931 |
|
932 def blex(r: ARexp, s: List[Char]) : Option[Bits]= s match { |
|
933 case Nil => { |
|
934 if (bnullable(r)) { |
|
935 val bits = mkepsBC(r) |
|
936 Some(bits) |
|
937 } |
|
938 else |
|
939 None |
|
940 //throw new Exception("Not matched") |
|
941 } |
|
942 case c::cs => { |
|
943 val der_res = bder(c,r) |
|
944 //val simp_res = bsimp(der_res) |
|
945 blex(der_res, cs) |
|
946 } |
|
947 } |
|
948 |
|
949 def blex_simp(r: ARexp, s: List[Char]) : Option[Bits]= s match { |
|
950 case Nil => { |
|
951 if (bnullable(r)) { |
|
952 val bits = mkepsBC(r) |
|
953 Some(bits) |
|
954 } |
|
955 else |
|
956 None |
|
957 //throw new Exception("Not matched") |
|
958 } |
|
959 case c::cs => { |
|
960 val der_res = bder(c,r) |
|
961 val simp_res = bsimp(der_res) |
|
962 blex_simp(simp_res, cs) |
|
963 } |
|
964 } |
|
965 |
|
966 |
|
967 |
|
968 |
|
969 |
|
970 def asize(a: ARexp) = size(erase(a)) |
|
971 |
|
972 //pder related |
|
973 type Mon = (Char, Rexp) |
|
974 type Lin = Set[Mon] |
|
975 |
|
976 def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match { |
|
977 case ZERO => Set() |
|
978 case ONE => rs |
|
979 case r => rs.map((re) => if (re == ONE) r else SEQ(re, r) ) |
|
980 } |
|
981 def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms |
|
982 case ZERO => Set() |
|
983 case ONE => l |
|
984 case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) } ) |
|
985 } |
|
986 def lf(r: Rexp): Lin = r match { |
|
987 case ZERO => Set() |
|
988 case ONE => Set() |
|
989 case CHAR(f) => { |
|
990 //val Some(c) = alphabet.find(f) |
|
991 Set((f, ONE)) |
|
992 } |
|
993 case ALTS(r1, r2) => { |
|
994 lf(r1 ) ++ lf(r2) |
|
995 } |
|
996 case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later...... |
|
997 case SEQ(r1, r2) =>{ |
|
998 if (nullable(r1)) |
|
999 cir_prod(lf(r1), r2) ++ lf(r2) |
|
1000 else |
|
1001 cir_prod(lf(r1), r2) |
|
1002 } |
|
1003 } |
|
1004 def lfs(r: Set[Rexp]): Lin = { |
|
1005 r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r)) |
|
1006 } |
|
1007 |
|
1008 def pder(x: Char, t: Rexp): Set[Rexp] = { |
|
1009 val lft = lf(t) |
|
1010 (lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2) |
|
1011 } |
|
1012 def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match { |
|
1013 case x::xs => pders(xs, pder(x, t)) |
|
1014 case Nil => Set(t) |
|
1015 } |
|
1016 def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match { |
|
1017 case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t))) |
|
1018 case Nil => ts |
|
1019 } |
|
1020 def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc ) |
|
1021 def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2) |
|
1022 //all implementation of partial derivatives that involve set union are potentially buggy |
|
1023 //because they don't include the original regular term before they are pdered. |
|
1024 //now only pderas is fixed. |
|
1025 def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders. |
|
1026 def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1) |
|
1027 def awidth(r: Rexp) : Int = r match { |
|
1028 case CHAR(c) => 1 |
|
1029 case SEQ(r1, r2) => awidth(r1) + awidth(r2) |
|
1030 case ALTS(r1, r2) => awidth(r1) + awidth(r2) |
|
1031 case ONE => 0 |
|
1032 case ZERO => 0 |
|
1033 case STAR(r) => awidth(r) |
|
1034 } |
|
1035 //def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList) |
|
1036 //def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList) |
|
1037 def o(r: Rexp) = if (nullable(r)) ONE else ZERO |
|
1038 //def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) )) |
|
1039 def pdp(x: Char, r: Rexp) : Set[Rexp] = r match { |
|
1040 case ZERO => Set[Rexp]() |
|
1041 case ONE => Set[Rexp]() |
|
1042 case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]() |
|
1043 case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2) |
|
1044 case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1))) |
|
1045 case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b)) |
|
1046 } |
|
1047 def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match { |
|
1048 case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t))) |
|
1049 case Nil => ts |
|
1050 } |
|
1051 def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc) |
|
1052 |
|
1053 |
|
1054 |
|
1055 def starPrint(r: Rexp) : Unit = r match { |
|
1056 |
|
1057 case SEQ(head, rstar) => |
|
1058 println(shortRexpOutput(head) ++ "~STARREG") |
|
1059 case STAR(rstar) => |
|
1060 println("STARREG") |
|
1061 case ALTS(r1, r2) => |
|
1062 println("(") |
|
1063 starPrint(r1) |
|
1064 println("+") |
|
1065 starPrint(r2) |
|
1066 println(")") |
|
1067 case ZERO => println("0") |
|
1068 } |
|
1069 |
|
1070 // @arg(doc = "small tests") |
|
1071 def n_astar_list(d: Int) : Rexp = { |
|
1072 if(d == 0) STAR("a") |
|
1073 else ALTS(STAR("a" * d), n_astar_list(d - 1)) |
|
1074 } |
|
1075 def n_astar_alts(d: Int) : Rexp = d match { |
|
1076 case 0 => n_astar_list(0) |
|
1077 case d => STAR(n_astar_list(d)) |
|
1078 // case r1 :: r2 :: Nil => ALTS(r1, r2) |
|
1079 // case r1 :: Nil => r1 |
|
1080 // case r :: rs => ALTS(r, n_astar_alts(rs)) |
|
1081 // case Nil => throw new Error("should give at least 1 elem") |
|
1082 } |
|
1083 def n_astar_aux(d: Int) = { |
|
1084 if(d == 0) n_astar_alts(0) |
|
1085 else ALTS(n_astar_alts(d), n_astar_alts(d - 1)) |
|
1086 } |
|
1087 |
|
1088 def n_astar(d: Int) : Rexp = STAR(n_astar_aux(d)) |
|
1089 //val STARREG = n_astar(3) |
|
1090 // ( STAR("a") | |
|
1091 // ("a" | "aa").% | |
|
1092 // ( "a" | "aa" | "aaa").% |
|
1093 // ).% |
|
1094 //( "a" | "aa" | "aaa" | "aaaa").% | |
|
1095 //( "a" | "aa" | "aaa" | "aaaa" | "aaaaa").% |
|
1096 (((STAR("a") | ( STAR("aaa")) | STAR("aaaaa"| ( STAR("aaaaaaa")) | STAR("aaaaaaaaaaa"))).%).%).% |
|
1097 |
|
1098 // @main |
|
1099 |
|
1100 def lcm(list: Seq[Int]):Int=list.foldLeft(1:Int){ |
|
1101 (a, b) => b * a / |
|
1102 Stream.iterate((a,b)){case (x,y) => (y, x%y)}.dropWhile(_._2 != 0).head._1.abs |
|
1103 } |
|
1104 |
|
1105 def small() = { |
|
1106 //val pderSTAR = pderUNIV(STARREG) |
|
1107 |
|
1108 //val refSize = pderSTAR.map(size(_)).sum |
|
1109 for(n <- 1 to 12){ |
|
1110 val STARREG = n_astar_alts(n) |
|
1111 val iMax = (lcm((1 to n).toList)) |
|
1112 for(i <- 1 to iMax + 1 ){// 100, 400, 800, 840, 841, 900 |
|
1113 val prog0 = "a" * i |
|
1114 //println(s"test: $prog0") |
|
1115 print(n) |
|
1116 print(" ") |
|
1117 // print(i) |
|
1118 // print(" ") |
|
1119 println(asize(bders_simp(prog0.toList, internalise(STARREG)))) |
|
1120 } |
|
1121 } |
|
1122 } |
|
1123 |
|
1124 def generator_test() { |
|
1125 |
|
1126 // test(rexp(7), 1000) { (r: Rexp) => |
|
1127 // val ss = stringsFromRexp(r) |
|
1128 // val boolList = ss.map(s => { |
|
1129 // val simpVal = simpBlexer(r, s) |
|
1130 // val strongVal = strongBlexer(r, s) |
|
1131 // // println(simpVal) |
|
1132 // // println(strongVal) |
|
1133 // (simpVal == strongVal) && (simpVal != None) |
|
1134 // }) |
|
1135 // !boolList.exists(b => b == false) |
|
1136 // } |
|
1137 // test(single(STAR(ALTS(STAR(CHAR('c')),ALTS(CHAR('c'),ZERO)))), 100000) { (r: Rexp) => |
|
1138 // val ss = stringsFromRexp(r) |
|
1139 // val boolList = ss.map(s => { |
|
1140 // val bdStrong = bdersStrong(s.toList, internalise(r)) |
|
1141 // val bdStrong5 = bdersStrong5(s.toList, internalise(r)) |
|
1142 // // println(shortRexpOutput(r)) |
|
1143 // // println(s) |
|
1144 // // println(strongVal) |
|
1145 // // println(strongVal5) |
|
1146 // // (strongVal == strongVal5) |
|
1147 |
|
1148 // if(asize(bdStrong5) > asize(bdStrong)){ |
|
1149 // println(s) |
|
1150 // println(shortRexpOutput(erase(bdStrong5))) |
|
1151 // println(shortRexpOutput(erase(bdStrong))) |
|
1152 // } |
|
1153 // asize(bdStrong5) <= asize(bdStrong) |
|
1154 // }) |
|
1155 // !boolList.exists(b => b == false) |
|
1156 // } |
|
1157 //*** example where bdStrong5 has a smaller size than bdStrong |
|
1158 // test(single(STAR(SEQ(ALTS(SEQ(STAR(CHAR('a')),ALTS(ALTS(ONE,ZERO),SEQ(ONE,ONE))),CHAR('a')),ONE))), 1) { (r: Rexp) => |
|
1159 //*** depth 5 example bdStrong5 larger size than bdStrong |
|
1160 // test(single(STAR(SEQ(SEQ(ALTS(CHAR('b'),STAR(CHAR('b'))),CHAR('b')),(ALTS(STAR(CHAR('c')), ONE))))), 1) {(r: Rexp) => |
|
1161 |
|
1162 |
|
1163 |
|
1164 //sanity check from Christian's request |
|
1165 // val r = ("a" | "ab") ~ ("bc" | "c") |
|
1166 // val a = internalise(r) |
|
1167 // val aval = blexing_simp(r, "abc") |
|
1168 // println(aval) |
|
1169 |
|
1170 //sample counterexample:(depth 7) |
|
1171 //STAR(SEQ(ALTS(STAR(STAR(STAR(STAR(CHAR(c))))),ALTS(CHAR(c),CHAR(b))),ALTS(ZERO,SEQ(ALTS(ALTS(STAR(CHAR(c)),SEQ(CHAR(b),CHAR(a))),CHAR(c)),STAR(ALTS(ALTS(ONE,CHAR(a)),STAR(CHAR(c)))))))) |
|
1172 //(depth5) |
|
1173 //STAR(SEQ(ALTS(ALTS(STAR(CHAR(b)),SEQ(ONE,CHAR(b))),SEQ(STAR(CHAR(a)),CHAR(b))),ALTS(ZERO,ALTS(STAR(CHAR(b)),STAR(CHAR(a)))))) |
|
1174 test(rexp(4), 10000000) { (r: Rexp) => |
|
1175 // ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) => |
|
1176 val ss = stringsFromRexp(r) |
|
1177 val boolList = ss.map(s => { |
|
1178 val bdStrong = bdersStrong(s.toList, internalise(r)) |
|
1179 val bdStrong5 = bdersStrong5(s.toList, internalise(r)) |
|
1180 val bdFurther5 = strongBsimp5(bdStrong5) |
|
1181 val sizeFurther5 = asize(bdFurther5) |
|
1182 val pdersSet = pderUNIV(r) |
|
1183 val size5 = asize(bdStrong5) |
|
1184 val size0 = asize(bdStrong) |
|
1185 // println(s) |
|
1186 // println("pdersSet size") |
|
1187 // println(pdersSet.size) |
|
1188 // pdersSet.foreach(r => println(shortRexpOutput(r))) |
|
1189 // println("after bdStrong5") |
|
1190 |
|
1191 // println(shortRexpOutput(erase(bdStrong5))) |
|
1192 // println(breakIntoTerms(erase(bdStrong5)).size) |
|
1193 |
|
1194 // println("after bdStrong") |
|
1195 // println(shortRexpOutput(erase(bdStrong))) |
|
1196 // println(breakIntoTerms(erase(bdStrong)).size) |
|
1197 // println(size5, size0, sizeFurther5) |
|
1198 // aprint(strongBsimp5(bdStrong)) |
|
1199 // println(asize(strongBsimp5(bdStrong5))) |
|
1200 // println(s) |
|
1201 size5 <= size0 |
|
1202 }) |
|
1203 // println(boolList) |
|
1204 //!boolList.exists(b => b == false) |
|
1205 !boolList.exists(b => b == false) |
|
1206 } |
|
1207 |
|
1208 |
|
1209 } |
|
1210 //small() |
|
1211 // generator_test() |
|
1212 |
|
1213 // 1 |
|
1214 |
|
1215 // SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))), |
|
1216 // SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))), |
|
1217 // STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))), |
|
1218 // SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE)) |
|
1219 |
|
1220 |
|
1221 // Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty)) |
|
1222 // Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty)) |
|
1223 |
|
1224 |
|
1225 //def time[R](block: => R): R = { |
|
1226 // val t0 = System.nanoTime() |
|
1227 // val result = block // call-by-name |
|
1228 // val t1 = System.nanoTime() |
|
1229 // println(" " + (t1 - t0)/1e9 + "") |
|
1230 // result |
|
1231 //} |
|
1232 //val astarstarB = STAR(STAR("a"))~("b") |
|
1233 //val data_points_x_axis = List.range(0, 60000, 3000) |
|
1234 //for (i <- data_points_x_axis) {print(i); time { blexing(astarstarB, ("a"*i)) } } |
|
1235 ////println(data_points_x_axis.zip( List(0,1,3,4))) |
|
1236 //for(i <- List.range(0, 40000, 3000)) print(i + " ") |
|
1237 |