|
1 // A simple lexer inspired by work of Sulzmann & Lu |
|
2 //================================================== |
|
3 // |
|
4 // Call the test cases with |
|
5 // |
|
6 // amm lexer.sc small |
|
7 // amm lexer.sc fib |
|
8 // amm lexer.sc loops |
|
9 // amm lexer.sc email |
|
10 // |
|
11 // amm lexer.sc all |
|
12 |
|
13 |
|
14 |
|
15 // regular expressions including records |
|
16 abstract class Rexp |
|
17 case object ZERO extends Rexp |
|
18 case object ONE extends Rexp |
|
19 case object ANYCHAR extends Rexp |
|
20 case class CHAR(c: Char) extends Rexp |
|
21 case class ALTS(r1: Rexp, r2: Rexp) extends Rexp |
|
22 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
|
23 case class STAR(r: Rexp) extends Rexp |
|
24 case class RECD(x: String, r: Rexp) extends Rexp |
|
25 case class NTIMES(n: Int, r: Rexp) extends Rexp |
|
26 case class OPTIONAL(r: Rexp) extends Rexp |
|
27 case class NOT(r: Rexp) extends Rexp |
|
28 // records for extracting strings or tokens |
|
29 |
|
30 // values |
|
31 abstract class Val |
|
32 case object Failure extends Val |
|
33 case object Empty extends Val |
|
34 case class Chr(c: Char) extends Val |
|
35 case class Sequ(v1: Val, v2: Val) extends Val |
|
36 case class Left(v: Val) extends Val |
|
37 case class Right(v: Val) extends Val |
|
38 case class Stars(vs: List[Val]) extends Val |
|
39 case class Rec(x: String, v: Val) extends Val |
|
40 case class Ntime(vs: List[Val]) extends Val |
|
41 case class Optionall(v: Val) extends Val |
|
42 case class Nots(s: String) extends Val |
|
43 |
|
44 |
|
45 |
|
46 abstract class Bit |
|
47 case object Z extends Bit |
|
48 case object S extends Bit |
|
49 |
|
50 |
|
51 type Bits = List[Bit] |
|
52 |
|
53 abstract class ARexp |
|
54 case object AZERO extends ARexp |
|
55 case class AONE(bs: Bits) extends ARexp |
|
56 case class ACHAR(bs: Bits, c: Char) extends ARexp |
|
57 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp |
|
58 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp |
|
59 case class ASTAR(bs: Bits, r: ARexp) extends ARexp |
|
60 case class ANOT(bs: Bits, r: ARexp) extends ARexp |
|
61 case class AANYCHAR(bs: Bits) extends ARexp |
|
62 |
|
63 import scala.util.Try |
|
64 |
|
65 trait Generator[+T] { |
|
66 self => // an alias for "this" |
|
67 def generate(): T |
|
68 |
|
69 def gen(n: Int) : List[T] = |
|
70 if (n == 0) Nil else self.generate() :: gen(n - 1) |
|
71 |
|
72 def map[S](f: T => S): Generator[S] = new Generator[S] { |
|
73 def generate = f(self.generate()) |
|
74 } |
|
75 def flatMap[S](f: T => Generator[S]): Generator[S] = new Generator[S] { |
|
76 def generate = f(self.generate()).generate() |
|
77 } |
|
78 |
|
79 |
|
80 } |
|
81 |
|
82 // tests a property according to a given random generator |
|
83 def test[T](r: Generator[T], amount: Int = 100)(pred: T => Boolean) { |
|
84 for (_ <- 0 until amount) { |
|
85 val value = r.generate() |
|
86 assert(pred(value), s"Test failed for: $value") |
|
87 } |
|
88 println(s"Test passed $amount times") |
|
89 } |
|
90 def test2[T, S](r: Generator[T], s: Generator[S], amount: Int = 100)(pred: (T, S) => Boolean) { |
|
91 for (_ <- 0 until amount) { |
|
92 val valueR = r.generate() |
|
93 val valueS = s.generate() |
|
94 assert(pred(valueR, valueS), s"Test failed for: $valueR, $valueS") |
|
95 } |
|
96 println(s"Test passed $amount times") |
|
97 } |
|
98 |
|
99 // random integers |
|
100 val integers = new Generator[Int] { |
|
101 val rand = new java.util.Random |
|
102 def generate() = rand.nextInt() |
|
103 } |
|
104 |
|
105 // random booleans |
|
106 val booleans = integers.map(_ > 0) |
|
107 |
|
108 // random integers in the range lo and high |
|
109 def range(lo: Int, hi: Int): Generator[Int] = |
|
110 for (x <- integers) yield (lo + x.abs % (hi - lo)).abs |
|
111 |
|
112 // random characters |
|
113 def chars_range(lo: Char, hi: Char) = range(lo, hi).map(_.toChar) |
|
114 val chars = chars_range('a', 'z') |
|
115 |
|
116 |
|
117 def oneOf[T](xs: T*): Generator[T] = |
|
118 for (idx <- range(0, xs.length)) yield xs(idx) |
|
119 |
|
120 def single[T](x: T) = new Generator[T] { |
|
121 def generate() = x |
|
122 } |
|
123 |
|
124 def pairs[T, U](t: Generator[T], u: Generator[U]): Generator[(T, U)] = |
|
125 for (x <- t; y <- u) yield (x, y) |
|
126 |
|
127 // lists |
|
128 def emptyLists = single(Nil) |
|
129 |
|
130 def nonEmptyLists : Generator[List[Int]] = |
|
131 for (head <- integers; tail <- lists) yield head :: tail |
|
132 |
|
133 def lists: Generator[List[Int]] = for { |
|
134 kind <- booleans |
|
135 list <- if (kind) emptyLists else nonEmptyLists |
|
136 } yield list |
|
137 |
|
138 def char_list(len: Int): Generator[List[Char]] = { |
|
139 if(len <= 0) single(Nil) |
|
140 else{ |
|
141 for { |
|
142 c <- chars_range('a', 'c') |
|
143 tail <- char_list(len - 1) |
|
144 } yield c :: tail |
|
145 } |
|
146 } |
|
147 |
|
148 def strings(len: Int): Generator[String] = for(cs <- char_list(len)) yield cs.toString |
|
149 |
|
150 def sampleString(r: Rexp) : List[String] = r match { |
|
151 case STAR(r) => stringsFromRexp(r).flatMap(s => List("", s, s ++ s))//only generate 0, 1, 2 reptitions |
|
152 case SEQ(r1, r2) => stringsFromRexp(r1).flatMap(s1 => stringsFromRexp(r2).map(s2 => s1 ++ s2) ) |
|
153 case ALTS(r1, r2) => throw new Error(s" Rexp ${r} not expected: all alternatives are supposed to have been opened up") |
|
154 case ONE => "" :: Nil |
|
155 case ZERO => Nil |
|
156 case CHAR(c) => c.toString :: Nil |
|
157 |
|
158 } |
|
159 |
|
160 def stringsFromRexp(r: Rexp) : List[String] = |
|
161 breakIntoTerms(r).flatMap(r => sampleString(r)) |
|
162 |
|
163 |
|
164 // (simple) binary trees |
|
165 trait Tree[T] |
|
166 case class Inner[T](left: Tree[T], right: Tree[T]) extends Tree[T] |
|
167 case class Leaf[T](x: T) extends Tree[T] |
|
168 |
|
169 def leafs[T](t: Generator[T]): Generator[Leaf[T]] = |
|
170 for (x <- t) yield Leaf(x) |
|
171 |
|
172 def inners[T](t: Generator[T]): Generator[Inner[T]] = |
|
173 for (l <- trees(t); r <- trees(t)) yield Inner(l, r) |
|
174 |
|
175 def trees[T](t: Generator[T]): Generator[Tree[T]] = |
|
176 for (kind <- range(0, 2); |
|
177 tree <- if (kind == 0) leafs(t) else inners(t)) yield tree |
|
178 |
|
179 // regular expressions |
|
180 |
|
181 // generates random leaf-regexes; prefers CHAR-regexes |
|
182 def leaf_rexp() : Generator[Rexp] = |
|
183 for (kind <- range(0, 5); |
|
184 c <- chars_range('a', 'd')) yield |
|
185 kind match { |
|
186 case 0 => ZERO |
|
187 case 1 => ONE |
|
188 case _ => CHAR(c) |
|
189 } |
|
190 |
|
191 // generates random inner regexes with maximum depth d |
|
192 def inner_rexp(d: Int) : Generator[Rexp] = |
|
193 for (kind <- range(0, 3); |
|
194 l <- rexp(d); |
|
195 r <- rexp(d)) |
|
196 yield kind match { |
|
197 case 0 => ALTS(l, r) |
|
198 case 1 => SEQ(l, r) |
|
199 case 2 => STAR(r) |
|
200 } |
|
201 |
|
202 // generates random regexes with maximum depth d; |
|
203 // prefers inner regexes in 2/3 of the cases |
|
204 def rexp(d: Int = 100): Generator[Rexp] = |
|
205 for (kind <- range(0, 3); |
|
206 r <- if (d <= 0 || kind == 0) leaf_rexp() else inner_rexp(d - 1)) yield r |
|
207 |
|
208 |
|
209 // some test functions for rexps |
|
210 def height(r: Rexp) : Int = r match { |
|
211 case ZERO => 1 |
|
212 case ONE => 1 |
|
213 case CHAR(_) => 1 |
|
214 case ALTS(r1, r2) => 1 + List(height(r1), height(r2)).max |
|
215 case SEQ(r1, r2) => 1 + List(height(r1), height(r2)).max |
|
216 case STAR(r) => 1 + height(r) |
|
217 } |
|
218 |
|
219 // def size(r: Rexp) : Int = r match { |
|
220 // case ZERO => 1 |
|
221 // case ONE => 1 |
|
222 // case CHAR(_) => 1 |
|
223 // case ALTS(r1, r2) => 1 + size(r1) + size(r2) |
|
224 // case SEQ(r1, r2) => 1 + size(r1) + size(r2) |
|
225 // case STAR(r) => 1 + size(r) |
|
226 // } |
|
227 |
|
228 // randomly subtracts 1 or 2 from the STAR case |
|
229 def size_faulty(r: Rexp) : Int = r match { |
|
230 case ZERO => 1 |
|
231 case ONE => 1 |
|
232 case CHAR(_) => 1 |
|
233 case ALTS(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2) |
|
234 case SEQ(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2) |
|
235 case STAR(r) => 1 + size_faulty(r) - range(0, 2).generate |
|
236 } |
|
237 |
|
238 |
|
239 |
|
240 // some convenience for typing in regular expressions |
|
241 |
|
242 def charlist2rexp(s : List[Char]): Rexp = s match { |
|
243 case Nil => ONE |
|
244 case c::Nil => CHAR(c) |
|
245 case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
|
246 } |
|
247 implicit def string2rexp(s : String) : Rexp = |
|
248 charlist2rexp(s.toList) |
|
249 |
|
250 implicit def RexpOps(r: Rexp) = new { |
|
251 def | (s: Rexp) = ALTS(r, s) |
|
252 def % = STAR(r) |
|
253 def ~ (s: Rexp) = SEQ(r, s) |
|
254 } |
|
255 |
|
256 implicit def stringOps(s: String) = new { |
|
257 def | (r: Rexp) = ALTS(s, r) |
|
258 def | (r: String) = ALTS(s, r) |
|
259 def % = STAR(s) |
|
260 def ~ (r: Rexp) = SEQ(s, r) |
|
261 def ~ (r: String) = SEQ(s, r) |
|
262 def $ (r: Rexp) = RECD(s, r) |
|
263 } |
|
264 |
|
265 def nullable(r: Rexp) : Boolean = r match { |
|
266 case ZERO => false |
|
267 case ONE => true |
|
268 case CHAR(_) => false |
|
269 case ANYCHAR => false |
|
270 case ALTS(r1, r2) => nullable(r1) || nullable(r2) |
|
271 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
|
272 case STAR(_) => true |
|
273 case RECD(_, r1) => nullable(r1) |
|
274 case NTIMES(n, r) => if (n == 0) true else nullable(r) |
|
275 case OPTIONAL(r) => true |
|
276 case NOT(r) => !nullable(r) |
|
277 } |
|
278 |
|
279 def der(c: Char, r: Rexp) : Rexp = r match { |
|
280 case ZERO => ZERO |
|
281 case ONE => ZERO |
|
282 case CHAR(d) => if (c == d) ONE else ZERO |
|
283 case ANYCHAR => ONE |
|
284 case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2)) |
|
285 case SEQ(r1, r2) => |
|
286 if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2)) |
|
287 else SEQ(der(c, r1), r2) |
|
288 case STAR(r) => SEQ(der(c, r), STAR(r)) |
|
289 case RECD(_, r1) => der(c, r1) |
|
290 case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO |
|
291 case OPTIONAL(r) => der(c, r) |
|
292 case NOT(r) => NOT(der(c, r)) |
|
293 } |
|
294 |
|
295 |
|
296 // extracts a string from a value |
|
297 def flatten(v: Val) : String = v match { |
|
298 case Empty => "" |
|
299 case Chr(c) => c.toString |
|
300 case Left(v) => flatten(v) |
|
301 case Right(v) => flatten(v) |
|
302 case Sequ(v1, v2) => flatten(v1) ++ flatten(v2) |
|
303 case Stars(vs) => vs.map(flatten).mkString |
|
304 case Ntime(vs) => vs.map(flatten).mkString |
|
305 case Optionall(v) => flatten(v) |
|
306 case Rec(_, v) => flatten(v) |
|
307 } |
|
308 |
|
309 |
|
310 // extracts an environment from a value; |
|
311 // used for tokenising a string |
|
312 def env(v: Val) : List[(String, String)] = v match { |
|
313 case Empty => Nil |
|
314 case Chr(c) => Nil |
|
315 case Left(v) => env(v) |
|
316 case Right(v) => env(v) |
|
317 case Sequ(v1, v2) => env(v1) ::: env(v2) |
|
318 case Stars(vs) => vs.flatMap(env) |
|
319 case Ntime(vs) => vs.flatMap(env) |
|
320 case Rec(x, v) => (x, flatten(v))::env(v) |
|
321 case Optionall(v) => env(v) |
|
322 case Nots(s) => ("Negative", s) :: Nil |
|
323 } |
|
324 |
|
325 |
|
326 // The injection and mkeps part of the lexer |
|
327 //=========================================== |
|
328 |
|
329 def mkeps(r: Rexp) : Val = r match { |
|
330 case ONE => Empty |
|
331 case ALTS(r1, r2) => |
|
332 if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) |
|
333 case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
|
334 case STAR(r) => Stars(Nil) |
|
335 case RECD(x, r) => Rec(x, mkeps(r)) |
|
336 case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r))) |
|
337 case OPTIONAL(r) => Optionall(Empty) |
|
338 case NOT(rInner) => if(nullable(rInner)) throw new Exception("error") |
|
339 else Nots("")//Nots(s.reverse.toString) |
|
340 // case NOT(ZERO) => Empty |
|
341 // case NOT(CHAR(c)) => Empty |
|
342 // case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2))) |
|
343 // case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2))) |
|
344 // case NOT(STAR(r)) => Stars(Nil) |
|
345 |
|
346 } |
|
347 |
|
348 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
|
349 case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
|
350 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
|
351 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
|
352 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
|
353 case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) |
|
354 case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) |
|
355 case (CHAR(d), Empty) => Chr(c) |
|
356 case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) |
|
357 case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs) |
|
358 case (OPTIONAL(r), v) => Optionall(inj(r, c, v)) |
|
359 case (NOT(r), Nots(s)) => Nots(c.toString ++ s) |
|
360 case (ANYCHAR, Empty) => Chr(c) |
|
361 } |
|
362 |
|
363 // some "rectification" functions for simplification |
|
364 |
|
365 |
|
366 |
|
367 |
|
368 // The Lexing Rules for the WHILE Language |
|
369 |
|
370 // bnullable function: tests whether the aregular |
|
371 // expression can recognise the empty string |
|
372 def bnullable (r: ARexp) : Boolean = r match { |
|
373 case AZERO => false |
|
374 case AONE(_) => true |
|
375 case ACHAR(_,_) => false |
|
376 case AALTS(_, rs) => rs.exists(bnullable) |
|
377 case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2) |
|
378 case ASTAR(_, _) => true |
|
379 case ANOT(_, rn) => !bnullable(rn) |
|
380 } |
|
381 |
|
382 def mkepsBC(r: ARexp) : Bits = r match { |
|
383 case AONE(bs) => bs |
|
384 case AALTS(bs, rs) => { |
|
385 val n = rs.indexWhere(bnullable) |
|
386 bs ++ mkepsBC(rs(n)) |
|
387 } |
|
388 case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2) |
|
389 case ASTAR(bs, r) => bs ++ List(Z) |
|
390 case ANOT(bs, rn) => bs |
|
391 } |
|
392 |
|
393 |
|
394 def bder(c: Char, r: ARexp) : ARexp = r match { |
|
395 case AZERO => AZERO |
|
396 case AONE(_) => AZERO |
|
397 case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO |
|
398 case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _))) |
|
399 case ASEQ(bs, r1, r2) => |
|
400 if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil ) |
|
401 else ASEQ(bs, bder(c, r1), r2) |
|
402 case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r)) |
|
403 case ANOT(bs, rn) => ANOT(bs, bder(c, rn)) |
|
404 case AANYCHAR(bs) => AONE(bs) |
|
405 } |
|
406 |
|
407 def fuse(bs: Bits, r: ARexp) : ARexp = r match { |
|
408 case AZERO => AZERO |
|
409 case AONE(cs) => AONE(bs ++ cs) |
|
410 case ACHAR(cs, f) => ACHAR(bs ++ cs, f) |
|
411 case AALTS(cs, rs) => AALTS(bs ++ cs, rs) |
|
412 case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2) |
|
413 case ASTAR(cs, r) => ASTAR(bs ++ cs, r) |
|
414 case ANOT(cs, r) => ANOT(bs ++ cs, r) |
|
415 } |
|
416 |
|
417 |
|
418 def internalise(r: Rexp) : ARexp = r match { |
|
419 case ZERO => AZERO |
|
420 case ONE => AONE(Nil) |
|
421 case CHAR(c) => ACHAR(Nil, c) |
|
422 //case PRED(f) => APRED(Nil, f) |
|
423 case ALTS(r1, r2) => |
|
424 AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2)))) |
|
425 // case ALTS(r1::rs) => { |
|
426 // val AALTS(Nil, rs2) = internalise(ALTS(rs)) |
|
427 // AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _))) |
|
428 // } |
|
429 case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2)) |
|
430 case STAR(r) => ASTAR(Nil, internalise(r)) |
|
431 case RECD(x, r) => internalise(r) |
|
432 case NOT(r) => ANOT(Nil, internalise(r)) |
|
433 case ANYCHAR => AANYCHAR(Nil) |
|
434 } |
|
435 |
|
436 |
|
437 def bsimp(r: ARexp): ARexp = |
|
438 { |
|
439 r match { |
|
440 case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match { |
|
441 case (AZERO, _) => AZERO |
|
442 case (_, AZERO) => AZERO |
|
443 case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) |
|
444 case (r1s, r2s) => ASEQ(bs1, r1s, r2s) |
|
445 } |
|
446 case AALTS(bs1, rs) => { |
|
447 val rs_simp = rs.map(bsimp(_)) |
|
448 val flat_res = flats(rs_simp) |
|
449 val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase) |
|
450 dist_res match { |
|
451 case Nil => AZERO |
|
452 case s :: Nil => fuse(bs1, s) |
|
453 case rs => AALTS(bs1, rs) |
|
454 } |
|
455 |
|
456 } |
|
457 case r => r |
|
458 } |
|
459 } |
|
460 def strongBsimp(r: ARexp): ARexp = |
|
461 { |
|
462 r match { |
|
463 case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match { |
|
464 case (AZERO, _) => AZERO |
|
465 case (_, AZERO) => AZERO |
|
466 case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) |
|
467 case (r1s, r2s) => ASEQ(bs1, r1s, r2s) |
|
468 } |
|
469 case AALTS(bs1, rs) => { |
|
470 val rs_simp = rs.map(strongBsimp(_)) |
|
471 val flat_res = flats(rs_simp) |
|
472 val dist_res = distinctBy4(flat_res)//distinctBy(flat_res, erase) |
|
473 dist_res match { |
|
474 case Nil => AZERO |
|
475 case s :: Nil => fuse(bs1, s) |
|
476 case rs => AALTS(bs1, rs) |
|
477 } |
|
478 |
|
479 } |
|
480 case r => r |
|
481 } |
|
482 } |
|
483 |
|
484 def strongBsimp5(r: ARexp): ARexp = |
|
485 { |
|
486 // println("was this called?") |
|
487 r match { |
|
488 case ASEQ(bs1, r1, r2) => (strongBsimp5(r1), strongBsimp5(r2)) match { |
|
489 case (AZERO, _) => AZERO |
|
490 case (_, AZERO) => AZERO |
|
491 case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) |
|
492 case (r1s, r2s) => ASEQ(bs1, r1s, r2s) |
|
493 } |
|
494 case AALTS(bs1, rs) => { |
|
495 // println("alts case") |
|
496 val rs_simp = rs.map(strongBsimp5(_)) |
|
497 val flat_res = flats(rs_simp) |
|
498 var dist_res = distinctBy5(flat_res)//distinctBy(flat_res, erase) |
|
499 var dist2_res = distinctBy5(dist_res) |
|
500 while(dist_res != dist2_res){ |
|
501 dist_res = dist2_res |
|
502 dist2_res = distinctBy5(dist_res) |
|
503 } |
|
504 (dist2_res) match { |
|
505 case Nil => AZERO |
|
506 case s :: Nil => fuse(bs1, s) |
|
507 case rs => AALTS(bs1, rs) |
|
508 } |
|
509 } |
|
510 case r => r |
|
511 } |
|
512 } |
|
513 |
|
514 def bders (s: List[Char], r: ARexp) : ARexp = s match { |
|
515 case Nil => r |
|
516 case c::s => bders(s, bder(c, r)) |
|
517 } |
|
518 |
|
519 def flats(rs: List[ARexp]): List[ARexp] = rs match { |
|
520 case Nil => Nil |
|
521 case AZERO :: rs1 => flats(rs1) |
|
522 case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2) |
|
523 case r1 :: rs2 => r1 :: flats(rs2) |
|
524 } |
|
525 |
|
526 def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match { |
|
527 case Nil => Nil |
|
528 case (x::xs) => { |
|
529 val res = f(x) |
|
530 if (acc.contains(res)) distinctBy(xs, f, acc) |
|
531 else x::distinctBy(xs, f, res::acc) |
|
532 } |
|
533 } |
|
534 |
|
535 |
|
536 def pruneRexp(r: ARexp, allowableTerms: List[Rexp]) : ARexp = { |
|
537 r match { |
|
538 case ASEQ(bs, r1, r2) => |
|
539 val termsTruncated = allowableTerms.collect(rt => rt match { |
|
540 case SEQ(r1p, r2p) if(r2p == erase(r2)) => r1p//if(r2p == erase(r2)) |
|
541 }) |
|
542 val pruned : ARexp = pruneRexp(r1, termsTruncated) |
|
543 pruned match { |
|
544 case AZERO => AZERO |
|
545 case AONE(bs1) => fuse(bs ++ bs1, r2) |
|
546 case pruned1 => ASEQ(bs, pruned1, r2) |
|
547 } |
|
548 |
|
549 case AALTS(bs, rs) => |
|
550 //allowableTerms.foreach(a => println(shortRexpOutput(a))) |
|
551 val rsp = rs.map(r => |
|
552 pruneRexp(r, allowableTerms) |
|
553 ) |
|
554 .filter(r => r != AZERO) |
|
555 rsp match { |
|
556 case Nil => AZERO |
|
557 case r1::Nil => fuse(bs, r1) |
|
558 case rs1 => AALTS(bs, rs1) |
|
559 } |
|
560 case r => |
|
561 if(allowableTerms.contains(erase(r))) r else AZERO //assert(r != AZERO) |
|
562 } |
|
563 } |
|
564 |
|
565 def oneSimp(r: Rexp) : Rexp = r match { |
|
566 case SEQ(ONE, r) => r |
|
567 case SEQ(r1, r2) => SEQ(oneSimp(r1), r2) |
|
568 case r => r//assert r != 0 |
|
569 |
|
570 } |
|
571 |
|
572 |
|
573 def distinctBy4(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match { |
|
574 case Nil => Nil |
|
575 case x :: xs => |
|
576 //assert(acc.distinct == acc) |
|
577 val erased = erase(x) |
|
578 if(acc.contains(erased)) |
|
579 distinctBy4(xs, acc) |
|
580 else{ |
|
581 val addToAcc = breakIntoTerms(erased).filter(r => !acc.contains(oneSimp(r))) |
|
582 //val xp = pruneRexp(x, addToAcc) |
|
583 pruneRexp(x, addToAcc) match { |
|
584 case AZERO => distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc) |
|
585 case xPrime => xPrime :: distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc) |
|
586 } |
|
587 } |
|
588 } |
|
589 |
|
590 // fun pAKC_aux :: "arexp list ⇒ arexp ⇒ rexp ⇒ arexp" |
|
591 // where |
|
592 // "pAKC_aux rsa r ctx = (if (L (SEQ (erase r) ( ctx) )) ⊆ (L (erase (AALTs [] rsa))) then AZERO else |
|
593 // case r of (ASEQ bs r1 r2) ⇒ |
|
594 // bsimp_ASEQ bs (pAKC_aux rsa r1 (SEQ (erase r2) ( ctx) )) r2 | |
|
595 // (AALTs bs rs) ⇒ |
|
596 // bsimp_AALTs bs (flts (map (λr. pAKC_aux rsa r ( ctx) ) rs) ) | |
|
597 // r ⇒ r |
|
598 |
|
599 // def canonicalSeq(rs: List[Rexp], acc: Rexp) = rs match { |
|
600 // case r::Nil => SEQ(r, acc) |
|
601 // case Nil => acc |
|
602 // case r1::r2::Nil => SEQ(SEQ(r1, r2), acc) |
|
603 // } |
|
604 |
|
605 |
|
606 //the "fake" Language interpretation: just concatenates! |
|
607 def L(acc: Rexp, rs: List[Rexp]) : Rexp = rs match { |
|
608 case Nil => acc |
|
609 case r :: rs1 => |
|
610 // if(acc == ONE) |
|
611 // L(r, rs1) |
|
612 // else |
|
613 L(SEQ(acc, r), rs1) |
|
614 } |
|
615 |
|
616 def rprint(r: Rexp) : Unit = println(shortRexpOutput(r)) |
|
617 def rsprint(rs: List[Rexp]) = rs.foreach(r => println(shortRexpOutput(r))) |
|
618 def aprint(a: ARexp) = println(shortRexpOutput(erase(a))) |
|
619 def asprint(as: List[ARexp]) = as.foreach(a => println(shortRexpOutput(erase(a)))) |
|
620 |
|
621 def pAKC(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = { |
|
622 // println("pakc") |
|
623 // println(shortRexpOutput(erase(r))) |
|
624 // println("acc") |
|
625 // rsprint(acc) |
|
626 // println("ctx---------") |
|
627 // rsprint(ctx) |
|
628 // println("ctx---------end") |
|
629 // rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp)) |
|
630 |
|
631 if (breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)) {//acc.flatMap(breakIntoTerms |
|
632 AZERO |
|
633 } |
|
634 else{ |
|
635 r match { |
|
636 case ASEQ(bs, r1, r2) => |
|
637 (pAKC(acc, r1, erase(r2) :: ctx)) match{ |
|
638 case AZERO => |
|
639 AZERO |
|
640 case AONE(bs1) => |
|
641 fuse(bs1, r2) |
|
642 case r1p => |
|
643 ASEQ(bs, r1p, r2) |
|
644 } |
|
645 case AALTS(bs, rs0) => |
|
646 // println("before pruning") |
|
647 // println(s"ctx is ") |
|
648 // ctx.foreach(r => println(shortRexpOutput(r))) |
|
649 // println(s"rs0 is ") |
|
650 // rs0.foreach(r => println(shortRexpOutput(erase(r)))) |
|
651 // println(s"acc is ") |
|
652 // acc.foreach(r => println(shortRexpOutput(r))) |
|
653 rs0.map(r => pAKC(acc, r, ctx)).filter(_ != AZERO) match { |
|
654 case Nil => |
|
655 // println("after pruning Nil") |
|
656 AZERO |
|
657 case r :: Nil => |
|
658 // println("after pruning singleton") |
|
659 // println(shortRexpOutput(erase(r))) |
|
660 r |
|
661 case rs0p => |
|
662 // println("after pruning non-singleton") |
|
663 AALTS(bs, rs0p) |
|
664 } |
|
665 case r => r |
|
666 } |
|
667 } |
|
668 } |
|
669 |
|
670 def distinctBy5(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match { |
|
671 case Nil => |
|
672 Nil |
|
673 case x :: xs => { |
|
674 val erased = erase(x) |
|
675 if(acc.contains(erased)){ |
|
676 distinctBy5(xs, acc) |
|
677 } |
|
678 else{ |
|
679 val pruned = pAKC(acc, x, Nil) |
|
680 val newTerms = breakIntoTerms(erase(pruned)) |
|
681 pruned match { |
|
682 case AZERO => |
|
683 distinctBy5(xs, acc) |
|
684 case xPrime => |
|
685 xPrime :: distinctBy5(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc) |
|
686 } |
|
687 } |
|
688 } |
|
689 } |
|
690 |
|
691 |
|
692 def breakIntoTerms(r: Rexp) : List[Rexp] = r match { |
|
693 case SEQ(r1, r2) => breakIntoTerms(r1).map(r11 => SEQ(r11, r2)) |
|
694 case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2) |
|
695 case ZERO => Nil |
|
696 case _ => r::Nil |
|
697 } |
|
698 |
|
699 |
|
700 |
|
701 def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match { |
|
702 case (ONE, bs) => (Empty, bs) |
|
703 case (CHAR(f), bs) => (Chr(f), bs) |
|
704 case (ALTS(r1, r2), Z::bs1) => { |
|
705 val (v, bs2) = decode_aux(r1, bs1) |
|
706 (Left(v), bs2) |
|
707 } |
|
708 case (ALTS(r1, r2), S::bs1) => { |
|
709 val (v, bs2) = decode_aux(r2, bs1) |
|
710 (Right(v), bs2) |
|
711 } |
|
712 case (SEQ(r1, r2), bs) => { |
|
713 val (v1, bs1) = decode_aux(r1, bs) |
|
714 val (v2, bs2) = decode_aux(r2, bs1) |
|
715 (Sequ(v1, v2), bs2) |
|
716 } |
|
717 case (STAR(r1), S::bs) => { |
|
718 val (v, bs1) = decode_aux(r1, bs) |
|
719 //(v) |
|
720 val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1) |
|
721 (Stars(v::vs), bs2) |
|
722 } |
|
723 case (STAR(_), Z::bs) => (Stars(Nil), bs) |
|
724 case (RECD(x, r1), bs) => { |
|
725 val (v, bs1) = decode_aux(r1, bs) |
|
726 (Rec(x, v), bs1) |
|
727 } |
|
728 case (NOT(r), bs) => (Nots(r.toString), bs) |
|
729 } |
|
730 |
|
731 def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match { |
|
732 case (v, Nil) => v |
|
733 case _ => throw new Exception("Not decodable") |
|
734 } |
|
735 |
|
736 |
|
737 |
|
738 def blexing_simp(r: Rexp, s: String) : Val = { |
|
739 val bit_code = blex_simp(internalise(r), s.toList) |
|
740 decode(r, bit_code) |
|
741 } |
|
742 def simpBlexer(r: Rexp, s: String) : Val = { |
|
743 Try(blexing_simp(r, s)).getOrElse(Failure) |
|
744 } |
|
745 |
|
746 def strong_blexing_simp(r: Rexp, s: String) : Val = { |
|
747 decode(r, strong_blex_simp(internalise(r), s.toList)) |
|
748 } |
|
749 |
|
750 def strong_blexing_simp5(r: Rexp, s: String) : Val = { |
|
751 decode(r, strong_blex_simp5(internalise(r), s.toList)) |
|
752 } |
|
753 |
|
754 |
|
755 def strongBlexer(r: Rexp, s: String) : Val = { |
|
756 Try(strong_blexing_simp(r, s)).getOrElse(Failure) |
|
757 } |
|
758 |
|
759 def strongBlexer5(r: Rexp, s: String): Val = { |
|
760 Try(strong_blexing_simp5(r, s)).getOrElse(Failure) |
|
761 } |
|
762 |
|
763 def strong_blex_simp(r: ARexp, s: List[Char]) : Bits = s match { |
|
764 case Nil => { |
|
765 if (bnullable(r)) { |
|
766 //println(asize(r)) |
|
767 val bits = mkepsBC(r) |
|
768 bits |
|
769 } |
|
770 else |
|
771 throw new Exception("Not matched") |
|
772 } |
|
773 case c::cs => { |
|
774 val der_res = bder(c,r) |
|
775 val simp_res = strongBsimp(der_res) |
|
776 strong_blex_simp(simp_res, cs) |
|
777 } |
|
778 } |
|
779 |
|
780 def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match { |
|
781 case Nil => { |
|
782 if (bnullable(r)) { |
|
783 //println(asize(r)) |
|
784 val bits = mkepsBC(r) |
|
785 bits |
|
786 } |
|
787 else |
|
788 throw new Exception("Not matched") |
|
789 } |
|
790 case c::cs => { |
|
791 val der_res = bder(c,r) |
|
792 val simp_res = strongBsimp5(der_res) |
|
793 strong_blex_simp5(simp_res, cs) |
|
794 } |
|
795 } |
|
796 |
|
797 |
|
798 def bders_simp(s: List[Char], r: ARexp) : ARexp = s match { |
|
799 case Nil => r |
|
800 case c::s => |
|
801 //println(erase(r)) |
|
802 bders_simp(s, bsimp(bder(c, r))) |
|
803 } |
|
804 |
|
805 def bdersStrong5(s: List[Char], r: ARexp) : ARexp = s match { |
|
806 case Nil => r |
|
807 case c::s => bdersStrong5(s, strongBsimp5(bder(c, r))) |
|
808 } |
|
809 |
|
810 def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r)) |
|
811 |
|
812 def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match { |
|
813 case Nil => r |
|
814 case c::s => bdersStrong(s, strongBsimp(bder(c, r))) |
|
815 } |
|
816 |
|
817 def bdersStrongRexp(s: String, r: Rexp) : ARexp = bdersStrong(s.toList, internalise(r)) |
|
818 |
|
819 def erase(r:ARexp): Rexp = r match{ |
|
820 case AZERO => ZERO |
|
821 case AONE(_) => ONE |
|
822 case ACHAR(bs, c) => CHAR(c) |
|
823 case AALTS(bs, Nil) => ZERO |
|
824 case AALTS(bs, a::Nil) => erase(a) |
|
825 case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as))) |
|
826 case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2)) |
|
827 case ASTAR(cs, r)=> STAR(erase(r)) |
|
828 case ANOT(bs, r) => NOT(erase(r)) |
|
829 case AANYCHAR(bs) => ANYCHAR |
|
830 } |
|
831 |
|
832 |
|
833 def allCharSeq(r: Rexp) : Boolean = r match { |
|
834 case CHAR(c) => true |
|
835 case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2) |
|
836 case _ => false |
|
837 } |
|
838 |
|
839 def flattenSeq(r: Rexp) : String = r match { |
|
840 case CHAR(c) => c.toString |
|
841 case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2) |
|
842 case _ => throw new Error("flatten unflattenable rexp") |
|
843 } |
|
844 |
|
845 def shortRexpOutput(r: Rexp) : String = r match { |
|
846 case CHAR(c) => c.toString |
|
847 case ONE => "1" |
|
848 case ZERO => "0" |
|
849 case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]" |
|
850 case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]" |
|
851 case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")" |
|
852 case STAR(STAR(r)) => "(..)*"// ++ shortRexpOutput(r) ++ "]*" |
|
853 case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")" |
|
854 //case RTOP => "RTOP" |
|
855 } |
|
856 |
|
857 |
|
858 def blex_simp(r: ARexp, s: List[Char]) : Bits = s match { |
|
859 case Nil => { |
|
860 if (bnullable(r)) { |
|
861 val bits = mkepsBC(r) |
|
862 bits |
|
863 } |
|
864 else |
|
865 throw new Exception("Not matched") |
|
866 } |
|
867 case c::cs => { |
|
868 val der_res = bder(c,r) |
|
869 val simp_res = bsimp(der_res) |
|
870 blex_simp(simp_res, cs) |
|
871 } |
|
872 } |
|
873 |
|
874 |
|
875 |
|
876 |
|
877 def size(r: Rexp) : Int = r match { |
|
878 case ZERO => 1 |
|
879 case ONE => 1 |
|
880 case CHAR(_) => 1 |
|
881 case ANYCHAR => 1 |
|
882 case NOT(r0) => 1 + size(r0) |
|
883 case SEQ(r1, r2) => 1 + size(r1) + size(r2) |
|
884 case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum |
|
885 case STAR(r) => 1 + size(r) |
|
886 } |
|
887 |
|
888 def asize(a: ARexp) = size(erase(a)) |
|
889 |
|
890 //pder related |
|
891 type Mon = (Char, Rexp) |
|
892 type Lin = Set[Mon] |
|
893 |
|
894 def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match { |
|
895 case ZERO => Set() |
|
896 case ONE => rs |
|
897 case r => rs.map((re) => if (re == ONE) r else SEQ(re, r) ) |
|
898 } |
|
899 def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms |
|
900 case ZERO => Set() |
|
901 case ONE => l |
|
902 case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) } ) |
|
903 } |
|
904 def lf(r: Rexp): Lin = r match { |
|
905 case ZERO => Set() |
|
906 case ONE => Set() |
|
907 case CHAR(f) => { |
|
908 //val Some(c) = alphabet.find(f) |
|
909 Set((f, ONE)) |
|
910 } |
|
911 case ALTS(r1, r2) => { |
|
912 lf(r1 ) ++ lf(r2) |
|
913 } |
|
914 case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later...... |
|
915 case SEQ(r1, r2) =>{ |
|
916 if (nullable(r1)) |
|
917 cir_prod(lf(r1), r2) ++ lf(r2) |
|
918 else |
|
919 cir_prod(lf(r1), r2) |
|
920 } |
|
921 } |
|
922 def lfs(r: Set[Rexp]): Lin = { |
|
923 r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r)) |
|
924 } |
|
925 |
|
926 def pder(x: Char, t: Rexp): Set[Rexp] = { |
|
927 val lft = lf(t) |
|
928 (lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2) |
|
929 } |
|
930 def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match { |
|
931 case x::xs => pders(xs, pder(x, t)) |
|
932 case Nil => Set(t) |
|
933 } |
|
934 def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match { |
|
935 case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t))) |
|
936 case Nil => ts |
|
937 } |
|
938 def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc ) |
|
939 def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2) |
|
940 //all implementation of partial derivatives that involve set union are potentially buggy |
|
941 //because they don't include the original regular term before they are pdered. |
|
942 //now only pderas is fixed. |
|
943 def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders. |
|
944 def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1) |
|
945 def awidth(r: Rexp) : Int = r match { |
|
946 case CHAR(c) => 1 |
|
947 case SEQ(r1, r2) => awidth(r1) + awidth(r2) |
|
948 case ALTS(r1, r2) => awidth(r1) + awidth(r2) |
|
949 case ONE => 0 |
|
950 case ZERO => 0 |
|
951 case STAR(r) => awidth(r) |
|
952 } |
|
953 //def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList) |
|
954 //def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList) |
|
955 def o(r: Rexp) = if (nullable(r)) ONE else ZERO |
|
956 //def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) )) |
|
957 def pdp(x: Char, r: Rexp) : Set[Rexp] = r match { |
|
958 case ZERO => Set[Rexp]() |
|
959 case ONE => Set[Rexp]() |
|
960 case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]() |
|
961 case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2) |
|
962 case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1))) |
|
963 case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b)) |
|
964 } |
|
965 def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match { |
|
966 case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t))) |
|
967 case Nil => ts |
|
968 } |
|
969 def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc) |
|
970 |
|
971 |
|
972 |
|
973 def starPrint(r: Rexp) : Unit = r match { |
|
974 |
|
975 case SEQ(head, rstar) => |
|
976 println(shortRexpOutput(head) ++ "~STARREG") |
|
977 case STAR(rstar) => |
|
978 println("STARREG") |
|
979 case ALTS(r1, r2) => |
|
980 println("(") |
|
981 starPrint(r1) |
|
982 println("+") |
|
983 starPrint(r2) |
|
984 println(")") |
|
985 case ZERO => println("0") |
|
986 } |
|
987 |
|
988 // @arg(doc = "small tests") |
|
989 def n_astar_list(d: Int) : Rexp = { |
|
990 if(d == 0) STAR("a") |
|
991 else ALTS(STAR("a" * d), n_astar_list(d - 1)) |
|
992 } |
|
993 def n_astar_alts(d: Int) : Rexp = d match { |
|
994 case 0 => n_astar_list(0) |
|
995 case d => STAR(n_astar_list(d)) |
|
996 // case r1 :: r2 :: Nil => ALTS(r1, r2) |
|
997 // case r1 :: Nil => r1 |
|
998 // case r :: rs => ALTS(r, n_astar_alts(rs)) |
|
999 // case Nil => throw new Error("should give at least 1 elem") |
|
1000 } |
|
1001 def n_astar_aux(d: Int) = { |
|
1002 if(d == 0) n_astar_alts(0) |
|
1003 else ALTS(n_astar_alts(d), n_astar_alts(d - 1)) |
|
1004 } |
|
1005 |
|
1006 def n_astar(d: Int) : Rexp = STAR(n_astar_aux(d)) |
|
1007 //val STARREG = n_astar(3) |
|
1008 // ( STAR("a") | |
|
1009 // ("a" | "aa").% | |
|
1010 // ( "a" | "aa" | "aaa").% |
|
1011 // ).% |
|
1012 //( "a" | "aa" | "aaa" | "aaaa").% | |
|
1013 //( "a" | "aa" | "aaa" | "aaaa" | "aaaaa").% |
|
1014 (((STAR("a") | ( STAR("aaa")) | STAR("aaaaa"| ( STAR("aaaaaaa")) | STAR("aaaaaaaaaaa"))).%).%).% |
|
1015 |
|
1016 // @main |
|
1017 |
|
1018 def lcm(list: Seq[Int]):Int=list.foldLeft(1:Int){ |
|
1019 (a, b) => b * a / |
|
1020 Stream.iterate((a,b)){case (x,y) => (y, x%y)}.dropWhile(_._2 != 0).head._1.abs |
|
1021 } |
|
1022 |
|
1023 def small() = { |
|
1024 //val pderSTAR = pderUNIV(STARREG) |
|
1025 |
|
1026 //val refSize = pderSTAR.map(size(_)).sum |
|
1027 for(n <- 6 to 6){ |
|
1028 val STARREG = n_astar(n) |
|
1029 val iMax = (lcm((1 to n).toList)) |
|
1030 for(i <- 1 to iMax + 50){// 100, 400, 800, 840, 841, 900 |
|
1031 val prog0 = "a" * i |
|
1032 //println(s"test: $prog0") |
|
1033 print(n) |
|
1034 print(" ") |
|
1035 // print(i) |
|
1036 // print(" ") |
|
1037 println(asize(bders_simp(prog0.toList, internalise(STARREG)))) |
|
1038 } |
|
1039 } |
|
1040 } |
|
1041 |
|
1042 def generator_test() { |
|
1043 |
|
1044 // test(rexp(7), 1000) { (r: Rexp) => |
|
1045 // val ss = stringsFromRexp(r) |
|
1046 // val boolList = ss.map(s => { |
|
1047 // val simpVal = simpBlexer(r, s) |
|
1048 // val strongVal = strongBlexer(r, s) |
|
1049 // // println(simpVal) |
|
1050 // // println(strongVal) |
|
1051 // (simpVal == strongVal) && (simpVal != None) |
|
1052 // }) |
|
1053 // !boolList.exists(b => b == false) |
|
1054 // } |
|
1055 // test(single(STAR(ALTS(STAR(CHAR('c')),ALTS(CHAR('c'),ZERO)))), 100000) { (r: Rexp) => |
|
1056 // val ss = stringsFromRexp(r) |
|
1057 // val boolList = ss.map(s => { |
|
1058 // val bdStrong = bdersStrong(s.toList, internalise(r)) |
|
1059 // val bdStrong5 = bdersStrong5(s.toList, internalise(r)) |
|
1060 // // println(shortRexpOutput(r)) |
|
1061 // // println(s) |
|
1062 // // println(strongVal) |
|
1063 // // println(strongVal5) |
|
1064 // // (strongVal == strongVal5) |
|
1065 |
|
1066 // if(asize(bdStrong5) > asize(bdStrong)){ |
|
1067 // println(s) |
|
1068 // println(shortRexpOutput(erase(bdStrong5))) |
|
1069 // println(shortRexpOutput(erase(bdStrong))) |
|
1070 // } |
|
1071 // asize(bdStrong5) <= asize(bdStrong) |
|
1072 // }) |
|
1073 // !boolList.exists(b => b == false) |
|
1074 // } |
|
1075 //*** example where bdStrong5 has a smaller size than bdStrong |
|
1076 // test(single(STAR(SEQ(ALTS(SEQ(STAR(CHAR('a')),ALTS(ALTS(ONE,ZERO),SEQ(ONE,ONE))),CHAR('a')),ONE))), 1) { (r: Rexp) => |
|
1077 //*** depth 5 example bdStrong5 larger size than bdStrong |
|
1078 // test(single(STAR(SEQ(SEQ(ALTS(CHAR('b'),STAR(CHAR('b'))),CHAR('b')),(ALTS(STAR(CHAR('c')), ONE))))), 1) {(r: Rexp) => |
|
1079 |
|
1080 |
|
1081 |
|
1082 //sanity check from Christian's request |
|
1083 // val r = ("a" | "ab") ~ ("bc" | "c") |
|
1084 // val a = internalise(r) |
|
1085 // val aval = blexing_simp(r, "abc") |
|
1086 // println(aval) |
|
1087 |
|
1088 //sample counterexample:(depth 7) |
|
1089 //STAR(SEQ(ALTS(STAR(STAR(STAR(STAR(CHAR(c))))),ALTS(CHAR(c),CHAR(b))),ALTS(ZERO,SEQ(ALTS(ALTS(STAR(CHAR(c)),SEQ(CHAR(b),CHAR(a))),CHAR(c)),STAR(ALTS(ALTS(ONE,CHAR(a)),STAR(CHAR(c)))))))) |
|
1090 //(depth5) |
|
1091 //STAR(SEQ(ALTS(ALTS(STAR(CHAR(b)),SEQ(ONE,CHAR(b))),SEQ(STAR(CHAR(a)),CHAR(b))),ALTS(ZERO,ALTS(STAR(CHAR(b)),STAR(CHAR(a)))))) |
|
1092 test(rexp(4), 10000000) { (r: Rexp) => |
|
1093 // ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) => |
|
1094 val ss = stringsFromRexp(r) |
|
1095 val boolList = ss.map(s => { |
|
1096 val bdStrong = bdersStrong(s.toList, internalise(r)) |
|
1097 val bdStrong5 = bdersStrong5(s.toList, internalise(r)) |
|
1098 val bdFurther5 = strongBsimp5(bdStrong5) |
|
1099 val sizeFurther5 = asize(bdFurther5) |
|
1100 val pdersSet = pderUNIV(r) |
|
1101 val size5 = asize(bdStrong5) |
|
1102 val size0 = asize(bdStrong) |
|
1103 // println(s) |
|
1104 // println("pdersSet size") |
|
1105 // println(pdersSet.size) |
|
1106 // pdersSet.foreach(r => println(shortRexpOutput(r))) |
|
1107 // println("after bdStrong5") |
|
1108 |
|
1109 // println(shortRexpOutput(erase(bdStrong5))) |
|
1110 // println(breakIntoTerms(erase(bdStrong5)).size) |
|
1111 |
|
1112 // println("after bdStrong") |
|
1113 // println(shortRexpOutput(erase(bdStrong))) |
|
1114 // println(breakIntoTerms(erase(bdStrong)).size) |
|
1115 // println(size5, size0, sizeFurther5) |
|
1116 // aprint(strongBsimp5(bdStrong)) |
|
1117 // println(asize(strongBsimp5(bdStrong5))) |
|
1118 // println(s) |
|
1119 size5 <= size0 |
|
1120 }) |
|
1121 // println(boolList) |
|
1122 //!boolList.exists(b => b == false) |
|
1123 !boolList.exists(b => b == false) |
|
1124 } |
|
1125 |
|
1126 |
|
1127 } |
|
1128 small() |
|
1129 // generator_test() |
|
1130 |
|
1131 // 1 |
|
1132 |
|
1133 // SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))), |
|
1134 // SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))), |
|
1135 // STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))), |
|
1136 // SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE)) |
|
1137 |
|
1138 |
|
1139 // Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty)) |
|
1140 // Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty)) |