29 |
29 |
30 // usual regular expressions |
30 // usual regular expressions |
31 abstract class Rexp |
31 abstract class Rexp |
32 case object ZERO extends Rexp |
32 case object ZERO extends Rexp |
33 case object ONE extends Rexp |
33 case object ONE extends Rexp |
34 case class PRED(f: Char => Boolean) extends Rexp |
34 case class PRED(f: Char => Boolean, s: String = "_") extends Rexp |
35 case class ALTS(rs: List[Rexp]) extends Rexp |
35 case class ALTS(rs: List[Rexp]) extends Rexp |
36 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
36 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
37 case class STAR(r: Rexp) extends Rexp |
37 case class STAR(r: Rexp) extends Rexp |
38 case class RECD(x: String, r: Rexp) extends Rexp |
38 case class RECD(x: String, r: Rexp) extends Rexp |
39 |
39 |
40 |
40 |
41 // abbreviations |
41 // abbreviations |
42 def CHAR(c: Char) = PRED(_ == c) |
42 def CHAR(c: Char) = PRED(_ == c, c.toString) |
43 def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2)) |
43 def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2)) |
44 def PLUS(r: Rexp) = SEQ(r, STAR(r)) |
44 def PLUS(r: Rexp) = SEQ(r, STAR(r)) |
45 |
45 |
46 // annotated regular expressions |
46 // annotated regular expressions |
47 abstract class ARexp |
47 abstract class ARexp |
48 case object AZERO extends ARexp |
48 case object AZERO extends ARexp |
49 case class AONE(bs: Bits) extends ARexp |
49 case class AONE(bs: Bits) extends ARexp |
50 case class APRED(bs: Bits, f: Char => Boolean) extends ARexp |
50 case class APRED(bs: Bits, f: Char => Boolean, s: String = "_") extends ARexp |
51 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp |
51 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp |
52 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp |
52 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp |
53 case class ASTAR(bs: Bits, r: ARexp) extends ARexp |
53 case class ASTAR(bs: Bits, r: ARexp) extends ARexp |
54 |
54 |
55 // abbreviations |
55 // abbreviations |
90 def $ (r: Rexp) = RECD(s, r) |
90 def $ (r: Rexp) = RECD(s, r) |
91 } |
91 } |
92 |
92 |
93 |
93 |
94 // string of a regular expressions - for testing purposes |
94 // string of a regular expressions - for testing purposes |
95 def string(r: Rexp): String = r match { |
95 def string(r: Rexp): String = r match { |
96 case ZERO => "0" |
96 case ZERO => "0" |
97 case ONE => "1" |
97 case ONE => "1" |
98 case PRED(_) => "_" |
98 case PRED(_, s) => s |
99 case ALTS(rs) => rs.map(string).mkString("[", "|", "]") |
99 case ALTS(rs) => rs.map(string).mkString("[", "|", "]") |
100 case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})" |
100 case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})" |
101 case STAR(r) => s"{${string(r)}}*" |
101 case STAR(r) => s"{${string(r)}}*" |
102 case RECD(x, r) => s"(${x}! ${string(r)})" |
102 case RECD(x, r) => s"(${x}! ${string(r)})" |
103 } |
103 } |
|
104 |
|
105 // string of an annotated regular expressions - for testing purposes |
|
106 |
|
107 def astring(a: ARexp): String = a match { |
|
108 case AZERO => "0" |
|
109 case AONE(_) => "1" |
|
110 case APRED(_, _, s) => s |
|
111 case AALTS(_, rs) => rs.map(astring).mkString("[", "|", "]") |
|
112 case ASEQ(_, r1, r2) => s"(${astring(r1)} ~ ${astring(r2)})" |
|
113 case ASTAR(_, r) => s"{${astring(r)}}*" |
|
114 } |
|
115 |
104 |
116 |
105 //-------------------------------------------------------------------------------------------------------- |
117 //-------------------------------------------------------------------------------------------------------- |
106 // START OF NON-BITCODE PART |
118 // START OF NON-BITCODE PART |
107 // |
119 // |
108 |
120 |
109 // nullable function: tests whether the regular |
121 // nullable function: tests whether the regular |
110 // expression can recognise the empty string |
122 // expression can recognise the empty string |
111 def nullable (r: Rexp) : Boolean = r match { |
123 def nullable (r: Rexp) : Boolean = r match { |
112 case ZERO => false |
124 case ZERO => false |
113 case ONE => true |
125 case ONE => true |
114 case PRED(_) => false |
126 case PRED(_, _) => false |
115 case ALTS(rs) => rs.exists(nullable) |
127 case ALTS(rs) => rs.exists(nullable) |
116 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
128 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
117 case STAR(_) => true |
129 case STAR(_) => true |
118 case RECD(_, r) => nullable(r) |
130 case RECD(_, r) => nullable(r) |
119 } |
131 } |
120 |
132 |
121 // derivative of a regular expression w.r.t. a character |
133 // derivative of a regular expression w.r.t. a character |
122 def der (c: Char, r: Rexp) : Rexp = r match { |
134 def der (c: Char, r: Rexp) : Rexp = r match { |
123 case ZERO => ZERO |
135 case ZERO => ZERO |
124 case ONE => ZERO |
136 case ONE => ZERO |
125 case PRED(f) => if (f(c)) ONE else ZERO |
137 case PRED(f, _) => if (f(c)) ONE else ZERO |
126 case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2))) |
138 case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2))) |
127 case SEQ(r1, r2) => |
139 case SEQ(r1, r2) => |
128 if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2))) |
140 if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2))) |
129 else SEQ(der(c, r1), r2) |
141 else SEQ(der(c, r1), r2) |
130 case STAR(r) => SEQ(der(c, r), STAR(r)) |
142 case STAR(r) => SEQ(der(c, r), STAR(r)) |
169 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
181 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
170 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
182 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
171 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
183 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
172 case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1)) |
184 case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1)) |
173 case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2)) |
185 case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2)) |
174 case (PRED(_), Empty) => Chr(c) |
186 case (PRED(_, _), Empty) => Chr(c) |
175 case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) |
187 case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) |
176 } |
188 } |
177 |
189 |
178 // lexing without simplification |
190 // lexing without simplification |
179 def lex(r: Rexp, s: List[Char]) : Val = s match { |
191 def lex(r: Rexp, s: List[Char]) : Val = s match { |
262 |
274 |
263 |
275 |
264 def fuse(bs: Bits, r: ARexp) : ARexp = r match { |
276 def fuse(bs: Bits, r: ARexp) : ARexp = r match { |
265 case AZERO => AZERO |
277 case AZERO => AZERO |
266 case AONE(cs) => AONE(bs ++ cs) |
278 case AONE(cs) => AONE(bs ++ cs) |
267 case APRED(cs, f) => APRED(bs ++ cs, f) |
279 case APRED(cs, f, s) => APRED(bs ++ cs, f, s) |
268 case AALTS(cs, rs) => AALTS(bs ++ cs, rs) |
280 case AALTS(cs, rs) => AALTS(bs ++ cs, rs) |
269 case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2) |
281 case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2) |
270 case ASTAR(cs, r) => ASTAR(bs ++ cs, r) |
282 case ASTAR(cs, r) => ASTAR(bs ++ cs, r) |
271 } |
283 } |
272 |
284 |
273 // translation into ARexps |
285 // translation into ARexps |
274 def internalise(r: Rexp) : ARexp = r match { |
286 def internalise(r: Rexp) : ARexp = r match { |
275 case ZERO => AZERO |
287 case ZERO => AZERO |
276 case ONE => AONE(Nil) |
288 case ONE => AONE(Nil) |
277 case PRED(f) => APRED(Nil, f) |
289 case PRED(f, s) => APRED(Nil, f, s) |
278 case ALTS(List(r1, r2)) => |
290 case ALTS(List(r1, r2)) => |
279 AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2)))) |
291 AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2)))) |
280 case ALTS(r1::rs) => { |
292 case ALTS(r1::rs) => { |
281 val AALTS(Nil, rs2) = internalise(ALTS(rs)) |
293 val AALTS(Nil, rs2) = internalise(ALTS(rs)) |
282 AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _))) |
294 AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _))) |
289 internalise(("a" | "ab") ~ ("b" | "")) |
301 internalise(("a" | "ab") ~ ("b" | "")) |
290 |
302 |
291 // decoding of values from bit sequences |
303 // decoding of values from bit sequences |
292 def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match { |
304 def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match { |
293 case (ONE, bs) => (Empty, bs) |
305 case (ONE, bs) => (Empty, bs) |
294 case (PRED(f), C(c)::bs) => (Chr(c), bs) |
306 case (PRED(f, _), C(c)::bs) => (Chr(c), bs) |
295 case (ALTS(r::Nil), bs) => decode_aux(r, bs) |
307 case (ALTS(r::Nil), bs) => decode_aux(r, bs) |
296 case (ALTS(rs), bs) => bs match { |
308 case (ALTS(rs), bs) => bs match { |
297 case Z::bs1 => { |
309 case Z::bs1 => { |
298 val (v, bs2) = decode_aux(rs.head, bs1) |
310 val (v, bs2) = decode_aux(rs.head, bs1) |
299 (Left(v), bs2) |
311 (Left(v), bs2) |
328 |
340 |
329 //erase function: extracts a Rexp from Arexp |
341 //erase function: extracts a Rexp from Arexp |
330 def erase(r: ARexp) : Rexp = r match{ |
342 def erase(r: ARexp) : Rexp = r match{ |
331 case AZERO => ZERO |
343 case AZERO => ZERO |
332 case AONE(_) => ONE |
344 case AONE(_) => ONE |
333 case APRED(bs, f) => PRED(f) |
345 case APRED(bs, f, s) => PRED(f, s) |
334 case AALTS(bs, rs) => ALTS(rs.map(erase(_))) |
346 case AALTS(bs, rs) => ALTS(rs.map(erase(_))) |
335 case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2)) |
347 case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2)) |
336 case ASTAR(cs, r)=> STAR(erase(r)) |
348 case ASTAR(cs, r)=> STAR(erase(r)) |
337 } |
349 } |
338 |
350 |
340 // bnullable function: tests whether the aregular |
352 // bnullable function: tests whether the aregular |
341 // expression can recognise the empty string |
353 // expression can recognise the empty string |
342 def bnullable (r: ARexp) : Boolean = r match { |
354 def bnullable (r: ARexp) : Boolean = r match { |
343 case AZERO => false |
355 case AZERO => false |
344 case AONE(_) => true |
356 case AONE(_) => true |
345 case APRED(_,_) => false |
357 case APRED(_,_,_) => false |
346 case AALTS(_, rs) => rs.exists(bnullable) |
358 case AALTS(_, rs) => rs.exists(bnullable) |
347 case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2) |
359 case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2) |
348 case ASTAR(_, _) => true |
360 case ASTAR(_, _) => true |
349 } |
361 } |
350 |
362 |
360 |
372 |
361 // derivative of a regular expression w.r.t. a character |
373 // derivative of a regular expression w.r.t. a character |
362 def bder(c: Char, r: ARexp) : ARexp = r match { |
374 def bder(c: Char, r: ARexp) : ARexp = r match { |
363 case AZERO => AZERO |
375 case AZERO => AZERO |
364 case AONE(_) => AZERO |
376 case AONE(_) => AZERO |
365 case APRED(bs, f) => if (f(c)) AONE(bs:::List(C(c))) else AZERO |
377 case APRED(bs, f, _) => if (f(c)) AONE(bs:::List(C(c))) else AZERO |
366 case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _))) |
378 case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _))) |
367 case ASEQ(bs, r1, r2) => |
379 case ASEQ(bs, r1, r2) => |
368 if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(bmkeps(r1), bder(c, r2))) |
380 if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(bmkeps(r1), bder(c, r2))) |
369 else ASEQ(bs, bder(c, r1), r2) |
381 else ASEQ(bs, bder(c, r1), r2) |
370 case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r)) |
382 case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r)) |
576 print("Old: " + time(tokenise_simp(WHILE_REGS, fib_prog * i))) |
588 print("Old: " + time(tokenise_simp(WHILE_REGS, fib_prog * i))) |
577 print(" Bit: " + time(btokenise_simp(WHILE_REGS, fib_prog * i))) |
589 print(" Bit: " + time(btokenise_simp(WHILE_REGS, fib_prog * i))) |
578 println(" Bit full simp: " + time(btokenise_simp_full(WHILE_REGS, fib_prog * i))) |
590 println(" Bit full simp: " + time(btokenise_simp_full(WHILE_REGS, fib_prog * i))) |
579 } |
591 } |
580 |
592 |
|
593 |
581 println("Original " + size(WHILE_REGS)) |
594 println("Original " + size(WHILE_REGS)) |
582 println("Size Bit " + asize(bders_simp((fib_prog * 10).toList, internalise(WHILE_REGS)))) |
595 println("Size Bit " + asize(bders_simp((fib_prog * 1).toList, internalise(WHILE_REGS)))) |
583 println("Size Bitf " + asize(bders_simp_full((fib_prog * 10).toList, internalise(WHILE_REGS)))) |
596 println("Size Bitf " + asize(bders_simp_full((fib_prog * 1).toList, internalise(WHILE_REGS)))) |
584 println("Size Old " + size(ders_simp((fib_prog * 10).toList, WHILE_REGS))) |
597 println("Size Old " + size(ders_simp((fib_prog * 1).toList, WHILE_REGS))) |
585 |
598 |
586 |
599 |
587 //System.exit(0) |
600 System.exit(0) |
588 |
601 |
589 println("Internal sizes test OK or strange") |
602 println("Internal sizes test OK or strange") |
590 |
603 |
591 def perc(p1: Double, p2: Double) : String = |
604 def perc(p1: Double, p2: Double) : String = |
592 f"${(((p1 - p2) / p2) * 100.0) }%5.0f" + "%" |
605 f"${(((p1 - p2) / p2) * 100.0) }%5.0f" + "%" |