diff -r 4e00dd2398ac -r 940530087f30 progs/scala/re-bit.scala --- a/progs/scala/re-bit.scala Fri Apr 01 16:29:33 2016 +0100 +++ b/progs/scala/re-bit.scala Tue Apr 05 09:27:36 2016 +0100 @@ -9,6 +9,7 @@ case class ALT(r1: Rexp, r2: Rexp) extends Rexp case class SEQ(r1: Rexp, r2: Rexp) extends Rexp case class STAR(r: Rexp) extends Rexp +case class RECD(x: String, r: Rexp) extends Rexp abstract class ARexp case object AZERO extends ARexp @@ -25,6 +26,7 @@ case class Left(v: Val) extends Val case class Right(v: Val) extends Val case class Stars(vs: List[Val]) extends Val +case class Rec(x: String, v: Val) extends Val // some convenience for typing in regular expressions def charlist2rexp(s : List[Char]): Rexp = s match { @@ -46,6 +48,7 @@ def % = STAR(s) def ~ (r: Rexp) = SEQ(s, r) def ~ (r: String) = SEQ(s, r) + def $ (r: Rexp) = RECD(s, r) } // translation into ARexps @@ -65,6 +68,7 @@ case ALT(r1, r2) => AALT(Nil, fuse(List(false), internalise(r1)), fuse(List(true), internalise(r2))) case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2)) case STAR(r) => ASTAR(Nil, internalise(r)) + case RECD(x, r) => internalise(r) } internalise(("a" | "ab") ~ ("b" | "")) @@ -86,12 +90,16 @@ val (v2, bs2) = decode_aux(r2, bs1) (Sequ(v1, v2), bs2) } - case (STAR(r), false::bs) => { - val (v, bs1) = decode_aux(r, bs) - val (Stars(vs), bs2) = decode_aux(STAR(r), bs1) + case (STAR(r1), false::bs) => { + val (v, bs1) = decode_aux(r1, bs) + val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1) (Stars(v::vs), bs2) } - case (STAR(r), true::bs) => (Stars(Nil), bs) + case (STAR(_), true::bs) => (Stars(Nil), bs) + case (RECD(x, r1), bs) => { + val (v, bs1) = decode_aux(r1, bs) + (Rec(x, v), bs1) + } } def decode(r: Rexp, bs: List[Boolean]) = decode_aux(r, bs) match { @@ -145,6 +153,54 @@ def lexing(r: Rexp, s: String) : Val = decode(r, lex(internalise(r), s.toList)) + + +def simp(r: ARexp): ARexp = r match { + case ASEQ(bs1, r1, r2) => (simp(r1), simp(r2)) match { + case (AZERO, _) => AZERO + case (_, AZERO) => AZERO + case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) + case (r1s, r2s) => ASEQ(bs1, r1s, r2s) + } + case AALT(bs1, r1, r2) => (simp(r1), simp(r2)) match { + case (AZERO, r2s) => fuse(bs1, r2s) + case (r1s, AZERO) => fuse(bs1, r1s) + case (r1s, r2s) => AALT(bs1, r1s, r2s) + } + case r => r +} + +def lex_simp(r: ARexp, s: List[Char]) : List[Boolean] = s match { + case Nil => if (nullable(r)) mkepsBC(r) else throw new Exception("Not matched") + case c::cs => lex(simp(der(c, r)), cs) +} + +def lexing_simp(r: Rexp, s: String) : Val = decode(r, lex_simp(internalise(r), s.toList)) + + + +// extracts a string from value +def flatten(v: Val) : String = v match { + case Empty => "" + case Chr(c) => c.toString + case Left(v) => flatten(v) + case Right(v) => flatten(v) + case Sequ(v1, v2) => flatten(v1) + flatten(v2) + case Stars(vs) => vs.map(flatten).mkString + case Rec(_, v) => flatten(v) +} + +// extracts an environment from a value +def env(v: Val) : List[(String, String)] = v match { + case Empty => Nil + case Chr(c) => Nil + case Left(v) => env(v) + case Right(v) => env(v) + case Sequ(v1, v2) => env(v1) ::: env(v2) + case Stars(vs) => vs.flatMap(env) + case Rec(x, v) => (x, flatten(v))::env(v) +} + // Some Tests //============ @@ -160,14 +216,20 @@ val r0 = ("a" | "ab") ~ ("b" | "") println(lexing(r0, "ab")) +println(lexing_simp(r0, "ab")) val r1 = ("a" | "ab") ~ ("bcd" | "cd") println(lexing(r1, "abcd")) +println(lexing_simp(r1, "abcd")) println(lexing((("" | "a") ~ ("ab" | "b")), "ab")) +println(lexing_simp((("" | "a") ~ ("ab" | "b")), "ab")) + println(lexing((("" | "a") ~ ("b" | "ab")), "ab")) +println(lexing_simp((("" | "a") ~ ("b" | "ab")), "ab")) + println(lexing((("" | "a") ~ ("c" | "ab")), "ab")) - +println(lexing_simp((("" | "a") ~ ("c" | "ab")), "ab")) // Two Simple Tests for the While Language @@ -190,24 +252,41 @@ val END: Rexp = "}" val STRING: Rexp = "\"" ~ SYM.% ~ "\"" -val WHILE_REGS = (KEYWORD | - ID | - OP | - NUM | - SEMI | - LPAREN | RPAREN | - BEGIN | END | - WHITESPACE).% - +val WHILE_REGS = (("k" $ KEYWORD) | + ("i" $ ID) | + ("o" $ OP) | + ("n" $ NUM) | + ("s" $ SEMI) | + ("str" $ STRING) | + ("p" $ (LPAREN | RPAREN)) | + ("b" $ (BEGIN | END)) | + ("w" $ WHITESPACE)).% println("prog0 test") val prog0 = """read n""" -println(lexing(WHILE_REGS, prog0)) +println(env(lexing(WHILE_REGS, prog0))) +println(env(lexing_simp(WHILE_REGS, prog0))) println("prog1 test") val prog1 = """read n; write (n)""" -println(lexing(WHILE_REGS, prog1)) +println(env(lexing(WHILE_REGS, prog1))) +println(env(lexing_simp(WHILE_REGS, prog1))) +// Sulzmann's tests +//================== + +val sulzmann = ("a" | "b" | "ab").% + +println(lexing(sulzmann, "a" * 10)) +println(lexing_simp(sulzmann, "a" * 10)) + +for (i <- 1 to 6501 by 500) { + println(i + ": " + "%.5f".format(time_needed(1, lexing_simp(sulzmann, "a" * i)))) +} + +for (i <- 1 to 16 by 5) { + println(i + ": " + "%.5f".format(time_needed(1, lexing_simp(sulzmann, "ab" * i)))) +}