updated
authorChristian Urban <urbanc@in.tum.de>
Thu, 07 Feb 2019 10:52:41 +0000
changeset 305 6e2cef17a9b3
parent 304 82a99eec5b73
child 306 6756b026c5fe
updated
exps/both.scala
exps/profile.scala
text/proposal.tex
--- a/exps/both.scala	Mon Feb 04 13:10:26 2019 +0000
+++ b/exps/both.scala	Thu Feb 07 10:52:41 2019 +0000
@@ -31,7 +31,7 @@
 abstract class Rexp 
 case object ZERO extends Rexp
 case object ONE extends Rexp
-case class PRED(f: Char => Boolean) extends Rexp
+case class PRED(f: Char => Boolean, s: String = "_") extends Rexp 
 case class ALTS(rs: List[Rexp]) extends Rexp 
 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
 case class STAR(r: Rexp) extends Rexp 
@@ -39,7 +39,7 @@
 
 
 // abbreviations
-def CHAR(c: Char) = PRED(_ == c)
+def CHAR(c: Char) = PRED(_ == c, c.toString)
 def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2))
 def PLUS(r: Rexp) = SEQ(r, STAR(r))
 
@@ -47,7 +47,7 @@
 abstract class ARexp 
 case object AZERO extends ARexp
 case class AONE(bs: Bits) extends ARexp
-case class APRED(bs: Bits, f: Char => Boolean) extends ARexp
+case class APRED(bs: Bits, f: Char => Boolean, s: String = "_") extends ARexp
 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
 case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
@@ -92,15 +92,27 @@
 
 
 // string of a regular expressions - for testing purposes
-  def string(r: Rexp): String = r match {
-    case ZERO => "0"
-    case ONE => "1"
-    case PRED(_) => "_"
-    case ALTS(rs) => rs.map(string).mkString("[", "|", "]")
-    case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
-    case STAR(r) => s"{${string(r)}}*"
-    case RECD(x, r) => s"(${x}! ${string(r)})"
-  }
+def string(r: Rexp): String = r match {
+  case ZERO => "0"
+  case ONE => "1"
+  case PRED(_, s) => s
+  case ALTS(rs) => rs.map(string).mkString("[", "|", "]")
+  case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
+  case STAR(r) => s"{${string(r)}}*"
+  case RECD(x, r) => s"(${x}! ${string(r)})"
+}
+
+// string of an annotated regular expressions - for testing purposes
+
+def astring(a: ARexp): String = a match {
+  case AZERO => "0"
+  case AONE(_) => "1"
+  case APRED(_, _, s) => s
+  case AALTS(_, rs) => rs.map(astring).mkString("[", "|", "]")
+  case ASEQ(_, r1, r2) => s"(${astring(r1)} ~ ${astring(r2)})"
+  case ASTAR(_, r) => s"{${astring(r)}}*"
+}
+ 
 
 //--------------------------------------------------------------------------------------------------------
 // START OF NON-BITCODE PART
@@ -111,7 +123,7 @@
 def nullable (r: Rexp) : Boolean = r match {
   case ZERO => false
   case ONE => true
-  case PRED(_) => false
+  case PRED(_, _) => false
   case ALTS(rs) => rs.exists(nullable)
   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
   case STAR(_) => true
@@ -122,7 +134,7 @@
 def der (c: Char, r: Rexp) : Rexp = r match {
   case ZERO => ZERO
   case ONE => ZERO
-  case PRED(f) => if (f(c)) ONE else ZERO
+  case PRED(f, _) => if (f(c)) ONE else ZERO
   case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2)))
   case SEQ(r1, r2) => 
     if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2)))
@@ -171,7 +183,7 @@
   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1))
   case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2))
-  case (PRED(_), Empty) => Chr(c) 
+  case (PRED(_, _), Empty) => Chr(c) 
   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
 }
 
@@ -264,7 +276,7 @@
 def fuse(bs: Bits, r: ARexp) : ARexp = r match {
   case AZERO => AZERO
   case AONE(cs) => AONE(bs ++ cs)
-  case APRED(cs, f) => APRED(bs ++ cs, f)
+  case APRED(cs, f, s) => APRED(bs ++ cs, f, s)
   case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
   case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
   case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
@@ -274,7 +286,7 @@
 def internalise(r: Rexp) : ARexp = r match {
   case ZERO => AZERO
   case ONE => AONE(Nil)
-  case PRED(f) => APRED(Nil, f)
+  case PRED(f, s) => APRED(Nil, f, s)
   case ALTS(List(r1, r2)) => 
     AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
   case ALTS(r1::rs) => {
@@ -291,7 +303,7 @@
 // decoding of values from bit sequences
 def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
   case (ONE, bs) => (Empty, bs)
-  case (PRED(f), C(c)::bs) => (Chr(c), bs)
+  case (PRED(f, _), C(c)::bs) => (Chr(c), bs)
   case (ALTS(r::Nil), bs) => decode_aux(r, bs)
   case (ALTS(rs), bs) => bs match {
     case Z::bs1 => {
@@ -330,7 +342,7 @@
 def erase(r: ARexp) : Rexp = r match{
   case AZERO => ZERO
   case AONE(_) => ONE
-  case APRED(bs, f) => PRED(f)
+  case APRED(bs, f, s) => PRED(f, s)
   case AALTS(bs, rs) => ALTS(rs.map(erase(_)))
   case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
   case ASTAR(cs, r)=> STAR(erase(r))
@@ -342,7 +354,7 @@
 def bnullable (r: ARexp) : Boolean = r match {
   case AZERO => false
   case AONE(_) => true
-  case APRED(_,_) => false
+  case APRED(_,_,_) => false
   case AALTS(_, rs) => rs.exists(bnullable)
   case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
   case ASTAR(_, _) => true
@@ -362,7 +374,7 @@
 def bder(c: Char, r: ARexp) : ARexp = r match {
   case AZERO => AZERO
   case AONE(_) => AZERO
-  case APRED(bs, f) => if (f(c)) AONE(bs:::List(C(c))) else AZERO
+  case APRED(bs, f, _) => if (f(c)) AONE(bs:::List(C(c))) else AZERO
   case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
   case ASEQ(bs, r1, r2) => 
     if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(bmkeps(r1), bder(c, r2)))
@@ -378,7 +390,6 @@
   case c::s => bders(s, bder(c, r))
 }
 
-
 def flats(rs: List[ARexp]): List[ARexp] = rs match {
     case Nil => Nil
     case AZERO :: rs1 => flats(rs1)
@@ -404,6 +415,7 @@
 
 
 
+
 def bders_simp (s: List[Char], r: ARexp) : ARexp = s match {
   case Nil => r
   case c::s => bders_simp(s, bsimp(bder(c, r)))
@@ -485,7 +497,7 @@
 def size(r: Rexp) : Int = r match {
   case ZERO => 1
   case ONE => 1
-  case PRED(_) => 1
+  case PRED(_,_) => 1
   case SEQ(r1, r2) => 1 + size(r1) + size(r2)
   case ALTS(rs) => 1 + rs.map(size).sum
   case STAR(r) => 1 + size(r)
@@ -498,9 +510,9 @@
 // Lexing Rules for a Small While Language
 
 //symbols
-val SYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_))
+val SYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_), "SYM")
 //digits
-val DIGIT = PRED("0123456789".contains(_))
+val DIGIT = PRED("0123456789".contains(_), "NUM")
 //identifiers
 val ID = SYM ~ (SYM | DIGIT).% 
 //numbers
@@ -578,13 +590,14 @@
   println(" Bit full simp: " + time(btokenise_simp_full(WHILE_REGS, fib_prog * i)))
 }
 
+
 println("Original " + size(WHILE_REGS))
-println("Size Bit  " + asize(bders_simp((fib_prog * 10).toList, internalise(WHILE_REGS))))
-println("Size Bitf " + asize(bders_simp_full((fib_prog * 10).toList, internalise(WHILE_REGS))))
-println("Size Old  " + size(ders_simp((fib_prog * 10).toList, WHILE_REGS)))
+println("Size Bit  " + asize(bders_simp((fib_prog * 1).toList, internalise(WHILE_REGS))))
+println("Size Bitf " + asize(bders_simp_full((fib_prog * 1).toList, internalise(WHILE_REGS))))
+println("Size Old  " + size(ders_simp((fib_prog * 1).toList, WHILE_REGS)))
 
 
-//System.exit(0)
+System.exit(0)
 
 println("Internal sizes test OK or strange")
 
@@ -662,3 +675,4 @@
 
 
 
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/exps/profile.scala	Thu Feb 07 10:52:41 2019 +0000
@@ -0,0 +1,637 @@
+
+import scala.language.implicitConversions    
+import scala.language.reflectiveCalls
+import scala.annotation.tailrec   
+import scala.util.Try
+
+def escape(raw: String) : String = {
+  import scala.reflect.runtime.universe._
+  Literal(Constant(raw)).toString
+}
+
+def esc2(r: (String, String)) = (escape(r._1), escape(r._2))
+
+def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
+  case Nil => Nil
+  case (x::xs) => {
+    val res = f(x)
+    if (acc.contains(res)) distinctBy(xs, f, acc)  
+    else x::distinctBy(xs, f, res::acc)
+  }
+} 
+
+abstract class Bit
+case object Z extends Bit
+case object S extends Bit
+case class C(c: Char) extends Bit
+
+type Bits = List[Bit]
+
+// usual regular expressions
+abstract class Rexp 
+case object ZERO extends Rexp
+case object ONE extends Rexp
+case class PRED(f: Char => Boolean, s: String = "_") extends Rexp 
+case class ALTS(rs: List[Rexp]) extends Rexp 
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
+case class STAR(r: Rexp) extends Rexp 
+case class RECD(x: String, r: Rexp) extends Rexp
+
+
+// abbreviations
+def CHAR(c: Char) = PRED(_ == c, c.toString)
+def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2))
+def PLUS(r: Rexp) = SEQ(r, STAR(r))
+
+// annotated regular expressions
+abstract class ARexp 
+case object AZERO extends ARexp
+case class AONE(bs: Bits) extends ARexp
+case class APRED(bs: Bits, f: Char => Boolean, s: String = "_") extends ARexp
+case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
+case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
+case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
+
+// abbreviations
+def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2))
+
+// values
+abstract class Val
+case object Empty extends Val
+case class Chr(c: Char) extends Val
+case class Sequ(v1: Val, v2: Val) extends Val
+case class Left(v: Val) extends Val
+case class Right(v: Val) extends Val
+case class Stars(vs: List[Val]) extends Val
+case class Rec(x: String, v: Val) extends Val
+
+
+   
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]): Rexp = s match {
+  case Nil => ONE
+  case c::Nil => CHAR(c)
+  case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+implicit def RexpOps(r: Rexp) = new {
+  def | (s: Rexp) = ALT(r, s)
+  def % = STAR(r)
+  def ~ (s: Rexp) = SEQ(r, s)
+}
+
+implicit def stringOps(s: String) = new {
+  def | (r: Rexp) = ALT(s, r)
+  def | (r: String) = ALT(s, r)
+  def % = STAR(s)
+  def ~ (r: Rexp) = SEQ(s, r)
+  def ~ (r: String) = SEQ(s, r)
+  def $ (r: Rexp) = RECD(s, r)
+}
+
+
+// string of a regular expressions - for testing purposes
+def string(r: Rexp): String = r match {
+  case ZERO => "0"
+  case ONE => "1"
+  case PRED(_, s) => s
+  case ALTS(rs) => rs.map(string).mkString("[", "|", "]")
+  case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
+  case STAR(r) => s"{${string(r)}}*"
+  case RECD(x, r) => s"(${x}! ${string(r)})"
+}
+
+// string of an annotated regular expressions - for testing purposes
+
+def astring(a: ARexp): String = a match {
+  case AZERO => "0"
+  case AONE(_) => "1"
+  case APRED(_, _, s) => s
+  case AALTS(_, rs) => rs.map(astring).mkString("[", "|", "]")
+  case ASEQ(_, r1, r2) => s"(${astring(r1)} ~ ${astring(r2)})"
+  case ASTAR(_, r) => s"{${astring(r)}}*"
+}
+ 
+
+//--------------------------------------------------------------------------------------------------------
+// START OF NON-BITCODE PART
+//
+
+// nullable function: tests whether the regular 
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+  case ZERO => false
+  case ONE => true
+  case PRED(_, _) => false
+  case ALTS(rs) => rs.exists(nullable)
+  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+  case STAR(_) => true
+  case RECD(_, r) => nullable(r)
+}
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+  case ZERO => ZERO
+  case ONE => ZERO
+  case PRED(f, _) => if (f(c)) ONE else ZERO
+  case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2)))
+  case SEQ(r1, r2) => 
+    if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2)))
+    else SEQ(der(c, r1), r2)
+  case STAR(r) => SEQ(der(c, r), STAR(r))
+  case RECD(_, r1) => der(c, r1)
+}
+
+
+def flatten(v: Val) : String = v match {
+  case Empty => ""
+  case Chr(c) => c.toString
+  case Left(v) => flatten(v)
+  case Right(v) => flatten(v)
+  case Sequ(v1, v2) => flatten(v1) + flatten(v2)
+  case Stars(vs) => vs.map(flatten).mkString
+  case Rec(_, v) => flatten(v)
+}
+
+// extracts an environment from a value
+def env(v: Val) : List[(String, String)] = v match {
+  case Empty => Nil
+  case Chr(c) => Nil
+  case Left(v) => env(v)
+  case Right(v) => env(v)
+  case Sequ(v1, v2) => env(v1) ::: env(v2)
+  case Stars(vs) => vs.flatMap(env)
+  case Rec(x, v) => (x, flatten(v))::env(v)
+}
+
+
+// injection part
+def mkeps(r: Rexp) : Val = r match {
+  case ONE => Empty
+  case ALTS(List(r1, r2)) => 
+    if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+  case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+  case STAR(r) => Stars(Nil)
+  case RECD(x, r) => Rec(x, mkeps(r))
+}
+
+def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+  case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+  case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+  case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+  case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+  case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1))
+  case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2))
+  case (PRED(_, _), Empty) => Chr(c) 
+  case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+}
+
+// lexing without simplification
+def lex(r: Rexp, s: List[Char]) : Val = s match {
+  case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
+  case c::cs => inj(r, c, lex(der(c, r), cs))
+}
+
+def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
+
+//println(lexing(("ab" | "ab") ~ ("b" | ONE), "ab"))
+
+// some "rectification" functions for simplification
+def F_ID(v: Val): Val = v
+def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
+def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
+def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
+  case Right(v) => Right(f2(v))
+  case Left(v) => Left(f1(v))
+}
+def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
+  case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
+}
+def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
+  (v:Val) => Sequ(f1(Empty), f2(v))
+def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
+  (v:Val) => Sequ(f1(v), f2(Empty))
+def F_RECD(f: Val => Val) = (v:Val) => v match {
+  case Rec(x, v) => Rec(x, f(v))
+}
+def F_ERROR(v: Val): Val = throw new Exception("error")
+
+// simplification of regular expressions returning also an
+// rectification function; no simplification under STAR 
+
+var profile_simp : Int = 0
+
+def simp(r: Rexp): (Rexp, Val => Val) = {
+  profile_simp =  profile_simp + 1
+  r match {
+  case ALTS(List(r1, r2)) => {
+    val (r1s, f1s) = simp(r1)
+    val (r2s, f2s) = simp(r2)
+    (r1s, r2s) match {
+      case (ZERO, _) => (r2s, F_RIGHT(f2s))
+      case (_, ZERO) => (r1s, F_LEFT(f1s))
+      case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
+                else (ALTS(List(r1s, r2s)), F_ALT(f1s, f2s)) 
+    }
+  }
+  case SEQ(r1, r2) => {
+    val (r1s, f1s) = simp(r1)
+    val (r2s, f2s) = simp(r2)
+    (r1s, r2s) match {
+      case (ZERO, _) => (ZERO, F_ERROR)
+      case (_, ZERO) => (ZERO, F_ERROR)
+      case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
+      case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
+      case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
+    }
+  }
+  case RECD(x, r1) => {
+    val (r1s, f1s) = simp(r1)
+    (RECD(x, r1s), F_RECD(f1s))
+  }
+  case r => (r, F_ID)
+}}
+
+def ders_simp(s: List[Char], r: Rexp) : Rexp = s match {
+  case Nil => r
+  case c::s => ders_simp(s, simp(der(c, r))._1)
+}
+
+
+def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
+  case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
+  case c::cs => {
+    val (r_simp, f_simp) = simp(der(c, r))
+    inj(r, c, f_simp(lex_simp(r_simp, cs)))
+  }
+}
+
+def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
+
+println(lexing_simp(("a" | "ab") ~ ("b" | ""), "ab"))
+
+
+def tokenise_simp(r: Rexp, s: String) = env(lexing_simp(r, s)).map(esc2)
+
+//--------------------------------------------------------------------------------------------------------
+// BITCODED PART
+
+
+def fuse(bs: Bits, r: ARexp) : ARexp = r match {
+  case AZERO => AZERO
+  case AONE(cs) => AONE(bs ++ cs)
+  case APRED(cs, f, s) => APRED(bs ++ cs, f, s)
+  case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
+  case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
+  case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
+}
+
+// translation into ARexps
+def internalise(r: Rexp) : ARexp = r match {
+  case ZERO => AZERO
+  case ONE => AONE(Nil)
+  case PRED(f, s) => APRED(Nil, f, s)
+  case ALTS(List(r1, r2)) => 
+    AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
+  case ALTS(r1::rs) => {
+     val AALTS(Nil, rs2) = internalise(ALTS(rs))
+     AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
+  }
+  case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
+  case STAR(r) => ASTAR(Nil, internalise(r))
+  case RECD(x, r) => internalise(r)
+}
+
+internalise(("a" | "ab") ~ ("b" | ""))
+
+// decoding of values from bit sequences
+def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
+  case (ONE, bs) => (Empty, bs)
+  case (PRED(f, _), C(c)::bs) => (Chr(c), bs)
+  case (ALTS(r::Nil), bs) => decode_aux(r, bs)
+  case (ALTS(rs), bs) => bs match {
+    case Z::bs1 => {
+      val (v, bs2) = decode_aux(rs.head, bs1)
+      (Left(v), bs2)
+    }
+    case S::bs1 => {
+      val (v, bs2) = decode_aux(ALTS(rs.tail), bs1)
+      (Right(v), bs2)			
+    }
+  }
+  case (SEQ(r1, r2), bs) => {
+    val (v1, bs1) = decode_aux(r1, bs)
+    val (v2, bs2) = decode_aux(r2, bs1)
+    (Sequ(v1, v2), bs2)
+  }
+  case (STAR(r1), S::bs) => {
+    val (v, bs1) = decode_aux(r1, bs)
+    val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
+    (Stars(v::vs), bs2)
+  }
+  case (STAR(_), Z::bs) => (Stars(Nil), bs)
+  case (RECD(x, r1), bs) => {
+    val (v, bs1) = decode_aux(r1, bs)
+    (Rec(x, v), bs1)
+  }
+}
+
+def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
+  case (v, Nil) => v
+  case _ => throw new Exception("Not decodable")
+}
+
+
+//erase function: extracts a Rexp from Arexp
+def erase(r: ARexp) : Rexp = r match{
+  case AZERO => ZERO
+  case AONE(_) => ONE
+  case APRED(bs, f, s) => PRED(f, s)
+  case AALTS(bs, rs) => ALTS(rs.map(erase(_)))
+  case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
+  case ASTAR(cs, r)=> STAR(erase(r))
+}
+
+
+// bnullable function: tests whether the aregular 
+// expression can recognise the empty string
+def bnullable (r: ARexp) : Boolean = r match {
+  case AZERO => false
+  case AONE(_) => true
+  case APRED(_,_,_) => false
+  case AALTS(_, rs) => rs.exists(bnullable)
+  case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
+  case ASTAR(_, _) => true
+}
+
+def bmkeps(r: ARexp) : Bits = r match {
+  case AONE(bs) => bs
+  case AALTS(bs, rs) => {
+    val n = rs.indexWhere(bnullable)
+    bs ++ bmkeps(rs(n))
+  }
+  case ASEQ(bs, r1, r2) => bs ++ bmkeps(r1) ++ bmkeps(r2)
+  case ASTAR(bs, r) => bs ++ List(Z)
+}
+
+// derivative of a regular expression w.r.t. a character
+def bder(c: Char, r: ARexp) : ARexp = r match {
+  case AZERO => AZERO
+  case AONE(_) => AZERO
+  case APRED(bs, f, _) => if (f(c)) AONE(bs:::List(C(c))) else AZERO
+  case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
+  case ASEQ(bs, r1, r2) => 
+    if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(bmkeps(r1), bder(c, r2)))
+    else ASEQ(bs, bder(c, r1), r2)
+  case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
+}
+
+
+// derivative w.r.t. a string (iterates bder)
+@tailrec
+def bders (s: List[Char], r: ARexp) : ARexp = s match {
+  case Nil => r
+  case c::s => bders(s, bder(c, r))
+}
+
+
+def inc_profile(m: Map[ARexp, Int], a: ARexp) : Map[ARexp, Int] = {
+  val current =  m.getOrElse(a, 0) 
+  m + (a -> (current + 1))
+}
+
+var profile_bsimp_args : Map[ARexp, Int] = Map()
+var profile_bsimp : Int = 0
+var profile_flats : Int = 0
+
+
+def flats(rs: List[ARexp]): List[ARexp] = {
+  profile_flats = profile_flats + 1
+  rs match {
+    case Nil => Nil
+    case AZERO :: rs1 => flats(rs1)
+    case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
+    case r1 :: rs2 => r1 :: flats(rs2)
+}}
+
+def bsimp(r: ARexp): ARexp = {
+  //profile_bsimp = inc_profile(profile_bsimp, r)
+  profile_bsimp = profile_bsimp + 1
+  r match {
+  case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
+      case (AZERO, _) => AZERO
+      case (_, AZERO) => AZERO
+      case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+      case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+  }
+  case AALTS(bs1, rs) => distinctBy(flats(rs.map(bsimp)), erase) match {
+    case Nil => AZERO
+    case r :: Nil => fuse(bs1, r)
+    case rs => AALTS(bs1, rs)  
+  }
+  //case ASTAR(bs1, r1) => ASTAR(bs1, bsimp(r1))
+  case r => r
+  }
+}
+
+
+
+
+def bders_simp (s: List[Char], r: ARexp) : ARexp = s match {
+  case Nil => r
+  case c::s => bders_simp(s, bsimp(bder(c, r)))
+}
+
+def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+  case Nil => if (bnullable(r)) bmkeps(r)
+	      else throw new Exception("Not matched")
+  case c::cs => blex_simp(bsimp(bder(c, r)), cs)
+}
+
+
+def blexing_simp(r: Rexp, s: String) : Val = 
+ decode(r, blex_simp(internalise(r), s.toList))
+
+
+def btokenise_simp(r: Rexp, s: String) = env(blexing_simp(r, s)).map(esc2)
+
+
+
+// INCLUDING SIMPLIFICATION UNDER STARS
+
+def bsimp_full(r: ARexp): ARexp = r match {
+  case ASEQ(bs1, r1, r2) => (bsimp_full(r1), bsimp_full(r2)) match {
+      case (AZERO, _) => AZERO
+      case (_, AZERO) => AZERO
+      case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+      case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+  }
+  case AALTS(bs1, rs) => distinctBy(flats(rs.map(bsimp_full)), erase) match {
+    case Nil => AZERO
+    case r :: Nil => fuse(bs1, r)
+    case rs => AALTS(bs1, rs)  
+  }
+  case ASTAR(bs1, r1) => ASTAR(bs1, bsimp_full(r1))
+  case r => r
+}
+
+def bders_simp_full(s: List[Char], r: ARexp) : ARexp = s match {
+  case Nil => r
+  case c::s => bders_simp_full(s, bsimp_full(bder(c, r)))
+}
+
+def blex_simp_full(r: ARexp, s: List[Char]) : Bits = s match {
+  case Nil => if (bnullable(r)) bmkeps(r)
+	      else throw new Exception("Not matched")
+  case c::cs => blex_simp_full(bsimp_full(bder(c, r)), cs)
+}
+
+
+def blexing_simp_full(r: Rexp, s: String) : Val = 
+ decode(r, blex_simp_full(internalise(r), s.toList))
+
+
+def btokenise_simp_full(r: Rexp, s: String) = env(blexing_simp_full(r, s)).map(esc2)
+
+
+
+//   Testing
+//============
+
+def time[T](code: => T) = {
+  val start = System.nanoTime()
+  val result = code
+  val end = System.nanoTime()
+  ((end - start)/1.0e9).toString
+  //result
+}
+
+def timeR[T](code: => T) = {
+  val start = System.nanoTime()
+  for (i <- 1 to 10) code
+  val result = code
+  val end = System.nanoTime()
+  (result, (end - start))
+}
+
+//size: of a Aregx for testing purposes 
+def size(r: Rexp) : Int = r match {
+  case ZERO => 1
+  case ONE => 1
+  case PRED(_,_) => 1
+  case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+  case ALTS(rs) => 1 + rs.map(size).sum
+  case STAR(r) => 1 + size(r)
+  case RECD(_, r) => size(r)
+}
+
+def asize(a: ARexp) = size(erase(a))
+
+
+// Lexing Rules for a Small While Language
+
+//symbols
+val SYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_), "SYM")
+//digits
+val DIGIT = PRED("0123456789".contains(_), "NUM")
+//identifiers
+val ID = SYM ~ (SYM | DIGIT).% 
+//numbers
+val NUM = STAR(DIGIT)
+//keywords
+val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false"
+//semicolons
+val SEMI: Rexp = ";"
+//operators
+val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
+//whitespaces
+val WHITESPACE = PLUS(" " | "\n" | "\t")
+//parentheses
+val RPAREN: Rexp = ")"
+val LPAREN: Rexp = "("
+val BEGIN: Rexp = "{"
+val END: Rexp = "}"
+//strings...but probably needs not
+val STRING: Rexp = "\"" ~ SYM.% ~ "\""
+
+
+
+val WHILE_REGS = (("k" $ KEYWORD) | 
+                  ("i" $ ID) | 
+                  ("o" $ OP) | 
+                  ("n" $ NUM) | 
+                  ("s" $ SEMI) | 
+                  ("str" $ STRING) |
+                  ("p" $ (LPAREN | RPAREN)) | 
+                  ("b" $ (BEGIN | END)) | 
+                  ("w" $ WHITESPACE)).%
+
+// Bigger Tests
+//==============
+
+
+val fib_prog = """
+write "Fib";
+read n;
+minus1 := 0;
+minus2 := 1;
+while n > 0 do {
+  temp := minus2;
+  minus2 := minus1 + minus2;
+  minus1 := temp;
+  n := n - 1
+};
+write "Result";
+write minus2
+"""
+
+
+
+
+profile_simp = 0
+profile_bsimp = 0
+profile_flats = 0
+
+println("Original " + size(WHILE_REGS))
+println("Size Bit  " + asize(bders_simp((fib_prog * 20).toList, internalise(WHILE_REGS))) + "  " + profile_bsimp +  " + " + profile_flats)
+println("Size Old  " + size(ders_simp((fib_prog * 20).toList, WHILE_REGS)) + "  " + profile_simp)
+
+System.exit(0)
+
+println("Internal sizes test OK or strange")
+
+def perc(p1: Double, p2: Double) : String =
+  f"${(((p1 - p2) / p2) * 100.0) }%5.0f" + "%"
+
+def ders_test(n: Int, s: List[Char], r: Rexp, a: ARexp) : (Rexp, ARexp) = s match {
+  case Nil => (r, a)
+  case c::s => {
+    // derivative 
+    val (rd1, tr1) = timeR(der(c, r))
+    val (ad1, ta1) = timeR(bder(c, a))
+    val trs1 = f"${tr1}%.5f"
+    val tas1 = f"${ta1}%.5f"
+    if (tr1 < ta1) println(s"Time strange der  (step) ${n} ${perc(ta1, tr1)} sizes  der ${size(rd1)} ${asize(ad1)}")
+    //simplification
+    val (rd, tr) = timeR(simp(rd1)._1)
+    val (ad, ta) = timeR(bsimp(ad1))
+    val trs = f"${tr}%.5f"
+    val tas = f"${ta}%.5f"
+    //full simplification
+    val (adf, taf) = timeR(bsimp_full(ad1))
+    if (tr < ta) println(s"Time strange simp (step) ${n} ${perc(ta, tr)} sizes simp ${size(rd)} ${asize(ad)}")
+    if (n == 1749 || n == 1734) {
+      println{s"Aregex before bder (size: ${asize(a)})\n ${string(erase(a))}"}
+      println{s"Aregex after bder (size: ${asize(ad1)})\n ${string(erase(ad1))}"}
+      println{s"Aregex after bsimp (size: ${asize(ad)})\n ${string(erase(ad))}"}
+      println{s"Aregex after bsimp_full (size: ${asize(adf)})\n ${string(erase(adf))}"}
+    }
+    ders_test(n + 1, s, rd, ad)
+  }
+}
+
+val prg = (fib_prog * 10).toList
+ders_test(0, prg, WHILE_REGS, internalise(WHILE_REGS))
+
+
--- a/text/proposal.tex	Mon Feb 04 13:10:26 2019 +0000
+++ b/text/proposal.tex	Thu Feb 07 10:52:41 2019 +0000
@@ -177,7 +177,7 @@
 otherwise the scanning programs would not be useful as a hardening
 technique for servers. The quest for speed with these decisions is
 presently a rather big challenge for the amount of traffic typical
-serves have to cope with and the large number of patterns that might
+servers have to cope with and the large number of patterns that might
 be of interest.  The problem is that when thousands of regular
 expressions are involved, the process of detecting whether a string
 matches a regular expression can be excruciating slow. This might not