run
authorChengsong
Wed, 13 Mar 2019 13:14:38 +0000
changeset 0 902326e1615a
child 1 99f4459d9bb6
run scalac lex_blex_Frankensteined.scala BRexp.scala Element.scala Partial.scala Spiral.scala then run scala Spiral to see the results
Brexp.scala
Element.scala
Partial.scala
Spiral.scala
lex_blex_Frankensteined.scala
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Brexp.scala	Wed Mar 13 13:14:38 2019 +0000
@@ -0,0 +1,143 @@
+import RexpRelated._
+import RexpRelated.Rexp._
+import Spiral._
+import scala.collection.mutable.ArrayBuffer
+abstract class BRexp
+case object BZERO extends BRexp
+case object BONE extends BRexp
+case class BCHAR(c: Char) extends BRexp
+case class BALTS(b: Bit, rs: List[BRexp]) extends BRexp 
+case class BSEQ(r1: BRexp, r2: BRexp) extends BRexp 
+case class BSTAR(r: BRexp) extends BRexp 
+
+object BRexp{
+  def brternalise(r: Rexp) : BRexp = r match {//remember the initial shadowed bit should be set to Z as there 
+  //are no enclosing STAR or SEQ
+    case ZERO => BZERO
+    case ONE => BONE
+    case CHAR(c) => BCHAR(c)
+    case ALTS(rs) => BALTS(Z,  rs.map(brternalise))
+    case SEQ(r1, r2) => BSEQ( brternalise(r1), brternalise(r2) )
+    case STAR(r) => BSTAR(brternalise(r))
+    case RECD(x, r) => brternalise(r)
+  }
+  def brnullable (r: BRexp) : Boolean = r match {
+    case BZERO => false
+    case BONE => true
+    case BCHAR(_) => false
+    case BALTS(bs, rs) => rs.exists(brnullable)
+    case BSEQ(r1, r2) => brnullable(r1) && brnullable(r2)
+    case BSTAR(_) => true
+  }
+  def brder(c: Char, r: BRexp) : BRexp = r match {
+    case BZERO => BZERO
+    case BONE => BZERO
+    case BCHAR(d) => if (c == d) BONE else BZERO
+    case BALTS(bs, rs) => BALTS(S, rs.map(brder(c, _)))//After derivative: Splittable in the sense of PD
+    case BSEQ(r1, r2) => 
+      if (brnullable(r1)) BALTS(S, List(BSEQ(brder(c, r1), r2), brder(c, r2) ) )//in r1\c~r2 r2's structure is maintained together with its splittablility bit
+      else BSEQ(brder(c, r1), r2)
+    case BSTAR(r) => BSEQ(brder(c, r), BSTAR(r))
+  }
+  def bflat(rs: List[BRexp]): List[BRexp] = {
+    //println("bs: " + bs + "rs: "+ rs + "length of bs, rs" + bs.length + ", " + rs.length)
+    //assert(bs.length == rs.length - 1 || bs.length == rs.length)//bs == Nil, rs == Nil  May add early termination later.
+    rs match {
+      case Nil => Nil//TODO: Look at this! bs should have been exhausted one bit earlier.
+      case BZERO :: rs1 => bflat(rs1)
+      case BALTS(S, rs1) :: rs2 => rs1 ::: bflat(rs2)
+      case BALTS(Z, rs1) :: rs2 => BALTS(Z, rs1) :: bflat(rs2)
+      case r1 :: rs2 => r1 :: bflat(rs2)
+    }
+  }
+  def stflat(rs: List[BRexp]): List[BRexp] = {
+    //println("bs: " + bs + "rs: "+ rs + "length of bs, rs" + bs.length + ", " + rs.length)
+    //assert(bs.length == rs.length - 1 || bs.length == rs.length)//bs == Nil, rs == Nil  May add early termination later.
+    rs match {
+      case Nil => Nil//TODO: Look at this! bs should have been exhausted one bit earlier.
+      case BZERO :: rs1 => bflat(rs1)
+      case BALTS(_, rs1) :: rs2 => rs1 ::: bflat(rs2)
+      case r1 :: rs2 => r1 :: bflat(rs2)
+    }
+  }
+  def berase(r:BRexp): Rexp = r match{
+    case BZERO => ZERO
+    case BONE => ONE
+    case BCHAR(f) => CHAR(f)
+    case BALTS(bs, rs) => ALTS(rs.map(berase(_)))
+    case BSEQ(r1, r2) => SEQ (berase(r1), berase(r2))
+    case BSTAR(r)=> STAR(berase(r))
+  }
+  def br_simp(r: BRexp): BRexp = r match {
+    case BSEQ(r1, r2) => (br_simp(r1), br_simp(r2)) match {
+        case (BZERO, _) => BZERO
+        case (_, BZERO) => BZERO
+        case (BONE, r2s) => r2s 
+        case (r1s, r2s) => BSEQ(r1s, r2s)
+    }
+    case BALTS(bs, rs) => {
+      //assert(bs.length == rs.length - 1)
+      val dist_res = if(bs == S) {//all S
+        val rs_simp = rs.map(br_simp)
+        val flat_res = bflat(rs_simp)
+        distinctBy(flat_res, berase)
+      }
+      else{//not allowed to simplify (all Z)
+        rs
+      }
+      dist_res match {
+        case Nil => {BZERO}
+        case s :: Nil => { s}
+        case rs => {BALTS(bs, rs)}
+      }
+    }
+    //case BSTAR(r) => BSTAR(r)
+    case r => r
+  }
+
+  def strong_br_simp(r: BRexp): BRexp = r match {
+    case BSEQ(r1, r2) => (strong_br_simp(r1), strong_br_simp(r2)) match {
+        case (BZERO, _) => BZERO
+        case (_, BZERO) => BZERO
+        case (BONE, r2s) => r2s 
+        case (r1s, r2s) => BSEQ(r1s, r2s)
+    }
+    case BALTS(bs, rs) => {
+      //assert(bs.length == rs.length - 1)
+      val dist_res = {//all S
+        val rs_simp = rs.map(strong_br_simp)
+        val flat_res = stflat(rs_simp)
+        distinctBy(flat_res, berase)
+      }
+      dist_res match {
+        case Nil => {BZERO}
+        case s :: Nil => { s}
+        case rs => {BALTS(bs, rs)}
+      }
+    }
+    //case BSTAR(r) => BSTAR(r)
+    case r => r
+  }
+
+  def break_chain(bs: Bit, rs: List[BRexp]): List[Rexp] = {
+    //do it in a functional style
+    if(bs == S)
+      rs.flatMap(bspill)
+    else
+      List(ALTS(rs.map(berase)))
+  }
+  
+  def bspill(r: BRexp): List[Rexp] = {//need to take SEQ(SEQ...) and many other structs into consideration
+      r match {//TODO: break chain rs according to bs
+          case BALTS(bs, rs) => {
+            break_chain(bs, rs)
+          }
+            //rs.flatMap(r1 =>  bspill(r1)  ).toSet    how to write if r's bit says no, do not split it with the adjacent regex?
+          case BSEQ(r2, r3) => bspill(r2).map( a => if(a == ONE) berase(r3) else SEQ(a, berase(r3)) )
+          case BZERO => List()
+          case r => List(berase(r))
+      }
+    
+  }
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Element.scala	Wed Mar 13 13:14:38 2019 +0000
@@ -0,0 +1,89 @@
+import Element.elem
+abstract class Element{
+  def contents: Array[String]
+
+  def height: Int = contents.length
+  def width: Int = contents(0).length
+
+  
+
+  def above(that: Element): Element = {
+    //new ArrayElement(this.contents ++ that.contents)
+    val this1 = this widen that.width
+    val that1 = that widen this.width
+    elem(this1.contents ++ that1.contents)
+  }
+  def left_align(that: Element): Element = {
+    if (this.width == that.width){
+      this above that
+    }
+    else if (this.width < that.width) {
+      (this beside elem(' ', that.width - this.width, this.height)) above that
+    }
+    else {
+      this above (that beside elem(' ', this.width - that.width, that.height))
+    }
+  }
+  def up_align(that: Element): Element = {
+    if (this.height == that.height){
+      this beside that
+    }
+    else if (this.height < that.height) {
+      (this above elem(' ', this.width, that.height - this.height)) beside that
+    }
+    else {
+      this beside (that above elem(' ', that.width, this.height - that.height))
+    }  
+  }
+  def beside(that: Element): Element = {
+    val this1 = this heighten that.height
+    val that1 = that heighten this.height
+    elem(
+      for ((line1, line2) <- this1.contents zip that1.contents)
+      yield line1 + line2)
+  }
+  def widen(w: Int): Element = 
+    if(w <= width) this
+    else {
+      val left = Element.elem(' ', (w - width) / 2, height)
+      var right = Element.elem(' ', w - width - left.width, height)
+      left beside this beside right
+    }
+  def heighten(h: Int): Element = 
+    if (h <= height) this
+    else {
+      val top = Element.elem(' ', width, (h - height) / 2)
+      val bot = Element.elem(' ', width, h - height - top.height)
+      top above this above bot
+    }
+  override def toString = contents mkString "\n"
+}
+object Element {
+  private class ArrayElement(
+    val contents: Array[String]
+  ) extends Element
+
+  private class LineElement(s: String) extends Element {
+    val contents = Array(s)
+    override def width = s.length
+    override def height = 1
+  }
+
+  private class UniformElement(
+    ch: Char,
+    override val width: Int,
+    override val height: Int
+  ) extends Element {
+    private val line = ch.toString * width
+    def contents = Array.fill(height)(line)
+  }
+
+  def elem(contents: Array[String]): Element =
+    new ArrayElement(contents)
+    
+  def elem(chr: Char, width: Int, height: Int): Element =
+    new UniformElement(chr, width, height)
+    
+  def elem(line: String): Element =
+      new LineElement(line)
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Partial.scala	Wed Mar 13 13:14:38 2019 +0000
@@ -0,0 +1,129 @@
+import RexpRelated._
+import RexpRelated.Rexp._
+import Spiral._
+object Partial{
+  def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
+    case ZERO => Set()
+    case ONE => rs
+    case r => rs.map((re) => if (re == ONE) r else SEQ(re, r)  )   
+  }
+  def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
+    case ZERO => Set()
+    case ONE => l
+    case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) }  )
+  }
+  def lf(r: Rexp): Lin = r match {
+    case ZERO => Set()
+    case ONE => Set()
+    case CHAR(f) => {
+      //val Some(c) = alphabet.find(f) 
+      Set((f, ONE))
+    }
+    case ALTS(rs) => {
+      rs.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
+    }
+    case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
+    case SEQ(r1, r2) =>{
+      if (nullable(r1))
+        cir_prod(lf(r1), r2) ++ lf(r2)
+      else
+        cir_prod(lf(r1), r2) 
+    }
+  }
+  def lfs(r: Set[Rexp]): Lin = {
+    r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
+  }
+
+  def pder(x: Char, t: Rexp): Set[Rexp] = {
+    val lft = lf(t)
+    (lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
+  }
+  def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
+    case x::xs => pders(xs, pder(x, t))
+    case Nil => Set(t)
+  }
+  def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
+    case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+    case Nil => ts 
+  }
+  def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
+  def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
+  //all implementation of partial derivatives that involve set union are potentially buggy
+  //because they don't include the original regular term before they are pdered.
+  //now only pderas is fixed.
+  def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
+  
+  def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
+  def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
+  def o(r: Rexp) = if (nullable(r)) ONE else ZERO
+  def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
+  def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
+    case ZERO => Set[Rexp]()
+    case ONE => Set[Rexp]()
+    case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
+    case ALTS(rs) => rs.foldLeft(Set[Rexp]())((acc, r) => acc ++ pdp(x, r))
+    case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
+    case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
+  }
+  def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
+    case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+    case Nil => ts   
+  }
+  def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
+
+  //this function currently  the problem of counting the same char as different ones 
+  //because they are defined as "pred"
+  def subterms(r: Rexp): Set[Rexp] = {//excluding zeros.
+    r match {
+      case ZERO => {
+        Set[Rexp]()
+      }
+      case SEQ(r1, r2) => {
+        Set(r1, r2) ++ subterms(r1) ++ subterms(r2)
+      }
+      case ALTS(rs) => {
+        rs.toSet ++ rs.foldLeft(Set[Rexp]())((acc, r) => acc ++ subterms(r))
+      }
+      case STAR(r) => {
+        Set(r) ++ subterms(r)
+      }
+      case r => Set(r)
+    }
+  }
+  def comp(s: List[Char], t: Rexp) = {
+    //var r = internalise(t)
+    //var setr = Set(t)
+    
+    /*for(i <- 0 to s.length - 1){
+      val mamaipi = bsimp(bder(s(i), r))
+      val mamaimapi = pdps(List(s(i)), setr)
+      //compare dersimp and pder w.r.t each character in string s
+      println("current string: " + s.slice(0,i + 1))
+      println("der+simp: ")
+      println(aregx_tree(mamaipi))
+      println("pder: ")
+      mamaimapi.foreach(m => println(regx_tree(m)))
+      r = mamaipi
+      setr = mamaimapi 
+    }*/
+    for(i <- 1 to 10)
+      println(pderas(Set(t), i).size, i)
+    //val alphabet_star_t = pderas(Set(t), 10)
+    //println("all possible elements in pder (probably...): ")
+    //alphabet_star_t.foreach(r => println(regx_tree(r)))
+  }
+}
+/*    val delta = lfs(t).map(mon => mon._2)                
+    if(delta.size == t.size)
+    {
+      println(delta.size)
+      println("steady: "+delta.size)
+      return delta
+    }
+      
+    else
+    {
+      val a = pderas(delta)
+      println("increment: "+delta.size+a.size)
+      return a
+    }*/
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Spiral.scala	Wed Mar 13 13:14:38 2019 +0000
@@ -0,0 +1,351 @@
+import Element.elem
+import RexpRelated._
+import RexpRelated.Rexp._
+import Partial._
+import BRexp._
+import scala.collection.mutable.ListBuffer
+object Spiral{
+
+  val space = elem(" ")
+  val corner = elem("+")
+
+  def spiral(nEdges: Int, direction: Int): Element = {
+    if(nEdges == 1)
+      elem("+")
+    else {
+      val sp = spiral(nEdges - 1, (direction + 3) % 4)
+      def verticalBar = elem('|', 1, sp.height)
+      def horizontalBar = elem('-', sp.width, 1)
+      if(direction == 0)
+        (corner beside horizontalBar) above sp//(sp beside space)
+      else if (direction == 1)
+        sp beside (corner above verticalBar)
+      else if (direction == 2)
+        (space beside sp) above (horizontalBar beside corner)
+      else
+        (verticalBar above corner) beside (space above sp)
+    }
+  }
+  val alphabet = ("""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.:"=()\;-+*!<>\/%{} """+"\n\t").toSet//Set('a','b','c')
+  def bregx_tree(r: BRexp): Element = regx_tree(berase(r))
+  def regx_tree(r: Rexp): Element = aregx_tree(internalise(r))
+  def aregx_tree(r: ARexp): Element = {
+    r match {
+      case ACHAR(bs, d) => {
+        //val Some(d) = alphabet.find(f)
+        d match {
+          case '\n' => elem("\\n")
+          case '\t' => elem("\\t")
+          case ' ' => elem("space")
+          case d => elem(d.toString)
+        }   
+      }
+      case AONE(bs) => {
+        elem("ONE")
+      }
+      case AZERO => {
+        elem("ZERO")
+      }
+      case ASEQ(bs, r1, r2) => {
+        binary_print("SEQ", r1, r2)
+      }
+      case AALTS(bs, rs) => {
+        //elem("Awaiting completion")
+        list_print("ALT", rs)
+      }
+      case ASTAR(bs, r) => {
+        list_print("STA", List(r))
+      }
+    }
+  }
+  val port = elem(" └-")
+  def list_print(name: String, rs: List[ARexp]): Element = {
+    rs match {
+      case r::Nil => {
+        val pref = aregx_tree(r)
+        val head = elem(name)
+        (head left_align (port up_align pref) ) 
+      }
+      case r2::r1::Nil => {
+        binary_print(name, r2, r1)
+      }
+      case r::rs1 => {
+        val pref = aregx_tree(r)
+        val head = elem(name)
+        if (pref.height > 1){
+          val link = elem('|', 1, pref.height - 1)
+          (head left_align ((port above link) beside pref)) left_align tail_print(rs1)    
+        }
+        else{
+          (head left_align (port beside pref) ) left_align tail_print(rs1)
+        }
+      }
+    }
+  }
+  def tail_print(rs: List[ARexp]): Element = {
+    rs match {
+      case r2::r1::Nil => {
+        val pref = aregx_tree(r2)
+        val suff = aregx_tree(r1)
+        if (pref.height > 1){
+          val link = elem('|', 1, pref.height - 1)
+          ((port above link) beside pref) left_align (port up_align suff)
+        }
+        else{
+          (port beside pref) left_align (port up_align suff)
+        } 
+      }
+      case r2::rs1 => {
+        val pref = aregx_tree(r2)
+        
+        if (pref.height > 1){
+          val link = elem('|', 1, pref.height - 1)
+          ((port above link) beside pref) left_align tail_print(rs1)//(port up_align tail_print(rs1) )
+        }
+        else{
+          (port beside pref) left_align tail_print(rs1)//(port up_align tail_print(rs1))
+        } 
+        //pref left_align tail_print(rs1)
+      }
+    }
+  }
+
+  def binary_print(name: String, r1: ARexp, r2: ARexp): Element = {
+    val pref = aregx_tree(r1)
+    val suff = aregx_tree(r2)
+    val head = elem(name)
+    if (pref.height > 1){
+      val link = elem('|', 1, pref.height - 1)
+      (head left_align ((port above link) beside pref) ) left_align (port up_align suff)
+    }
+    else{
+      (head left_align (port beside pref) ) left_align (port up_align suff)
+    }
+  }
+  val arr_of_size = ListBuffer.empty[Int]
+  def spill(r: Rexp, or: Rexp): Set[Rexp] = {
+    if(r == or) 
+      Set(r)
+    else{
+      r match {
+          case ALTS(rs) => rs.flatMap(r1 => spill(r1, or)).toSet
+          case SEQ(ALTS(rs), r3) => rs.flatMap(r1 => spill(r1, or).map(a => if(a == ONE) r3 else SEQ(a, r3)) ).toSet
+          case ZERO => Set()
+          case r => Set(r)
+      }
+    }
+  }
+  def pC(r: Rexp): Set[Rexp] = {//PD's companion
+    r match {
+      case SEQ(r1, r2) => pC(r2)
+      case ALTS(rs) => rs.flatMap(a => pC(a) ).toSet
+      case CHAR(c) => Set(r)
+      case r => Set()
+    }
+  }
+
+  def aspill(ar: ARexp, or: Rexp): Set[Rexp] = spill(erase(ar), or)
+  def illustration(r: Rexp, s: String){
+    var i_like_imperative_style = internalise(r)
+    val all_chars = s.toList
+    for (i <- 0 to s.length - 1){
+      val der_res =  bder(all_chars(i), i_like_imperative_style)
+      val simp_res = bsimp(der_res)
+      println("The original regex, the regex after derivative w.r.t " + all_chars(i) + " and the simplification of the derivative.")
+      println(aregx_tree(i_like_imperative_style) up_align aregx_tree(der_res) up_align aregx_tree(simp_res))
+      //println(asize(i_like_imperative_style), asize(der_res), asize(simp_res))
+      arr_of_size += asize(i_like_imperative_style)
+      //println(asize(simp_res), asize(simp_res) / arr_of_size(0) )
+      i_like_imperative_style = simp_res
+    }
+    arr_of_size += asize(i_like_imperative_style)
+  }
+  val ran = scala.util.Random
+  var alphabet_size = 3
+  def balanced_seq_star_gen(depth: Int, star: Boolean): Rexp = {
+    if(depth == 1){
+      ((ran.nextInt(4) + 97).toChar).toString
+    }
+    else if(star){
+      STAR(balanced_seq_star_gen(depth - 1, false))
+    }
+    else{
+      SEQ(balanced_seq_star_gen(depth - 1, true), balanced_seq_star_gen(depth - 1, true))
+    }
+  }
+  def max(i: Int, j: Int): Int = {
+    if(i > j)
+      i
+    else 
+      j
+  }
+  def random_struct_gen(depth:Int): Rexp = {
+    val dice = ran.nextInt(3)
+    val dice2 = ran.nextInt(3)
+    (dice, depth) match {
+      case (_, 0) => ((ran.nextInt(3) + 97).toChar).toString
+      case (0, i) => STAR(random_struct_gen(max(0, i - 1 - dice2)))
+      case (1, i) => SEQ(random_struct_gen(max(0, i - 1 - dice2)), random_struct_gen(max(0, i - 1 - dice2)))
+      case (2, i) => ALTS( List(random_struct_gen(max(0, i - 1 - dice2)), random_struct_gen(max(0, i - 1 - dice2))) )
+    }
+  }
+  def balanced_struct_gen(depth: Int): Rexp = {
+    val dice = ran.nextInt(3)
+    (dice, depth) match {
+      case (_, 0) => ((ran.nextInt(3) + 97).toChar).toString
+      case (0, i) => STAR(random_struct_gen(depth - 1))
+      case (1, i) => SEQ(random_struct_gen(depth - 1), random_struct_gen(depth - 1))
+      case (2, i) => ALTS( List(random_struct_gen(depth - 1), random_struct_gen(depth - 1) ) )
+    }
+  }
+  def rd_string_gen(alp_size: Int, len: Int): String = {
+    if( len > 0)
+      ((ran.nextInt(alp_size) + 97).toChar).toString + rd_string_gen(alp_size, len - 1)
+    else
+      ((ran.nextInt(alp_size) + 97).toChar).toString
+  }
+  def plot(b: List[Int]){
+    println(b(0),b.max)
+
+  }
+  def dep_exp(depth: List[Int]){
+    for(i <- depth){
+      arr_of_size.clear()
+      val s = rd_string_gen(alphabet_size, (i-8)*(i-8)+10)
+      val r = random_struct_gen(i)
+      println("depth: "+i)
+      illustration(r, s) //"abcabadaaadcabdbabcdaadbabbcbbdabdabbcbdbabdbcdb") 
+      //println("depth: " + i + " general stats:"+ arr_of_size(0), arr_of_size.max, arr_of_size.max/arr_of_size(0))
+      //println("x y label alignment")
+      /*for(i <- 0 to s.length - 1){
+        if(s(i) == '\n')
+          println(i+" "+arr_of_size(i)+" "+"nl"+" -140")
+        else if(s(i) ==  ' ')
+          println(i+" "+arr_of_size(i)+" "+"sp"+" -140")
+        else
+          println(i+" "+arr_of_size(i)+" "+s(i)+" -140")
+      }*/
+      //println(s.length + " " + arr_of_size(s.length) + " ]" + " -140")
+    }
+  }
+  def case_study(ss: List[String], r: Rexp){
+    for(s <- ss){
+      arr_of_size.clear()
+      illustration(r, s)
+      println("x y label alignment")
+      for(i <- 0 to s.length - 1){
+        if(s(i) == '\n')
+          println(i+" "+arr_of_size(i)+" "+"nl"+" -140")
+        else if(s(i) ==  ' ')
+          println(i+" "+arr_of_size(i)+" "+"sp"+" -140")
+        else
+          println(i+" "+arr_of_size(i)+" "+s(i)+" -140")
+      }
+    }
+  }
+  def star_gen(dp: Int): Rexp = {
+    if(dp > 0)
+      STAR(star_gen(dp - 1))
+    else
+      "a"
+  }
+  def strs_gen(len: Int, num: Int): List[String] = {
+    if(num > 0){
+      rd_string_gen(3, len)::strs_gen(len, num - 1)
+    }
+    else{
+      Nil
+    }
+  }
+  def regx_print(r: Rexp): String = {
+    r match {
+      case ZERO =>
+        "ZERO"
+      case CHAR(c) => {
+         //val Some(c) = alphabet.find(f)
+         "\"" + c.toString + "\""
+      }
+      case ONE => {
+        "ONE"
+      }
+      case ALTS(rs) => {
+        "ALTS(List("+(rs.map(regx_print)).foldLeft("")((a, b) => if(a == "") b else a + "," + b)+"))"
+      }
+      case SEQ(r1, r2) => {
+        "SEQ(" + regx_print(r1) + "," + regx_print(r2) + ")"
+      }
+      case STAR(r) => {
+        "STAR(" + regx_print(r) + ")"
+      }
+    }
+  }
+  val mkst = "abcdefghijklmnopqrstuvwxyz"
+  def weak_sub_check(r: Rexp, s: String, i: Int, f: (List[Rexp], Set[Rexp]) => Boolean){
+    //we first compute pders over the set of all strings on the alphabet
+    val pd = pderas(Set(r), i + 4)
+    //then "b-internalise" the regular expression into a brexp(this is essentially 
+    //attaching a bit Z to every alts to signify that they come from the original regular expression)
+    var old = brternalise(r)
+    //this is for comparison between normal simp and the weakened version of simp
+    //normal simp will be performed on syncold
+    //weakend simp will be performed on old
+    var syncold = brternalise(r)
+    val all_chars = s.toList
+    for (i <- 0 to s.length - 1){
+      val syncder_res = brder(all_chars(i), syncold)
+      val syncsimp_res = strong_br_simp(syncder_res)
+      //see brder for detailed explanation
+      //just changes bit Z to S when deriving an ALTS, 
+      //signifying that the structure has been "touched" and
+      //therefore able to be spilled in the bspill function
+      val der_res =  brder(all_chars(i), old)
+      val simp_res = br_simp(der_res)
+      val anatomy = bspill(simp_res)
+      //track if the number of regular expressions exceeds those in the PD set(remember PD means the pders over A*)
+      if(f(anatomy, pd)  == false){
+        /*println("regular expression")
+        println(regx_tree(r))
+        println("string at " + i)
+        println(s)
+        println("partial derivatives")
+        (pd.foreach(a => println(regx_tree(a))))
+        println("simp result")
+        println(bregx_tree(simp_res))
+        println("bspill result")
+        (anatomy.foreach(a => println(regx_tree(a))))*/
+        println(size(berase(syncsimp_res)))
+        println(size(berase(simp_res)))
+        println(anatomy.map(size).sum)
+        println(pd.map(size).sum)
+      }  
+      old = simp_res
+      syncold = syncsimp_res
+    }
+  }
+  def inclusion_truth(anatomy: List[Rexp], pd: Set[Rexp]): Boolean = {
+    val aset = anatomy.toSet
+    if(aset subsetOf pd){
+      true
+    }
+    else{
+      println("inclusion not true")
+      false
+    }
+  }
+  def size_comp(anatomy: List[Rexp], pd: Set[Rexp]):Boolean = {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); true}
+  def size_expansion_rate(anatomy: List[Rexp], pd: Set[Rexp]): Boolean = if(anatomy.size > (pd.size)*2 ) {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); inclusion_truth(anatomy, pd); false }else {true}
+  
+  def check_all(){
+    for(i <- 1 to 1)
+    {
+        val s = "bbb"//rd_string_gen(alphabet_size, 5)//"ac"//rd_string_gen(alphabet_size, 5)
+        val r = STAR(STAR(ALTS(List(SEQ(CHAR('b'),CHAR('b')), ALTS(List(CHAR('a'), CHAR('b')))))))//balanced_struct_gen(4)//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7)
+        //subset_check(r, s)
+        weak_sub_check(r, s, 5, size_expansion_rate)
+    }
+  }
+  def main(args: Array[String]) {
+    check_all()   
+  } 
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lex_blex_Frankensteined.scala	Wed Mar 13 13:14:38 2019 +0000
@@ -0,0 +1,880 @@
+package RexpRelated
+import scala.language.implicitConversions    
+import scala.language.reflectiveCalls
+import scala.annotation.tailrec   
+import scala.util.Try
+
+abstract class Bit
+case object Z extends Bit
+case object S extends Bit
+case class C(c: Char) extends Bit
+
+
+abstract class Rexp 
+case object ZERO extends Rexp
+case object ONE extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALTS(rs: List[Rexp]) extends Rexp 
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
+case class STAR(r: Rexp) extends Rexp 
+case class RECD(x: String, r: Rexp) extends Rexp
+
+
+
+object Rexp{
+  type Bits = List[Bit]
+  // abbreviations
+  type Mon = (Char, Rexp)
+  type Lin = Set[Mon]
+  def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2))
+  def PLUS(r: Rexp) = SEQ(r, STAR(r))
+  def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2))
+
+
+  def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
+    case Nil => Nil
+    case (x::xs) => {
+      val res = f(x)
+      if (acc.contains(res)) distinctBy(xs, f, acc)  
+      else x::distinctBy(xs, f, res::acc)
+    }
+  } 
+  // some convenience for typing in regular expressions
+  def charlist2rexp(s : List[Char]): Rexp = s match {
+    case Nil => ONE
+    case c::Nil => CHAR(c)
+    case c::s => SEQ(CHAR(c), charlist2rexp(s))
+  }
+  implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+  implicit def RexpOps(r: Rexp) = new {
+    def | (s: Rexp) = ALT(r, s)
+    def % = STAR(r)
+    def ~ (s: Rexp) = SEQ(r, s)
+  }
+
+  implicit def stringOps(s: String) = new {
+    def | (r: Rexp) = ALT(s, r)
+    def | (r: String) = ALT(s, r)
+    def % = STAR(s)
+    def ~ (r: Rexp) = SEQ(s, r)
+    def ~ (r: String) = SEQ(s, r)
+    def $ (r: Rexp) = RECD(s, r)
+  }
+
+  // translation into ARexps
+  def fuse(bs: Bits, r: ARexp) : ARexp = r match {
+    case AZERO => AZERO
+    case AONE(cs) => AONE(bs ++ cs)
+    case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
+    case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
+    case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
+    case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
+  }
+
+  def internalise(r: Rexp) : ARexp = r match {
+    case ZERO => AZERO
+    case ONE => AONE(Nil)
+    case CHAR(c) => ACHAR(Nil, c)
+    case ALTS(List(r1, r2)) => 
+      AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
+    case ALTS(r1::rs) => {
+      val AALTS(Nil, rs2) = internalise(ALTS(rs))
+      AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
+    }
+    case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
+    case STAR(r) => ASTAR(Nil, internalise(r))
+    case RECD(x, r) => internalise(r)
+  }
+
+  internalise(("a" | "ab") ~ ("b" | ""))
+
+  def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
+    case (ONE, bs) => (Empty, bs)
+    case (PRED(f), C(c)::bs) => (Chr(c), bs)
+    case (ALTS(r::Nil), bs) => decode_aux(r, bs)//this case seems tailor made for those who want to simplify the regex before der or simp
+    case (ALTS(rs), bs) => bs match {
+      case Z::bs1 => {
+        val (v, bs2) = decode_aux(rs.head, bs1)
+        (Left(v), bs2)
+      }
+      case S::bs1 => {
+        val (v, bs2) = decode_aux(ALTS(rs.tail), bs1)
+        (Right(v), bs2)			
+      }
+    }
+    case (SEQ(r1, r2), bs) => {
+      val (v1, bs1) = decode_aux(r1, bs)
+      val (v2, bs2) = decode_aux(r2, bs1)
+      (Sequ(v1, v2), bs2)
+    }
+    case (STAR(r1), S::bs) => {
+      val (v, bs1) = decode_aux(r1, bs)
+      //println(v)
+      val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
+      (Stars(v::vs), bs2)
+    }
+    case (STAR(_), Z::bs) => (Stars(Nil), bs)
+    case (RECD(x, r1), bs) => {
+      val (v, bs1) = decode_aux(r1, bs)
+      (Rec(x, v), bs1)
+    }
+  }
+
+  def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
+    case (v, Nil) => v
+    case _ => throw new Exception("Not decodable")
+  }
+
+
+  //erase function: extracts the regx from Aregex
+  def erase(r:ARexp): Rexp = r match{
+    case AZERO => ZERO
+    case AONE(_) => ONE
+    case ACHAR(bs, f) => CHAR(f)
+    case AALTS(bs, rs) => ALTS(rs.map(erase(_)))
+    case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
+    case ASTAR(cs, r)=> STAR(erase(r))
+  }
+
+  //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART
+  // nullable function: tests whether the regular 
+  // expression can recognise the empty string
+  def nullable (r: Rexp) : Boolean = r match {
+    case ZERO => false
+    case ONE => true
+    case CHAR(_) => false
+    case ALTS(rs) => rs.exists(nullable)
+    case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+    case STAR(_) => true
+    case RECD(_, r) => nullable(r)
+    //case PLUS(r) => nullable(r)
+  }
+
+  // derivative of a regular expression w.r.t. a character
+  def der (c: Char, r: Rexp) : Rexp = r match {
+    case ZERO => ZERO
+    case ONE => ZERO
+    case CHAR(f) => if (c == f) ONE else ZERO
+    case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2)))
+    case SEQ(r1, r2) => 
+      if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2)))
+      else SEQ(der(c, r1), r2)
+    case STAR(r) => SEQ(der(c, r), STAR(r))
+    case RECD(_, r1) => der(c, r1)
+    //case PLUS(r) => SEQ(der(c, r), STAR(r))
+  }
+
+  def flatten(v: Val) : String = v match {
+    case Empty => ""
+    case Chr(c) => c.toString
+    case Left(v) => flatten(v)
+    case Right(v) => flatten(v)
+    case Sequ(v1, v2) => flatten(v1) + flatten(v2)
+    case Stars(vs) => vs.map(flatten).mkString
+    case Rec(_, v) => flatten(v)
+  }
+
+  // extracts an environment from a value
+  def env(v: Val) : List[(String, String)] = v match {
+    case Empty => Nil
+    case Chr(c) => Nil
+    case Left(v) => env(v)
+    case Right(v) => env(v)
+    case Sequ(v1, v2) => env(v1) ::: env(v2)
+    case Stars(vs) => vs.flatMap(env)
+    case Rec(x, v) => (x, flatten(v))::env(v)
+  }
+
+
+  // injection part
+  def mkeps(r: Rexp) : Val = r match {
+    case ONE => Empty
+    case ALTS(List(r1, r2)) => 
+      if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+    case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+    case STAR(r) => Stars(Nil)
+    case RECD(x, r) => Rec(x, mkeps(r))
+    //case PLUS(r) => Stars(List(mkeps(r)))
+  }
+
+  def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+    case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+    case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+    case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+    case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+    case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1))
+    case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2))
+    case (CHAR(_), Empty) => Chr(c) 
+    case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+    //case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+  }
+  def lex(r: Rexp, s: List[Char]) : Val = s match {
+    case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
+    case c::cs => inj(r, c, lex(der(c, r), cs))
+  }
+
+  def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
+
+  // some "rectification" functions for simplification
+  def F_ID(v: Val): Val = v
+  def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
+  def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
+  def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
+    case Right(v) => Right(f2(v))
+    case Left(v) => Left(f1(v))
+  }
+  def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
+    case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
+  }
+  def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
+    (v:Val) => Sequ(f1(Empty), f2(v))
+  def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
+    (v:Val) => Sequ(f1(v), f2(Empty))
+  def F_RECD(f: Val => Val) = (v:Val) => v match {
+    case Rec(x, v) => Rec(x, f(v))
+  }
+  def F_ERROR(v: Val): Val = throw new Exception("error")
+
+  // simplification of regular expressions returning also an
+  // rectification function; no simplification under STAR 
+  def simp(r: Rexp): (Rexp, Val => Val) = r match {
+    case ALTS(List(r1, r2)) => {
+      val (r1s, f1s) = simp(r1)
+      val (r2s, f2s) = simp(r2)
+      (r1s, r2s) match {
+        case (ZERO, _) => (r2s, F_RIGHT(f2s))
+        case (_, ZERO) => (r1s, F_LEFT(f1s))
+        case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
+                  else (ALTS(List(r1s, r2s)), F_ALT(f1s, f2s)) 
+      }
+    }
+    case SEQ(r1, r2) => {
+      val (r1s, f1s) = simp(r1)
+      val (r2s, f2s) = simp(r2)
+      (r1s, r2s) match {
+        case (ZERO, _) => (ZERO, F_ERROR)
+        case (_, ZERO) => (ZERO, F_ERROR)
+        case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
+        case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
+        case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
+      }
+    }
+    case RECD(x, r1) => {
+      val (r1s, f1s) = simp(r1)
+      (RECD(x, r1s), F_RECD(f1s))
+    }
+    case r => (r, F_ID)
+  }
+  /*
+  val each_simp_time = scala.collection.mutable.ArrayBuffer.empty[Long]
+  val each_simp_timeb = scala.collection.mutable.ArrayBuffer.empty[Long]
+  */
+  def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
+    case Nil => {
+      if (nullable(r)) {
+        mkeps(r) 
+      }
+      else throw new Exception("Not matched")
+    }
+    case c::cs => {
+      val (r_simp, f_simp) = simp(der(c, r))
+      inj(r, c, f_simp(lex_simp(r_simp, cs)))
+    }
+  }
+
+  def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
+
+  //println(lexing_simp(("a" | "ab") ~ ("b" | ""), "ab"))
+
+  // filters out all white spaces
+  def tokenise(r: Rexp, s: String) = 
+    env(lexing_simp(r, s)).filterNot { _._1 == "w"}
+
+
+  //reads the string from a file 
+  def fromFile(name: String) : String = 
+    io.Source.fromFile(name).mkString
+
+  def tokenise_file(r: Rexp, name: String) = 
+    tokenise(r, fromFile(name))
+  
+  //   Testing
+  //============
+
+  def time[T](code: => T) = {
+    val start = System.nanoTime()
+    val result = code
+    val end = System.nanoTime()
+    println((end - start)/1.0e9)
+    result
+  }
+
+  //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART
+
+  // bnullable function: tests whether the aregular 
+  // expression can recognise the empty string
+  def bnullable (r: ARexp) : Boolean = r match {
+    case AZERO => false
+    case AONE(_) => true
+    case ACHAR(_,_) => false
+    case AALTS(_, rs) => rs.exists(bnullable)
+    case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
+    case ASTAR(_, _) => true
+  }
+
+  def mkepsBC(r: ARexp) : Bits = r match {
+    case AONE(bs) => bs
+    case AALTS(bs, rs) => {
+      val n = rs.indexWhere(bnullable)
+      bs ++ mkepsBC(rs(n))
+    }
+    case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
+    case ASTAR(bs, r) => bs ++ List(Z)
+  }
+
+  // derivative of a regular expression w.r.t. a character
+  def bder(c: Char, r: ARexp) : ARexp = r match {
+    case AZERO => AZERO
+    case AONE(_) => AZERO
+    case ACHAR(bs, f) => if (c == f) AONE(bs:::List(C(c))) else AZERO
+    case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
+    case ASEQ(bs, r1, r2) => 
+      if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(mkepsBC(r1), bder(c, r2)))
+      else ASEQ(bs, bder(c, r1), r2)
+    case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
+  }
+
+
+  def ders (s: List[Char], r: Rexp) : Rexp = s match {
+    case Nil => r
+    case c::s => ders(s, der(c, r))
+  }
+
+  // derivative w.r.t. a string (iterates bder)
+  @tailrec
+  def bders (s: List[Char], r: ARexp) : ARexp = s match {
+    case Nil => r
+    case c::s => bders(s, bder(c, r))
+  }
+
+  def flats(rs: List[ARexp]): List[ARexp] = rs match {
+      case Nil => Nil
+      case AZERO :: rs1 => flats(rs1)
+      case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
+      case r1 :: rs2 => r1 :: flats(rs2)
+    }
+  def rflats(rs: List[Rexp]): List[Rexp] = rs match {
+    case Nil => Nil
+    case ZERO :: rs1 => rflats(rs1)
+    case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2)
+    case r1 :: rs2 => r1 :: rflats(rs2)
+  }
+  //val flats_time = scala.collection.mutable.ArrayBuffer.empty[Long]
+  //val dist_time = scala.collection.mutable.ArrayBuffer.empty[Long]
+  var flats_time = 0L
+  var dist_time = 0L
+  /*
+  def bsimp(r: ARexp, depth: Int): ARexp = 
+  {
+    r match {
+      case ASEQ(bs1, r1, r2) => (bsimp(r1, depth), bsimp(r2, depth)) match {
+          case (AZERO, _) => AZERO
+          case (_, AZERO) => AZERO
+          case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+          case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+      }
+      case AALTS(bs1, rs) => {
+        depth match {
+          case 0 => {
+            flats(distinctBy(rs, erase)) match {
+              case Nil => AZERO
+              case s :: Nil => fuse(bs1, s)
+              case rs => AALTS(bs1, rs) 
+            } 
+          }
+          case n => {
+            val rs_simp = rs.map(bsimp(_, n - 1))
+            val time2 = System.nanoTime()
+            val flat_res = flats(rs_simp)
+            val time3 = System.nanoTime()
+            val dist_res = distinctBy(flat_res, erase)
+            val time4 = System.nanoTime()
+            flats_time = flats_time + time3 - time2
+            dist_time = dist_time + time4 - time3
+            //flats_time += time3 - time2
+            //dist_time += time4 - time3
+            //distinctBy(flats(rs.map(bsimp)), erase) match {
+            dist_res match {
+              case Nil => AZERO
+              case s :: Nil => fuse(bs1, s)
+              case rs => AALTS(bs1, rs)  
+            }
+          }
+        }
+      }
+      case r => r
+    }
+  }
+  */
+  //----------------------------------------------------------------------------This bsimp is the original slow one
+  
+  def bsimp(r: ARexp): ARexp = r match {
+    case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
+        case (AZERO, _) => AZERO
+        case (_, AZERO) => AZERO
+        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+    }
+    case AALTS(bs1, rs) => {
+      val rs_simp = rs.map(bsimp)
+      val time2 = System.nanoTime()
+      val flat_res = flats(rs_simp)
+      val time3 = System.nanoTime()
+      val dist_res = distinctBy(flat_res, erase)
+      val time4 = System.nanoTime()
+      flats_time = flats_time + time3 - time2
+      dist_time = dist_time + time4 - time3
+      dist_res match {
+        case Nil => AZERO
+        case s :: Nil => fuse(bs1, s)
+        case rs => AALTS(bs1, rs)  
+      }
+    }
+    case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
+    case r => r
+  }
+
+  def bsimp_weakened(r: ARexp): ARexp = r match {
+    case ASEQ(bs1, r1, r2) => (bsimp_weakened(r1), r2) match {
+        case (AZERO, _) => AZERO
+        case (_, AZERO) => AZERO
+        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+    }
+    case AALTS(bs1, rs) => {
+      val rs_simp = rs.map(bsimp_weakened)
+      val time2 = System.nanoTime()
+      val flat_res = flats(rs_simp)
+      val time3 = System.nanoTime()
+      val dist_res = distinctBy(flat_res, erase)
+      val time4 = System.nanoTime()
+      flats_time = flats_time + time3 - time2
+      dist_time = dist_time + time4 - time3
+      dist_res match {
+        case Nil => AZERO
+        case s :: Nil => fuse(bs1, s)
+        case rs => AALTS(bs1, rs)  
+      }
+    }
+    case ASTAR(bs, r) => ASTAR(bs, bsimp_weakened(r))
+    case r => r
+  }
+
+  def simp_weakened(r: Rexp): Rexp = r match {
+    case SEQ(r1, r2) => (simp_weakened(r1), r2) match {
+        case (ZERO, _) => ZERO
+        case (_, ZERO) => ZERO
+        case (ONE, r2s) => r2s
+        case (r1s, r2s) => SEQ(r1s, r2s)
+    }
+    case ALTS(rs) => {
+      val rs_simp = rs.map(simp_weakened)
+      val flat_res = rflats(rs_simp)
+      val dist_res = rs_simp.distinct
+      dist_res match {
+        case Nil => ZERO
+        case s :: Nil => s
+        case rs => ALTS(rs)  
+      }
+    }
+    case STAR(r) => STAR(simp_weakened(r))
+    case r => r
+  }
+    
+  
+  //----------------------------------------------------------------------------experiment bsimp
+  /*
+  def bders_simp (s: List[Char], r: ARexp) : ARexp = s match {
+    case Nil => r
+    case c::s => bders_simp(s, bsimp(bder(c, r)))
+  }
+  */
+  /*
+  def time[T](code: => T) = {
+    val start = System.nanoTime()
+    val result = code
+    val end = System.nanoTime()
+    println((end - start)/1.0e9)
+    result
+  }
+  */
+  // main unsimplified lexing function (produces a value)
+  def blex(r: ARexp, s: List[Char]) : Bits = s match {
+    case Nil => if (bnullable(r)) mkepsBC(r) else throw new Exception("Not matched")
+    case c::cs => {
+      val der_res = bder(c,r)
+      blex(der_res, cs)
+    }
+  }
+
+  def bpre_lexing(r: Rexp, s: String) = blex(internalise(r), s.toList)
+  //def blexing(r: Rexp, s: String) : Val = decode(r, blex(internalise(r), s.toList))
+
+  var bder_time = 0L
+  var bsimp_time = 0L
+  var mkepsBC_time = 0L
+  var small_de = 2
+  var big_de = 5
+  var usual_de = 3
+  
+  def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+    case Nil => {
+      if (bnullable(r)) {
+        //println(asize(r))
+        val time4 = System.nanoTime()
+        val bits = mkepsBC(r)
+        val time5 = System.nanoTime()
+        mkepsBC_time = time5 - time4
+        bits
+      }
+    else throw new Exception("Not matched")
+    }
+    case c::cs => {
+      val time1 = System.nanoTime()
+      val der_res = bder(c,r)
+      val time2 = System.nanoTime()
+      val simp_res = bsimp(der_res)
+      val time3 = System.nanoTime()  
+      bder_time = bder_time + time2 - time1
+      bsimp_time = bsimp_time + time3 - time2
+      blex_simp(simp_res, cs)      
+    }
+  }
+  def blex_real_simp(r: ARexp, s: List[Char]): ARexp = s match{
+    case Nil => r
+    case c::cs => blex_real_simp(bsimp(bder(c, r)), cs)
+  }
+
+  //-------------------------------------------------------------------------------------tests whether simp(simp(r)) == simp(r) holds true
+  /*
+  def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+    case Nil => {
+      if (bnullable(r)) {
+        //println(asize(r))
+        mkepsBC(r)
+      }
+    else throw new Exception("Not matched")
+    }
+    case c::cs => {
+      val der_res = bder(c,r)
+      val simp_res = bsimp(der_res)  
+      //val simp_res2 = bsimp(simp_res)  
+      //println("Size reduction from "+asize(der_res)+ " to " +asize(simp_res)+" to " + asize(simp_res2)) 
+      blex_simp(simp_res, cs)
+    }
+  }
+  */
+  /*
+  def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
+    case Nil => {
+      if (nullable(r)) {
+        mkeps(r) 
+      }
+      else throw new Exception("Not matched")
+    }
+    case c::cs => {
+      val start = System.nanoTime()
+      val (r_simp, f_simp) = simp(der(c, r))
+      val end = System.nanoTime()
+      println((end - start)/1.0e9)
+      inj(r, c, f_simp(lex_simp(r_simp, cs)))
+    }
+  }
+  */
+
+  //size: of a Aregx for testing purposes 
+  def size(r: Rexp) : Int = r match {
+    case ZERO => 1
+    case ONE => 1
+    case CHAR(_) => 1
+    case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+    case ALTS(rs) => 1 + rs.map(size).sum
+    case STAR(r) => 1 + size(r)
+  }
+
+  def asize(a: ARexp) = size(erase(a))
+
+
+  // decoding does not work yet
+  def blexing_simp(r: Rexp, s: String) = {
+    //flats_time.clear()
+    //dist_time.clear()
+    flats_time = 0L
+    dist_time = 0L
+    bder_time = 0L
+    bsimp_time = 0L
+    mkepsBC_time = 0L
+    val start = System.nanoTime()
+    val bit_code = blex_simp(internalise(r), s.toList)
+    val end = System.nanoTime()
+    println("total time: "+ (end - start)/1.0e9)
+    println("spent on flats: " + (flats_time/(1.0e9)))
+    println("spent on distinctBy: " + (dist_time/(1.0e9)))
+    println("spent on bder: "+ bder_time/1.0e9)
+    println("spent on bsimp: " + bsimp_time/1.0e9)
+    println("spent on mkepsBC: " + mkepsBC_time/1.0e9)
+    //println(s"The length of the string ${s.length}; length of bit sequence:")
+    //println((bit_code.length))
+    //println(final_derivative)
+    //bit_code
+    //decode(r, bit_code) 
+  }
+
+
+
+
+
+  // Lexing Rules for a Small While Language
+
+  //symbols
+  /*
+  val SYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_))
+  
+  //digits
+  val DIGIT = PRED("0123456789".contains(_))
+  //identifiers
+  val ID = SYM ~ (SYM | DIGIT).% 
+  //numbers
+  val NUM = STAR(DIGIT)
+  //keywords
+  val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false"
+  val AKEYWORD: Rexp = ALTS(List("skip" , "while" , "do" , "if" , "then" , "else" , "read" , "write" , "true" , "false"))
+  //semicolons
+  val SEMI: Rexp = ";"
+  //operators
+  val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
+  val AOP: Rexp = ALTS(List(":=" , "==" , "-" , "+" , "*" , "!=" , "<" , ">" , "<=" , ">=" , "%" , "/"))
+  //whitespaces
+  val WHITESPACE = PLUS(" " | "\n" | "\t")
+  //parentheses
+  val RPAREN: Rexp = ")"
+  val LPAREN: Rexp = "("
+  val BEGIN: Rexp = "{"
+  val END: Rexp = "}"
+  //strings...but probably needs not
+  val STRING: Rexp = "\"" ~ SYM.% ~ "\""
+
+
+
+  val WHILE_REGS = (("k" $ KEYWORD) | 
+                    ("i" $ ID) | 
+                    ("o" $ OP) | 
+                    ("n" $ NUM) | 
+                    ("s" $ SEMI) | 
+                    ("str" $ STRING) |
+                    ("p" $ (LPAREN | RPAREN)) | 
+                    ("b" $ (BEGIN | END)) | 
+                    ("w" $ WHITESPACE)).%
+
+  val AWHILE_REGS = (
+    ALTS(
+      List(
+        ("k" $ AKEYWORD), 
+                    ("i" $ ID),
+                    ("o" $ AOP) , 
+                    ("n" $ NUM) ,
+                    ("s" $ SEMI) ,
+                    ("str" $ STRING), 
+                    ("p" $ (LPAREN | RPAREN)), 
+                    ("b" $ (BEGIN | END)), 
+                    ("w" $ WHITESPACE)
+      )
+    )
+  ).%
+
+*/
+
+
+  //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART (TESTING)
+  /*
+  // Two Simple While programs
+  //========================
+  println("prog0 test")
+
+  val prog0 = """read n"""
+  println(env(lexing_simp(WHILE_REGS, prog0)))
+  println(tokenise(WHILE_REGS, prog0))
+
+  println("prog1 test")
+
+  val prog1 = """read  n; write (n)"""
+  println(tokenise(WHILE_REGS, prog1))
+
+  */
+  // Bigger Tests
+  //==============
+
+  def escape(raw: String): String = {
+    import scala.reflect.runtime.universe._
+    Literal(Constant(raw)).toString
+  }
+
+  val prog2 = """
+  write "Fib";
+  read n;
+  minus1 := 0;
+  minus2 := 1;
+  while n > 0 do {
+    temp := minus2;
+    minus2 := minus1 + minus2;
+    minus1 := temp;
+    n := n - 1
+  };
+  write "Result";
+  write minus2
+  """
+
+  val prog3 = """
+  start := 1000;
+  x := start;
+  y := start;
+  z := start;
+  while 0 < x do {
+  while 0 < y do {
+    while 0 < z do {
+      z := z - 1
+    };
+    z := start;
+    y := y - 1
+  };     
+  y := start;
+  x := x - 1
+  }
+  """
+  /*
+  for(i <- 400 to 400 by 1){
+    println(i+":")
+    blexing_simp(WHILE_REGS, prog2 * i)
+  } */
+
+    /*
+    for (i <- 2 to 5){
+      for(j <- 1 to 3){
+        println(i,j)
+        small_de = i
+        usual_de = i + j
+        big_de = i + 2*j 
+        blexing_simp(AWHILE_REGS, prog2 * 100)
+      }
+    }*/
+
+  /*
+  println("Tokens of prog2")
+  println(tokenise(WHILE_REGS, prog2).mkString("\n"))
+
+  val fib_tokens = tokenise(WHILE_REGS, prog2)
+  fib_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
+
+
+  val test_tokens = tokenise(WHILE_REGS, prog3)
+  test_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
+  */
+
+  /*
+  println("time test for blexing_simp")
+  for (i <- 1 to 1 by 1) {
+    lexing_simp(WHILE_REGS, prog2 * i)
+    blexing_simp(WHILE_REGS, prog2 * i)
+    for( j <- 0 to each_simp_timeb.length - 1){
+      if( each_simp_timeb(j)/each_simp_time(j) >= 10.0 )
+        println(j, each_simp_timeb(j), each_simp_time(j))
+    }
+  }
+  */
+
+
+  //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART (TESTING)
+
+
+
+  def clear() = {
+    print("")
+    //print("\33[H\33[2J")
+  }
+
+  //testing the two lexings produce the same value
+  //enumerates strings of length n over alphabet cs
+  def strs(n: Int, cs: String) : Set[String] = {
+    if (n == 0) Set("")
+    else {
+      val ss = strs(n - 1, cs)
+      ss ++
+      (for (s <- ss; c <- cs.toList) yield c + s)
+    }
+  }
+  def enum(n: Int, s: String) : Stream[Rexp] = n match {
+    case 0 => ZERO #:: ONE #:: s.toStream.map(CHAR)
+    case n => {  
+      val rs = enum(n - 1, s)
+      rs #:::
+      (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) #:::
+      (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) #:::
+      (for (r1 <- rs) yield STAR(r1))
+    }
+  }
+
+  //tests blexing and lexing
+  def tests_blexer_simp(ss: Set[String])(r: Rexp) = {
+    clear()
+    //println(s"Testing ${r}")
+    for (s <- ss.par) yield {
+      val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None)
+      val res2 = Try(Some(blexing_simp(r, s))).getOrElse(None)
+      if (res1 != res2) println(s"Disagree on ${r} and ${s}")
+      if (res1 != res2) println(s"   ${res1} !=  ${res2}")
+      if (res1 != res2) Some((r, s)) else None
+    }
+  }
+
+
+
+  //enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet
+  /*
+  def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = {
+    for (s <- ss){
+      
+      val der_res = bder(c, ar) 
+      val simp_res = bsimp(der_res)
+      println(asize(der_res))
+      println(asize(simp_res))
+      single_expression_explorer(simp_res, (sc - c))
+    }
+  }*/
+
+  //single_expression_explorer(internalise(("c"~("a"+"b"))%) , Set('a','b','c'))
+
+
+}
+
+import Rexp.Bits
+abstract class ARexp 
+case object AZERO extends ARexp
+case class AONE(bs: Bits) extends ARexp
+case class ACHAR(bs: Bits, f: Char) extends ARexp
+case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
+case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
+case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
+
+
+
+abstract class Val
+case object Empty extends Val
+case class Chr(c: Char) extends Val
+case class Sequ(v1: Val, v2: Val) extends Val
+case class Left(v: Val) extends Val
+case class Right(v: Val) extends Val
+case class Stars(vs: List[Val]) extends Val
+case class Rec(x: String, v: Val) extends Val
+//case class Pos(i: Int, v: Val) extends Val
+case object Prd extends Val