lex_blex_Frankensteined.scala
author Chengsong
Thu, 07 May 2020 11:36:15 +0100
changeset 150 b51d34113d47
parent 148 c8ef391dd6f7
child 151 73f990bc6843
permissions -rw-r--r--
currnet code

package RexpRelated
import scala.language.implicitConversions    
import scala.language.reflectiveCalls
import scala.annotation.tailrec   
import scala.util.Try

abstract class Bit
case object Z extends Bit
case object S extends Bit
//case class C(c: Char) extends Bit


abstract class Rexp 
case object ZERO extends Rexp
case object ONE extends Rexp
case class CHAR(c: Char) extends Rexp
case class ALTS(rs: List[Rexp]) extends Rexp 
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
case class STAR(r: Rexp) extends Rexp 
case class RECD(x: String, r: Rexp) extends Rexp



object Rexp{
  type Bits = List[Bit]
  // abbreviations
  type Mon = (Char, Rexp)
  type Lin = Set[Mon]
  def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2))
  def PLUS(r: Rexp) = SEQ(r, STAR(r))
  def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2))


  def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
    case Nil => Nil
    case (x::xs) => {
      val res = f(x)
      if (acc.contains(res)) distinctBy(xs, f, acc)  
      else x::distinctBy(xs, f, res::acc)
    }
  } 
  // some convenience for typing in regular expressions
  def charlist2rexp(s : List[Char]): Rexp = s match {
    case Nil => ONE
    case c::Nil => CHAR(c)
    case c::s => SEQ(CHAR(c), charlist2rexp(s))
  }
  implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)

  implicit def RexpOps(r: Rexp) = new {
    def | (s: Rexp) = ALT(r, s)
    def % = STAR(r)
    def ~ (s: Rexp) = SEQ(r, s)
  }

  implicit def stringOps(s: String) = new {
    def | (r: Rexp) = ALT(s, r)
    def | (r: String) = ALT(s, r)
    def % = STAR(s)
    def ~ (r: Rexp) = SEQ(s, r)
    def ~ (r: String) = SEQ(s, r)
    def $ (r: Rexp) = RECD(s, r)
  }

  // translation into ARexps
  def fuse(bs: Bits, r: ARexp) : ARexp = r match {
    case AZERO => AZERO
    case AONE(cs) => AONE(bs ++ cs)
    case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
    case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
    case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
    case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
  }

  def internalise(r: Rexp) : ARexp = r match {
    case ZERO => AZERO
    case ONE => AONE(Nil)
    case CHAR(c) => ACHAR(Nil, c)
    case ALTS(List(r1, r2)) => 
      AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
    case ALTS(r1::rs) => {
      val AALTS(Nil, rs2) = internalise(ALTS(rs))
      AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
    }
    case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
    case STAR(r) => ASTAR(Nil, internalise(r))
    case RECD(x, r) => internalise(r)
  }

  internalise(("a" | "ab") ~ ("b" | ""))

  def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
    case (ONE, bs) => (Empty, bs)
    case (CHAR(f), bs) => (Chr(f), bs)
    case (ALTS(r::Nil), bs) => decode_aux(r, bs)//this case seems only used when we simp a regex before derivatives and it contains things like alt("a")
    case (ALTS(rs), bs) => bs match {
      case Z::bs1 => {
        val (v, bs2) = decode_aux(rs.head, bs1)
        (Left(v), bs2)
      }
      case S::bs1 => {
        val (v, bs2) = decode_aux(ALTS(rs.tail), bs1)
        (Right(v), bs2)			
      }
    }
    case (SEQ(r1, r2), bs) => {
      val (v1, bs1) = decode_aux(r1, bs)
      val (v2, bs2) = decode_aux(r2, bs1)
      (Sequ(v1, v2), bs2)
    }
    case (STAR(r1), S::bs) => {
      val (v, bs1) = decode_aux(r1, bs)
      //println(v)
      val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
      (Stars(v::vs), bs2)
    }
    case (STAR(_), Z::bs) => (Stars(Nil), bs)
    case (RECD(x, r1), bs) => {
      val (v, bs1) = decode_aux(r1, bs)
      (Rec(x, v), bs1)
    }
    case (r, Nil) => (Stars(Nil), Nil)
  }

  def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
    case (v, Nil) => v
    case _ => throw new Exception("Not decodable")
  }

  def code(v: Val): Bits = v match {
    case Empty => Nil
    case Chr(a) => Nil
    case Left(v) => Z :: code(v)
    case Right(v) => S :: code(v)
    case Sequ(v1, v2) => code(v1) ::: code(v2)
    case Stars(Nil) => Z::Nil
    case Stars(v::vs) => S::code(v) ::: code(Stars(vs))
  }

  //note that left and right are not recorded
  //although they guide the retrival
  //in contrast with stars 
  def retrieve(r: ARexp, v: Val): Bits = (r,v) match {
    case (AONE(bs), Empty) => bs
    case (ACHAR(bs, c), Chr(d)) => bs
    case (AALTS(bs, a::Nil), v) => bs ++ retrieve(a, v)
    case (AALTS(bs, as), Left(v)) => bs ++ retrieve(as.head,v)
    case (AALTS(bs, as), Right(v)) => bs ++ retrieve(AALTS(Nil,as.tail),v)
    case (ASEQ(bs, a1, a2), Sequ(v1, v2)) => bs ++ retrieve(a1, v1) ++ retrieve(a2, v2)
    case (ASTAR(bs, a), Stars(Nil)) => bs ++ List(Z) 
    case (ASTAR(bs, a), Stars(v::vs)) => bs ++ List(S) ++ retrieve(a, v) ++ retrieve(ASTAR(Nil, a), Stars(vs))
  }//bug here last clause should not add list(S)
  //erase function: extracts the regx from Aregex
  def erase(r:ARexp): Rexp = r match{
    case AZERO => ZERO
    case AONE(_) => ONE
    case ACHAR(bs, f) => CHAR(f)
    case AALTS(bs, rs) => ALTS(rs.map(erase(_)))
    case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
    case ASTAR(cs, r)=> STAR(erase(r))
  }

  //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART
  // nullable function: tests whether the regular 
  // expression can recognise the empty string
  def nullable (r: Rexp) : Boolean = r match {
    case ZERO => false
    case ONE => true
    case CHAR(_) => false
    case ALTS(rs) => rs.exists(nullable)
    case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    case STAR(_) => true
    case RECD(_, r) => nullable(r)
    //case PLUS(r) => nullable(r)
  }

  // derivative of a regular expression w.r.t. a character
  def der (c: Char, r: Rexp) : Rexp = r match {
    case ZERO => ZERO
    case ONE => ZERO
    case CHAR(f) => if (c == f) ONE else ZERO
    case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2)))
    case SEQ(r1, r2) => 
      if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2)))
      else SEQ(der(c, r1), r2)
    case STAR(r) => SEQ(der(c, r), STAR(r))
    case RECD(_, r1) => der(c, r1)
    //case PLUS(r) => SEQ(der(c, r), STAR(r))
  }

  def ders (s: List[Char], r: Rexp) : Rexp = s match {
    case Nil => r
    case c::s => ders(s, der(c, r))
  }

def der_seqo(r:Rexp, s: List[Char],acc: List[Rexp]) : List[Rexp] = s match{
    case Nil => acc ::: List(r)
    case c::cs => der_seqo(der(c, r), cs, acc ::: List(r))
  }
  def der_seq_revo(r:Rexp, s: List[Char], acc: List[Rexp]): List[Rexp] = s match{
    case Nil => r::acc
    case c::cs =>der_seq_revo(r, cs, ders(s, r) :: acc  )
  }
  def re_closeo(l1: List[Rexp], l2: List[Rexp], re_init: Rexp): Rexp = l1 match {
    case Nil => re_init
    case c::cs => if(nullable(c)) re_closeo(cs, l2.tail, ALT(re_init,  l2.head)  ) 
    else re_closeo(cs, l2.tail, re_init)
  }

  //HERE
  def closed_string_dero(r1: Rexp, r2: Rexp, s: List[Char]): Rexp = {
    val l1 = der_seqo(r1, s, Nil)
    val l2 = der_seq_revo(r2, s, Nil)
    val Re = re_closeo((l1.reverse).tail, l2.tail, SEQ(l1.last, l2.head))
    Re
  }
  //derivative w.r.t string
def ders2(s: List[Char], r: Rexp) : Rexp = (s, r) match {
  case (Nil, r) => r
  case (s, ZERO) => ZERO
  case (s, ONE) => if (s == Nil) ONE else ZERO
  case (s, CHAR(c)) => if (s == List(c)) ONE else 
                       if (s == Nil) CHAR(c) else ZERO
  case (s, ALTS(List(r1, r2))) => ALT(ders2(s, r1), ders2(s, r2))
  case (s, SEQ(r1, r2)) => closed_string_dero(r1, r2, s)
  case (c::cs, STAR(r)) => closed_string_dero(der(c, r), STAR(r), cs)
}

  def flatten(v: Val) : String = v match {
    case Empty => ""
    case Chr(c) => c.toString
    case Left(v) => flatten(v)
    case Right(v) => flatten(v)
    case Sequ(v1, v2) => flatten(v1) + flatten(v2)
    case Stars(vs) => vs.map(flatten).mkString
    case Rec(_, v) => flatten(v)
  }

  // extracts an environment from a value
  def env(v: Val) : List[(String, String)] = v match {
    case Empty => Nil
    case Chr(c) => Nil
    case Left(v) => env(v)
    case Right(v) => env(v)
    case Sequ(v1, v2) => env(v1) ::: env(v2)
    case Stars(vs) => vs.flatMap(env)
    case Rec(x, v) => (x, flatten(v))::env(v)
  }


  // injection part
  def mkeps(r: Rexp) : Val = r match {
    case ONE => Empty
    case ALTS(List(r1, r2)) => 
      if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
    case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
    case STAR(r) => Stars(Nil)
    case RECD(x, r) => Rec(x, mkeps(r))
    //case PLUS(r) => Stars(List(mkeps(r)))
  }
  def haschr(r: Rexp) : Boolean = r match {
    case CHAR(c) => true
    case STAR(r0) => haschr(r0)
    case SEQ(r1, r2) => haschr(r1) && nullable(r2) || haschr(r2) && nullable(r1)
    case ALTS(List(r1, r2)) => haschr(r1) || haschr(r2)
    case ONE => false
    case ZERO => false
  }
  def haschar(r: Rexp, c: Char) : Boolean = r match {
    case CHAR(d) => if(c == d) true else false
    case SEQ(r1, r2) => if(haschar(r1, c) && nullable(r2)) true else if(haschar(r2, c) && nullable(r1) ) true else false
    case STAR(r) => if(haschar(r, c)) true else false
    case ALTS(List(r1, r2)) => if(haschar(r1, c) || haschar(r2, c)) true else false
    case ONE => false
    case ZERO => false 
  }
  def mkchr(r: Rexp) : Val = r match {
    case SEQ(r1, r2) => 
      if(haschr(r1) && nullable(r2)) Sequ(mkchr(r1), mkeps(r2)) 
      else if(nullable(r1) && haschr(r2)) Sequ(mkeps(r1), mkchr(r2))
      else throw new Exception(r.toString)
    case ALTS(List(r1, r2)) => if (haschr(r1)) Left(mkchr(r1)) else Right(mkchr(r2))
    case STAR(r0) => Stars(List(mkchr(r0)))
    case CHAR(c) => Chr(c)
    case _ => throw new Exception("the regex you gave me can't make a char")
  }
  def mkchar(r: Rexp, c: Char) : Val = r match {
    case CHAR(d) => Chr(c)//if(c == d)  Chr(c) else error
    case ALTS(List(r1, r2)) =>
      if (haschar(r1, c)) Left(mkchar(r1, c)) else Right(mkchar(r2, c))

    case SEQ(r1, r2) => {if(haschar(r1, c)) Sequ(mkchar(r1, c), mkeps(r2)) else Sequ(mkeps(r1), mkchar(r2, c))}
    case STAR(r) =>Stars(List(mkchar(r, c)))
  }
  def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
    case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
    case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
    case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
    case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
    case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1))
    case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2))
    case (CHAR(_), Empty) => Chr(c) 
    case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
    //case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
  }
  def fuzzy_inj(r: ARexp, c: Char, v: Val) : Val = v match {
    case Stars(vs) => r match {//vs
      case ASTAR(bs, r0) => inj(     erase(r), c, Sequ(mkeps(erase(bder(c, r0))), v)     )
      case ASEQ(bs, r1, r2) => inj(erase(r), c, Sequ(mkeps(erase(bder(c, r1))), v) )
    }
    case Sequ(v1, v2) => r match {
      case ASTAR(bs, r0) => inj(erase(r), c, Sequ(mkchar(erase(bder(c, r0)), c), v2))
      case _ => v
    }
    case _ => v
  }
  /*def gen_rect(r: Rexp) : Val => Val = {
    //lingqi
    //buyao sanle 
    val r1 = bsimp(r)

  }
  def fuzzy_inj(r: ARexp, c: Char, v: Val) : Val = {
    val f = gen_rect(r)
    val vo = f(v)
    inj(r, c, vo)
  }*/
  def lex(r: Rexp, s: List[Char]) : Val = s match {
    case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
    case c::cs => inj(r, c, lex(der(c, r), cs))
  }

  def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)

  // some "rectification" functions for simplification
  def F_ID(v: Val): Val = v
  def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
  def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
  def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
    case Right(v) => Right(f2(v))
    case Left(v) => Left(f1(v))
  }
  def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
    case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
  }
  def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
    (v:Val) => Sequ(f1(Empty), f2(v))
  def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
    (v:Val) => Sequ(f1(v), f2(Empty))
  def F_RECD(f: Val => Val) = (v:Val) => v match {
    case Rec(x, v) => Rec(x, f(v))
  }
  def F_ERROR(v: Val): Val = throw new Exception("error")

  // simplification of regular expressions returning also an
  // rectification function; no simplification under STAR 
  def simp(r: Rexp): (Rexp, Val => Val) = r match {
    case ALTS(List(r1, r2)) => {
      val (r1s, f1s) = simp(r1)
      val (r2s, f2s) = simp(r2)
      (r1s, r2s) match {
        case (ZERO, _) => (r2s, F_RIGHT(f2s))
        case (_, ZERO) => (r1s, F_LEFT(f1s))
        case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
                  else (ALTS(List(r1s, r2s)), F_ALT(f1s, f2s)) 
      }
    }
    case SEQ(r1, r2) => {
      val (r1s, f1s) = simp(r1)
      val (r2s, f2s) = simp(r2)
      (r1s, r2s) match {
        case (ZERO, _) => (ZERO, F_ERROR)
        case (_, ZERO) => (ZERO, F_ERROR)
        case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
        case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
        case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
      }
    }
    case RECD(x, r1) => {
      val (r1s, f1s) = simp(r1)
      (RECD(x, r1s), F_RECD(f1s))
    }
    case r => (r, F_ID)
  }
  /*
  val each_simp_time = scala.collection.mutable.ArrayBuffer.empty[Long]
  val each_simp_timeb = scala.collection.mutable.ArrayBuffer.empty[Long]
  */
  def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
    case Nil => {
      if (nullable(r)) {
        mkeps(r) 
      }
      else throw new Exception("Not matched")
    }
    case c::cs => {
      val (r_simp, f_simp) = simp(der(c, r))
      inj(r, c, f_simp(lex_simp(r_simp, cs)))
    }
  }

  def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)

  //println(lexing_simp(("a" | "ab") ~ ("b" | ""), "ab"))

  // filters out all white spaces
  def tokenise(r: Rexp, s: String) = 
    env(lexing_simp(r, s)).filterNot { _._1 == "w"}


  //reads the string from a file 
  def fromFile(name: String) : String = 
    io.Source.fromFile(name).mkString

  def tokenise_file(r: Rexp, name: String) = 
    tokenise(r, fromFile(name))
  
  //   Testing
  //============

  def time[T](code: => T) = {
    val start = System.nanoTime()
    val result = code
    val end = System.nanoTime()
    println((end - start)/1.0e9)
    result
  }

  //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART

  // bnullable function: tests whether the aregular 
  // expression can recognise the empty string
  def bnullable (r: ARexp) : Boolean = r match {
    case AZERO => false
    case AONE(_) => true
    case ACHAR(_,_) => false
    case AALTS(_, rs) => rs.exists(bnullable)
    case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
    case ASTAR(_, _) => true
  }

  def mkepsBC(r: ARexp) : Bits = r match {
    case AONE(bs) => bs
    case AALTS(bs, rs) => {
      val n = rs.indexWhere(bnullable)
      bs ++ mkepsBC(rs(n))
    }
    case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
    case ASTAR(bs, r) => bs ++ List(Z)
  }

  // derivative of a regular expression w.r.t. a character
  def bder(c: Char, r: ARexp) : ARexp = r match {
    case AZERO => AZERO
    case AONE(_) => AZERO
    case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
    case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
    case ASEQ(bs, r1, r2) => 
      if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(mkepsBC(r1), bder(c, r2)))
      else ASEQ(bs, bder(c, r1), r2)
    case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
  }
  def bder_rf(c: Char, r: ARexp) : ARexp = r match {
    case AZERO => AZERO
    case AONE(_) => AZERO
    case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
    case AALTS(bs, rs) => AALTS(bs, rs.map(bder_rf(c, _)))
    case ASEQ(bs, r1, r2) =>
      if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder_rf(c, r1), r2), fuse(mkepsBC(r1), bder_rf(c, r2)))
      else ASEQ(bs, bder_rf(c, r1), r2)
    case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder_rf(c, r)), ASTAR(Nil, r))
  }
  // derivative w.r.t. a string (iterates bder)
  @tailrec
  def bders (s: List[Char], r: ARexp) : ARexp = s match {
    case Nil => r
    case c::s => bders(s, bder(c, r))
  }
  
  def bders_rf(s: List[Char], r: ARexp) : ARexp = s match {
    case Nil => r
    case c::s => bders_rf(s, bder_rf(c, r))
  }
  def all_zero_except_alt(rs: List[ARexp], a: ARexp): ARexp = rs match{
    case Nil => a
    case AZERO :: rs1 => all_zero_except_alt(rs1, a)
    case AALTS(bs, rs1) :: rs2 => {
      if (a == AZERO)
        all_zero_except_alt(rs2, AALTS(bs, rs1))
      else
        AZERO
    }
    case r1 :: rs2 => AZERO
  }
  def flats(rs: List[ARexp]): List[ARexp] = rs match {
      case Nil => Nil
      case AZERO :: rs1 => flats(rs1)
      case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
      case r1 :: rs2 => r1 :: flats(rs2)
  }
  /*
  def remove(v: Val): Val = v match{
    case Right(v1) => v1
    case Left(v1) => v1
    case _ => throw new Exception("Not removable")
  }*/
  def augment(v: Val, i: Int): Val = if(i > 1) augment(Right(v), i - 1) else Right(v)
//an overly complex version
/*
    if(rel_dist >0){//the regex we are dealing with is not what v points at
      rs match{
        case Nil => throw new Exception("Trying to simplify a non-existent value")
        case AZERO :: rs1 => right_shift(rs1, rel_dist - 1, remove(v))
        case AALTS(bs, rs1) :: rs2 => right_shift(rs2, rel_dist - 1, augment(v, rs1.length - 1))//rs1 is guaranteed to have a len geq 2
        case r1 :: rs2 => right_shift(rs2, rel_dist - 1, v)
      }
    }
    else if(rel_dist == 0){//we are dealing with regex v is pointing to -- "v->r itself"
      rs match{//r1 cannot be zero
        AALTS(bs, rs1) :: rs2 => right_shift(  )
        AZERO::rs2 => throw new Exception("Value corresponds to zero")
        r1::rs2 => right_shift(rs2, rel_dist - 1, v)
      }

    }
    else{

    }
    */
  //gives how much the regex at i has shifted right after flatten(on a list of simplified regexes)
  def right_shift(rs: List[ARexp], i: Int): Int = (rs, i) match {
    case (_, 0) => 0
    case (Nil, _) => 0
    case (AZERO :: rs1, _) => right_shift(rs1, i - 1) - 1
    case (AALTS(bs, rs1) :: rs2, _) => rs1.length - 1 + right_shift(rs2, i - 1)
    case (r1 :: rs2, _) => right_shift(rs2, i - 1)
  }
  def rflats(rs: List[Rexp]): List[Rexp] = rs match {
    case Nil => Nil
    case ZERO :: rs1 => rflats(rs1)
    case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2)
    case r1 :: rs2 => r1 :: rflats(rs2)
  }
  var flats_time = 0L
  var dist_time = 0L
  
  def bsimp(r: ARexp): ARexp = r match {
    case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
        case (AZERO, _) => AZERO
        case (_, AZERO) => AZERO
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
    }
    case AALTS(bs1, rs) => {
      val rs_simp = rs.map(bsimp)
      val flat_res = flats(rs_simp)
      val dist_res = distinctBy(flat_res, erase)
      dist_res match {
        case Nil => AZERO
        case r :: Nil => fuse(bs1, r)
        case rs => AALTS(bs1, rs)  
      }
    }
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
    case r => r
  }
  //minimise fuse operation if possible
  def bsimp_rf(r: ARexp):ARexp = r match {
     case ASEQ(bs1, r1, r2) => (bsimp_rf(r1), bsimp_rf(r2)) match {
        case (AZERO, _) => AZERO
        case (_, AZERO) => AZERO
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
    }
    case AALTS(bs1, rs) => {
      //after map simp, before flats, check if all others simplify to 0s, if yes, do not fuse
      val rs_simp = rs.map(bsimp_rf)
      //prevent fuse from happening
      val fuse_alts = all_zero_except_alt(rs_simp, AZERO)//returns AZERO if not the case, return AALTS if yes
      if(fuse_alts == AZERO){
        val flat_res = flats(rs_simp)
        val dist_res = distinctBy(flat_res, erase)
        dist_res match {
          case Nil => AZERO
          case r :: Nil => fuse(bs1, r)
          case rs => AALTS(bs1, rs)  
        }
      }
      else{
        fuse(bs1, fuse_alts)
      }
    }
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
    case r => r   
  }
  //only print at the top level

  //find_pos returns the index of a certain regex in a list of regex
  //the leftmost regex is given the index 0 and the regex next to it
  //is given 1 and so on 
  //it needs the value to point out which regex it wants to get index of
  //find_pos_aux does essentially the same thing as find_pos, except that
  //it receives an alts instead of a list of regular expressions
  def find_pos(v: Val, rs: List[ARexp]): Int = (v, rs) match{
    case (v, r::Nil) => 0
    case (Right(v), r::rs) => find_pos(v, rs) + 1
    case (Left(v), r::rs) => 0
    //case (v, _) => 0
  }
  def find_pos_aux(v: Val, r: ARexp): Int = r match {
    case AALTS(bs, rs) => find_pos(v, rs)
    case r => 0
  }
  def remove(v: Val, rs: List[ARexp]) : Val = (v,rs) match {//remove the outmost layer of ALTS's Left and Right
    //we have to use r to detect the bound of nested L/Rs
    case (v, r::Nil) => v
    case (Right(v), r::rs) => remove(v, rs) 
    case (Left(v), r::rs) => v 
    //case (v, r::Nil) => v
  }
  def simple_end(v: Val): Boolean = v match {
    case Left(v) => return false
    case Right(v) => return simple_end(v)
    case v => return true
  }

  //tells if rs[i] after flatten is at the right end 
  def isend(v: Val, rs: List[ARexp], i: Int): Boolean = {//TODO: here the slice api i am not familiar with so this call might be incorrect and crash the bsimp2
    val rsbh = rs.slice(i + 1, rs.length)
    val out_end = if(flats(rsbh) == Nil) true else false
    val inner_end = simple_end(v)
    inner_end && out_end
  }
  //get the coat of v and wears it on vs
  def get_coat(v: Val, rs: List[Rexp], vs: Val): Val = (v, rs) match{//the dual operation of remove(so-called by myself)
    case (Right(v), r::Nil) => Right(vs)
    case (Left(v), r::rs) => Left(vs) 
    case (Right(v), r::rs) => Right(get_coat(v, rs, vs))
  }
  //coat does the job of "coating" a value
  //given the number of right outside it
  def coat(v: Val, i: Int) : Val = i match {
    case 0 => v
    case i => if(i > 0) coat(Right(v), i - 1) else {println(v,i); throw new Exception("coat minus")}
  }
  def decoat(v:Val, i: Int) : Val = i match {
    case 0 => v
    case i => v match {
      case Right(v0) => decoat(v0, i - 1)
      case _ => throw new Exception("bad args decoat")
    }
  }
  //given a regex, and a value, return the rectification function for how to rebuild the original value from the simplified value
  
  def vunsimp(r: ARexp, v: Val) : Val => Val = (r, v) match {
    case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match {
        case ((AZERO, _), (_, _) )=> throw new Exception("bad arguments")
        case ((_, _), (AZERO, _)) => throw new Exception("bad arguments")
        case ((AONE(bs2), v1s) , (r2s, v2s)) => (vtails => Sequ(v1, vunsimp(r2, v2)(vtails)))  //(fuse(bs1 ++ bs2, r2s), v2s )//v2 tells how to retrieve bits in r2s, which is enough as the bits of the first part of the sequence has already been integrated to the top level of the second regx.
        case ((r1s, v1s), (r2s, v2s)) => (vsmall => vsmall match {
          case Sequ(v1small, v2small) => Sequ(vunsimp(r1, v1)(v1small), vunsimp(r2, v2)(v2small))
          case _ => {
            println(vsmall) ;
            throw new Exception("bad arguments sequ")
          }
          //(ASEQ(bs1, r1s, r2s),  Sequ(v1s, v2s))
        })
    }
    case (AALTS(bs1, rs), v) => {
      val init_ind = find_pos(v, rs)
      val rightend1 = if(init_ind + 1 == rs.length) true else false
      val inner_rectfunct = vunsimp(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to  match the regx rs[i]
      val vpointr = bsimp2(rs(init_ind), remove(v, rs))
      val target_vs = vpointr._2
      val target_rs = vpointr._1

      val rs_simp = rs.map(bsimp)
      val target_vs_kernel = target_rs match {
        case AALTS(bs2, rs2) => remove(target_vs, rs2)//remove the secondary layer of left and right
        case r => target_vs
      }
      val target_vs_outerlayers = target_rs match {
        case AALTS(bs2, rs2) => find_pos(target_vs, rs2)//remove the secondary layer of left and right
        case r => 0
      }
      val rightend2 = target_rs match {
        case AALTS(bs2, rs2) => if(find_pos(target_vs, rs2) == rs2.length - 1) true else false
        case r => false
      }
      val isalts = target_rs match {
        case AALTS(bs2, rs2) =>  true 
        case r => false
      }


      val flat_res = flats(rs_simp)
      val flats_shit1 = right_shift(rs_simp, init_ind)
      val flats_shift2 = find_pos_aux(target_vs, target_rs)
      val flats_shift =  flats_shit1 + flats_shift2//right_shift used to be called flats_vsimp
      val new_ind = init_ind + flats_shift

      val dist_res = distinctBy(flat_res, erase)
      val front_part = distinctBy(flat_res.slice(0, new_ind + 1), erase)

      val vdb = if(dist_res.length == front_part.length )//that means the regex we are interested in is at the end of the list
      {
        coat(target_vs_kernel, front_part.length - 1)
      }
      else{
        coat(Left(target_vs_kernel), front_part.length - 1)
      }
      if(rightend1){
        if(rightend2){
          kernel_coated: Val => 
          decoat(kernel_coated, front_part.length - 1) match {
            case Left(vk) => coat(inner_rectfunct(coat(vk, target_vs_outerlayers)), init_ind)//add clause: if rs_simp(init_ind) is an alts
            case vk => coat(inner_rectfunct(coat(vk, target_vs_outerlayers)), init_ind)
          }
        }
        else{
          kernel_coated: Val => 
          decoat(kernel_coated, front_part.length - 1) match {
            case Left(vk) => if(isalts) coat(inner_rectfunct(coat(Left(vk), target_vs_outerlayers)), init_ind) 
              else coat(inner_rectfunct(coat((vk), target_vs_outerlayers)), init_ind)//add clause: if rs_simp(init_ind) is an alts
            case vk => if(isalts) coat(inner_rectfunct(coat(Left(vk), target_vs_outerlayers)), init_ind)
              else coat(inner_rectfunct(coat((vk), target_vs_outerlayers)), init_ind)
          }
        }
      }
      else{
        if(rightend2){
          kernel_coated: Val => 
          decoat(kernel_coated, front_part.length - 1) match {
            case Left(vk) => coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind)//add clause: if rs_simp(init_ind) is an alts
            case vk => coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind)
          } 
        }
        else{
          kernel_coated: Val => 
          decoat(kernel_coated, front_part.length - 1) match {
            case Left(vk) => if(isalts) coat(Left(inner_rectfunct(coat(Left(vk), target_vs_outerlayers))), init_ind)
              else coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind)//add clause: if rs_simp(init_ind) is an alts
            case vk => if(isalts) coat(Left(inner_rectfunct(coat(Left(vk), target_vs_outerlayers))), init_ind)
              else coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind)
          }     
        }

      }

    }
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
    case (r, v) => (v => v)
  }
  //This version takes a regex and a value, return a simplified regex and its corresponding simplified value 
  //def flats2(r: Arexp, v: val): (ARexp, Val) = (r, v) match{
  //  case (AALTS(bs, rs), Right(v0)) => rs match {
  //    case Nil => (AALTS(bs, Nil), v)
  //    case AZERO::rs0 =>  flats2(AALTS(bs, rs0), v0) 
  //    case 
  //  case (AALTS(bs, rs), Left(v0)) => 
  //}
  //liangren qixinxieli zuo yixie shiqing 
  //shi xinzhaobuxuan de moqi
  //ni yao fanzhangben 
  //na wo jiu zhineng peini fanzhangben 
  //ni xianshuo wo weini zuole haoduohaoduo
  //na wo jiu haohao gen ni fenxifenxi
  //ni daodi zuole shenme 
  //ni meizuo shenme 
  //zhishi yinwei ni lei 
  //suoyi ni juede ni zuole haoduo 
  //zuofanyeshi 
  //shi weile ni shiwu cai zuo name jingxi
  //shiwo de hua meitian sancan huadeshijian 
  //buchaoguo liangxiaoshi
  //ni zuode henduoshihou doushi weile ziji
  //ni zuoshiqing de shihou ye zongshi bawo jiaolai bangmang
  //ni de xiaolv hai hendi 
  //zonghe kanlai
  //ni buhuizhaoguziji meiyoudandang nenglibuzu haizisi
  //suoyi wo zui taoyan gennishuozhexie 
  //shuoli bianbuguolai 
  //jiu ba ren xiaoshihoude shiqing nachulaishuo, lai exin ren
  //nibangwo daoyibeishui, wo buleyujieshou, wo bushuoxiexie 
  //nandao woshuo sheiyaonidaole, zoukai? buyaoni geiwo dao? wo ziji dao?
  //nishuo qiantou doushi ni zaizuo 
  //zheliwoyao buchong
  //quancheng ni yigerenzuo yidunfan de cishu
  //dagai you duoshaoci?
  //kending you jici, dan bushi duoshu
  //duoshu shi wo huilai yihou 
  //ni rangwo lai chaocai huozhe lai jieguan zuowan shengyude gongzuo 
  //ni zuo de zhexieshihou 
  //yiban jiushi lianggecai 
  //huozhe yicai yitang
  //wo hui renwei 
  //zhe shi ni shenti keyi shiying de gongzuoliang
  //moren ni ziji hui zhaoguhao ziji 
  //ruguo ni leile, yehui lai zhaowo bangmang
  //suoyi wo moren ni zai zhe qijian shi meiyou
  //guolao, ruguo you, wo renwei 
  //1 ni meiyou zhaoguhao niziji
  //2 nimeiyou jishi xiangwo xunqiu bangzhu
  //3 ni jibensangshi laodongnengli le,
  //yihou huolu doushi wo lai bao le jiu wanle
  //niziji meinengli jiu buyao dui zuoshi de ren 
  //tiaosanjiansi
  //ni shibushi taomi xigecai jiu hui lei
  //na shuoming
  //yaome ni tili taicha--jibensangshi laodongli
  //yaome ni fangfabuheshi(xiaolvdi, zishiyouwenti daozhi shentilaosun)--ni yinggai tiaozheng ziji zuoshide fangfa, women yiqilai zhaozhao yuanyin
  //cuowu de silushi: nikanwo dou zhemelei le,
  //shuoming wo zuole henduo, ni bu tiliangwo
  //ni zuo zhexie doushi yinggaide
  //yinwei wo shi nimama erqie wo leile
  def pos_i(rs: List[ARexp], v: Val): Int = (rs, v) match {
    case (r::Nil,         v1) => 0
    case ( r::rs1, Right(v) ) => pos_i(rs1, v) + 1
    case ( r::rs1, Left(v)  ) => 0
  }
  def pos_v(rs: List[ARexp], v: Val): Val = (rs, v) match {
    case (r::Nil,         v1) => v1
    case (r::rs1, Right(v)  ) => pos_v(rs1, v)
    case (r::rs1, Left(v)   ) => v
  }
  def unify(rs: List[ARexp], v: Val): Val = {
    Position( pos_i(rs, v), pos_v(rs, v) )
  }
  def deunify(rs_length: Int, v: Val): Val = v match{
    case Position(i, v0) => {
      if(i <0) {println(rs_length, v); throw new Exception("deunify minus")} 
      else if(rs_length == 1) {println(v); v}
      else if(rs_length - 1 == i) coat(v0, i) 
      else coat(Left(v0), i)
      }
    case _ => throw new Exception("deunify error")
  }
  def flats2(front: List[ARexp], i: Int, rs: List[ARexp], v: Val): (List[ARexp], Val) = v match {
    case Position(j, v0) => {
      if (i < 0) (front ::: flats(rs), Position(j, v0) ) 
      else if(i == 0){
        rs match {
          case AALTS(bs, rs1) :: rs2 => ( (front ::: rs1.map(fuse(bs, _))):::flats(rs2), Position(j + rs1.length - 1, pos_v(rs1, v0)))
          case r::rs2 => (front ::: List(r) ::: flats(rs2), Position(j, v0))
          case _ => throw new Exception("flats2 i = 0")
        }
      }
      else{
        rs match {
          case AZERO::rs1 => flats2(front, i - 1, rs1, Position(j - 1, v0))
          case AALTS(bs, rs1) ::rs2 => flats2(front:::rs1.map(fuse(bs, _)), i - 1, rs2, Position(j + rs1.length - 1, v0))
          case r::rs1 => flats2(front:::List(r), i - 1, rs1, Position(j, v0)) 
          case _ => throw new Exception("flats2 i>0")
        }
      }
    }
    case _ => throw new Exception("flats2 error")
  }
  def distinctBy2[B, C](v: Val, xs: List[B], f: B => C, acc: List[C] = Nil, res: List[B] = Nil): (List[B], Val) = xs match {
    case Nil => (res, v)
    case (x::xs) => {
      val re = f(x)
      if (acc.contains(re)) v match { 
        case Position(i, v0) => distinctBy2(Position(i - 1, v0), xs, f, acc, res)  
        case _ => throw new Exception("dB2")
      }
      else distinctBy2(v, xs, f, re::acc, x::res)
    }
  }
   def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r,v) match{
    case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match {
        case ((AZERO, _), (_, _) )=> (AZERO, undefined)
        case ((_, _), (AZERO, _)) => (AZERO, undefined)
        case ((AONE(bs2), v1s) , (r2s, v2s)) => (fuse(bs1 ++ bs2, r2s), v2s )//v2 tells how to retrieve bits in r2s, which is enough as the bits of the first part of the sequence has already been integrated to the top level of the second regx.
        case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s),  Sequ(v1s, v2s))
    }
    case (AALTS(bs1, rs), v) => {
      val vlist = unify(rs, v)
      vlist match {
        case Position(i, v0) => {
          val v_simp = bsimp2(rs(i), v0)._2
          val rs_simp = rs.map(bsimp)
          val flat_res = flats2( Nil, i, rs_simp, Position(i, v_simp) )
          val dist_res = distinctBy2(flat_res._2, flat_res._1, erase)
          val rs_new = dist_res._1
          val v_new = deunify(rs_new.length, dist_res._2)
          rs_new match {
            case Nil => (AZERO, undefined)
            case s :: Nil => (fuse(bs1, s), v_new)
            case rs => (AALTS(bs1, rs), v_new)
          }
        }
        case _ => throw new Exception("Funny vlist bsimp2")
      }
    }
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
    case (r, v) => (r, v)  
  }
  /*def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r,v) match{
    case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match {
        case ((AZERO, _), (_, _) )=> (AZERO, undefined)
        case ((_, _), (AZERO, _)) => (AZERO, undefined)
        case ((AONE(bs2), v1s) , (r2s, v2s)) => (fuse(bs1 ++ bs2, r2s), v2s )//v2 tells how to retrieve bits in r2s, which is enough as the bits of the first part of the sequence has already been integrated to the top level of the second regx.
        case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s),  Sequ(v1s, v2s))
    }
    case (AALTS(bs1, rs), v) => {
      val init_ind = find_pos(v, rs)
      val vpointr = bsimp2(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to  match the regx rs[i]
      val target_sv = vpointr._2
      val target_sr = vpointr._1

      val rs_simp = rs.map(bsimp)
      val target_vs_kernel = target_sr match {
        case AALTS(bs2, rs2) => remove(target_sv, rs2)//if rs(init_ind) after simp is also an alts, we remove the R(....L(v)) outside v
        case r => target_sv
      }
      val flat_res = flats(rs_simp)
      val flats_shift1 = right_shift(rs_simp, init_ind)
      val flats_base = find_pos_aux(target_sv, target_sr)
      val flats_shift =  flats_shift1 + flats_base//right_shift used to be called flats_vsimp
      val new_ind = init_ind + flats_shift

      val dist_res = distinctBy(flat_res, erase)
      val front_part = distinctBy(flat_res.slice(0, new_ind + 1), erase)

      val vdb = if(dist_res.length == front_part.length )//that means the regex we are interested in is at the end of the list
      {
        coat(target_vs_kernel, front_part.length - 1)
      }
      else{
        coat(Left(target_vs_kernel), front_part.length - 1)
      }
      dist_res match {
        case Nil => (AZERO, undefined)
        case s :: Nil => (fuse(bs1, s), vdb)
        case rs => (AALTS(bs1, rs), vdb)
      }
    }
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
    case (r, v) => (r, v)  
  }*/
  //the below are all residuals from the bsimp2 function 
        //val vs_for_coating = if(isend(vs._2, rs_simp, init_ind)||flat_res.length == 1) vs_kernel else Left(vs_kernel)
        //val vf = coat(vs_for_coating, new_ind)
      //flats2 returns a list of regex and a single v
      //now |- vf: ALTS(bs1, flat_res)
      //phase 2 transformation so that aalts(bs1, rsf) => aalts(bs, rsdb) and vf => vdb
            //val size_reduction = new_ind + 1 - front_part.length
  def vsimp(r: ARexp, v: Val): Val = bsimp2(r, v)._2
  /*This version was first intended for returning a function however a value would be simpler.
  def bsimp2(r: ARexp, v: Val): (ARexp, Val => Val) = (r,v) match{
    case (ASEQ(bs1, r1, r2), v) => (bsimp2(r1), bsimp2(r2)) match {
        case ((AZERO, _), (_, _) )=> (AZERO, undefined)
        case ((_, _), (AZERO, _)) => (AZERO, undefined)
        case ((AONE(bs2), f1) , (r2s, f2)) => (fuse(bs1 ++ bs2, r2s), lambda v => v match { case Sequ(_, v) => f2(v) } )
        case ((r1s, f1), (r2s, f2)) => (ASEQ(bs1, r1s, r2s), lambda v => v match {case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))}
    }
    case AALTS(bs1, rs) => {
      val init_ind = find_pos(v, rs)
      val vs = bsimp2(rs[init_ind], remove(v, rs))//remove all the outer layers of left and right in v to  match the regx rs[i]
      val rs_simp = rs.map(bsimp)
      val vs_kernel = rs_simp[init_ind] match {
        case AALTS(bs2, rs2) => remove(vs, rs_simp[init_ind])//remove the secondary layer of left and right
        case r => vs
      }
      val vs_for_coating = if(isend(vs, rs_simp, init_ind)) vs_kernel else Left(vs_kernel)

      val r_s = rs_simp[init_ind]
      val shift = right_shift(vs, rs_simp, init_ind) + find_pos(vs, rs_simp[init_ind])
      val vf = coat(vs_for_coating, shift + init_ind)

      val flat_res = flats(rs_simp)//flats2 returns a list of regex and a single v
      val dist_res = distinctBy(flat_res, erase)
      dist_res match {
        case Nil => AZERO
        case s :: Nil => fuse(bs1, s)
        case rs => AALTS(bs1, rs)  
      }
    }
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
    case r => r  
  }*/
  def super_bsimp(r: ARexp): ARexp = r match {
    case ASEQ(bs1, r1, r2) => (super_bsimp(r1), super_bsimp(r2)) match {
        case (AZERO, _) => AZERO
        case (_, AZERO) => AZERO
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)//万一是(r1, alts(rs))这种形式呢
        case (AALTS(bs2, rs), r2) => AALTS(bs1 ++ bs2, rs.map(r => r match {case AONE(bs3) => fuse(bs3, r2) case r => ASEQ(Nil, r, r2)} ) ) 
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
    }
    case AALTS(bs1, rs) => {
      val rs_simp = rs.map(super_bsimp)
      val flat_res = flats(rs_simp)
      val dist_res = distinctBy(flat_res, erase)
      dist_res match {
        case Nil => AZERO
        case s :: Nil => fuse(bs1, s)
        case rs => AALTS(bs1, rs)  
      }
    }
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
    case r => r
  }
//nice website http://www.nhc.gov.cn/xcs/kpzs/202002/d228d85eaa19412eaaa4eee740f03101.shtml
//orientalism

  def simp_weakened(r: Rexp): Rexp = r match {
    case SEQ(r1, r2) => (simp_weakened(r1), r2) match {
        case (ZERO, _) => ZERO
        case (_, ZERO) => ZERO
        case (ONE, r2s) => r2s
        case (r1s, r2s) => SEQ(r1s, r2s)
    }
    case ALTS(rs) => {
      val rs_simp = rs.map(simp_weakened)
      val flat_res = rflats(rs_simp)
      val dist_res = rs_simp.distinct
      dist_res match {
        case Nil => ZERO
        case s :: Nil => s
        case rs => ALTS(rs)  
      }
    }
    case STAR(r) => STAR(simp_weakened(r))
    case r => r
  }
    
  def bders_simp (s: List[Char], r: ARexp) : ARexp = s match {
    case Nil => r
    case c::s => bders_simp(s, bsimp(bder(c, r)))
  }
  def bders_simp_rf (s: List[Char], r: ARexp) : ARexp = s match {
    case Nil => r
    case c::s => bders_simp_rf(s, bsimp_rf(bder(c, r)))
  }
  
  //----------------------------------------------------------------------------experiment bsimp
  /*

  */
  /*
  def time[T](code: => T) = {
    val start = System.nanoTime()
    val result = code
    val end = System.nanoTime()
    println((end - start)/1.0e9)
    result
  }
  */
  // main unsimplified lexing function (produces a value)
  def blex(s: List[Char], r: ARexp) : Bits = s match {
    case Nil => if (bnullable(r)) mkepsBC(r) else throw new Exception("Not matched")
    case c::cs => {
      val der_res = bder(c,r)
      blex(cs, der_res)
    }
  }

  def bpre_lexing(r: Rexp, s: String) = blex( s.toList, internalise(r) )
  def blexing(s: String, r: Rexp) : Val = decode(r, blex( s.toList, internalise(r) ) )

  var bder_time = 0L
  var bsimp_time = 0L
  var mkepsBC_time = 0L
  var small_de = 2
  var big_de = 5
  var usual_de = 3
  
  def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
    case Nil => {
      if (bnullable(r)) {
        //println(asize(r))
        mkepsBC(r)
      }
    else throw new Exception("Not matched")
    }
    case c::cs => {
      val der_res = bder(c,r)
      val simp_res = bsimp(der_res)
      blex_simp(simp_res, cs)      
    }
  }
  def super_blex_simp(r: ARexp, s: List[Char]): Bits = s match {
    case Nil => {
      if (bnullable(r)) {
        mkepsBC(r)
      }
      else throw new Exception("Not matched")
    }
    case c::cs => {
      super_blex_simp(super_bsimp(bder(c,r)), cs)
    }
  }
  def blex_real_simp(r: ARexp, s: List[Char]): ARexp = s match{
    case Nil => r
    case c::cs => blex_real_simp(bsimp(bder(c, r)), cs)
  }


  //size: of a Aregx for testing purposes 
  def size(r: Rexp) : Int = r match {
    case ZERO => 1
    case ONE => 1
    case CHAR(_) => 1
    case SEQ(r1, r2) => 1 + size(r1) + size(r2)
    case ALTS(rs) => 1 + rs.map(size).sum
    case STAR(r) => 1 + size(r)
  }

  def asize(a: ARexp) = size(erase(a))


  // decoding does not work yet
  def blexing_simp(r: Rexp, s: String) = {
    val bit_code = blex_simp(internalise(r), s.toList)
    decode(r, bit_code) 
  }
  def super_blexing_simp(r: Rexp, s: String) = {
    decode(r, super_blex_simp(internalise(r), s.toList))
  }





  // Lexing Rules for a Small While Language

  //symbols
  /*
  val SYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_))
  
  //digits
  val DIGIT = PRED("0123456789".contains(_))
  //identifiers
  val ID = SYM ~ (SYM | DIGIT).% 
  //numbers
  val NUM = STAR(DIGIT)
  //keywords
  val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false"
  val AKEYWORD: Rexp = ALTS(List("skip" , "while" , "do" , "if" , "then" , "else" , "read" , "write" , "true" , "false"))
  //semicolons
  val SEMI: Rexp = ";"
  //operators
  val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
  val AOP: Rexp = ALTS(List(":=" , "==" , "-" , "+" , "*" , "!=" , "<" , ">" , "<=" , ">=" , "%" , "/"))
  //whitespaces
  val WHITESPACE = PLUS(" " | "\n" | "\t")
  //parentheses
  val RPAREN: Rexp = ")"
  val LPAREN: Rexp = "("
  val BEGIN: Rexp = "{"
  val END: Rexp = "}"
  //strings...but probably needs not
  val STRING: Rexp = "\"" ~ SYM.% ~ "\""



  val WHILE_REGS = (("k" $ KEYWORD) | 
                    ("i" $ ID) | 
                    ("o" $ OP) | 
                    ("n" $ NUM) | 
                    ("s" $ SEMI) | 
                    ("str" $ STRING) |
                    ("p" $ (LPAREN | RPAREN)) | 
                    ("b" $ (BEGIN | END)) | 
                    ("w" $ WHITESPACE)).%

  val AWHILE_REGS = (
    ALTS(
      List(
        ("k" $ AKEYWORD), 
                    ("i" $ ID),
                    ("o" $ AOP) , 
                    ("n" $ NUM) ,
                    ("s" $ SEMI) ,
                    ("str" $ STRING), 
                    ("p" $ (LPAREN | RPAREN)), 
                    ("b" $ (BEGIN | END)), 
                    ("w" $ WHITESPACE)
      )
    )
  ).%

*/


  //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART (TESTING)
  /*
  // Two Simple While programs
  //========================
  println("prog0 test")

  val prog0 = """read n"""
  println(env(lexing_simp(WHILE_REGS, prog0)))
  println(tokenise(WHILE_REGS, prog0))

  println("prog1 test")

  val prog1 = """read  n; write (n)"""
  println(tokenise(WHILE_REGS, prog1))

  */
  // Bigger Tests
  //==============

  def escape(raw: String): String = {
    import scala.reflect.runtime.universe._
    Literal(Constant(raw)).toString
  }

  val prog2 = """
  write "Fib";
  read n;
  minus1 := 0;
  minus2 := 1;
  while n > 0 do {
    temp := minus2;
    minus2 := minus1 + minus2;
    minus1 := temp;
    n := n - 1
  };
  write "Result";
  write minus2
  """

  val prog3 = """
  start := 1000;
  x := start;
  y := start;
  z := start;
  while 0 < x do {
  while 0 < y do {
    while 0 < z do {
      z := z - 1
    };
    z := start;
    y := y - 1
  };     
  y := start;
  x := x - 1
  }
  """
  /*
  for(i <- 400 to 400 by 1){
    println(i+":")
    blexing_simp(WHILE_REGS, prog2 * i)
  } */

    /*
    for (i <- 2 to 5){
      for(j <- 1 to 3){
        println(i,j)
        small_de = i
        usual_de = i + j
        big_de = i + 2*j 
        blexing_simp(AWHILE_REGS, prog2 * 100)
      }
    }*/

  /*
  println("Tokens of prog2")
  println(tokenise(WHILE_REGS, prog2).mkString("\n"))

  val fib_tokens = tokenise(WHILE_REGS, prog2)
  fib_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")


  val test_tokens = tokenise(WHILE_REGS, prog3)
  test_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
  */

  /*
  println("time test for blexing_simp")
  for (i <- 1 to 1 by 1) {
    lexing_simp(WHILE_REGS, prog2 * i)
    blexing_simp(WHILE_REGS, prog2 * i)
    for( j <- 0 to each_simp_timeb.length - 1){
      if( each_simp_timeb(j)/each_simp_time(j) >= 10.0 )
        println(j, each_simp_timeb(j), each_simp_time(j))
    }
  }
  */


  //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART (TESTING)



  def clear() = {
    print("")
    //print("\33[H\33[2J")
  }

  //testing the two lexings produce the same value
  //enumerates strings of length n over alphabet cs
  def strs(n: Int, cs: String) : Set[String] = {
    if (n == 0) Set("")
    else {
      val ss = strs(n - 1, cs)
      ss ++
      (for (s <- ss; c <- cs.toList) yield c + s)
    }
  }
  def enum(n: Int, s: String) : Stream[Rexp] = n match {
    case 0 => ZERO #:: ONE #:: s.toStream.map(CHAR)
    case n => {  
      val rs = enum(n - 1, s)
      rs #:::
      (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) #:::
      (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) #:::
      (for (r1 <- rs) yield STAR(r1))
    }
  }

  //tests blexing and lexing
  def tests_blexer_simp(ss: Set[String])(r: Rexp) = {
    clear()
    //println(s"Testing ${r}")
    for (s <- ss.par) yield {
      val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None)
      val res2 = Try(Some(super_blexing_simp(r, s))).getOrElse(None)
      if (res1 != res2) println(s"Disagree on ${r} and ${s}")
      if (res1 != res2) println(s"   ${res1} !=  ${res2}")
      if (res1 != res2) Some((r, s)) else None
    }
  }



  
  /*
  def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = {
    for (s <- ss){
      
      val der_res = bder(c, ar) 
      val simp_res = bsimp(der_res)
      println(asize(der_res))
      println(asize(simp_res))
      single_expression_explorer(simp_res, (sc - c))
    }
  }*/

  //single_expression_explorer(internalise(("c"~("a"+"b"))%) , Set('a','b','c'))


}

import Rexp.Bits
abstract class ARexp 
case object AZERO extends ARexp
case class AONE(bs: Bits) extends ARexp
case class ACHAR(bs: Bits, f: Char) extends ARexp
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
case class ASTAR(bs: Bits, r: ARexp) extends ARexp 



abstract class Val
case object Empty extends Val
case class Chr(c: Char) extends Val
case class Sequ(v1: Val, v2: Val) extends Val
case class Left(v: Val) extends Val
case class Right(v: Val) extends Val
case class Stars(vs: List[Val]) extends Val
case class Rec(x: String, v: Val) extends Val
case class Position(i: Int, v: Val) extends Val
case object undefined extends Val
//case class Pos(i: Int, v: Val) extends Val
case object Prd extends Val