lex_blex_Frankensteined.scala
changeset 0 902326e1615a
child 3 f15dccc42c7b
equal deleted inserted replaced
-1:000000000000 0:902326e1615a
       
     1 package RexpRelated
       
     2 import scala.language.implicitConversions    
       
     3 import scala.language.reflectiveCalls
       
     4 import scala.annotation.tailrec   
       
     5 import scala.util.Try
       
     6 
       
     7 abstract class Bit
       
     8 case object Z extends Bit
       
     9 case object S extends Bit
       
    10 case class C(c: Char) extends Bit
       
    11 
       
    12 
       
    13 abstract class Rexp 
       
    14 case object ZERO extends Rexp
       
    15 case object ONE extends Rexp
       
    16 case class CHAR(c: Char) extends Rexp
       
    17 case class ALTS(rs: List[Rexp]) extends Rexp 
       
    18 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
       
    19 case class STAR(r: Rexp) extends Rexp 
       
    20 case class RECD(x: String, r: Rexp) extends Rexp
       
    21 
       
    22 
       
    23 
       
    24 object Rexp{
       
    25   type Bits = List[Bit]
       
    26   // abbreviations
       
    27   type Mon = (Char, Rexp)
       
    28   type Lin = Set[Mon]
       
    29   def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2))
       
    30   def PLUS(r: Rexp) = SEQ(r, STAR(r))
       
    31   def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2))
       
    32 
       
    33 
       
    34   def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
       
    35     case Nil => Nil
       
    36     case (x::xs) => {
       
    37       val res = f(x)
       
    38       if (acc.contains(res)) distinctBy(xs, f, acc)  
       
    39       else x::distinctBy(xs, f, res::acc)
       
    40     }
       
    41   } 
       
    42   // some convenience for typing in regular expressions
       
    43   def charlist2rexp(s : List[Char]): Rexp = s match {
       
    44     case Nil => ONE
       
    45     case c::Nil => CHAR(c)
       
    46     case c::s => SEQ(CHAR(c), charlist2rexp(s))
       
    47   }
       
    48   implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
       
    49 
       
    50   implicit def RexpOps(r: Rexp) = new {
       
    51     def | (s: Rexp) = ALT(r, s)
       
    52     def % = STAR(r)
       
    53     def ~ (s: Rexp) = SEQ(r, s)
       
    54   }
       
    55 
       
    56   implicit def stringOps(s: String) = new {
       
    57     def | (r: Rexp) = ALT(s, r)
       
    58     def | (r: String) = ALT(s, r)
       
    59     def % = STAR(s)
       
    60     def ~ (r: Rexp) = SEQ(s, r)
       
    61     def ~ (r: String) = SEQ(s, r)
       
    62     def $ (r: Rexp) = RECD(s, r)
       
    63   }
       
    64 
       
    65   // translation into ARexps
       
    66   def fuse(bs: Bits, r: ARexp) : ARexp = r match {
       
    67     case AZERO => AZERO
       
    68     case AONE(cs) => AONE(bs ++ cs)
       
    69     case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
       
    70     case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
       
    71     case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
       
    72     case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
       
    73   }
       
    74 
       
    75   def internalise(r: Rexp) : ARexp = r match {
       
    76     case ZERO => AZERO
       
    77     case ONE => AONE(Nil)
       
    78     case CHAR(c) => ACHAR(Nil, c)
       
    79     case ALTS(List(r1, r2)) => 
       
    80       AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
       
    81     case ALTS(r1::rs) => {
       
    82       val AALTS(Nil, rs2) = internalise(ALTS(rs))
       
    83       AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
       
    84     }
       
    85     case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
       
    86     case STAR(r) => ASTAR(Nil, internalise(r))
       
    87     case RECD(x, r) => internalise(r)
       
    88   }
       
    89 
       
    90   internalise(("a" | "ab") ~ ("b" | ""))
       
    91 
       
    92   def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
       
    93     case (ONE, bs) => (Empty, bs)
       
    94     case (PRED(f), C(c)::bs) => (Chr(c), bs)
       
    95     case (ALTS(r::Nil), bs) => decode_aux(r, bs)//this case seems tailor made for those who want to simplify the regex before der or simp
       
    96     case (ALTS(rs), bs) => bs match {
       
    97       case Z::bs1 => {
       
    98         val (v, bs2) = decode_aux(rs.head, bs1)
       
    99         (Left(v), bs2)
       
   100       }
       
   101       case S::bs1 => {
       
   102         val (v, bs2) = decode_aux(ALTS(rs.tail), bs1)
       
   103         (Right(v), bs2)			
       
   104       }
       
   105     }
       
   106     case (SEQ(r1, r2), bs) => {
       
   107       val (v1, bs1) = decode_aux(r1, bs)
       
   108       val (v2, bs2) = decode_aux(r2, bs1)
       
   109       (Sequ(v1, v2), bs2)
       
   110     }
       
   111     case (STAR(r1), S::bs) => {
       
   112       val (v, bs1) = decode_aux(r1, bs)
       
   113       //println(v)
       
   114       val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
       
   115       (Stars(v::vs), bs2)
       
   116     }
       
   117     case (STAR(_), Z::bs) => (Stars(Nil), bs)
       
   118     case (RECD(x, r1), bs) => {
       
   119       val (v, bs1) = decode_aux(r1, bs)
       
   120       (Rec(x, v), bs1)
       
   121     }
       
   122   }
       
   123 
       
   124   def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
       
   125     case (v, Nil) => v
       
   126     case _ => throw new Exception("Not decodable")
       
   127   }
       
   128 
       
   129 
       
   130   //erase function: extracts the regx from Aregex
       
   131   def erase(r:ARexp): Rexp = r match{
       
   132     case AZERO => ZERO
       
   133     case AONE(_) => ONE
       
   134     case ACHAR(bs, f) => CHAR(f)
       
   135     case AALTS(bs, rs) => ALTS(rs.map(erase(_)))
       
   136     case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
       
   137     case ASTAR(cs, r)=> STAR(erase(r))
       
   138   }
       
   139 
       
   140   //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART
       
   141   // nullable function: tests whether the regular 
       
   142   // expression can recognise the empty string
       
   143   def nullable (r: Rexp) : Boolean = r match {
       
   144     case ZERO => false
       
   145     case ONE => true
       
   146     case CHAR(_) => false
       
   147     case ALTS(rs) => rs.exists(nullable)
       
   148     case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
   149     case STAR(_) => true
       
   150     case RECD(_, r) => nullable(r)
       
   151     //case PLUS(r) => nullable(r)
       
   152   }
       
   153 
       
   154   // derivative of a regular expression w.r.t. a character
       
   155   def der (c: Char, r: Rexp) : Rexp = r match {
       
   156     case ZERO => ZERO
       
   157     case ONE => ZERO
       
   158     case CHAR(f) => if (c == f) ONE else ZERO
       
   159     case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2)))
       
   160     case SEQ(r1, r2) => 
       
   161       if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2)))
       
   162       else SEQ(der(c, r1), r2)
       
   163     case STAR(r) => SEQ(der(c, r), STAR(r))
       
   164     case RECD(_, r1) => der(c, r1)
       
   165     //case PLUS(r) => SEQ(der(c, r), STAR(r))
       
   166   }
       
   167 
       
   168   def flatten(v: Val) : String = v match {
       
   169     case Empty => ""
       
   170     case Chr(c) => c.toString
       
   171     case Left(v) => flatten(v)
       
   172     case Right(v) => flatten(v)
       
   173     case Sequ(v1, v2) => flatten(v1) + flatten(v2)
       
   174     case Stars(vs) => vs.map(flatten).mkString
       
   175     case Rec(_, v) => flatten(v)
       
   176   }
       
   177 
       
   178   // extracts an environment from a value
       
   179   def env(v: Val) : List[(String, String)] = v match {
       
   180     case Empty => Nil
       
   181     case Chr(c) => Nil
       
   182     case Left(v) => env(v)
       
   183     case Right(v) => env(v)
       
   184     case Sequ(v1, v2) => env(v1) ::: env(v2)
       
   185     case Stars(vs) => vs.flatMap(env)
       
   186     case Rec(x, v) => (x, flatten(v))::env(v)
       
   187   }
       
   188 
       
   189 
       
   190   // injection part
       
   191   def mkeps(r: Rexp) : Val = r match {
       
   192     case ONE => Empty
       
   193     case ALTS(List(r1, r2)) => 
       
   194       if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
       
   195     case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
       
   196     case STAR(r) => Stars(Nil)
       
   197     case RECD(x, r) => Rec(x, mkeps(r))
       
   198     //case PLUS(r) => Stars(List(mkeps(r)))
       
   199   }
       
   200 
       
   201   def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
       
   202     case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
       
   203     case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
       
   204     case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
       
   205     case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
       
   206     case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1))
       
   207     case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2))
       
   208     case (CHAR(_), Empty) => Chr(c) 
       
   209     case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
       
   210     //case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
       
   211   }
       
   212   def lex(r: Rexp, s: List[Char]) : Val = s match {
       
   213     case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
       
   214     case c::cs => inj(r, c, lex(der(c, r), cs))
       
   215   }
       
   216 
       
   217   def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
       
   218 
       
   219   // some "rectification" functions for simplification
       
   220   def F_ID(v: Val): Val = v
       
   221   def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
       
   222   def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
       
   223   def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
       
   224     case Right(v) => Right(f2(v))
       
   225     case Left(v) => Left(f1(v))
       
   226   }
       
   227   def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
       
   228     case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
       
   229   }
       
   230   def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
       
   231     (v:Val) => Sequ(f1(Empty), f2(v))
       
   232   def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
       
   233     (v:Val) => Sequ(f1(v), f2(Empty))
       
   234   def F_RECD(f: Val => Val) = (v:Val) => v match {
       
   235     case Rec(x, v) => Rec(x, f(v))
       
   236   }
       
   237   def F_ERROR(v: Val): Val = throw new Exception("error")
       
   238 
       
   239   // simplification of regular expressions returning also an
       
   240   // rectification function; no simplification under STAR 
       
   241   def simp(r: Rexp): (Rexp, Val => Val) = r match {
       
   242     case ALTS(List(r1, r2)) => {
       
   243       val (r1s, f1s) = simp(r1)
       
   244       val (r2s, f2s) = simp(r2)
       
   245       (r1s, r2s) match {
       
   246         case (ZERO, _) => (r2s, F_RIGHT(f2s))
       
   247         case (_, ZERO) => (r1s, F_LEFT(f1s))
       
   248         case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
       
   249                   else (ALTS(List(r1s, r2s)), F_ALT(f1s, f2s)) 
       
   250       }
       
   251     }
       
   252     case SEQ(r1, r2) => {
       
   253       val (r1s, f1s) = simp(r1)
       
   254       val (r2s, f2s) = simp(r2)
       
   255       (r1s, r2s) match {
       
   256         case (ZERO, _) => (ZERO, F_ERROR)
       
   257         case (_, ZERO) => (ZERO, F_ERROR)
       
   258         case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
       
   259         case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
       
   260         case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
       
   261       }
       
   262     }
       
   263     case RECD(x, r1) => {
       
   264       val (r1s, f1s) = simp(r1)
       
   265       (RECD(x, r1s), F_RECD(f1s))
       
   266     }
       
   267     case r => (r, F_ID)
       
   268   }
       
   269   /*
       
   270   val each_simp_time = scala.collection.mutable.ArrayBuffer.empty[Long]
       
   271   val each_simp_timeb = scala.collection.mutable.ArrayBuffer.empty[Long]
       
   272   */
       
   273   def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
       
   274     case Nil => {
       
   275       if (nullable(r)) {
       
   276         mkeps(r) 
       
   277       }
       
   278       else throw new Exception("Not matched")
       
   279     }
       
   280     case c::cs => {
       
   281       val (r_simp, f_simp) = simp(der(c, r))
       
   282       inj(r, c, f_simp(lex_simp(r_simp, cs)))
       
   283     }
       
   284   }
       
   285 
       
   286   def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
       
   287 
       
   288   //println(lexing_simp(("a" | "ab") ~ ("b" | ""), "ab"))
       
   289 
       
   290   // filters out all white spaces
       
   291   def tokenise(r: Rexp, s: String) = 
       
   292     env(lexing_simp(r, s)).filterNot { _._1 == "w"}
       
   293 
       
   294 
       
   295   //reads the string from a file 
       
   296   def fromFile(name: String) : String = 
       
   297     io.Source.fromFile(name).mkString
       
   298 
       
   299   def tokenise_file(r: Rexp, name: String) = 
       
   300     tokenise(r, fromFile(name))
       
   301   
       
   302   //   Testing
       
   303   //============
       
   304 
       
   305   def time[T](code: => T) = {
       
   306     val start = System.nanoTime()
       
   307     val result = code
       
   308     val end = System.nanoTime()
       
   309     println((end - start)/1.0e9)
       
   310     result
       
   311   }
       
   312 
       
   313   //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART
       
   314 
       
   315   // bnullable function: tests whether the aregular 
       
   316   // expression can recognise the empty string
       
   317   def bnullable (r: ARexp) : Boolean = r match {
       
   318     case AZERO => false
       
   319     case AONE(_) => true
       
   320     case ACHAR(_,_) => false
       
   321     case AALTS(_, rs) => rs.exists(bnullable)
       
   322     case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
       
   323     case ASTAR(_, _) => true
       
   324   }
       
   325 
       
   326   def mkepsBC(r: ARexp) : Bits = r match {
       
   327     case AONE(bs) => bs
       
   328     case AALTS(bs, rs) => {
       
   329       val n = rs.indexWhere(bnullable)
       
   330       bs ++ mkepsBC(rs(n))
       
   331     }
       
   332     case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
       
   333     case ASTAR(bs, r) => bs ++ List(Z)
       
   334   }
       
   335 
       
   336   // derivative of a regular expression w.r.t. a character
       
   337   def bder(c: Char, r: ARexp) : ARexp = r match {
       
   338     case AZERO => AZERO
       
   339     case AONE(_) => AZERO
       
   340     case ACHAR(bs, f) => if (c == f) AONE(bs:::List(C(c))) else AZERO
       
   341     case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
       
   342     case ASEQ(bs, r1, r2) => 
       
   343       if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(mkepsBC(r1), bder(c, r2)))
       
   344       else ASEQ(bs, bder(c, r1), r2)
       
   345     case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
       
   346   }
       
   347 
       
   348 
       
   349   def ders (s: List[Char], r: Rexp) : Rexp = s match {
       
   350     case Nil => r
       
   351     case c::s => ders(s, der(c, r))
       
   352   }
       
   353 
       
   354   // derivative w.r.t. a string (iterates bder)
       
   355   @tailrec
       
   356   def bders (s: List[Char], r: ARexp) : ARexp = s match {
       
   357     case Nil => r
       
   358     case c::s => bders(s, bder(c, r))
       
   359   }
       
   360 
       
   361   def flats(rs: List[ARexp]): List[ARexp] = rs match {
       
   362       case Nil => Nil
       
   363       case AZERO :: rs1 => flats(rs1)
       
   364       case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
       
   365       case r1 :: rs2 => r1 :: flats(rs2)
       
   366     }
       
   367   def rflats(rs: List[Rexp]): List[Rexp] = rs match {
       
   368     case Nil => Nil
       
   369     case ZERO :: rs1 => rflats(rs1)
       
   370     case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2)
       
   371     case r1 :: rs2 => r1 :: rflats(rs2)
       
   372   }
       
   373   //val flats_time = scala.collection.mutable.ArrayBuffer.empty[Long]
       
   374   //val dist_time = scala.collection.mutable.ArrayBuffer.empty[Long]
       
   375   var flats_time = 0L
       
   376   var dist_time = 0L
       
   377   /*
       
   378   def bsimp(r: ARexp, depth: Int): ARexp = 
       
   379   {
       
   380     r match {
       
   381       case ASEQ(bs1, r1, r2) => (bsimp(r1, depth), bsimp(r2, depth)) match {
       
   382           case (AZERO, _) => AZERO
       
   383           case (_, AZERO) => AZERO
       
   384           case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   385           case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
       
   386       }
       
   387       case AALTS(bs1, rs) => {
       
   388         depth match {
       
   389           case 0 => {
       
   390             flats(distinctBy(rs, erase)) match {
       
   391               case Nil => AZERO
       
   392               case s :: Nil => fuse(bs1, s)
       
   393               case rs => AALTS(bs1, rs) 
       
   394             } 
       
   395           }
       
   396           case n => {
       
   397             val rs_simp = rs.map(bsimp(_, n - 1))
       
   398             val time2 = System.nanoTime()
       
   399             val flat_res = flats(rs_simp)
       
   400             val time3 = System.nanoTime()
       
   401             val dist_res = distinctBy(flat_res, erase)
       
   402             val time4 = System.nanoTime()
       
   403             flats_time = flats_time + time3 - time2
       
   404             dist_time = dist_time + time4 - time3
       
   405             //flats_time += time3 - time2
       
   406             //dist_time += time4 - time3
       
   407             //distinctBy(flats(rs.map(bsimp)), erase) match {
       
   408             dist_res match {
       
   409               case Nil => AZERO
       
   410               case s :: Nil => fuse(bs1, s)
       
   411               case rs => AALTS(bs1, rs)  
       
   412             }
       
   413           }
       
   414         }
       
   415       }
       
   416       case r => r
       
   417     }
       
   418   }
       
   419   */
       
   420   //----------------------------------------------------------------------------This bsimp is the original slow one
       
   421   
       
   422   def bsimp(r: ARexp): ARexp = r match {
       
   423     case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
       
   424         case (AZERO, _) => AZERO
       
   425         case (_, AZERO) => AZERO
       
   426         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   427         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
       
   428     }
       
   429     case AALTS(bs1, rs) => {
       
   430       val rs_simp = rs.map(bsimp)
       
   431       val time2 = System.nanoTime()
       
   432       val flat_res = flats(rs_simp)
       
   433       val time3 = System.nanoTime()
       
   434       val dist_res = distinctBy(flat_res, erase)
       
   435       val time4 = System.nanoTime()
       
   436       flats_time = flats_time + time3 - time2
       
   437       dist_time = dist_time + time4 - time3
       
   438       dist_res match {
       
   439         case Nil => AZERO
       
   440         case s :: Nil => fuse(bs1, s)
       
   441         case rs => AALTS(bs1, rs)  
       
   442       }
       
   443     }
       
   444     case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
       
   445     case r => r
       
   446   }
       
   447 
       
   448   def bsimp_weakened(r: ARexp): ARexp = r match {
       
   449     case ASEQ(bs1, r1, r2) => (bsimp_weakened(r1), r2) match {
       
   450         case (AZERO, _) => AZERO
       
   451         case (_, AZERO) => AZERO
       
   452         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   453         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
       
   454     }
       
   455     case AALTS(bs1, rs) => {
       
   456       val rs_simp = rs.map(bsimp_weakened)
       
   457       val time2 = System.nanoTime()
       
   458       val flat_res = flats(rs_simp)
       
   459       val time3 = System.nanoTime()
       
   460       val dist_res = distinctBy(flat_res, erase)
       
   461       val time4 = System.nanoTime()
       
   462       flats_time = flats_time + time3 - time2
       
   463       dist_time = dist_time + time4 - time3
       
   464       dist_res match {
       
   465         case Nil => AZERO
       
   466         case s :: Nil => fuse(bs1, s)
       
   467         case rs => AALTS(bs1, rs)  
       
   468       }
       
   469     }
       
   470     case ASTAR(bs, r) => ASTAR(bs, bsimp_weakened(r))
       
   471     case r => r
       
   472   }
       
   473 
       
   474   def simp_weakened(r: Rexp): Rexp = r match {
       
   475     case SEQ(r1, r2) => (simp_weakened(r1), r2) match {
       
   476         case (ZERO, _) => ZERO
       
   477         case (_, ZERO) => ZERO
       
   478         case (ONE, r2s) => r2s
       
   479         case (r1s, r2s) => SEQ(r1s, r2s)
       
   480     }
       
   481     case ALTS(rs) => {
       
   482       val rs_simp = rs.map(simp_weakened)
       
   483       val flat_res = rflats(rs_simp)
       
   484       val dist_res = rs_simp.distinct
       
   485       dist_res match {
       
   486         case Nil => ZERO
       
   487         case s :: Nil => s
       
   488         case rs => ALTS(rs)  
       
   489       }
       
   490     }
       
   491     case STAR(r) => STAR(simp_weakened(r))
       
   492     case r => r
       
   493   }
       
   494     
       
   495   
       
   496   //----------------------------------------------------------------------------experiment bsimp
       
   497   /*
       
   498   def bders_simp (s: List[Char], r: ARexp) : ARexp = s match {
       
   499     case Nil => r
       
   500     case c::s => bders_simp(s, bsimp(bder(c, r)))
       
   501   }
       
   502   */
       
   503   /*
       
   504   def time[T](code: => T) = {
       
   505     val start = System.nanoTime()
       
   506     val result = code
       
   507     val end = System.nanoTime()
       
   508     println((end - start)/1.0e9)
       
   509     result
       
   510   }
       
   511   */
       
   512   // main unsimplified lexing function (produces a value)
       
   513   def blex(r: ARexp, s: List[Char]) : Bits = s match {
       
   514     case Nil => if (bnullable(r)) mkepsBC(r) else throw new Exception("Not matched")
       
   515     case c::cs => {
       
   516       val der_res = bder(c,r)
       
   517       blex(der_res, cs)
       
   518     }
       
   519   }
       
   520 
       
   521   def bpre_lexing(r: Rexp, s: String) = blex(internalise(r), s.toList)
       
   522   //def blexing(r: Rexp, s: String) : Val = decode(r, blex(internalise(r), s.toList))
       
   523 
       
   524   var bder_time = 0L
       
   525   var bsimp_time = 0L
       
   526   var mkepsBC_time = 0L
       
   527   var small_de = 2
       
   528   var big_de = 5
       
   529   var usual_de = 3
       
   530   
       
   531   def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
       
   532     case Nil => {
       
   533       if (bnullable(r)) {
       
   534         //println(asize(r))
       
   535         val time4 = System.nanoTime()
       
   536         val bits = mkepsBC(r)
       
   537         val time5 = System.nanoTime()
       
   538         mkepsBC_time = time5 - time4
       
   539         bits
       
   540       }
       
   541     else throw new Exception("Not matched")
       
   542     }
       
   543     case c::cs => {
       
   544       val time1 = System.nanoTime()
       
   545       val der_res = bder(c,r)
       
   546       val time2 = System.nanoTime()
       
   547       val simp_res = bsimp(der_res)
       
   548       val time3 = System.nanoTime()  
       
   549       bder_time = bder_time + time2 - time1
       
   550       bsimp_time = bsimp_time + time3 - time2
       
   551       blex_simp(simp_res, cs)      
       
   552     }
       
   553   }
       
   554   def blex_real_simp(r: ARexp, s: List[Char]): ARexp = s match{
       
   555     case Nil => r
       
   556     case c::cs => blex_real_simp(bsimp(bder(c, r)), cs)
       
   557   }
       
   558 
       
   559   //-------------------------------------------------------------------------------------tests whether simp(simp(r)) == simp(r) holds true
       
   560   /*
       
   561   def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
       
   562     case Nil => {
       
   563       if (bnullable(r)) {
       
   564         //println(asize(r))
       
   565         mkepsBC(r)
       
   566       }
       
   567     else throw new Exception("Not matched")
       
   568     }
       
   569     case c::cs => {
       
   570       val der_res = bder(c,r)
       
   571       val simp_res = bsimp(der_res)  
       
   572       //val simp_res2 = bsimp(simp_res)  
       
   573       //println("Size reduction from "+asize(der_res)+ " to " +asize(simp_res)+" to " + asize(simp_res2)) 
       
   574       blex_simp(simp_res, cs)
       
   575     }
       
   576   }
       
   577   */
       
   578   /*
       
   579   def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
       
   580     case Nil => {
       
   581       if (nullable(r)) {
       
   582         mkeps(r) 
       
   583       }
       
   584       else throw new Exception("Not matched")
       
   585     }
       
   586     case c::cs => {
       
   587       val start = System.nanoTime()
       
   588       val (r_simp, f_simp) = simp(der(c, r))
       
   589       val end = System.nanoTime()
       
   590       println((end - start)/1.0e9)
       
   591       inj(r, c, f_simp(lex_simp(r_simp, cs)))
       
   592     }
       
   593   }
       
   594   */
       
   595 
       
   596   //size: of a Aregx for testing purposes 
       
   597   def size(r: Rexp) : Int = r match {
       
   598     case ZERO => 1
       
   599     case ONE => 1
       
   600     case CHAR(_) => 1
       
   601     case SEQ(r1, r2) => 1 + size(r1) + size(r2)
       
   602     case ALTS(rs) => 1 + rs.map(size).sum
       
   603     case STAR(r) => 1 + size(r)
       
   604   }
       
   605 
       
   606   def asize(a: ARexp) = size(erase(a))
       
   607 
       
   608 
       
   609   // decoding does not work yet
       
   610   def blexing_simp(r: Rexp, s: String) = {
       
   611     //flats_time.clear()
       
   612     //dist_time.clear()
       
   613     flats_time = 0L
       
   614     dist_time = 0L
       
   615     bder_time = 0L
       
   616     bsimp_time = 0L
       
   617     mkepsBC_time = 0L
       
   618     val start = System.nanoTime()
       
   619     val bit_code = blex_simp(internalise(r), s.toList)
       
   620     val end = System.nanoTime()
       
   621     println("total time: "+ (end - start)/1.0e9)
       
   622     println("spent on flats: " + (flats_time/(1.0e9)))
       
   623     println("spent on distinctBy: " + (dist_time/(1.0e9)))
       
   624     println("spent on bder: "+ bder_time/1.0e9)
       
   625     println("spent on bsimp: " + bsimp_time/1.0e9)
       
   626     println("spent on mkepsBC: " + mkepsBC_time/1.0e9)
       
   627     //println(s"The length of the string ${s.length}; length of bit sequence:")
       
   628     //println((bit_code.length))
       
   629     //println(final_derivative)
       
   630     //bit_code
       
   631     //decode(r, bit_code) 
       
   632   }
       
   633 
       
   634 
       
   635 
       
   636 
       
   637 
       
   638   // Lexing Rules for a Small While Language
       
   639 
       
   640   //symbols
       
   641   /*
       
   642   val SYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_))
       
   643   
       
   644   //digits
       
   645   val DIGIT = PRED("0123456789".contains(_))
       
   646   //identifiers
       
   647   val ID = SYM ~ (SYM | DIGIT).% 
       
   648   //numbers
       
   649   val NUM = STAR(DIGIT)
       
   650   //keywords
       
   651   val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false"
       
   652   val AKEYWORD: Rexp = ALTS(List("skip" , "while" , "do" , "if" , "then" , "else" , "read" , "write" , "true" , "false"))
       
   653   //semicolons
       
   654   val SEMI: Rexp = ";"
       
   655   //operators
       
   656   val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
       
   657   val AOP: Rexp = ALTS(List(":=" , "==" , "-" , "+" , "*" , "!=" , "<" , ">" , "<=" , ">=" , "%" , "/"))
       
   658   //whitespaces
       
   659   val WHITESPACE = PLUS(" " | "\n" | "\t")
       
   660   //parentheses
       
   661   val RPAREN: Rexp = ")"
       
   662   val LPAREN: Rexp = "("
       
   663   val BEGIN: Rexp = "{"
       
   664   val END: Rexp = "}"
       
   665   //strings...but probably needs not
       
   666   val STRING: Rexp = "\"" ~ SYM.% ~ "\""
       
   667 
       
   668 
       
   669 
       
   670   val WHILE_REGS = (("k" $ KEYWORD) | 
       
   671                     ("i" $ ID) | 
       
   672                     ("o" $ OP) | 
       
   673                     ("n" $ NUM) | 
       
   674                     ("s" $ SEMI) | 
       
   675                     ("str" $ STRING) |
       
   676                     ("p" $ (LPAREN | RPAREN)) | 
       
   677                     ("b" $ (BEGIN | END)) | 
       
   678                     ("w" $ WHITESPACE)).%
       
   679 
       
   680   val AWHILE_REGS = (
       
   681     ALTS(
       
   682       List(
       
   683         ("k" $ AKEYWORD), 
       
   684                     ("i" $ ID),
       
   685                     ("o" $ AOP) , 
       
   686                     ("n" $ NUM) ,
       
   687                     ("s" $ SEMI) ,
       
   688                     ("str" $ STRING), 
       
   689                     ("p" $ (LPAREN | RPAREN)), 
       
   690                     ("b" $ (BEGIN | END)), 
       
   691                     ("w" $ WHITESPACE)
       
   692       )
       
   693     )
       
   694   ).%
       
   695 
       
   696 */
       
   697 
       
   698 
       
   699   //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART (TESTING)
       
   700   /*
       
   701   // Two Simple While programs
       
   702   //========================
       
   703   println("prog0 test")
       
   704 
       
   705   val prog0 = """read n"""
       
   706   println(env(lexing_simp(WHILE_REGS, prog0)))
       
   707   println(tokenise(WHILE_REGS, prog0))
       
   708 
       
   709   println("prog1 test")
       
   710 
       
   711   val prog1 = """read  n; write (n)"""
       
   712   println(tokenise(WHILE_REGS, prog1))
       
   713 
       
   714   */
       
   715   // Bigger Tests
       
   716   //==============
       
   717 
       
   718   def escape(raw: String): String = {
       
   719     import scala.reflect.runtime.universe._
       
   720     Literal(Constant(raw)).toString
       
   721   }
       
   722 
       
   723   val prog2 = """
       
   724   write "Fib";
       
   725   read n;
       
   726   minus1 := 0;
       
   727   minus2 := 1;
       
   728   while n > 0 do {
       
   729     temp := minus2;
       
   730     minus2 := minus1 + minus2;
       
   731     minus1 := temp;
       
   732     n := n - 1
       
   733   };
       
   734   write "Result";
       
   735   write minus2
       
   736   """
       
   737 
       
   738   val prog3 = """
       
   739   start := 1000;
       
   740   x := start;
       
   741   y := start;
       
   742   z := start;
       
   743   while 0 < x do {
       
   744   while 0 < y do {
       
   745     while 0 < z do {
       
   746       z := z - 1
       
   747     };
       
   748     z := start;
       
   749     y := y - 1
       
   750   };     
       
   751   y := start;
       
   752   x := x - 1
       
   753   }
       
   754   """
       
   755   /*
       
   756   for(i <- 400 to 400 by 1){
       
   757     println(i+":")
       
   758     blexing_simp(WHILE_REGS, prog2 * i)
       
   759   } */
       
   760 
       
   761     /*
       
   762     for (i <- 2 to 5){
       
   763       for(j <- 1 to 3){
       
   764         println(i,j)
       
   765         small_de = i
       
   766         usual_de = i + j
       
   767         big_de = i + 2*j 
       
   768         blexing_simp(AWHILE_REGS, prog2 * 100)
       
   769       }
       
   770     }*/
       
   771 
       
   772   /*
       
   773   println("Tokens of prog2")
       
   774   println(tokenise(WHILE_REGS, prog2).mkString("\n"))
       
   775 
       
   776   val fib_tokens = tokenise(WHILE_REGS, prog2)
       
   777   fib_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
       
   778 
       
   779 
       
   780   val test_tokens = tokenise(WHILE_REGS, prog3)
       
   781   test_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
       
   782   */
       
   783 
       
   784   /*
       
   785   println("time test for blexing_simp")
       
   786   for (i <- 1 to 1 by 1) {
       
   787     lexing_simp(WHILE_REGS, prog2 * i)
       
   788     blexing_simp(WHILE_REGS, prog2 * i)
       
   789     for( j <- 0 to each_simp_timeb.length - 1){
       
   790       if( each_simp_timeb(j)/each_simp_time(j) >= 10.0 )
       
   791         println(j, each_simp_timeb(j), each_simp_time(j))
       
   792     }
       
   793   }
       
   794   */
       
   795 
       
   796 
       
   797   //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART (TESTING)
       
   798 
       
   799 
       
   800 
       
   801   def clear() = {
       
   802     print("")
       
   803     //print("\33[H\33[2J")
       
   804   }
       
   805 
       
   806   //testing the two lexings produce the same value
       
   807   //enumerates strings of length n over alphabet cs
       
   808   def strs(n: Int, cs: String) : Set[String] = {
       
   809     if (n == 0) Set("")
       
   810     else {
       
   811       val ss = strs(n - 1, cs)
       
   812       ss ++
       
   813       (for (s <- ss; c <- cs.toList) yield c + s)
       
   814     }
       
   815   }
       
   816   def enum(n: Int, s: String) : Stream[Rexp] = n match {
       
   817     case 0 => ZERO #:: ONE #:: s.toStream.map(CHAR)
       
   818     case n => {  
       
   819       val rs = enum(n - 1, s)
       
   820       rs #:::
       
   821       (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) #:::
       
   822       (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) #:::
       
   823       (for (r1 <- rs) yield STAR(r1))
       
   824     }
       
   825   }
       
   826 
       
   827   //tests blexing and lexing
       
   828   def tests_blexer_simp(ss: Set[String])(r: Rexp) = {
       
   829     clear()
       
   830     //println(s"Testing ${r}")
       
   831     for (s <- ss.par) yield {
       
   832       val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None)
       
   833       val res2 = Try(Some(blexing_simp(r, s))).getOrElse(None)
       
   834       if (res1 != res2) println(s"Disagree on ${r} and ${s}")
       
   835       if (res1 != res2) println(s"   ${res1} !=  ${res2}")
       
   836       if (res1 != res2) Some((r, s)) else None
       
   837     }
       
   838   }
       
   839 
       
   840 
       
   841 
       
   842   //enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet
       
   843   /*
       
   844   def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = {
       
   845     for (s <- ss){
       
   846       
       
   847       val der_res = bder(c, ar) 
       
   848       val simp_res = bsimp(der_res)
       
   849       println(asize(der_res))
       
   850       println(asize(simp_res))
       
   851       single_expression_explorer(simp_res, (sc - c))
       
   852     }
       
   853   }*/
       
   854 
       
   855   //single_expression_explorer(internalise(("c"~("a"+"b"))%) , Set('a','b','c'))
       
   856 
       
   857 
       
   858 }
       
   859 
       
   860 import Rexp.Bits
       
   861 abstract class ARexp 
       
   862 case object AZERO extends ARexp
       
   863 case class AONE(bs: Bits) extends ARexp
       
   864 case class ACHAR(bs: Bits, f: Char) extends ARexp
       
   865 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
       
   866 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
       
   867 case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
       
   868 
       
   869 
       
   870 
       
   871 abstract class Val
       
   872 case object Empty extends Val
       
   873 case class Chr(c: Char) extends Val
       
   874 case class Sequ(v1: Val, v2: Val) extends Val
       
   875 case class Left(v: Val) extends Val
       
   876 case class Right(v: Val) extends Val
       
   877 case class Stars(vs: List[Val]) extends Val
       
   878 case class Rec(x: String, v: Val) extends Val
       
   879 //case class Pos(i: Int, v: Val) extends Val
       
   880 case object Prd extends Val