exps/lex_blex_Frankensteined.scala
changeset 310 c090baa7059d
parent 309 a7769a89c529
child 311 8b8db9558ecf
equal deleted inserted replaced
309:a7769a89c529 310:c090baa7059d
     1 package RexpRelated
       
     2 import scala.language.implicitConversions    
       
     3 import scala.language.reflectiveCalls
       
     4 import scala.annotation.tailrec   
       
     5 import scala.util.Try
       
     6 
       
     7 abstract class Bit
       
     8 case object Z extends Bit
       
     9 case object S extends Bit
       
    10 case class C(c: Char) extends Bit
       
    11 
       
    12 
       
    13 abstract class Rexp 
       
    14 case object ZERO extends Rexp
       
    15 case object ONE extends Rexp
       
    16 case class PRED(f: Char => Boolean) extends Rexp
       
    17 case class ALTS(rs: List[Rexp]) extends Rexp 
       
    18 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
       
    19 case class STAR(r: Rexp) extends Rexp 
       
    20 case class RECD(x: String, r: Rexp) extends Rexp
       
    21 
       
    22 
       
    23 
       
    24 object Rexp{
       
    25   type Bits = List[Bit]
       
    26   // abbreviations
       
    27   def CHAR(c: Char) = PRED(_ == c)
       
    28   def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2))
       
    29   def PLUS(r: Rexp) = SEQ(r, STAR(r))
       
    30   def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2))
       
    31 
       
    32 
       
    33   def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
       
    34     case Nil => Nil
       
    35     case (x::xs) => {
       
    36       val res = f(x)
       
    37       if (acc.contains(res)) distinctBy(xs, f, acc)  
       
    38       else x::distinctBy(xs, f, res::acc)
       
    39     }
       
    40   } 
       
    41   // some convenience for typing in regular expressions
       
    42   def charlist2rexp(s : List[Char]): Rexp = s match {
       
    43     case Nil => ONE
       
    44     case c::Nil => CHAR(c)
       
    45     case c::s => SEQ(CHAR(c), charlist2rexp(s))
       
    46   }
       
    47   implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
       
    48 
       
    49   implicit def RexpOps(r: Rexp) = new {
       
    50     def | (s: Rexp) = ALT(r, s)
       
    51     def % = STAR(r)
       
    52     def ~ (s: Rexp) = SEQ(r, s)
       
    53   }
       
    54 
       
    55   implicit def stringOps(s: String) = new {
       
    56     def | (r: Rexp) = ALT(s, r)
       
    57     def | (r: String) = ALT(s, r)
       
    58     def % = STAR(s)
       
    59     def ~ (r: Rexp) = SEQ(s, r)
       
    60     def ~ (r: String) = SEQ(s, r)
       
    61     def $ (r: Rexp) = RECD(s, r)
       
    62   }
       
    63 
       
    64   // translation into ARexps
       
    65   def fuse(bs: Bits, r: ARexp) : ARexp = r match {
       
    66     case AZERO => AZERO
       
    67     case AONE(cs) => AONE(bs ++ cs)
       
    68     case APRED(cs, f) => APRED(bs ++ cs, f)
       
    69     case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
       
    70     case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
       
    71     case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
       
    72   }
       
    73 
       
    74   def internalise(r: Rexp) : ARexp = r match {
       
    75     case ZERO => AZERO
       
    76     case ONE => AONE(Nil)
       
    77     case PRED(f) => APRED(Nil, f)
       
    78     case ALTS(List(r1, r2)) => 
       
    79       AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
       
    80     case ALTS(r1::rs) => {
       
    81       val AALTS(Nil, rs2) = internalise(ALTS(rs))
       
    82       AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
       
    83     }
       
    84     case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
       
    85     case STAR(r) => ASTAR(Nil, internalise(r))
       
    86     case RECD(x, r) => internalise(r)
       
    87   }
       
    88 
       
    89   internalise(("a" | "ab") ~ ("b" | ""))
       
    90 
       
    91   def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
       
    92     case (ONE, bs) => (Empty, bs)
       
    93     case (PRED(f), C(c)::bs) => (Chr(c), bs)
       
    94     case (ALTS(r::Nil), bs) => decode_aux(r, bs)
       
    95     case (ALTS(rs), bs) => bs match {
       
    96       case Z::bs1 => {
       
    97         val (v, bs2) = decode_aux(rs.head, bs1)
       
    98         (Left(v), bs2)
       
    99       }
       
   100       case S::bs1 => {
       
   101         val (v, bs2) = decode_aux(ALTS(rs.tail), bs1)
       
   102         (Right(v), bs2)			
       
   103       }
       
   104     }
       
   105     case (SEQ(r1, r2), bs) => {
       
   106       val (v1, bs1) = decode_aux(r1, bs)
       
   107       val (v2, bs2) = decode_aux(r2, bs1)
       
   108       (Sequ(v1, v2), bs2)
       
   109     }
       
   110     case (STAR(r1), S::bs) => {
       
   111       val (v, bs1) = decode_aux(r1, bs)
       
   112       //println(v)
       
   113       val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
       
   114       (Stars(v::vs), bs2)
       
   115     }
       
   116     case (STAR(_), Z::bs) => (Stars(Nil), bs)
       
   117     case (RECD(x, r1), bs) => {
       
   118       val (v, bs1) = decode_aux(r1, bs)
       
   119       (Rec(x, v), bs1)
       
   120     }
       
   121   }
       
   122 
       
   123   def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
       
   124     case (v, Nil) => v
       
   125     case _ => throw new Exception("Not decodable")
       
   126   }
       
   127 
       
   128 
       
   129   //erase function: extracts the regx from Aregex
       
   130   def erase(r:ARexp): Rexp = r match{
       
   131     case AZERO => ZERO
       
   132     case AONE(_) => ONE
       
   133     case APRED(bs, f) => PRED(f)
       
   134     case AALTS(bs, rs) => ALTS(rs.map(erase(_)))
       
   135     case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
       
   136     case ASTAR(cs, r)=> STAR(erase(r))
       
   137   }
       
   138 
       
   139   //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART
       
   140   // nullable function: tests whether the regular 
       
   141   // expression can recognise the empty string
       
   142   def nullable (r: Rexp) : Boolean = r match {
       
   143     case ZERO => false
       
   144     case ONE => true
       
   145     case PRED(_) => false
       
   146     case ALTS(rs) => rs.exists(nullable)
       
   147     case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
   148     case STAR(_) => true
       
   149     case RECD(_, r) => nullable(r)
       
   150     //case PLUS(r) => nullable(r)
       
   151   }
       
   152 
       
   153   // derivative of a regular expression w.r.t. a character
       
   154   def der (c: Char, r: Rexp) : Rexp = r match {
       
   155     case ZERO => ZERO
       
   156     case ONE => ZERO
       
   157     case PRED(f) => if (f(c)) ONE else ZERO
       
   158     case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2)))
       
   159     case SEQ(r1, r2) => 
       
   160       if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2)))
       
   161       else SEQ(der(c, r1), r2)
       
   162     case STAR(r) => SEQ(der(c, r), STAR(r))
       
   163     case RECD(_, r1) => der(c, r1)
       
   164     //case PLUS(r) => SEQ(der(c, r), STAR(r))
       
   165   }
       
   166 
       
   167   def flatten(v: Val) : String = v match {
       
   168     case Empty => ""
       
   169     case Chr(c) => c.toString
       
   170     case Left(v) => flatten(v)
       
   171     case Right(v) => flatten(v)
       
   172     case Sequ(v1, v2) => flatten(v1) + flatten(v2)
       
   173     case Stars(vs) => vs.map(flatten).mkString
       
   174     case Rec(_, v) => flatten(v)
       
   175   }
       
   176 
       
   177   // extracts an environment from a value
       
   178   def env(v: Val) : List[(String, String)] = v match {
       
   179     case Empty => Nil
       
   180     case Chr(c) => Nil
       
   181     case Left(v) => env(v)
       
   182     case Right(v) => env(v)
       
   183     case Sequ(v1, v2) => env(v1) ::: env(v2)
       
   184     case Stars(vs) => vs.flatMap(env)
       
   185     case Rec(x, v) => (x, flatten(v))::env(v)
       
   186   }
       
   187 
       
   188 
       
   189   // injection part
       
   190   def mkeps(r: Rexp) : Val = r match {
       
   191     case ONE => Empty
       
   192     case ALTS(List(r1, r2)) => 
       
   193       if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
       
   194     case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
       
   195     case STAR(r) => Stars(Nil)
       
   196     case RECD(x, r) => Rec(x, mkeps(r))
       
   197     //case PLUS(r) => Stars(List(mkeps(r)))
       
   198   }
       
   199 
       
   200   def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
       
   201     case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
       
   202     case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
       
   203     case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
       
   204     case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
       
   205     case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1))
       
   206     case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2))
       
   207     case (PRED(_), Empty) => Chr(c) 
       
   208     case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
       
   209     //case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
       
   210   }
       
   211   def lex(r: Rexp, s: List[Char]) : Val = s match {
       
   212     case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
       
   213     case c::cs => inj(r, c, lex(der(c, r), cs))
       
   214   }
       
   215 
       
   216   def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
       
   217 
       
   218   // some "rectification" functions for simplification
       
   219   def F_ID(v: Val): Val = v
       
   220   def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
       
   221   def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
       
   222   def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
       
   223     case Right(v) => Right(f2(v))
       
   224     case Left(v) => Left(f1(v))
       
   225   }
       
   226   def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
       
   227     case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
       
   228   }
       
   229   def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
       
   230     (v:Val) => Sequ(f1(Empty), f2(v))
       
   231   def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
       
   232     (v:Val) => Sequ(f1(v), f2(Empty))
       
   233   def F_RECD(f: Val => Val) = (v:Val) => v match {
       
   234     case Rec(x, v) => Rec(x, f(v))
       
   235   }
       
   236   def F_ERROR(v: Val): Val = throw new Exception("error")
       
   237 
       
   238   // simplification of regular expressions returning also an
       
   239   // rectification function; no simplification under STAR 
       
   240   def simp(r: Rexp): (Rexp, Val => Val) = r match {
       
   241     case ALTS(List(r1, r2)) => {
       
   242       val (r1s, f1s) = simp(r1)
       
   243       val (r2s, f2s) = simp(r2)
       
   244       (r1s, r2s) match {
       
   245         case (ZERO, _) => (r2s, F_RIGHT(f2s))
       
   246         case (_, ZERO) => (r1s, F_LEFT(f1s))
       
   247         case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
       
   248                   else (ALTS(List(r1s, r2s)), F_ALT(f1s, f2s)) 
       
   249       }
       
   250     }
       
   251     case SEQ(r1, r2) => {
       
   252       val (r1s, f1s) = simp(r1)
       
   253       val (r2s, f2s) = simp(r2)
       
   254       (r1s, r2s) match {
       
   255         case (ZERO, _) => (ZERO, F_ERROR)
       
   256         case (_, ZERO) => (ZERO, F_ERROR)
       
   257         case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
       
   258         case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
       
   259         case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
       
   260       }
       
   261     }
       
   262     case RECD(x, r1) => {
       
   263       val (r1s, f1s) = simp(r1)
       
   264       (RECD(x, r1s), F_RECD(f1s))
       
   265     }
       
   266     case r => (r, F_ID)
       
   267   }
       
   268   /*
       
   269   val each_simp_time = scala.collection.mutable.ArrayBuffer.empty[Long]
       
   270   val each_simp_timeb = scala.collection.mutable.ArrayBuffer.empty[Long]
       
   271   */
       
   272   def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
       
   273     case Nil => {
       
   274       if (nullable(r)) {
       
   275         mkeps(r) 
       
   276       }
       
   277       else throw new Exception("Not matched")
       
   278     }
       
   279     case c::cs => {
       
   280       val (r_simp, f_simp) = simp(der(c, r))
       
   281       inj(r, c, f_simp(lex_simp(r_simp, cs)))
       
   282     }
       
   283   }
       
   284 
       
   285   def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
       
   286 
       
   287   //println(lexing_simp(("a" | "ab") ~ ("b" | ""), "ab"))
       
   288 
       
   289   // filters out all white spaces
       
   290   def tokenise(r: Rexp, s: String) = 
       
   291     env(lexing_simp(r, s)).filterNot { _._1 == "w"}
       
   292 
       
   293 
       
   294   //reads the string from a file 
       
   295   def fromFile(name: String) : String = 
       
   296     io.Source.fromFile(name).mkString
       
   297 
       
   298   def tokenise_file(r: Rexp, name: String) = 
       
   299     tokenise(r, fromFile(name))
       
   300   
       
   301   //   Testing
       
   302   //============
       
   303 
       
   304   def time[T](code: => T) = {
       
   305     val start = System.nanoTime()
       
   306     val result = code
       
   307     val end = System.nanoTime()
       
   308     println((end - start)/1.0e9)
       
   309     result
       
   310   }
       
   311 
       
   312   //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART
       
   313 
       
   314   // bnullable function: tests whether the aregular 
       
   315   // expression can recognise the empty string
       
   316   def bnullable (r: ARexp) : Boolean = r match {
       
   317     case AZERO => false
       
   318     case AONE(_) => true
       
   319     case APRED(_,_) => false
       
   320     case AALTS(_, rs) => rs.exists(bnullable)
       
   321     case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
       
   322     case ASTAR(_, _) => true
       
   323   }
       
   324 
       
   325   def mkepsBC(r: ARexp) : Bits = r match {
       
   326     case AONE(bs) => bs
       
   327     case AALTS(bs, rs) => {
       
   328       val n = rs.indexWhere(bnullable)
       
   329       bs ++ mkepsBC(rs(n))
       
   330     }
       
   331     case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
       
   332     case ASTAR(bs, r) => bs ++ List(Z)
       
   333   }
       
   334 
       
   335   // derivative of a regular expression w.r.t. a character
       
   336   def bder(c: Char, r: ARexp) : ARexp = r match {
       
   337     case AZERO => AZERO
       
   338     case AONE(_) => AZERO
       
   339     case APRED(bs, f) => if (f(c)) AONE(bs:::List(C(c))) else AZERO
       
   340     case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
       
   341     case ASEQ(bs, r1, r2) => 
       
   342       if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(mkepsBC(r1), bder(c, r2)))
       
   343       else ASEQ(bs, bder(c, r1), r2)
       
   344     case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
       
   345   }
       
   346 
       
   347 
       
   348   def ders (s: List[Char], r: Rexp) : Rexp = s match {
       
   349     case Nil => r
       
   350     case c::s => ders(s, der(c, r))
       
   351   }
       
   352 
       
   353   // derivative w.r.t. a string (iterates bder)
       
   354   @tailrec
       
   355   def bders (s: List[Char], r: ARexp) : ARexp = s match {
       
   356     case Nil => r
       
   357     case c::s => bders(s, bder(c, r))
       
   358   }
       
   359 
       
   360   def flats(rs: List[ARexp]): List[ARexp] = rs match {
       
   361       case Nil => Nil
       
   362       case AZERO :: rs1 => flats(rs1)
       
   363       case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
       
   364       case r1 :: rs2 => r1 :: flats(rs2)
       
   365     }
       
   366   //val flats_time = scala.collection.mutable.ArrayBuffer.empty[Long]
       
   367   //val dist_time = scala.collection.mutable.ArrayBuffer.empty[Long]
       
   368   var flats_time = 0L
       
   369   var dist_time = 0L
       
   370   /*
       
   371   def bsimp(r: ARexp, depth: Int): ARexp = 
       
   372   {
       
   373     r match {
       
   374       case ASEQ(bs1, r1, r2) => (bsimp(r1, depth), bsimp(r2, depth)) match {
       
   375           case (AZERO, _) => AZERO
       
   376           case (_, AZERO) => AZERO
       
   377           case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   378           case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
       
   379       }
       
   380       case AALTS(bs1, rs) => {
       
   381         depth match {
       
   382           case 0 => {
       
   383             flats(distinctBy(rs, erase)) match {
       
   384               case Nil => AZERO
       
   385               case s :: Nil => fuse(bs1, s)
       
   386               case rs => AALTS(bs1, rs) 
       
   387             } 
       
   388           }
       
   389           case n => {
       
   390             val rs_simp = rs.map(bsimp(_, n - 1))
       
   391             val time2 = System.nanoTime()
       
   392             val flat_res = flats(rs_simp)
       
   393             val time3 = System.nanoTime()
       
   394             val dist_res = distinctBy(flat_res, erase)
       
   395             val time4 = System.nanoTime()
       
   396             flats_time = flats_time + time3 - time2
       
   397             dist_time = dist_time + time4 - time3
       
   398             //flats_time += time3 - time2
       
   399             //dist_time += time4 - time3
       
   400             //distinctBy(flats(rs.map(bsimp)), erase) match {
       
   401             dist_res match {
       
   402               case Nil => AZERO
       
   403               case s :: Nil => fuse(bs1, s)
       
   404               case rs => AALTS(bs1, rs)  
       
   405             }
       
   406           }
       
   407         }
       
   408       }
       
   409       case r => r
       
   410     }
       
   411   }
       
   412   */
       
   413   //----------------------------------------------------------------------------This bsimp is the original slow one
       
   414   
       
   415   def bsimp(r: ARexp): ARexp = r match {
       
   416     case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
       
   417         case (AZERO, _) => AZERO
       
   418         case (_, AZERO) => AZERO
       
   419         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   420         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
       
   421     }
       
   422     case AALTS(bs1, rs) => {
       
   423       val rs_simp = rs.map(bsimp)
       
   424       val time2 = System.nanoTime()
       
   425       val flat_res = flats(rs_simp)
       
   426       val time3 = System.nanoTime()
       
   427       val dist_res = distinctBy(flat_res, erase)
       
   428       val time4 = System.nanoTime()
       
   429       flats_time = flats_time + time3 - time2
       
   430       dist_time = dist_time + time4 - time3
       
   431       dist_res match {
       
   432         case Nil => AZERO
       
   433         case s :: Nil => fuse(bs1, s)
       
   434         case rs => AALTS(bs1, rs)  
       
   435       }
       
   436     }
       
   437     case r => r
       
   438   }
       
   439   
       
   440   //----------------------------------------------------------------------------experiment bsimp
       
   441   /*
       
   442   def bders_simp (s: List[Char], r: ARexp) : ARexp = s match {
       
   443     case Nil => r
       
   444     case c::s => bders_simp(s, bsimp(bder(c, r)))
       
   445   }
       
   446   */
       
   447   /*
       
   448   def time[T](code: => T) = {
       
   449     val start = System.nanoTime()
       
   450     val result = code
       
   451     val end = System.nanoTime()
       
   452     println((end - start)/1.0e9)
       
   453     result
       
   454   }
       
   455   */
       
   456   // main unsimplified lexing function (produces a value)
       
   457   def blex(r: ARexp, s: List[Char]) : Bits = s match {
       
   458     case Nil => if (bnullable(r)) mkepsBC(r) else throw new Exception("Not matched")
       
   459     case c::cs => {
       
   460       val der_res = bder(c,r)
       
   461       blex(der_res, cs)
       
   462     }
       
   463   }
       
   464 
       
   465   def bpre_lexing(r: Rexp, s: String) = blex(internalise(r), s.toList)
       
   466   //def blexing(r: Rexp, s: String) : Val = decode(r, blex(internalise(r), s.toList))
       
   467 
       
   468   var bder_time = 0L
       
   469   var bsimp_time = 0L
       
   470   var mkepsBC_time = 0L
       
   471   var small_de = 2
       
   472   var big_de = 5
       
   473   var usual_de = 3
       
   474   
       
   475   def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
       
   476     case Nil => {
       
   477       if (bnullable(r)) {
       
   478         //println(asize(r))
       
   479         val time4 = System.nanoTime()
       
   480         val bits = mkepsBC(r)
       
   481         val time5 = System.nanoTime()
       
   482         mkepsBC_time = time5 - time4
       
   483         bits
       
   484       }
       
   485     else throw new Exception("Not matched")
       
   486     }
       
   487     case c::cs => {
       
   488       val time1 = System.nanoTime()
       
   489       val der_res = bder(c,r)
       
   490       val time2 = System.nanoTime()
       
   491       val simp_res = bsimp(der_res)
       
   492       val time3 = System.nanoTime()  
       
   493       bder_time = bder_time + time2 - time1
       
   494       bsimp_time = bsimp_time + time3 - time2
       
   495       blex_simp(simp_res, cs)      
       
   496     }
       
   497   }
       
   498 
       
   499   //-------------------------------------------------------------------------------------tests whether simp(simp(r)) == simp(r) holds true
       
   500   /*
       
   501   def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
       
   502     case Nil => {
       
   503       if (bnullable(r)) {
       
   504         //println(asize(r))
       
   505         mkepsBC(r)
       
   506       }
       
   507     else throw new Exception("Not matched")
       
   508     }
       
   509     case c::cs => {
       
   510       val der_res = bder(c,r)
       
   511       val simp_res = bsimp(der_res)  
       
   512       //val simp_res2 = bsimp(simp_res)  
       
   513       //println("Size reduction from "+asize(der_res)+ " to " +asize(simp_res)+" to " + asize(simp_res2)) 
       
   514       blex_simp(simp_res, cs)
       
   515     }
       
   516   }
       
   517   */
       
   518   /*
       
   519   def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
       
   520     case Nil => {
       
   521       if (nullable(r)) {
       
   522         mkeps(r) 
       
   523       }
       
   524       else throw new Exception("Not matched")
       
   525     }
       
   526     case c::cs => {
       
   527       val start = System.nanoTime()
       
   528       val (r_simp, f_simp) = simp(der(c, r))
       
   529       val end = System.nanoTime()
       
   530       println((end - start)/1.0e9)
       
   531       inj(r, c, f_simp(lex_simp(r_simp, cs)))
       
   532     }
       
   533   }
       
   534   */
       
   535 
       
   536   //size: of a Aregx for testing purposes 
       
   537   def size(r: Rexp) : Int = r match {
       
   538     case ZERO => 1
       
   539     case ONE => 1
       
   540     case PRED(_) => 1
       
   541     case SEQ(r1, r2) => 1 + size(r1) + size(r2)
       
   542     case ALTS(rs) => 1 + rs.map(size).sum
       
   543     case STAR(r) => 1 + size(r)
       
   544   }
       
   545 
       
   546   def asize(a: ARexp) = size(erase(a))
       
   547 
       
   548 
       
   549   // decoding does not work yet
       
   550   def blexing_simp(r: Rexp, s: String) = {
       
   551     //flats_time.clear()
       
   552     //dist_time.clear()
       
   553     flats_time = 0L
       
   554     dist_time = 0L
       
   555     bder_time = 0L
       
   556     bsimp_time = 0L
       
   557     mkepsBC_time = 0L
       
   558     val start = System.nanoTime()
       
   559     val bit_code = blex_simp(internalise(r), s.toList)
       
   560     val end = System.nanoTime()
       
   561     println("total time: "+ (end - start)/1.0e9)
       
   562     println("spent on flats: " + (flats_time/(1.0e9)))
       
   563     println("spent on distinctBy: " + (dist_time/(1.0e9)))
       
   564     println("spent on bder: "+ bder_time/1.0e9)
       
   565     println("spent on bsimp: " + bsimp_time/1.0e9)
       
   566     println("spent on mkepsBC: " + mkepsBC_time/1.0e9)
       
   567     //println(s"The length of the string ${s.length}; length of bit sequence:")
       
   568     //println((bit_code.length))
       
   569     //println(final_derivative)
       
   570     //bit_code
       
   571     //decode(r, bit_code) 
       
   572   }
       
   573 
       
   574 
       
   575 
       
   576 
       
   577 
       
   578   // Lexing Rules for a Small While Language
       
   579 
       
   580   //symbols
       
   581   val SYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_))
       
   582   
       
   583   //digits
       
   584   val DIGIT = PRED("0123456789".contains(_))
       
   585   //identifiers
       
   586   val ID = SYM ~ (SYM | DIGIT).% 
       
   587   //numbers
       
   588   val NUM = STAR(DIGIT)
       
   589   //keywords
       
   590   val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false"
       
   591   val AKEYWORD: Rexp = ALTS(List("skip" , "while" , "do" , "if" , "then" , "else" , "read" , "write" , "true" , "false"))
       
   592   //semicolons
       
   593   val SEMI: Rexp = ";"
       
   594   //operators
       
   595   val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
       
   596   val AOP: Rexp = ALTS(List(":=" , "==" , "-" , "+" , "*" , "!=" , "<" , ">" , "<=" , ">=" , "%" , "/"))
       
   597   //whitespaces
       
   598   val WHITESPACE = PLUS(" " | "\n" | "\t")
       
   599   //parentheses
       
   600   val RPAREN: Rexp = ")"
       
   601   val LPAREN: Rexp = "("
       
   602   val BEGIN: Rexp = "{"
       
   603   val END: Rexp = "}"
       
   604   //strings...but probably needs not
       
   605   val STRING: Rexp = "\"" ~ SYM.% ~ "\""
       
   606 
       
   607 
       
   608 
       
   609   val WHILE_REGS = (("k" $ KEYWORD) | 
       
   610                     ("i" $ ID) | 
       
   611                     ("o" $ OP) | 
       
   612                     ("n" $ NUM) | 
       
   613                     ("s" $ SEMI) | 
       
   614                     ("str" $ STRING) |
       
   615                     ("p" $ (LPAREN | RPAREN)) | 
       
   616                     ("b" $ (BEGIN | END)) | 
       
   617                     ("w" $ WHITESPACE)).%
       
   618 
       
   619   val AWHILE_REGS = (
       
   620     ALTS(
       
   621       List(
       
   622         ("k" $ AKEYWORD), 
       
   623                     ("i" $ ID),
       
   624                     ("o" $ AOP) , 
       
   625                     ("n" $ NUM) ,
       
   626                     ("s" $ SEMI) ,
       
   627                     ("str" $ STRING), 
       
   628                     ("p" $ (LPAREN | RPAREN)), 
       
   629                     ("b" $ (BEGIN | END)), 
       
   630                     ("w" $ WHITESPACE)
       
   631       )
       
   632     )
       
   633   ).%
       
   634 
       
   635 
       
   636 
       
   637 
       
   638   //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART (TESTING)
       
   639   /*
       
   640   // Two Simple While programs
       
   641   //========================
       
   642   println("prog0 test")
       
   643 
       
   644   val prog0 = """read n"""
       
   645   println(env(lexing_simp(WHILE_REGS, prog0)))
       
   646   println(tokenise(WHILE_REGS, prog0))
       
   647 
       
   648   println("prog1 test")
       
   649 
       
   650   val prog1 = """read  n; write (n)"""
       
   651   println(tokenise(WHILE_REGS, prog1))
       
   652 
       
   653   */
       
   654   // Bigger Tests
       
   655   //==============
       
   656 
       
   657   def escape(raw: String): String = {
       
   658     import scala.reflect.runtime.universe._
       
   659     Literal(Constant(raw)).toString
       
   660   }
       
   661 
       
   662   val prog2 = """
       
   663   write "Fib";
       
   664   read n;
       
   665   minus1 := 0;
       
   666   minus2 := 1;
       
   667   while n > 0 do {
       
   668     temp := minus2;
       
   669     minus2 := minus1 + minus2;
       
   670     minus1 := temp;
       
   671     n := n - 1
       
   672   };
       
   673   write "Result";
       
   674   write minus2
       
   675   """
       
   676 
       
   677   val prog3 = """
       
   678   start := 1000;
       
   679   x := start;
       
   680   y := start;
       
   681   z := start;
       
   682   while 0 < x do {
       
   683   while 0 < y do {
       
   684     while 0 < z do {
       
   685       z := z - 1
       
   686     };
       
   687     z := start;
       
   688     y := y - 1
       
   689   };     
       
   690   y := start;
       
   691   x := x - 1
       
   692   }
       
   693   """
       
   694   /*
       
   695   for(i <- 400 to 400 by 1){
       
   696     println(i+":")
       
   697     blexing_simp(WHILE_REGS, prog2 * i)
       
   698   } */
       
   699 
       
   700     /*
       
   701     for (i <- 2 to 5){
       
   702       for(j <- 1 to 3){
       
   703         println(i,j)
       
   704         small_de = i
       
   705         usual_de = i + j
       
   706         big_de = i + 2*j 
       
   707         blexing_simp(AWHILE_REGS, prog2 * 100)
       
   708       }
       
   709     }*/
       
   710 
       
   711   /*
       
   712   println("Tokens of prog2")
       
   713   println(tokenise(WHILE_REGS, prog2).mkString("\n"))
       
   714 
       
   715   val fib_tokens = tokenise(WHILE_REGS, prog2)
       
   716   fib_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
       
   717 
       
   718 
       
   719   val test_tokens = tokenise(WHILE_REGS, prog3)
       
   720   test_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
       
   721   */
       
   722 
       
   723   /*
       
   724   println("time test for blexing_simp")
       
   725   for (i <- 1 to 1 by 1) {
       
   726     lexing_simp(WHILE_REGS, prog2 * i)
       
   727     blexing_simp(WHILE_REGS, prog2 * i)
       
   728     for( j <- 0 to each_simp_timeb.length - 1){
       
   729       if( each_simp_timeb(j)/each_simp_time(j) >= 10.0 )
       
   730         println(j, each_simp_timeb(j), each_simp_time(j))
       
   731     }
       
   732   }
       
   733   */
       
   734 
       
   735   // Tiger Language
       
   736   //================
       
   737 
       
   738   //symbols
       
   739   val TSYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_))
       
   740   //digits
       
   741   val TDIGIT = PRED("0123456789".contains(_))
       
   742   //identifiers
       
   743   val TID = TSYM ~ (TSYM | TDIGIT | "_").% 
       
   744   //numbers
       
   745   val TNUM = PLUS(TDIGIT)
       
   746   //keywords
       
   747   val TKEYWORD : Rexp = { "array" | "break" | "do" | "else" | "end" | "for" | 
       
   748                           "function" | "if" | "in" | "let" | "nil" | "of" | "then" | 
       
   749                           "to" | "type" | "var" | "while" }
       
   750 
       
   751   //operators
       
   752   val TOP: Rexp = { "(" | ")" | "[" | "]" | "{" | "}" | ":" | ":=" | "." | "," | 
       
   753         ";" | "/" | "+" | "-" | "=" | "<>" | ">" | "<" | ">=" | "<=" | "&" | "|" }
       
   754 
       
   755   //whitespaces
       
   756   val TSPECIAL : Rexp = PRED((""".:=()\;-""" ++ "\"").contains(_))
       
   757   val TWS : Rexp = " " | "\n" | "\t"
       
   758   //comments...but probably needs not
       
   759   val TCOMMENT: Rexp = """/*""" ~ (TSYM | TWS | TSPECIAL | TDIGIT).% ~ """*/"""
       
   760 
       
   761   val TWHITESPACE : Rexp = PLUS(TWS) | TCOMMENT
       
   762 
       
   763 
       
   764 
       
   765   //strings...but probably needs not
       
   766 
       
   767   val TSTRING: Rexp = "\"" ~ (TSYM | " " | TSPECIAL | TDIGIT).% ~ "\""
       
   768 
       
   769 
       
   770   // for indicating lexing errors
       
   771   val ERROR = PRED((_) => true)
       
   772 
       
   773 
       
   774   val TIGER_REGS = (("k" $ TKEYWORD) | 
       
   775                     ("i" $ TID) | 
       
   776                     ("o" $ TOP) | 
       
   777                     ("n" $ TNUM) | 
       
   778                     ("str" $ TSTRING) |
       
   779                     ("w" $ TWHITESPACE) | 
       
   780                     ("err" $ ERROR)).%
       
   781 
       
   782 
       
   783   //println(tokenise_file(TIGER_REGS, "test.tig").mkString("\n"))
       
   784   //println(tokenise_file(TIGER_REGS, "queens.tig").mkString("\n"))
       
   785 
       
   786   //tokenise(TCOMMENT,"""/**/""")
       
   787   //tokenise(TCOMMENT,"""/*a a a */""")
       
   788   //tokenise(TIGER_REGS,"""/*a a a */""")
       
   789   //tokenise(TCOMMENT,"""/* A program to solve the 8-queens problem */""")
       
   790   //tokenise(TIGER_REGS,"""/* A program to solve the 8-queens problem */""")
       
   791   //tokenise(TCOMMENT,"""/*  for i:= 0 to c do print("."); print("\n"); flush();*/""")
       
   792   //tokenise(TIGER_REGS,"""/*  for i:= 0 to c do print("."); print("\n"); flush();*/""")
       
   793 
       
   794 
       
   795   //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART (TESTING)
       
   796 
       
   797 
       
   798 
       
   799   def clear() = {
       
   800     print("")
       
   801     //print("\33[H\33[2J")
       
   802   }
       
   803 
       
   804   //testing the two lexings produce the same value
       
   805   //enumerates strings of length n over alphabet cs
       
   806   def strs(n: Int, cs: String) : Set[String] = {
       
   807     if (n == 0) Set("")
       
   808     else {
       
   809       val ss = strs(n - 1, cs)
       
   810       ss ++
       
   811       (for (s <- ss; c <- cs.toList) yield c + s)
       
   812     }
       
   813   }
       
   814   def enum(n: Int, s: String) : Stream[Rexp] = n match {
       
   815     case 0 => ZERO #:: ONE #:: s.toStream.map(CHAR)
       
   816     case n => {  
       
   817       val rs = enum(n - 1, s)
       
   818       rs #:::
       
   819       (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) #:::
       
   820       (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) #:::
       
   821       (for (r1 <- rs) yield STAR(r1))
       
   822     }
       
   823   }
       
   824 
       
   825   //tests blexing and lexing
       
   826   def tests_blexer_simp(ss: Set[String])(r: Rexp) = {
       
   827     clear()
       
   828     //println(s"Testing ${r}")
       
   829     for (s <- ss.par) yield {
       
   830       val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None)
       
   831       val res2 = Try(Some(blexing_simp(r, s))).getOrElse(None)
       
   832       if (res1 != res2) println(s"Disagree on ${r} and ${s}")
       
   833       if (res1 != res2) println(s"   ${res1} !=  ${res2}")
       
   834       if (res1 != res2) Some((r, s)) else None
       
   835     }
       
   836   }
       
   837 
       
   838 
       
   839 
       
   840   //enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet
       
   841   /*
       
   842   def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = {
       
   843     for (s <- ss){
       
   844       
       
   845       val der_res = bder(c, ar) 
       
   846       val simp_res = bsimp(der_res)
       
   847       println(asize(der_res))
       
   848       println(asize(simp_res))
       
   849       single_expression_explorer(simp_res, (sc - c))
       
   850     }
       
   851   }*/
       
   852 
       
   853   //single_expression_explorer(internalise(("c"~("a"+"b"))%) , Set('a','b','c'))
       
   854 
       
   855 
       
   856 }
       
   857 
       
   858 import Rexp.Bits
       
   859 abstract class ARexp 
       
   860 case object AZERO extends ARexp
       
   861 case class AONE(bs: Bits) extends ARexp
       
   862 case class APRED(bs: Bits, f: Char => Boolean) extends ARexp
       
   863 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
       
   864 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
       
   865 case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
       
   866 
       
   867 
       
   868 
       
   869 abstract class Val
       
   870 case object Empty extends Val
       
   871 case class Chr(c: Char) extends Val
       
   872 case class Sequ(v1: Val, v2: Val) extends Val
       
   873 case class Left(v: Val) extends Val
       
   874 case class Right(v: Val) extends Val
       
   875 case class Stars(vs: List[Val]) extends Val
       
   876 case class Rec(x: String, v: Val) extends Val
       
   877 //case class Pos(i: Int, v: Val) extends Val
       
   878 case object Prd extends Val