lex_blex_Frankensteined.scala
changeset 5 622ddbb1223a
parent 3 f15dccc42c7b
child 11 9c1ca6d6e190
--- a/lex_blex_Frankensteined.scala	Fri Mar 15 12:27:12 2019 +0000
+++ b/lex_blex_Frankensteined.scala	Sat Mar 16 14:14:42 2019 +0000
@@ -88,10 +88,10 @@
   }
 
   internalise(("a" | "ab") ~ ("b" | ""))
-/*
+
   def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
     case (ONE, bs) => (Empty, bs)
-    case (PRED(f), C(c)::bs) => (Chr(c), bs)
+    case (CHAR(f), C(c)::bs) => (Chr(c), bs)
     case (ALTS(r::Nil), bs) => decode_aux(r, bs)//this case seems tailor made for those who want to simplify the regex before der or simp
     case (ALTS(rs), bs) => bs match {
       case Z::bs1 => {
@@ -125,7 +125,7 @@
     case (v, Nil) => v
     case _ => throw new Exception("Not decodable")
   }
-*/
+
 
   //erase function: extracts the regx from Aregex
   def erase(r:ARexp): Rexp = r match{
@@ -370,54 +370,8 @@
     case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2)
     case r1 :: rs2 => r1 :: rflats(rs2)
   }
-  //val flats_time = scala.collection.mutable.ArrayBuffer.empty[Long]
-  //val dist_time = scala.collection.mutable.ArrayBuffer.empty[Long]
   var flats_time = 0L
   var dist_time = 0L
-  /*
-  def bsimp(r: ARexp, depth: Int): ARexp = 
-  {
-    r match {
-      case ASEQ(bs1, r1, r2) => (bsimp(r1, depth), bsimp(r2, depth)) match {
-          case (AZERO, _) => AZERO
-          case (_, AZERO) => AZERO
-          case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
-          case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
-      }
-      case AALTS(bs1, rs) => {
-        depth match {
-          case 0 => {
-            flats(distinctBy(rs, erase)) match {
-              case Nil => AZERO
-              case s :: Nil => fuse(bs1, s)
-              case rs => AALTS(bs1, rs) 
-            } 
-          }
-          case n => {
-            val rs_simp = rs.map(bsimp(_, n - 1))
-            val time2 = System.nanoTime()
-            val flat_res = flats(rs_simp)
-            val time3 = System.nanoTime()
-            val dist_res = distinctBy(flat_res, erase)
-            val time4 = System.nanoTime()
-            flats_time = flats_time + time3 - time2
-            dist_time = dist_time + time4 - time3
-            //flats_time += time3 - time2
-            //dist_time += time4 - time3
-            //distinctBy(flats(rs.map(bsimp)), erase) match {
-            dist_res match {
-              case Nil => AZERO
-              case s :: Nil => fuse(bs1, s)
-              case rs => AALTS(bs1, rs)  
-            }
-          }
-        }
-      }
-      case r => r
-    }
-  }
-  */
-  //----------------------------------------------------------------------------This bsimp is the original slow one
   
   def bsimp(r: ARexp): ARexp = r match {
     case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
@@ -428,49 +382,40 @@
     }
     case AALTS(bs1, rs) => {
       val rs_simp = rs.map(bsimp)
-      val time2 = System.nanoTime()
       val flat_res = flats(rs_simp)
-      val time3 = System.nanoTime()
       val dist_res = distinctBy(flat_res, erase)
-      val time4 = System.nanoTime()
-      flats_time = flats_time + time3 - time2
-      dist_time = dist_time + time4 - time3
       dist_res match {
         case Nil => AZERO
         case s :: Nil => fuse(bs1, s)
         case rs => AALTS(bs1, rs)  
       }
     }
-    case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
+    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
     case r => r
   }
-
-  def bsimp_weakened(r: ARexp): ARexp = r match {
-    case ASEQ(bs1, r1, r2) => (bsimp_weakened(r1), r2) match {
+  def super_bsimp(r: ARexp): ARexp = r match {
+    case ASEQ(bs1, r1, r2) => (super_bsimp(r1), super_bsimp(r2)) match {
         case (AZERO, _) => AZERO
         case (_, AZERO) => AZERO
         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+        case (AALTS(bs2, rs), r2) => AALTS(bs1 ++ bs2, rs.map(r => r match {case AONE(bs3) => fuse(bs3, r2) case r => ASEQ(Nil, r, r2)}) ) 
         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
     }
     case AALTS(bs1, rs) => {
-      val rs_simp = rs.map(bsimp_weakened)
-      val time2 = System.nanoTime()
+      val rs_simp = rs.map(super_bsimp)
       val flat_res = flats(rs_simp)
-      val time3 = System.nanoTime()
       val dist_res = distinctBy(flat_res, erase)
-      val time4 = System.nanoTime()
-      flats_time = flats_time + time3 - time2
-      dist_time = dist_time + time4 - time3
       dist_res match {
         case Nil => AZERO
         case s :: Nil => fuse(bs1, s)
         case rs => AALTS(bs1, rs)  
       }
     }
-    case ASTAR(bs, r) => ASTAR(bs, bsimp_weakened(r))
+    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
     case r => r
   }
 
+
   def simp_weakened(r: Rexp): Rexp = r match {
     case SEQ(r1, r2) => (simp_weakened(r1), r2) match {
         case (ZERO, _) => ZERO
@@ -532,66 +477,32 @@
     case Nil => {
       if (bnullable(r)) {
         //println(asize(r))
-        val time4 = System.nanoTime()
-        val bits = mkepsBC(r)
-        val time5 = System.nanoTime()
-        mkepsBC_time = time5 - time4
-        bits
+        mkepsBC(r)
       }
     else throw new Exception("Not matched")
     }
     case c::cs => {
-      val time1 = System.nanoTime()
       val der_res = bder(c,r)
-      val time2 = System.nanoTime()
       val simp_res = bsimp(der_res)
-      val time3 = System.nanoTime()  
-      bder_time = bder_time + time2 - time1
-      bsimp_time = bsimp_time + time3 - time2
       blex_simp(simp_res, cs)      
     }
   }
+  def super_blex_simp(r: ARexp, s: List[Char]): Bits = s match {
+    case Nil => {
+      if (bnullable(r)) {
+        mkepsBC(r)
+      }
+      else throw new Exception("Not matched")
+    }
+    case c::cs => {
+      super_blex_simp(super_bsimp(bder(c,r)), cs)
+    }
+  }
   def blex_real_simp(r: ARexp, s: List[Char]): ARexp = s match{
     case Nil => r
     case c::cs => blex_real_simp(bsimp(bder(c, r)), cs)
   }
 
-  //-------------------------------------------------------------------------------------tests whether simp(simp(r)) == simp(r) holds true
-  /*
-  def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
-    case Nil => {
-      if (bnullable(r)) {
-        //println(asize(r))
-        mkepsBC(r)
-      }
-    else throw new Exception("Not matched")
-    }
-    case c::cs => {
-      val der_res = bder(c,r)
-      val simp_res = bsimp(der_res)  
-      //val simp_res2 = bsimp(simp_res)  
-      //println("Size reduction from "+asize(der_res)+ " to " +asize(simp_res)+" to " + asize(simp_res2)) 
-      blex_simp(simp_res, cs)
-    }
-  }
-  */
-  /*
-  def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
-    case Nil => {
-      if (nullable(r)) {
-        mkeps(r) 
-      }
-      else throw new Exception("Not matched")
-    }
-    case c::cs => {
-      val start = System.nanoTime()
-      val (r_simp, f_simp) = simp(der(c, r))
-      val end = System.nanoTime()
-      println((end - start)/1.0e9)
-      inj(r, c, f_simp(lex_simp(r_simp, cs)))
-    }
-  }
-  */
 
   //size: of a Aregx for testing purposes 
   def size(r: Rexp) : Int = r match {
@@ -608,27 +519,11 @@
 
   // decoding does not work yet
   def blexing_simp(r: Rexp, s: String) = {
-    //flats_time.clear()
-    //dist_time.clear()
-    flats_time = 0L
-    dist_time = 0L
-    bder_time = 0L
-    bsimp_time = 0L
-    mkepsBC_time = 0L
-    val start = System.nanoTime()
     val bit_code = blex_simp(internalise(r), s.toList)
-    val end = System.nanoTime()
-    println("total time: "+ (end - start)/1.0e9)
-    println("spent on flats: " + (flats_time/(1.0e9)))
-    println("spent on distinctBy: " + (dist_time/(1.0e9)))
-    println("spent on bder: "+ bder_time/1.0e9)
-    println("spent on bsimp: " + bsimp_time/1.0e9)
-    println("spent on mkepsBC: " + mkepsBC_time/1.0e9)
-    //println(s"The length of the string ${s.length}; length of bit sequence:")
-    //println((bit_code.length))
-    //println(final_derivative)
-    //bit_code
-    //decode(r, bit_code) 
+    decode(r, bit_code) 
+  }
+  def super_blexing_simp(r: Rexp, s: String) = {
+    decode(r, super_blex_simp(internalise(r), s.toList))
   }
 
 
@@ -830,7 +725,7 @@
     //println(s"Testing ${r}")
     for (s <- ss.par) yield {
       val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None)
-      val res2 = Try(Some(blexing_simp(r, s))).getOrElse(None)
+      val res2 = Try(Some(super_blexing_simp(r, s))).getOrElse(None)
       if (res1 != res2) println(s"Disagree on ${r} and ${s}")
       if (res1 != res2) println(s"   ${res1} !=  ${res2}")
       if (res1 != res2) Some((r, s)) else None
@@ -839,7 +734,7 @@
 
 
 
-  //enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet
+  
   /*
   def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = {
     for (s <- ss){