correctness test with enumeration
authorChengsong
Sat, 16 Mar 2019 14:14:42 +0000
changeset 5 622ddbb1223a
parent 4 7a349fe58bf4
child 6 26b40a985622
correctness test with enumeration
Brexp.scala
Spiral.scala
lex_blex_Frankensteined.scala
--- a/Brexp.scala	Fri Mar 15 12:27:12 2019 +0000
+++ b/Brexp.scala	Sat Mar 16 14:14:42 2019 +0000
@@ -125,7 +125,7 @@
         case rs => {BALTS(bs, rs)}
       }
     }
-    //case BSTAR(r) => BSTAR(r)
+    case BSTAR(r) => BSTAR(strong_br_simp(r))
     case r => r
   }
   //we want to bound the size by a function bspill s.t.
--- a/Spiral.scala	Fri Mar 15 12:27:12 2019 +0000
+++ b/Spiral.scala	Sat Mar 16 14:14:42 2019 +0000
@@ -29,6 +29,35 @@
   val alphabet = ("""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.:"=()\;-+*!<>\/%{} """+"\n\t").toSet//Set('a','b','c')
   def bregx_tree(r: BRexp): Element = regx_tree(berase(r))
   def regx_tree(r: Rexp): Element = aregx_tree(internalise(r))
+  def annotated_tree(r: ARexp): Element = {
+    r match {
+      case ACHAR(bs, d) => {
+        //val Some(d) = alphabet.find(f)
+        d match {
+          case '\n' => elem("\\n")
+          case '\t' => elem("\\t")
+          case ' ' => elem("space")
+          case d => elem(d.toString)
+        }   
+      }
+      case AONE(bs) => {
+        elem("ONE")
+      }
+      case AZERO => {
+        elem("ZERO")
+      }
+      case ASEQ(bs, r1, r2) => {
+        binary_print("SEQ", r1, r2)
+      }
+      case AALTS(bs, rs) => {
+        //elem("Awaiting completion")
+        list_print("ALT", rs)
+      }
+      case ASTAR(bs, r) => {
+        list_print("STA", List(r))
+      }
+    } 
+  }
   def aregx_tree(r: ARexp): Element = {
     r match {
       case ACHAR(bs, d) => {
@@ -276,11 +305,11 @@
     //this is for comparison between normal simp and the weakened version of simp
     //normal simp will be performed on syncold
     //weakend simp will be performed on old
-    var syncold = brternalise(r)
+    var syncold = internalise(r)
     val all_chars = s.toList
     for (i <- 0 to s.length - 1){
-      val syncder_res = brder(all_chars(i), syncold)
-      val syncsimp_res = strong_br_simp(syncder_res)
+      val syncder_res = bder(all_chars(i), syncold)
+      val syncsimp_res = super_bsimp(syncder_res)
       //see brder for detailed explanation
       //just changes bit Z to S when deriving an ALTS, 
       //signifying that the structure has been "touched" and
@@ -289,10 +318,14 @@
       val simp_res = br_simp(der_res)
       val anatomy = bspill(simp_res)
       //track if the number of regular expressions exceeds those in the PD set(remember PD means the pders over A*)
-      if(f(anatomy, pd)  == false || i == 1){
-        println(size(berase(syncsimp_res)))
+      if(f(List(berase(simp_res)), pd)  == false ){
+        println(size(erase(syncsimp_res)))
         println(size(berase(simp_res)))
         println(bregx_tree(simp_res))
+        println(s)
+        println(i)
+        println(r)
+        println()
         println(anatomy.map(size).sum)
         println(pd.map(size).sum)
       }  
@@ -311,22 +344,22 @@
     }
   }
   def size_comp(anatomy: List[Rexp], pd: Set[Rexp]):Boolean = {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); true}
-  def size_expansion_rate(anatomy: List[Rexp], pd: Set[Rexp]): Boolean = if(anatomy.size > (pd.size)*2 ) {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); inclusion_truth(anatomy, pd); false }else {true}
+  def size_expansion_rate(r: List[Rexp], pd: Set[Rexp]): Boolean = if(size(r(0)) > (pd.map(size).sum) * 3 ) { false }else {true}
   def ders_simp(r: ARexp, s: List[Char]): ARexp = {
     s match {
       case Nil => r 
       case c::cs => ders_simp(bsimp(bder(c, r)), cs)
     }
   }
+  val big_panda = STAR(STAR(STAR(ALTS(List(ALTS(List(CHAR('c'), CHAR('b'))), SEQ(CHAR('c'),CHAR('c')))))))
+  val str_panda = "ccccb"
   def check_all(){
-    for(i <- 1 to 1)
-    {
-        val s = "bb"//rd_string_gen(alphabet_size, 5)//"ac"//rd_string_gen(alphabet_size, 5)
-        val r = STAR(STAR(ALTS(List(SEQ(CHAR('b'),CHAR('b')), ALTS(List(CHAR('a'), CHAR('b')))))))//balanced_struct_gen(4)//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7)
-        //subset_check(r, s)
-        weak_sub_check(r, s, 5, size_expansion_rate)
-    }
+
+        weak_sub_check(big_panda, str_panda, 6, size_expansion_rate)
+
   }
+  //simplified regex size 291, so called pd_simp size 70 (did not do simplification to terms in the PD set)
+
   def correctness_proof_convenient_path(){
     for(i <- 1 to 1)
     {
@@ -336,12 +369,18 @@
           val ss = s.slice(0, j+ 1)
           val nangao = ders_simp(r, ss.toList)
           val easy = bsimp(bders(ss.toList, r))
-          if(nangao != easy|| j == 2){
+          if(true){
             println(j)
             if(j == 3) println("not equal")
+            println("string")
             println(ss)
+            println("original regex")
             println(r)
+            println("regex after ders simp")
             println(nangao)
+            println("regex after ders")
+            println(bders(ss.toList, r))
+            println("regex after ders and then a single simp")
             println(easy)
           }
         }
@@ -349,7 +388,8 @@
   }
   def main(args: Array[String]) {
     //check_all()   
-    correctness_proof_convenient_path
+    enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet
+    //correctness_proof_convenient_path()
   } 
 }
 
--- a/lex_blex_Frankensteined.scala	Fri Mar 15 12:27:12 2019 +0000
+++ b/lex_blex_Frankensteined.scala	Sat Mar 16 14:14:42 2019 +0000
@@ -88,10 +88,10 @@
   }
 
   internalise(("a" | "ab") ~ ("b" | ""))
-/*
+
   def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
     case (ONE, bs) => (Empty, bs)
-    case (PRED(f), C(c)::bs) => (Chr(c), bs)
+    case (CHAR(f), C(c)::bs) => (Chr(c), bs)
     case (ALTS(r::Nil), bs) => decode_aux(r, bs)//this case seems tailor made for those who want to simplify the regex before der or simp
     case (ALTS(rs), bs) => bs match {
       case Z::bs1 => {
@@ -125,7 +125,7 @@
     case (v, Nil) => v
     case _ => throw new Exception("Not decodable")
   }
-*/
+
 
   //erase function: extracts the regx from Aregex
   def erase(r:ARexp): Rexp = r match{
@@ -370,54 +370,8 @@
     case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2)
     case r1 :: rs2 => r1 :: rflats(rs2)
   }
-  //val flats_time = scala.collection.mutable.ArrayBuffer.empty[Long]
-  //val dist_time = scala.collection.mutable.ArrayBuffer.empty[Long]
   var flats_time = 0L
   var dist_time = 0L
-  /*
-  def bsimp(r: ARexp, depth: Int): ARexp = 
-  {
-    r match {
-      case ASEQ(bs1, r1, r2) => (bsimp(r1, depth), bsimp(r2, depth)) match {
-          case (AZERO, _) => AZERO
-          case (_, AZERO) => AZERO
-          case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
-          case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
-      }
-      case AALTS(bs1, rs) => {
-        depth match {
-          case 0 => {
-            flats(distinctBy(rs, erase)) match {
-              case Nil => AZERO
-              case s :: Nil => fuse(bs1, s)
-              case rs => AALTS(bs1, rs) 
-            } 
-          }
-          case n => {
-            val rs_simp = rs.map(bsimp(_, n - 1))
-            val time2 = System.nanoTime()
-            val flat_res = flats(rs_simp)
-            val time3 = System.nanoTime()
-            val dist_res = distinctBy(flat_res, erase)
-            val time4 = System.nanoTime()
-            flats_time = flats_time + time3 - time2
-            dist_time = dist_time + time4 - time3
-            //flats_time += time3 - time2
-            //dist_time += time4 - time3
-            //distinctBy(flats(rs.map(bsimp)), erase) match {
-            dist_res match {
-              case Nil => AZERO
-              case s :: Nil => fuse(bs1, s)
-              case rs => AALTS(bs1, rs)  
-            }
-          }
-        }
-      }
-      case r => r
-    }
-  }
-  */
-  //----------------------------------------------------------------------------This bsimp is the original slow one
   
   def bsimp(r: ARexp): ARexp = r match {
     case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
@@ -428,49 +382,40 @@
     }
     case AALTS(bs1, rs) => {
       val rs_simp = rs.map(bsimp)
-      val time2 = System.nanoTime()
       val flat_res = flats(rs_simp)
-      val time3 = System.nanoTime()
       val dist_res = distinctBy(flat_res, erase)
-      val time4 = System.nanoTime()
-      flats_time = flats_time + time3 - time2
-      dist_time = dist_time + time4 - time3
       dist_res match {
         case Nil => AZERO
         case s :: Nil => fuse(bs1, s)
         case rs => AALTS(bs1, rs)  
       }
     }
-    case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
+    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
     case r => r
   }
-
-  def bsimp_weakened(r: ARexp): ARexp = r match {
-    case ASEQ(bs1, r1, r2) => (bsimp_weakened(r1), r2) match {
+  def super_bsimp(r: ARexp): ARexp = r match {
+    case ASEQ(bs1, r1, r2) => (super_bsimp(r1), super_bsimp(r2)) match {
         case (AZERO, _) => AZERO
         case (_, AZERO) => AZERO
         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+        case (AALTS(bs2, rs), r2) => AALTS(bs1 ++ bs2, rs.map(r => r match {case AONE(bs3) => fuse(bs3, r2) case r => ASEQ(Nil, r, r2)}) ) 
         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
     }
     case AALTS(bs1, rs) => {
-      val rs_simp = rs.map(bsimp_weakened)
-      val time2 = System.nanoTime()
+      val rs_simp = rs.map(super_bsimp)
       val flat_res = flats(rs_simp)
-      val time3 = System.nanoTime()
       val dist_res = distinctBy(flat_res, erase)
-      val time4 = System.nanoTime()
-      flats_time = flats_time + time3 - time2
-      dist_time = dist_time + time4 - time3
       dist_res match {
         case Nil => AZERO
         case s :: Nil => fuse(bs1, s)
         case rs => AALTS(bs1, rs)  
       }
     }
-    case ASTAR(bs, r) => ASTAR(bs, bsimp_weakened(r))
+    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
     case r => r
   }
 
+
   def simp_weakened(r: Rexp): Rexp = r match {
     case SEQ(r1, r2) => (simp_weakened(r1), r2) match {
         case (ZERO, _) => ZERO
@@ -532,66 +477,32 @@
     case Nil => {
       if (bnullable(r)) {
         //println(asize(r))
-        val time4 = System.nanoTime()
-        val bits = mkepsBC(r)
-        val time5 = System.nanoTime()
-        mkepsBC_time = time5 - time4
-        bits
+        mkepsBC(r)
       }
     else throw new Exception("Not matched")
     }
     case c::cs => {
-      val time1 = System.nanoTime()
       val der_res = bder(c,r)
-      val time2 = System.nanoTime()
       val simp_res = bsimp(der_res)
-      val time3 = System.nanoTime()  
-      bder_time = bder_time + time2 - time1
-      bsimp_time = bsimp_time + time3 - time2
       blex_simp(simp_res, cs)      
     }
   }
+  def super_blex_simp(r: ARexp, s: List[Char]): Bits = s match {
+    case Nil => {
+      if (bnullable(r)) {
+        mkepsBC(r)
+      }
+      else throw new Exception("Not matched")
+    }
+    case c::cs => {
+      super_blex_simp(super_bsimp(bder(c,r)), cs)
+    }
+  }
   def blex_real_simp(r: ARexp, s: List[Char]): ARexp = s match{
     case Nil => r
     case c::cs => blex_real_simp(bsimp(bder(c, r)), cs)
   }
 
-  //-------------------------------------------------------------------------------------tests whether simp(simp(r)) == simp(r) holds true
-  /*
-  def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
-    case Nil => {
-      if (bnullable(r)) {
-        //println(asize(r))
-        mkepsBC(r)
-      }
-    else throw new Exception("Not matched")
-    }
-    case c::cs => {
-      val der_res = bder(c,r)
-      val simp_res = bsimp(der_res)  
-      //val simp_res2 = bsimp(simp_res)  
-      //println("Size reduction from "+asize(der_res)+ " to " +asize(simp_res)+" to " + asize(simp_res2)) 
-      blex_simp(simp_res, cs)
-    }
-  }
-  */
-  /*
-  def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
-    case Nil => {
-      if (nullable(r)) {
-        mkeps(r) 
-      }
-      else throw new Exception("Not matched")
-    }
-    case c::cs => {
-      val start = System.nanoTime()
-      val (r_simp, f_simp) = simp(der(c, r))
-      val end = System.nanoTime()
-      println((end - start)/1.0e9)
-      inj(r, c, f_simp(lex_simp(r_simp, cs)))
-    }
-  }
-  */
 
   //size: of a Aregx for testing purposes 
   def size(r: Rexp) : Int = r match {
@@ -608,27 +519,11 @@
 
   // decoding does not work yet
   def blexing_simp(r: Rexp, s: String) = {
-    //flats_time.clear()
-    //dist_time.clear()
-    flats_time = 0L
-    dist_time = 0L
-    bder_time = 0L
-    bsimp_time = 0L
-    mkepsBC_time = 0L
-    val start = System.nanoTime()
     val bit_code = blex_simp(internalise(r), s.toList)
-    val end = System.nanoTime()
-    println("total time: "+ (end - start)/1.0e9)
-    println("spent on flats: " + (flats_time/(1.0e9)))
-    println("spent on distinctBy: " + (dist_time/(1.0e9)))
-    println("spent on bder: "+ bder_time/1.0e9)
-    println("spent on bsimp: " + bsimp_time/1.0e9)
-    println("spent on mkepsBC: " + mkepsBC_time/1.0e9)
-    //println(s"The length of the string ${s.length}; length of bit sequence:")
-    //println((bit_code.length))
-    //println(final_derivative)
-    //bit_code
-    //decode(r, bit_code) 
+    decode(r, bit_code) 
+  }
+  def super_blexing_simp(r: Rexp, s: String) = {
+    decode(r, super_blex_simp(internalise(r), s.toList))
   }
 
 
@@ -830,7 +725,7 @@
     //println(s"Testing ${r}")
     for (s <- ss.par) yield {
       val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None)
-      val res2 = Try(Some(blexing_simp(r, s))).getOrElse(None)
+      val res2 = Try(Some(super_blexing_simp(r, s))).getOrElse(None)
       if (res1 != res2) println(s"Disagree on ${r} and ${s}")
       if (res1 != res2) println(s"   ${res1} !=  ${res2}")
       if (res1 != res2) Some((r, s)) else None
@@ -839,7 +734,7 @@
 
 
 
-  //enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet
+  
   /*
   def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = {
     for (s <- ss){