thys2/blexer2.sc
changeset 532 cc54ce075db5
parent 530 823d9b19d21c
child 533 6acbc939af6a
--- a/thys2/blexer2.sc	Tue May 31 17:36:45 2022 +0100
+++ b/thys2/blexer2.sc	Fri Jun 03 16:45:30 2022 +0100
@@ -522,6 +522,7 @@
         case (_, AZERO) => AZERO
         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
         case (r1s, AONE(bs2)) => fuse(bs1, r1s) //asserted bs2 == Nil
+        //case (r1s, r2s) if(isOne(erase(r1s))) => fuse(bs1 ++ mkepsBC(r1s), r2s)
         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
     }
     case AALTS(bs1, rs) => {
@@ -535,10 +536,78 @@
             case rs => AALTS(bs1, rs)  
           }
     }
+    case ASTAR(bs, r0) if(atMostEmpty(erase(r0))) => AONE(bs)
     case r => r
   }
 }
 
+def distinctWith(rs: List[ARexp], 
+                f: (ARexp, Set[Rexp]) => ARexp, 
+                acc: Set[Rexp] = Set()) : List[ARexp] = 
+  rs match{
+    case Nil => Nil
+    case r :: rs => 
+      if(acc(erase(r)))
+        distinctWith(rs, f, acc)
+      else {
+        val pruned_r = f(r, acc)
+        pruned_r :: distinctWith(rs, f, strongBreakIntoTerms(erase(pruned_r)) ++: acc)
+      }
+  }
+
+//r = r' ~ tail => returns r'
+def removeSeqTail(r: Rexp, tail: Rexp) : Rexp = r match {
+  case SEQ(r1, r2) => 
+    if(r2 == tail) 
+      r1
+    else
+      ZERO
+  case r => ZERO
+}
+
+def prune7(r: ARexp, acc: Set[Rexp]) : ARexp = r match{
+  case AALTS(bs, rs) => rs.map(r => prune7(r, acc)).filter(_ != ZERO) match
+  {
+    case Nil => AZERO
+    case r::Nil => fuse(bs, r)
+    case rs1 => AALTS(bs, rs1)
+  }
+  case ASEQ(bs, r1, r2) => prune7(r1, acc.map(r => removeSeqTail(r, erase(r2)))) match {
+    case AZERO => AZERO
+    case r1p if(isOne(erase(r1p))) => fuse(bs ++ mkepsBC(r1p), r2)
+    case r1p => ASEQ(bs, r1p, r2)
+  }
+  case r => if(acc(erase(r))) AZERO else r
+}
+
+def strongBsimp7(r: ARexp): ARexp =
+{
+  r match {
+    case ASEQ(bs1, r1, r2) => (strongBsimp7(r1), strongBsimp7(r2)) match {
+        case (AZERO, _) => AZERO
+        case (_, AZERO) => AZERO
+        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+        case (r1s, AONE(bs2)) => fuse(bs1, r1s) //asserted bs2 == Nil
+        //case (r1s, r2s) if(isOne(erase(r1s))) => fuse(bs1 ++ mkepsBC(r1s), r2s)
+        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+    }
+    case AALTS(bs1, rs) => {
+        // println("alts case")
+          val rs_simp = rs.map(strongBsimp7(_))
+          val flat_res = flats(rs_simp)
+          var dist_res = distinctWith(flat_res, prune7)//distinctBy(flat_res, erase)
+          (dist_res) match {
+            case Nil => AZERO
+            case s :: Nil => fuse(bs1, s)
+            case rs => AALTS(bs1, rs)  
+          }
+    }
+    case ASTAR(bs, r0) if(atMostEmpty(erase(r0))) => AONE(bs)
+    case r => r
+  }
+}
+
+
 def bders (s: List[Char], r: ARexp) : ARexp = s match {
   case Nil => r
   case c::s => bders(s, bder(c, r))
@@ -717,9 +786,28 @@
   }
 }
 
+def atMostEmpty(r: Rexp) : Boolean = r match {
+  case ZERO => true
+  case ONE => true
+  case STAR(r) => atMostEmpty(r)
+  case SEQ(r1, r2) => atMostEmpty(r1) && atMostEmpty(r2)
+  case ALTS(r1, r2) => atMostEmpty(r1) && atMostEmpty(r2)
+  case CHAR(_) => false
+}
+
+def isOne(r: Rexp) : Boolean = r match {
+  case ONE => true
+  case SEQ(r1, r2) => isOne(r1) && isOne(r2)
+  case ALTS(r1, r2) => (isOne(r1) || isOne(r2)) && (atMostEmpty(r1) && atMostEmpty(r2))//rs.forall(atMostEmpty) && rs.exists(isOne)
+  case STAR(r0) => atMostEmpty(r0)
+  case CHAR(c) => false
+  case ZERO => false
+
+}
+
 def strongBreakIntoTerms(r: Rexp): List[Rexp] = r match {
-  case SEQ(r1, r2)  => if(nullable(r1)) 
-                          strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2)) ::: 
+  case SEQ(r1, r2)  => if(isOne(r1)) 
+                          //strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2)) ::: 
                           strongBreakIntoTerms(r2) 
                        else 
                           strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2))
@@ -734,34 +822,14 @@
 }
 
 def ABIncludedByC[A, B, C](a: A, b: B, c: C, f: (A, B) => C, subseteqPred: (C, C) => Boolean) : Boolean = {
-  
   subseteqPred(f(a, b), c)
 }
-def rexpListInclusion(rs1: Set[Rexp], rs2: Set[Rexp]) : Boolean = {
-  // println("r+ctx---------")
-  // rsprint(rs1)
-  // println("acc---------")
-  // rsprint(rs2)
-  
+def rs1_subseteq_rs2(rs1: Set[Rexp], rs2: Set[Rexp]) : Boolean = {
   val res = rs1.forall(rs2.contains)
-  // println(res)
-  // println("end------------------")
   res
 }
 def prune6(acc: Set[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
-  // println("pakc--------r")
-  // println(shortRexpOutput(erase(r)))
-  //   println("ctx---------")
-  // rsprint(ctx)
-  // println("pakc-------acc")
-  // rsprint(acc)
-  // println("r+ctx broken down---------")
-  // rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp))
-
-  // rprint(L(erase(r), ctx))
-  //breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)
-  if (ABIncludedByC(r, ctx, acc, attachCtx, rexpListInclusion)) {//acc.flatMap(breakIntoTerms
-    //println("included in acc!!!")
+  if (ABIncludedByC(r, ctx, acc, attachCtx, rs1_subseteq_rs2)) {//acc.flatMap(breakIntoTerms
     AZERO
   }
   else{
@@ -776,23 +844,12 @@
           ASEQ(bs, r1p, r2)
       }
       case AALTS(bs, rs0) => 
-        // println("before pruning")
-        // println(s"ctx is ")
-        // ctx.foreach(r => println(shortRexpOutput(r)))
-        // println(s"rs0 is ")
-        // rs0.foreach(r => println(shortRexpOutput(erase(r))))
-        // println(s"acc is ")
-        // acc.foreach(r => println(shortRexpOutput(r)))
         rs0.map(r => prune6(acc, r, ctx)).filter(_ != AZERO) match {
           case Nil => 
-            // println("after pruning Nil")
             AZERO
           case r :: Nil => 
-            // println("after pruning singleton")
-            // println(shortRexpOutput(erase(r)))
             r 
           case rs0p => 
-          // println("after pruning non-singleton")
             AALTS(bs, rs0p)
         }
       case r => r
@@ -913,7 +970,6 @@
 def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match {
   case Nil => {
     if (bnullable(r)) {
-      //println(asize(r))
       val bits = mkepsBC(r)
       bits
     }
@@ -943,6 +999,10 @@
     case Nil => r
     case c::s => bdersStrong6(s, strongBsimp6(bder(c, r)))
   }
+  def bdersStrong7(s: List[Char], r: ARexp) : ARexp = s match {
+    case Nil => r
+    case c::s => bdersStrong7(s, strongBsimp7(bder(c, r)))
+  }
   def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
 
   def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
@@ -1189,82 +1249,38 @@
 
 def generator_test() {
 
-  // test(rexp(7), 1000) { (r: Rexp) => 
-  //   val ss = stringsFromRexp(r)
-  //   val boolList = ss.map(s => {
-  //     val simpVal = simpBlexer(r, s)
-  //     val strongVal = strongBlexer(r, s)
-  //     // println(simpVal)
-  //     // println(strongVal)
-  //     (simpVal == strongVal) && (simpVal != None) 
-  //   })
-  //   !boolList.exists(b => b == false)
-  // }
-  // test(single(STAR(ALTS(STAR(CHAR('c')),ALTS(CHAR('c'),ZERO)))), 100000) { (r: Rexp) => 
-  //   val ss = stringsFromRexp(r)
-  //   val boolList = ss.map(s => {
-  //     val bdStrong = bdersStrong(s.toList, internalise(r))
-  //     val bdStrong5 = bdersStrong5(s.toList, internalise(r))
-  //     // println(shortRexpOutput(r))
-  //     // println(s)
-  //     // println(strongVal)
-  //     // println(strongVal5)
-  //     // (strongVal == strongVal5) 
-
-  //     if(asize(bdStrong5) > asize(bdStrong)){
-  //       println(s)
-  //       println(shortRexpOutput(erase(bdStrong5)))
-  //       println(shortRexpOutput(erase(bdStrong)))
-  //     }
-  //     asize(bdStrong5) <= asize(bdStrong)
-  //   })
-  //   !boolList.exists(b => b == false)
-  // }
-  //*** example where bdStrong5 has a smaller size than bdStrong
-  // test(single(STAR(SEQ(ALTS(SEQ(STAR(CHAR('a')),ALTS(ALTS(ONE,ZERO),SEQ(ONE,ONE))),CHAR('a')),ONE))), 1) { (r: Rexp) => 
-  //*** depth 5 example bdStrong5 larger size than bdStrong
-  // test(single(STAR(SEQ(SEQ(ALTS(CHAR('b'),STAR(CHAR('b'))),CHAR('b')),(ALTS(STAR(CHAR('c')), ONE))))), 1) {(r: Rexp) =>
- 
- 
- 
-  //sanity check from Christian's request
-  // val r = ("a" | "ab") ~ ("bc" | "c")
-  // val a = internalise(r)
-  // val aval = blexing_simp(r, "abc")
-  // println(aval)
-
-  //sample counterexample:(depth 7)
-  //STAR(SEQ(ALTS(STAR(STAR(STAR(STAR(CHAR(c))))),ALTS(CHAR(c),CHAR(b))),ALTS(ZERO,SEQ(ALTS(ALTS(STAR(CHAR(c)),SEQ(CHAR(b),CHAR(a))),CHAR(c)),STAR(ALTS(ALTS(ONE,CHAR(a)),STAR(CHAR(c))))))))
-  //(depth5)
-  //STAR(SEQ(ALTS(ALTS(STAR(CHAR(b)),SEQ(ONE,CHAR(b))),SEQ(STAR(CHAR(a)),CHAR(b))),ALTS(ZERO,ALTS(STAR(CHAR(b)),STAR(CHAR(a))))))
-  test(rexp(8), 10000) { (r: Rexp) => 
+  test(rexp(4), 1000000) { (r: Rexp) => 
   // ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) => 
-    val ss = stringsFromRexp(r)
+    val ss = Set("b")//stringsFromRexp(r)
     val boolList = ss.filter(s => s != "").map(s => {
       //val bdStrong = bdersStrong(s.toList, internalise(r))
-      val bdStrong6 = bdersStrong6(s.toList, internalise(r))
-      val bdStrong6Set = breakIntoTerms(erase(bdStrong6))
-      val pdersSet = pderUNIV(r).flatMap(r => breakIntoTerms(r))
-      // println(s)
-      // println(bdStrong6Set.size, pdersSet.size)
-      bdStrong6Set.size <= pdersSet.size
+      val bdStrong6 = bdersStrong7(s.toList, internalise(r))
+      val bdStrong6Set = strongBreakIntoTerms(erase(bdStrong6))
+      val pdersSet = pderUNIV(r)//.flatMap(r => strongBreakIntoTerms(r))
+      val pdersSetBroken = pdersSet.flatMap(r => strongBreakIntoTerms(r))
+      bdStrong6Set.size <= pdersSet.size || bdStrong6Set.size <= pdersSetBroken.size
     })
-    // println(boolList)
+    //println(boolList)
     //!boolList.exists(b => b == false)
     !boolList.exists(b => b == false)
   }
-
-
 }
 // small()
-generator_test()
-
+//  generator_test()
+  
+  
+  //STAR(STAR(STAR(SEQ(SEQ(ALTS(STAR(ALTS(ONE,CHAR('c'))),
+          //  CHAR('c')),ALTS(CHAR('a'),ALTS(STAR(CHAR('b')),ALTS(CHAR('a'),ZERO)))),
+          //  CHAR('c')))))//SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE)))//STAR(SEQ(ALTS(STAR(CHAR('c')),CHAR('c')),SEQ(ALTS(CHAR('c'),ONE),ONE)))
+//counterexample1: STAR(SEQ(ALTS(STAR(ZERO),ALTS(CHAR(a),CHAR(b))),SEQ(ONE,ALTS(CHAR(a),CHAR(b)))))
+//counterexample2: SEQ(ALTS(SEQ(CHAR(a),STAR(ONE)),STAR(ONE)),ALTS(CHAR(a),SEQ(ALTS(CHAR(c),CHAR(a)),CHAR(b))))
 def counterexample_check() {
-  val r = SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE)))//STAR(SEQ(ALTS(STAR(CHAR('c')),CHAR('c')),SEQ(ALTS(CHAR('c'),ONE),ONE)))
-  val s = "ccc"
-  val bdStrong5 = bdersStrong6(s.toList, internalise(r))
+  val r = SEQ(SEQ(STAR(ALTS(CHAR('a'),CHAR('b'))),
+    ALTS(ALTS(CHAR('c'),CHAR('b')),STAR(ONE))),STAR(CHAR('b')))
+  val s = "b"
+  val bdStrong5 = bdersStrong7(s.toList, internalise(r))
   val bdStrong5Set = breakIntoTerms(erase(bdStrong5))
-  val pdersSet = pderUNIV(r)//.map(oneSimp).flatMap(r => breakIntoTerms(r))
+  val pdersSet = pderUNIV(r)//.map(r => erase(bsimp(internalise(r))))//.flatMap(r => strongBreakIntoTerms(r))//.map(oneSimp).flatMap(r => breakIntoTerms(r))
   println("original regex ")
   rprint(r)
   println("after strong bsimp")
@@ -1273,8 +1289,10 @@
   rsprint(bdStrong5Set)
   println("after pderUNIV")
   rsprint(pdersSet.toList)
+
 }
-// counterexample_check()
+counterexample_check()
+
 def linform_test() {
   val r = STAR(SEQ(STAR(CHAR('c')),ONE))//SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE))) //
   val r_linforms = lf(r)