thys2/blexer2.sc
changeset 532 cc54ce075db5
parent 530 823d9b19d21c
child 533 6acbc939af6a
equal deleted inserted replaced
531:c334f0b3ef52 532:cc54ce075db5
   520     case ASEQ(bs1, r1, r2) => (strongBsimp6(r1), strongBsimp6(r2)) match {
   520     case ASEQ(bs1, r1, r2) => (strongBsimp6(r1), strongBsimp6(r2)) match {
   521         case (AZERO, _) => AZERO
   521         case (AZERO, _) => AZERO
   522         case (_, AZERO) => AZERO
   522         case (_, AZERO) => AZERO
   523         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
   523         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
   524         case (r1s, AONE(bs2)) => fuse(bs1, r1s) //asserted bs2 == Nil
   524         case (r1s, AONE(bs2)) => fuse(bs1, r1s) //asserted bs2 == Nil
       
   525         //case (r1s, r2s) if(isOne(erase(r1s))) => fuse(bs1 ++ mkepsBC(r1s), r2s)
   525         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   526         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   526     }
   527     }
   527     case AALTS(bs1, rs) => {
   528     case AALTS(bs1, rs) => {
   528         // println("alts case")
   529         // println("alts case")
   529           val rs_simp = rs.map(strongBsimp6(_))
   530           val rs_simp = rs.map(strongBsimp6(_))
   533             case Nil => AZERO
   534             case Nil => AZERO
   534             case s :: Nil => fuse(bs1, s)
   535             case s :: Nil => fuse(bs1, s)
   535             case rs => AALTS(bs1, rs)  
   536             case rs => AALTS(bs1, rs)  
   536           }
   537           }
   537     }
   538     }
       
   539     case ASTAR(bs, r0) if(atMostEmpty(erase(r0))) => AONE(bs)
   538     case r => r
   540     case r => r
   539   }
   541   }
   540 }
   542 }
       
   543 
       
   544 def distinctWith(rs: List[ARexp], 
       
   545                 f: (ARexp, Set[Rexp]) => ARexp, 
       
   546                 acc: Set[Rexp] = Set()) : List[ARexp] = 
       
   547   rs match{
       
   548     case Nil => Nil
       
   549     case r :: rs => 
       
   550       if(acc(erase(r)))
       
   551         distinctWith(rs, f, acc)
       
   552       else {
       
   553         val pruned_r = f(r, acc)
       
   554         pruned_r :: distinctWith(rs, f, strongBreakIntoTerms(erase(pruned_r)) ++: acc)
       
   555       }
       
   556   }
       
   557 
       
   558 //r = r' ~ tail => returns r'
       
   559 def removeSeqTail(r: Rexp, tail: Rexp) : Rexp = r match {
       
   560   case SEQ(r1, r2) => 
       
   561     if(r2 == tail) 
       
   562       r1
       
   563     else
       
   564       ZERO
       
   565   case r => ZERO
       
   566 }
       
   567 
       
   568 def prune7(r: ARexp, acc: Set[Rexp]) : ARexp = r match{
       
   569   case AALTS(bs, rs) => rs.map(r => prune7(r, acc)).filter(_ != ZERO) match
       
   570   {
       
   571     case Nil => AZERO
       
   572     case r::Nil => fuse(bs, r)
       
   573     case rs1 => AALTS(bs, rs1)
       
   574   }
       
   575   case ASEQ(bs, r1, r2) => prune7(r1, acc.map(r => removeSeqTail(r, erase(r2)))) match {
       
   576     case AZERO => AZERO
       
   577     case r1p if(isOne(erase(r1p))) => fuse(bs ++ mkepsBC(r1p), r2)
       
   578     case r1p => ASEQ(bs, r1p, r2)
       
   579   }
       
   580   case r => if(acc(erase(r))) AZERO else r
       
   581 }
       
   582 
       
   583 def strongBsimp7(r: ARexp): ARexp =
       
   584 {
       
   585   r match {
       
   586     case ASEQ(bs1, r1, r2) => (strongBsimp7(r1), strongBsimp7(r2)) match {
       
   587         case (AZERO, _) => AZERO
       
   588         case (_, AZERO) => AZERO
       
   589         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   590         case (r1s, AONE(bs2)) => fuse(bs1, r1s) //asserted bs2 == Nil
       
   591         //case (r1s, r2s) if(isOne(erase(r1s))) => fuse(bs1 ++ mkepsBC(r1s), r2s)
       
   592         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
       
   593     }
       
   594     case AALTS(bs1, rs) => {
       
   595         // println("alts case")
       
   596           val rs_simp = rs.map(strongBsimp7(_))
       
   597           val flat_res = flats(rs_simp)
       
   598           var dist_res = distinctWith(flat_res, prune7)//distinctBy(flat_res, erase)
       
   599           (dist_res) match {
       
   600             case Nil => AZERO
       
   601             case s :: Nil => fuse(bs1, s)
       
   602             case rs => AALTS(bs1, rs)  
       
   603           }
       
   604     }
       
   605     case ASTAR(bs, r0) if(atMostEmpty(erase(r0))) => AONE(bs)
       
   606     case r => r
       
   607   }
       
   608 }
       
   609 
   541 
   610 
   542 def bders (s: List[Char], r: ARexp) : ARexp = s match {
   611 def bders (s: List[Char], r: ARexp) : ARexp = s match {
   543   case Nil => r
   612   case Nil => r
   544   case c::s => bders(s, bder(c, r))
   613   case c::s => bders(s, bder(c, r))
   545 }
   614 }
   715       }
   784       }
   716     }
   785     }
   717   }
   786   }
   718 }
   787 }
   719 
   788 
       
   789 def atMostEmpty(r: Rexp) : Boolean = r match {
       
   790   case ZERO => true
       
   791   case ONE => true
       
   792   case STAR(r) => atMostEmpty(r)
       
   793   case SEQ(r1, r2) => atMostEmpty(r1) && atMostEmpty(r2)
       
   794   case ALTS(r1, r2) => atMostEmpty(r1) && atMostEmpty(r2)
       
   795   case CHAR(_) => false
       
   796 }
       
   797 
       
   798 def isOne(r: Rexp) : Boolean = r match {
       
   799   case ONE => true
       
   800   case SEQ(r1, r2) => isOne(r1) && isOne(r2)
       
   801   case ALTS(r1, r2) => (isOne(r1) || isOne(r2)) && (atMostEmpty(r1) && atMostEmpty(r2))//rs.forall(atMostEmpty) && rs.exists(isOne)
       
   802   case STAR(r0) => atMostEmpty(r0)
       
   803   case CHAR(c) => false
       
   804   case ZERO => false
       
   805 
       
   806 }
       
   807 
   720 def strongBreakIntoTerms(r: Rexp): List[Rexp] = r match {
   808 def strongBreakIntoTerms(r: Rexp): List[Rexp] = r match {
   721   case SEQ(r1, r2)  => if(nullable(r1)) 
   809   case SEQ(r1, r2)  => if(isOne(r1)) 
   722                           strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2)) ::: 
   810                           //strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2)) ::: 
   723                           strongBreakIntoTerms(r2) 
   811                           strongBreakIntoTerms(r2) 
   724                        else 
   812                        else 
   725                           strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2))
   813                           strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2))
   726   case ALTS(r1, r2) => strongBreakIntoTerms(r1) ::: strongBreakIntoTerms(r2)
   814   case ALTS(r1, r2) => strongBreakIntoTerms(r1) ::: strongBreakIntoTerms(r2)
   727   case ZERO => Nil
   815   case ZERO => Nil
   732   val res = strongBreakIntoTerms((L(erase(r), ctx))).map(oneSimp)
   820   val res = strongBreakIntoTerms((L(erase(r), ctx))).map(oneSimp)
   733   res.toSet
   821   res.toSet
   734 }
   822 }
   735 
   823 
   736 def ABIncludedByC[A, B, C](a: A, b: B, c: C, f: (A, B) => C, subseteqPred: (C, C) => Boolean) : Boolean = {
   824 def ABIncludedByC[A, B, C](a: A, b: B, c: C, f: (A, B) => C, subseteqPred: (C, C) => Boolean) : Boolean = {
   737   
       
   738   subseteqPred(f(a, b), c)
   825   subseteqPred(f(a, b), c)
   739 }
   826 }
   740 def rexpListInclusion(rs1: Set[Rexp], rs2: Set[Rexp]) : Boolean = {
   827 def rs1_subseteq_rs2(rs1: Set[Rexp], rs2: Set[Rexp]) : Boolean = {
   741   // println("r+ctx---------")
       
   742   // rsprint(rs1)
       
   743   // println("acc---------")
       
   744   // rsprint(rs2)
       
   745   
       
   746   val res = rs1.forall(rs2.contains)
   828   val res = rs1.forall(rs2.contains)
   747   // println(res)
       
   748   // println("end------------------")
       
   749   res
   829   res
   750 }
   830 }
   751 def prune6(acc: Set[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
   831 def prune6(acc: Set[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
   752   // println("pakc--------r")
   832   if (ABIncludedByC(r, ctx, acc, attachCtx, rs1_subseteq_rs2)) {//acc.flatMap(breakIntoTerms
   753   // println(shortRexpOutput(erase(r)))
       
   754   //   println("ctx---------")
       
   755   // rsprint(ctx)
       
   756   // println("pakc-------acc")
       
   757   // rsprint(acc)
       
   758   // println("r+ctx broken down---------")
       
   759   // rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp))
       
   760 
       
   761   // rprint(L(erase(r), ctx))
       
   762   //breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)
       
   763   if (ABIncludedByC(r, ctx, acc, attachCtx, rexpListInclusion)) {//acc.flatMap(breakIntoTerms
       
   764     //println("included in acc!!!")
       
   765     AZERO
   833     AZERO
   766   }
   834   }
   767   else{
   835   else{
   768     r match {
   836     r match {
   769       case ASEQ(bs, r1, r2) => 
   837       case ASEQ(bs, r1, r2) => 
   774           fuse(bs1, r2)
   842           fuse(bs1, r2)
   775         case r1p => 
   843         case r1p => 
   776           ASEQ(bs, r1p, r2)
   844           ASEQ(bs, r1p, r2)
   777       }
   845       }
   778       case AALTS(bs, rs0) => 
   846       case AALTS(bs, rs0) => 
   779         // println("before pruning")
       
   780         // println(s"ctx is ")
       
   781         // ctx.foreach(r => println(shortRexpOutput(r)))
       
   782         // println(s"rs0 is ")
       
   783         // rs0.foreach(r => println(shortRexpOutput(erase(r))))
       
   784         // println(s"acc is ")
       
   785         // acc.foreach(r => println(shortRexpOutput(r)))
       
   786         rs0.map(r => prune6(acc, r, ctx)).filter(_ != AZERO) match {
   847         rs0.map(r => prune6(acc, r, ctx)).filter(_ != AZERO) match {
   787           case Nil => 
   848           case Nil => 
   788             // println("after pruning Nil")
       
   789             AZERO
   849             AZERO
   790           case r :: Nil => 
   850           case r :: Nil => 
   791             // println("after pruning singleton")
       
   792             // println(shortRexpOutput(erase(r)))
       
   793             r 
   851             r 
   794           case rs0p => 
   852           case rs0p => 
   795           // println("after pruning non-singleton")
       
   796             AALTS(bs, rs0p)
   853             AALTS(bs, rs0p)
   797         }
   854         }
   798       case r => r
   855       case r => r
   799     }
   856     }
   800   }
   857   }
   911 }
   968 }
   912 
   969 
   913 def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match {
   970 def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match {
   914   case Nil => {
   971   case Nil => {
   915     if (bnullable(r)) {
   972     if (bnullable(r)) {
   916       //println(asize(r))
       
   917       val bits = mkepsBC(r)
   973       val bits = mkepsBC(r)
   918       bits
   974       bits
   919     }
   975     }
   920     else 
   976     else 
   921       throw new Exception("Not matched")
   977       throw new Exception("Not matched")
   940     case c::s => bdersStrong5(s, strongBsimp5(bder(c, r)))
   996     case c::s => bdersStrong5(s, strongBsimp5(bder(c, r)))
   941   }
   997   }
   942   def bdersStrong6(s: List[Char], r: ARexp) : ARexp = s match {
   998   def bdersStrong6(s: List[Char], r: ARexp) : ARexp = s match {
   943     case Nil => r
   999     case Nil => r
   944     case c::s => bdersStrong6(s, strongBsimp6(bder(c, r)))
  1000     case c::s => bdersStrong6(s, strongBsimp6(bder(c, r)))
       
  1001   }
       
  1002   def bdersStrong7(s: List[Char], r: ARexp) : ARexp = s match {
       
  1003     case Nil => r
       
  1004     case c::s => bdersStrong7(s, strongBsimp7(bder(c, r)))
   945   }
  1005   }
   946   def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
  1006   def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
   947 
  1007 
   948   def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
  1008   def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
   949     case Nil => r 
  1009     case Nil => r 
  1187   }
  1247   }
  1188 }
  1248 }
  1189 
  1249 
  1190 def generator_test() {
  1250 def generator_test() {
  1191 
  1251 
  1192   // test(rexp(7), 1000) { (r: Rexp) => 
  1252   test(rexp(4), 1000000) { (r: Rexp) => 
  1193   //   val ss = stringsFromRexp(r)
       
  1194   //   val boolList = ss.map(s => {
       
  1195   //     val simpVal = simpBlexer(r, s)
       
  1196   //     val strongVal = strongBlexer(r, s)
       
  1197   //     // println(simpVal)
       
  1198   //     // println(strongVal)
       
  1199   //     (simpVal == strongVal) && (simpVal != None) 
       
  1200   //   })
       
  1201   //   !boolList.exists(b => b == false)
       
  1202   // }
       
  1203   // test(single(STAR(ALTS(STAR(CHAR('c')),ALTS(CHAR('c'),ZERO)))), 100000) { (r: Rexp) => 
       
  1204   //   val ss = stringsFromRexp(r)
       
  1205   //   val boolList = ss.map(s => {
       
  1206   //     val bdStrong = bdersStrong(s.toList, internalise(r))
       
  1207   //     val bdStrong5 = bdersStrong5(s.toList, internalise(r))
       
  1208   //     // println(shortRexpOutput(r))
       
  1209   //     // println(s)
       
  1210   //     // println(strongVal)
       
  1211   //     // println(strongVal5)
       
  1212   //     // (strongVal == strongVal5) 
       
  1213 
       
  1214   //     if(asize(bdStrong5) > asize(bdStrong)){
       
  1215   //       println(s)
       
  1216   //       println(shortRexpOutput(erase(bdStrong5)))
       
  1217   //       println(shortRexpOutput(erase(bdStrong)))
       
  1218   //     }
       
  1219   //     asize(bdStrong5) <= asize(bdStrong)
       
  1220   //   })
       
  1221   //   !boolList.exists(b => b == false)
       
  1222   // }
       
  1223   //*** example where bdStrong5 has a smaller size than bdStrong
       
  1224   // test(single(STAR(SEQ(ALTS(SEQ(STAR(CHAR('a')),ALTS(ALTS(ONE,ZERO),SEQ(ONE,ONE))),CHAR('a')),ONE))), 1) { (r: Rexp) => 
       
  1225   //*** depth 5 example bdStrong5 larger size than bdStrong
       
  1226   // test(single(STAR(SEQ(SEQ(ALTS(CHAR('b'),STAR(CHAR('b'))),CHAR('b')),(ALTS(STAR(CHAR('c')), ONE))))), 1) {(r: Rexp) =>
       
  1227  
       
  1228  
       
  1229  
       
  1230   //sanity check from Christian's request
       
  1231   // val r = ("a" | "ab") ~ ("bc" | "c")
       
  1232   // val a = internalise(r)
       
  1233   // val aval = blexing_simp(r, "abc")
       
  1234   // println(aval)
       
  1235 
       
  1236   //sample counterexample:(depth 7)
       
  1237   //STAR(SEQ(ALTS(STAR(STAR(STAR(STAR(CHAR(c))))),ALTS(CHAR(c),CHAR(b))),ALTS(ZERO,SEQ(ALTS(ALTS(STAR(CHAR(c)),SEQ(CHAR(b),CHAR(a))),CHAR(c)),STAR(ALTS(ALTS(ONE,CHAR(a)),STAR(CHAR(c))))))))
       
  1238   //(depth5)
       
  1239   //STAR(SEQ(ALTS(ALTS(STAR(CHAR(b)),SEQ(ONE,CHAR(b))),SEQ(STAR(CHAR(a)),CHAR(b))),ALTS(ZERO,ALTS(STAR(CHAR(b)),STAR(CHAR(a))))))
       
  1240   test(rexp(8), 10000) { (r: Rexp) => 
       
  1241   // ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) => 
  1253   // ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) => 
  1242     val ss = stringsFromRexp(r)
  1254     val ss = Set("b")//stringsFromRexp(r)
  1243     val boolList = ss.filter(s => s != "").map(s => {
  1255     val boolList = ss.filter(s => s != "").map(s => {
  1244       //val bdStrong = bdersStrong(s.toList, internalise(r))
  1256       //val bdStrong = bdersStrong(s.toList, internalise(r))
  1245       val bdStrong6 = bdersStrong6(s.toList, internalise(r))
  1257       val bdStrong6 = bdersStrong7(s.toList, internalise(r))
  1246       val bdStrong6Set = breakIntoTerms(erase(bdStrong6))
  1258       val bdStrong6Set = strongBreakIntoTerms(erase(bdStrong6))
  1247       val pdersSet = pderUNIV(r).flatMap(r => breakIntoTerms(r))
  1259       val pdersSet = pderUNIV(r)//.flatMap(r => strongBreakIntoTerms(r))
  1248       // println(s)
  1260       val pdersSetBroken = pdersSet.flatMap(r => strongBreakIntoTerms(r))
  1249       // println(bdStrong6Set.size, pdersSet.size)
  1261       bdStrong6Set.size <= pdersSet.size || bdStrong6Set.size <= pdersSetBroken.size
  1250       bdStrong6Set.size <= pdersSet.size
       
  1251     })
  1262     })
  1252     // println(boolList)
  1263     //println(boolList)
  1253     //!boolList.exists(b => b == false)
  1264     //!boolList.exists(b => b == false)
  1254     !boolList.exists(b => b == false)
  1265     !boolList.exists(b => b == false)
  1255   }
  1266   }
  1256 
       
  1257 
       
  1258 }
  1267 }
  1259 // small()
  1268 // small()
  1260 generator_test()
  1269 //  generator_test()
  1261 
  1270   
       
  1271   
       
  1272   //STAR(STAR(STAR(SEQ(SEQ(ALTS(STAR(ALTS(ONE,CHAR('c'))),
       
  1273           //  CHAR('c')),ALTS(CHAR('a'),ALTS(STAR(CHAR('b')),ALTS(CHAR('a'),ZERO)))),
       
  1274           //  CHAR('c')))))//SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE)))//STAR(SEQ(ALTS(STAR(CHAR('c')),CHAR('c')),SEQ(ALTS(CHAR('c'),ONE),ONE)))
       
  1275 //counterexample1: STAR(SEQ(ALTS(STAR(ZERO),ALTS(CHAR(a),CHAR(b))),SEQ(ONE,ALTS(CHAR(a),CHAR(b)))))
       
  1276 //counterexample2: SEQ(ALTS(SEQ(CHAR(a),STAR(ONE)),STAR(ONE)),ALTS(CHAR(a),SEQ(ALTS(CHAR(c),CHAR(a)),CHAR(b))))
  1262 def counterexample_check() {
  1277 def counterexample_check() {
  1263   val r = SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE)))//STAR(SEQ(ALTS(STAR(CHAR('c')),CHAR('c')),SEQ(ALTS(CHAR('c'),ONE),ONE)))
  1278   val r = SEQ(SEQ(STAR(ALTS(CHAR('a'),CHAR('b'))),
  1264   val s = "ccc"
  1279     ALTS(ALTS(CHAR('c'),CHAR('b')),STAR(ONE))),STAR(CHAR('b')))
  1265   val bdStrong5 = bdersStrong6(s.toList, internalise(r))
  1280   val s = "b"
       
  1281   val bdStrong5 = bdersStrong7(s.toList, internalise(r))
  1266   val bdStrong5Set = breakIntoTerms(erase(bdStrong5))
  1282   val bdStrong5Set = breakIntoTerms(erase(bdStrong5))
  1267   val pdersSet = pderUNIV(r)//.map(oneSimp).flatMap(r => breakIntoTerms(r))
  1283   val pdersSet = pderUNIV(r)//.map(r => erase(bsimp(internalise(r))))//.flatMap(r => strongBreakIntoTerms(r))//.map(oneSimp).flatMap(r => breakIntoTerms(r))
  1268   println("original regex ")
  1284   println("original regex ")
  1269   rprint(r)
  1285   rprint(r)
  1270   println("after strong bsimp")
  1286   println("after strong bsimp")
  1271   aprint(bdStrong5)
  1287   aprint(bdStrong5)
  1272   println("turned into a set %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   ")
  1288   println("turned into a set %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   ")
  1273   rsprint(bdStrong5Set)
  1289   rsprint(bdStrong5Set)
  1274   println("after pderUNIV")
  1290   println("after pderUNIV")
  1275   rsprint(pdersSet.toList)
  1291   rsprint(pdersSet.toList)
  1276 }
  1292 
  1277 // counterexample_check()
  1293 }
       
  1294 counterexample_check()
       
  1295 
  1278 def linform_test() {
  1296 def linform_test() {
  1279   val r = STAR(SEQ(STAR(CHAR('c')),ONE))//SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE))) //
  1297   val r = STAR(SEQ(STAR(CHAR('c')),ONE))//SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE))) //
  1280   val r_linforms = lf(r)
  1298   val r_linforms = lf(r)
  1281   println(r_linforms.size)
  1299   println(r_linforms.size)
  1282   val pderuniv = pderUNIV(r)
  1300   val pderuniv = pderUNIV(r)