thys2/blexer2.sc
changeset 518 ff7945a988a3
parent 516 6fecb7fe8cd0
child 526 cb702fb4227f
equal deleted inserted replaced
517:3c5e58d08939 518:ff7945a988a3
       
     1 //Strong Bsimp to obtain Antimirov's cubic bound
       
     2 
     1 // A simple lexer inspired by work of Sulzmann & Lu
     3 // A simple lexer inspired by work of Sulzmann & Lu
     2 //==================================================
     4 //==================================================
     3 //
     5 //
     4 // Call the test cases with 
     6 // Call the test cases with 
     5 //
     7 //
   494     case AALTS(bs1, rs) => {
   496     case AALTS(bs1, rs) => {
   495         // println("alts case")
   497         // println("alts case")
   496           val rs_simp = rs.map(strongBsimp5(_))
   498           val rs_simp = rs.map(strongBsimp5(_))
   497           val flat_res = flats(rs_simp)
   499           val flat_res = flats(rs_simp)
   498           var dist_res = distinctBy5(flat_res)//distinctBy(flat_res, erase)
   500           var dist_res = distinctBy5(flat_res)//distinctBy(flat_res, erase)
   499           var dist2_res = distinctBy5(dist_res)
   501           // var dist2_res = distinctBy5(dist_res)
   500           while(dist_res != dist2_res){
   502           // while(dist_res != dist2_res){
   501             dist_res = dist2_res
   503           //   dist_res = dist2_res
   502             dist2_res = distinctBy5(dist_res)
   504           //   dist2_res = distinctBy5(dist_res)
       
   505           // }
       
   506           (dist_res) match {
       
   507             case Nil => AZERO
       
   508             case s :: Nil => fuse(bs1, s)
       
   509             case rs => AALTS(bs1, rs)  
   503           }
   510           }
   504           (dist2_res) match {
   511     }
       
   512     case r => r
       
   513   }
       
   514 }
       
   515 
       
   516 def strongBsimp6(r: ARexp): ARexp =
       
   517 {
       
   518   // println("was this called?")
       
   519   r match {
       
   520     case ASEQ(bs1, r1, r2) => (strongBsimp6(r1), strongBsimp6(r2)) match {
       
   521         case (AZERO, _) => AZERO
       
   522         case (_, AZERO) => AZERO
       
   523         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   524         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
       
   525     }
       
   526     case AALTS(bs1, rs) => {
       
   527         // println("alts case")
       
   528           val rs_simp = rs.map(strongBsimp6(_))
       
   529           val flat_res = flats(rs_simp)
       
   530           var dist_res = distinctBy6(flat_res)//distinctBy(flat_res, erase)
       
   531           (dist_res) match {
   505             case Nil => AZERO
   532             case Nil => AZERO
   506             case s :: Nil => fuse(bs1, s)
   533             case s :: Nil => fuse(bs1, s)
   507             case rs => AALTS(bs1, rs)  
   534             case rs => AALTS(bs1, rs)  
   508           }
   535           }
   509     }
   536     }
   686       }
   713       }
   687     }
   714     }
   688   }
   715   }
   689 }
   716 }
   690 
   717 
       
   718 def attachCtx(r: ARexp, ctx: List[Rexp]) : List[Rexp] = {
       
   719   val res = breakIntoTerms(oneSimp(L(erase(r), ctx))).map(oneSimp)
       
   720   res
       
   721 }
       
   722 
       
   723 def ABIncludedByC[A, B, C](a: A, b: B, c: C, f: (A, B) => C, inclusionPred: (C, C) => Boolean) : Boolean = {
       
   724   inclusionPred(f(a, b), c)
       
   725 }
       
   726 def rexpListInclusion(rs1: List[Rexp], rs2: List[Rexp]) : Boolean = {
       
   727   rs1.forall(rs2.contains)
       
   728 }
       
   729 def pAKC6(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
       
   730   // println("pakc")
       
   731   // println(shortRexpOutput(erase(r)))
       
   732   // println("acc")
       
   733   // rsprint(acc)
       
   734   // println("ctx---------")
       
   735   // rsprint(ctx)
       
   736   // println("ctx---------end")
       
   737   // rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp))
       
   738 
       
   739   // rprint(L(erase(r), ctx))
       
   740   //breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)
       
   741   if (ABIncludedByC(r, ctx, acc, attachCtx, rexpListInclusion)) {//acc.flatMap(breakIntoTerms
       
   742     //println("included in acc!!!")
       
   743     AZERO
       
   744   }
       
   745   else{
       
   746     r match {
       
   747       case ASEQ(bs, r1, r2) => 
       
   748       (pAKC6(acc, r1, erase(r2) :: ctx)) match{
       
   749         case AZERO => 
       
   750           AZERO
       
   751         case AONE(bs1) => 
       
   752           fuse(bs1, r2)
       
   753         case r1p => 
       
   754           ASEQ(bs, r1p, r2)
       
   755       }
       
   756       case AALTS(bs, rs0) => 
       
   757         // println("before pruning")
       
   758         // println(s"ctx is ")
       
   759         // ctx.foreach(r => println(shortRexpOutput(r)))
       
   760         // println(s"rs0 is ")
       
   761         // rs0.foreach(r => println(shortRexpOutput(erase(r))))
       
   762         // println(s"acc is ")
       
   763         // acc.foreach(r => println(shortRexpOutput(r)))
       
   764         rs0.map(r => pAKC6(acc, r, ctx)).filter(_ != AZERO) match {
       
   765           case Nil => 
       
   766             // println("after pruning Nil")
       
   767             AZERO
       
   768           case r :: Nil => 
       
   769             // println("after pruning singleton")
       
   770             // println(shortRexpOutput(erase(r)))
       
   771             r 
       
   772           case rs0p => 
       
   773           // println("after pruning non-singleton")
       
   774             AALTS(bs, rs0p)
       
   775         }
       
   776       case r => r
       
   777     }
       
   778   }
       
   779 }
       
   780 
       
   781 
       
   782 def distinctBy6(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
       
   783   case Nil => 
       
   784     Nil
       
   785   case x :: xs => {
       
   786     val erased = erase(x)
       
   787     if(acc.contains(erased)){
       
   788       distinctBy6(xs, acc)
       
   789     }
       
   790     else{
       
   791       val pruned = pAKC6(acc, x, Nil)
       
   792       val newTerms = breakIntoTerms(erase(pruned))
       
   793       pruned match {
       
   794         case AZERO => 
       
   795           distinctBy6(xs, acc)
       
   796         case xPrime => 
       
   797           xPrime :: distinctBy6(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
       
   798       }
       
   799     }
       
   800   }
       
   801 }
   691 
   802 
   692 def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
   803 def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
   693   case SEQ(r1, r2)  => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
   804   case SEQ(r1, r2)  => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
   694   case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
   805   case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
   695   case ZERO => Nil
   806   case ZERO => Nil
   804   
   915   
   805   def bdersStrong5(s: List[Char], r: ARexp) : ARexp = s match {
   916   def bdersStrong5(s: List[Char], r: ARexp) : ARexp = s match {
   806     case Nil => r
   917     case Nil => r
   807     case c::s => bdersStrong5(s, strongBsimp5(bder(c, r)))
   918     case c::s => bdersStrong5(s, strongBsimp5(bder(c, r)))
   808   }
   919   }
   809 
   920   def bdersStrong6(s: List[Char], r: ARexp) : ARexp = s match {
       
   921     case Nil => r
       
   922     case c::s => bdersStrong6(s, strongBsimp6(bder(c, r)))
       
   923   }
   810   def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
   924   def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
   811 
   925 
   812   def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
   926   def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
   813     case Nil => r 
   927     case Nil => r 
   814     case c::s => bdersStrong(s, strongBsimp(bder(c, r)))
   928     case c::s => bdersStrong(s, strongBsimp(bder(c, r)))
  1022 
  1136 
  1023 def small() = {
  1137 def small() = {
  1024   //val pderSTAR = pderUNIV(STARREG)
  1138   //val pderSTAR = pderUNIV(STARREG)
  1025 
  1139 
  1026   //val refSize = pderSTAR.map(size(_)).sum
  1140   //val refSize = pderSTAR.map(size(_)).sum
  1027   for(n <- 6 to 6){
  1141   for(n <- 5 to 5){
  1028     val STARREG = n_astar(n)
  1142     val STARREG = n_astar(n)
  1029     val iMax = (lcm((1 to n).toList))
  1143     val iMax = (lcm((1 to n).toList))
  1030     for(i <- 1 to iMax + 50){// 100, 400, 800, 840, 841, 900 
  1144     for(i <- 1 to iMax + 2){// 100, 400, 800, 840, 841, 900 
  1031       val prog0 = "a" * i
  1145       val prog0 = "a" * i
  1032       //println(s"test: $prog0")
  1146       //println(s"test: $prog0")
  1033       print(n)
  1147       print(i)
  1034       print(" ")
  1148       print(" ")
  1035       // print(i)
  1149       // print(i)
  1036       // print(" ")
  1150       // print(" ")
  1037       println(asize(bders_simp(prog0.toList, internalise(STARREG))))
  1151       println(asize(bders_simp(prog0.toList, internalise(STARREG))))
  1038     }
  1152     }
  1087 
  1201 
  1088   //sample counterexample:(depth 7)
  1202   //sample counterexample:(depth 7)
  1089   //STAR(SEQ(ALTS(STAR(STAR(STAR(STAR(CHAR(c))))),ALTS(CHAR(c),CHAR(b))),ALTS(ZERO,SEQ(ALTS(ALTS(STAR(CHAR(c)),SEQ(CHAR(b),CHAR(a))),CHAR(c)),STAR(ALTS(ALTS(ONE,CHAR(a)),STAR(CHAR(c))))))))
  1203   //STAR(SEQ(ALTS(STAR(STAR(STAR(STAR(CHAR(c))))),ALTS(CHAR(c),CHAR(b))),ALTS(ZERO,SEQ(ALTS(ALTS(STAR(CHAR(c)),SEQ(CHAR(b),CHAR(a))),CHAR(c)),STAR(ALTS(ALTS(ONE,CHAR(a)),STAR(CHAR(c))))))))
  1090   //(depth5)
  1204   //(depth5)
  1091   //STAR(SEQ(ALTS(ALTS(STAR(CHAR(b)),SEQ(ONE,CHAR(b))),SEQ(STAR(CHAR(a)),CHAR(b))),ALTS(ZERO,ALTS(STAR(CHAR(b)),STAR(CHAR(a))))))
  1205   //STAR(SEQ(ALTS(ALTS(STAR(CHAR(b)),SEQ(ONE,CHAR(b))),SEQ(STAR(CHAR(a)),CHAR(b))),ALTS(ZERO,ALTS(STAR(CHAR(b)),STAR(CHAR(a))))))
  1092   test(rexp(4), 10000000) { (r: Rexp) => 
  1206   test(rexp(4), 100000) { (r: Rexp) => 
  1093   // ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) => 
  1207   // ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) => 
  1094     val ss = stringsFromRexp(r)
  1208     val ss = stringsFromRexp(r)
  1095     val boolList = ss.map(s => {
  1209     val boolList = ss.filter(s => s != "").map(s => {
  1096       val bdStrong = bdersStrong(s.toList, internalise(r))
  1210       //val bdStrong = bdersStrong(s.toList, internalise(r))
  1097       val bdStrong5 = bdersStrong5(s.toList, internalise(r))
  1211       val bdStrong6 = bdersStrong6(s.toList, internalise(r))
  1098       val bdFurther5 = strongBsimp5(bdStrong5)
  1212       val bdStrong6Set = breakIntoTerms(erase(bdStrong6))
  1099       val sizeFurther5 = asize(bdFurther5)
  1213       val pdersSet = pderUNIV(r).flatMap(r => breakIntoTerms(r))
  1100       val pdersSet = pderUNIV(r)
       
  1101       val size5 = asize(bdStrong5)
       
  1102       val size0 = asize(bdStrong)
       
  1103       // println(s)
  1214       // println(s)
  1104       // println("pdersSet size")
  1215       // println(bdStrong6Set.size, pdersSet.size)
  1105       // println(pdersSet.size)
  1216       bdStrong6Set.size <= pdersSet.size
  1106       // pdersSet.foreach(r => println(shortRexpOutput(r)))
       
  1107       // println("after bdStrong5")
       
  1108       
       
  1109       // println(shortRexpOutput(erase(bdStrong5)))
       
  1110       // println(breakIntoTerms(erase(bdStrong5)).size)
       
  1111       
       
  1112       // println("after bdStrong")
       
  1113       // println(shortRexpOutput(erase(bdStrong)))
       
  1114       // println(breakIntoTerms(erase(bdStrong)).size)
       
  1115       // println(size5, size0, sizeFurther5)
       
  1116       // aprint(strongBsimp5(bdStrong))
       
  1117       // println(asize(strongBsimp5(bdStrong5)))
       
  1118       // println(s)
       
  1119       size5 <= size0 
       
  1120     })
  1217     })
  1121     // println(boolList)
  1218     // println(boolList)
  1122     //!boolList.exists(b => b == false)
  1219     //!boolList.exists(b => b == false)
  1123     !boolList.exists(b => b == false)
  1220     !boolList.exists(b => b == false)
  1124   }
  1221   }
  1125 
  1222 
  1126 
  1223 
  1127 }
  1224 }
  1128 small()
  1225 // small()
  1129 // generator_test()
  1226 generator_test()
  1130 
  1227 
       
  1228 def counterexample_check() {
       
  1229   val r = STAR(SEQ(ALTS(ALTS(CHAR('b'),CHAR('c')),
       
  1230     SEQ(CHAR('b'),CHAR('b'))),ALTS(SEQ(ONE,CHAR('b')),CHAR('a'))))
       
  1231   val s = "bbbb"
       
  1232   val bdStrong5 = bdersStrong6(s.toList, internalise(r))
       
  1233   val bdStrong5Set = breakIntoTerms(erase(bdStrong5))
       
  1234   val pdersSet = pderUNIV(r)//.map(oneSimp).flatMap(r => breakIntoTerms(r))
       
  1235   println("original regex ")
       
  1236   rprint(r)
       
  1237   println("after strong bsimp")
       
  1238   aprint(bdStrong5)
       
  1239   println("turned into a set %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   ")
       
  1240   rsprint(bdStrong5Set)
       
  1241   println("after pderUNIV")
       
  1242   rsprint(pdersSet.toList)
       
  1243 }
       
  1244 // counterexample_check()
  1131 // 1
  1245 // 1
  1132 
  1246 def newStrong_test() {
       
  1247   val r2 = (CHAR('b') | ONE)
       
  1248   val r0 = CHAR('d')
       
  1249   val r1 = (ONE | CHAR('c'))
       
  1250   val expRexp = (SEQ(r2, r0) | SEQ(SEQ(r1, r2), r0))
       
  1251   println(s"original regex is: ")
       
  1252   rprint(expRexp)
       
  1253   val expSimp5 = strongBsimp5(internalise(expRexp))
       
  1254   val expSimp6 = strongBsimp6(internalise(expRexp))
       
  1255   aprint(expSimp5)
       
  1256   aprint(expSimp6)
       
  1257 }
       
  1258 // newStrong_test()
  1133 // SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))),
  1259 // SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))),
  1134 // SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))),
  1260 // SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))),
  1135 // STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))),
  1261 // STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))),
  1136 // SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE))
  1262 // SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE))
  1137 
  1263