thys2/blexer2.sc
changeset 526 cb702fb4227f
parent 518 ff7945a988a3
child 530 823d9b19d21c
equal deleted inserted replaced
525:d8740017324c 526:cb702fb4227f
   639     // else
   639     // else
   640       L(SEQ(acc, r), rs1)
   640       L(SEQ(acc, r), rs1)
   641 }
   641 }
   642 
   642 
   643 def rprint(r: Rexp) : Unit = println(shortRexpOutput(r))
   643 def rprint(r: Rexp) : Unit = println(shortRexpOutput(r))
   644 def rsprint(rs: List[Rexp]) = rs.foreach(r => println(shortRexpOutput(r)))
   644 def rsprint(rs: Iterable[Rexp]) = rs.foreach(r => println(shortRexpOutput(r)))
       
   645 
   645 def aprint(a: ARexp) = println(shortRexpOutput(erase(a)))
   646 def aprint(a: ARexp) = println(shortRexpOutput(erase(a)))
   646 def asprint(as: List[ARexp]) = as.foreach(a => println(shortRexpOutput(erase(a))))
   647 def asprint(as: List[ARexp]) = as.foreach(a => println(shortRexpOutput(erase(a))))
   647 
   648 
   648 def pAKC(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
   649 def pAKC(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
   649   // println("pakc")
   650   // println("pakc")
   713       }
   714       }
   714     }
   715     }
   715   }
   716   }
   716 }
   717 }
   717 
   718 
   718 def attachCtx(r: ARexp, ctx: List[Rexp]) : List[Rexp] = {
   719 def strongBreakIntoTerms(r: Rexp): List[Rexp] = r match {
   719   val res = breakIntoTerms(oneSimp(L(erase(r), ctx))).map(oneSimp)
   720   case SEQ(r1, r2)  => if(nullable(r1)) 
       
   721                           strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2)) ::: 
       
   722                           strongBreakIntoTerms(r2) 
       
   723                        else 
       
   724                           strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2))
       
   725   case ALTS(r1, r2) => strongBreakIntoTerms(r1) ::: strongBreakIntoTerms(r2)
       
   726   case ZERO => Nil
       
   727   case _ => r :: Nil
       
   728 }
       
   729 
       
   730 def attachCtx(r: ARexp, ctx: List[Rexp]) : Set[Rexp] = {
       
   731   val res = strongBreakIntoTerms((L(erase(r), ctx))).map(oneSimp)
       
   732   res.toSet
       
   733 }
       
   734 
       
   735 def ABIncludedByC[A, B, C](a: A, b: B, c: C, f: (A, B) => C, inclusionPred: (C, C) => Boolean) : Boolean = {
       
   736   
       
   737   inclusionPred(f(a, b), c)
       
   738 }
       
   739 def rexpListInclusion(rs1: Set[Rexp], rs2: Set[Rexp]) : Boolean = {
       
   740   // println("r+ctx---------")
       
   741   // rsprint(rs1)
       
   742   // println("acc---------")
       
   743   // rsprint(rs2)
       
   744   
       
   745   val res = rs1.forall(rs2.contains)
       
   746   // println(res)
       
   747   // println("end------------------")
   720   res
   748   res
   721 }
   749 }
   722 
   750 def pAKC6(acc: Set[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
   723 def ABIncludedByC[A, B, C](a: A, b: B, c: C, f: (A, B) => C, inclusionPred: (C, C) => Boolean) : Boolean = {
   751   // println("pakc--------r")
   724   inclusionPred(f(a, b), c)
       
   725 }
       
   726 def rexpListInclusion(rs1: List[Rexp], rs2: List[Rexp]) : Boolean = {
       
   727   rs1.forall(rs2.contains)
       
   728 }
       
   729 def pAKC6(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
       
   730   // println("pakc")
       
   731   // println(shortRexpOutput(erase(r)))
   752   // println(shortRexpOutput(erase(r)))
   732   // println("acc")
   753   //   println("ctx---------")
       
   754   // rsprint(ctx)
       
   755   // println("pakc-------acc")
   733   // rsprint(acc)
   756   // rsprint(acc)
   734   // println("ctx---------")
   757   // println("r+ctx broken down---------")
   735   // rsprint(ctx)
       
   736   // println("ctx---------end")
       
   737   // rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp))
   758   // rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp))
   738 
   759 
   739   // rprint(L(erase(r), ctx))
   760   // rprint(L(erase(r), ctx))
   740   //breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)
   761   //breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)
   741   if (ABIncludedByC(r, ctx, acc, attachCtx, rexpListInclusion)) {//acc.flatMap(breakIntoTerms
   762   if (ABIncludedByC(r, ctx, acc, attachCtx, rexpListInclusion)) {//acc.flatMap(breakIntoTerms
   777     }
   798     }
   778   }
   799   }
   779 }
   800 }
   780 
   801 
   781 
   802 
   782 def distinctBy6(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
   803 def distinctBy6(xs: List[ARexp], acc: Set[Rexp] = Set()) : List[ARexp] = xs match {
   783   case Nil => 
   804   case Nil => 
   784     Nil
   805     Nil
   785   case x :: xs => {
   806   case x :: xs => {
   786     val erased = erase(x)
   807     val erased = erase(x)
   787     if(acc.contains(erased)){
   808     if(acc.contains(erased)){
   788       distinctBy6(xs, acc)
   809       distinctBy6(xs, acc)
   789     }
   810     }
   790     else{
   811     else{
   791       val pruned = pAKC6(acc, x, Nil)
   812       val pruned = pAKC6(acc, x, Nil)
   792       val newTerms = breakIntoTerms(erase(pruned))
   813       val newTerms = strongBreakIntoTerms(erase(pruned))
   793       pruned match {
   814       pruned match {
   794         case AZERO => 
   815         case AZERO => 
   795           distinctBy6(xs, acc)
   816           distinctBy6(xs, acc)
   796         case xPrime => 
   817         case xPrime => 
   797           xPrime :: distinctBy6(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
   818           xPrime :: distinctBy6(xs, newTerms.map(oneSimp) ++: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
   798       }
   819       }
   799     }
   820     }
   800   }
   821   }
   801 }
   822 }
   802 
   823 
  1011     case r => rs.map((re) => if (re == ONE) r else SEQ(re, r)  )   
  1032     case r => rs.map((re) => if (re == ONE) r else SEQ(re, r)  )   
  1012   }
  1033   }
  1013   def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
  1034   def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
  1014     case ZERO => Set()
  1035     case ZERO => Set()
  1015     case ONE => l
  1036     case ONE => l
  1016     case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) }  )
  1037     case t => l.map( m => m._2 match 
       
  1038       {
       
  1039         case ZERO => m 
       
  1040         case ONE => (m._1, t) 
       
  1041         case p => (m._1, SEQ(p, t)) 
       
  1042       }  
       
  1043     
       
  1044     )
  1017   }
  1045   }
  1018   def lf(r: Rexp): Lin = r match {
  1046   def lf(r: Rexp): Lin = r match {
  1019     case ZERO => Set()
  1047     case ZERO => Set()
  1020     case ONE => Set()
  1048     case ONE => Set()
  1021     case CHAR(f) => {
  1049     case CHAR(f) => {
  1047   }
  1075   }
  1048   def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
  1076   def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
  1049     case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
  1077     case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
  1050     case Nil => ts 
  1078     case Nil => ts 
  1051   }
  1079   }
  1052   def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
  1080   def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = 
       
  1081     ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
  1053   def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
  1082   def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
  1054   //all implementation of partial derivatives that involve set union are potentially buggy
  1083   //all implementation of partial derivatives that involve set union are potentially buggy
  1055   //because they don't include the original regular term before they are pdered.
  1084   //because they don't include the original regular term before they are pdered.
  1056   //now only pderas is fixed.
  1085   //now only pderas is fixed.
  1057   def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
  1086   def pderas(t: Set[Rexp], d: Int): Set[Rexp] = 
       
  1087     if(d > 0) 
       
  1088       pderas(lfs(t).map(mon => mon._2), d - 1) ++ t 
       
  1089     else 
       
  1090       lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
  1058   def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
  1091   def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
  1059   def awidth(r: Rexp) : Int = r match {
  1092   def awidth(r: Rexp) : Int = r match {
  1060     case CHAR(c) => 1
  1093     case CHAR(c) => 1
  1061     case SEQ(r1, r2) => awidth(r1) + awidth(r2)
  1094     case SEQ(r1, r2) => awidth(r1) + awidth(r2)
  1062     case ALTS(r1, r2) => awidth(r1) + awidth(r2)
  1095     case ALTS(r1, r2) => awidth(r1) + awidth(r2)
  1221   }
  1254   }
  1222 
  1255 
  1223 
  1256 
  1224 }
  1257 }
  1225 // small()
  1258 // small()
  1226 generator_test()
  1259 // generator_test()
  1227 
  1260 
  1228 def counterexample_check() {
  1261 def counterexample_check() {
  1229   val r = STAR(SEQ(ALTS(ALTS(CHAR('b'),CHAR('c')),
  1262   val r = SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE)))//STAR(SEQ(ALTS(STAR(CHAR('c')),CHAR('c')),SEQ(ALTS(CHAR('c'),ONE),ONE)))
  1230     SEQ(CHAR('b'),CHAR('b'))),ALTS(SEQ(ONE,CHAR('b')),CHAR('a'))))
  1263   val s = "ccc"
  1231   val s = "bbbb"
       
  1232   val bdStrong5 = bdersStrong6(s.toList, internalise(r))
  1264   val bdStrong5 = bdersStrong6(s.toList, internalise(r))
  1233   val bdStrong5Set = breakIntoTerms(erase(bdStrong5))
  1265   val bdStrong5Set = breakIntoTerms(erase(bdStrong5))
  1234   val pdersSet = pderUNIV(r)//.map(oneSimp).flatMap(r => breakIntoTerms(r))
  1266   val pdersSet = pderUNIV(r)//.map(oneSimp).flatMap(r => breakIntoTerms(r))
  1235   println("original regex ")
  1267   println("original regex ")
  1236   rprint(r)
  1268   rprint(r)
  1240   rsprint(bdStrong5Set)
  1272   rsprint(bdStrong5Set)
  1241   println("after pderUNIV")
  1273   println("after pderUNIV")
  1242   rsprint(pdersSet.toList)
  1274   rsprint(pdersSet.toList)
  1243 }
  1275 }
  1244 // counterexample_check()
  1276 // counterexample_check()
       
  1277 def linform_test() {
       
  1278   val r = STAR(SEQ(STAR(CHAR('c')), ONE))
       
  1279   val r_linforms = lf(r)
       
  1280   println(r_linforms.size)
       
  1281 }
       
  1282 linform_test()
  1245 // 1
  1283 // 1
  1246 def newStrong_test() {
  1284 def newStrong_test() {
  1247   val r2 = (CHAR('b') | ONE)
  1285   val r2 = (CHAR('b') | ONE)
  1248   val r0 = CHAR('d')
  1286   val r0 = CHAR('d')
  1249   val r1 = (ONE | CHAR('c'))
  1287   val r1 = (ONE | CHAR('c'))