thys2/blexer1.sc
changeset 414 1234e6bd4fd1
parent 412 48876e1092f1
child 415 5c96fe5306a7
equal deleted inserted replaced
413:b85f8e28fbd8 414:1234e6bd4fd1
   440               case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy2(xs, newTerms.map(erase(_)):::acc)
   440               case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy2(xs, newTerms.map(erase(_)):::acc)
   441             }
   441             }
   442           case x => x::distinctBy2(xs, res::acc)
   442           case x => x::distinctBy2(xs, res::acc)
   443         }
   443         }
   444     }
   444     }
       
   445   }
       
   446 
       
   447 
       
   448 
       
   449   def distinctBy3(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match {
       
   450     case Nil => Nil
       
   451     case (x::xs) => {
       
   452       val res = erase(x)
       
   453       if(acc.contains(res))
       
   454         distinctBy3(xs, acc)
       
   455       else
       
   456         x match {
       
   457           case ASEQ(bs0, AALTS(bs1, rs), r2) => 
       
   458             val newTerms =  distinctBy3(rs.map(r1 => ASEQ(Nil, r1, r2)), acc)
       
   459             val rsPrime = projectFirstChild(newTerms)
       
   460             newTerms match {
       
   461               case Nil => distinctBy3(xs, acc)
       
   462               case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy3(xs, erase(t)::acc)
       
   463               case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy3(xs, newTerms.map(erase(_)):::acc)
       
   464             }
       
   465           case x => x::distinctBy3(xs, res::acc)
       
   466         }
       
   467     }
       
   468   }
       
   469 
       
   470   def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
       
   471     case SEQ(r1, r2)  => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
       
   472     case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
       
   473     case _ => r::Nil
   445   } 
   474   } 
   446 
   475 
   447   def prettyRexp(r: Rexp) : String = r match {
   476   def prettyRexp(r: Rexp) : String = r match {
   448       case STAR(r0) => s"${prettyRexp(r0)}*"
   477       case STAR(r0) => s"${prettyRexp(r0)}*"
   449       case SEQ(CHAR(c), r2) => c.toString ++ prettyRexp(r2)
   478       case SEQ(CHAR(c), r2) => c.toString ++ prettyRexp(r2)
   495 }
   524 }
   496 
   525 
   497 def blexing_simp(r: Rexp, s: String) : Val = {
   526 def blexing_simp(r: Rexp, s: String) : Val = {
   498     val bit_code = blex_simp(internalise(r), s.toList)
   527     val bit_code = blex_simp(internalise(r), s.toList)
   499     decode(r, bit_code)
   528     decode(r, bit_code)
       
   529   }
       
   530 
       
   531   def strong_blexing_simp(r: Rexp, s: String) : Val = {
       
   532     decode(r, strong_blex_simp(internalise(r), s.toList))
       
   533   }
       
   534 
       
   535   def strong_blex_simp(r: ARexp, s: List[Char]) :Bits = s match {
       
   536     case Nil => {
       
   537       if (bnullable(r)) {
       
   538         //println(asize(r))
       
   539         val bits = mkepsBC(r)
       
   540         bits
       
   541       }
       
   542       else 
       
   543         throw new Exception("Not matched")
       
   544     }
       
   545     case c::cs => {
       
   546       val der_res = bder(c,r)
       
   547       val simp_res = strongBsimp(der_res)  
       
   548       strong_blex_simp(simp_res, cs)      
       
   549     }
   500   }
   550   }
   501 
   551 
   502 
   552 
   503 
   553 
   504   def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
   554   def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
   625                     }
   675                     }
   626                 case rPrime => x::strongDB(xs, erase(x)::acc1, acc2)    
   676                 case rPrime => x::strongDB(xs, erase(x)::acc1, acc2)    
   627             }
   677             }
   628                 
   678                 
   629         }
   679         }
   630     
   680   
   631 }
   681 }
       
   682 
       
   683 def allCharSeq(r: Rexp) : Boolean = r match {
       
   684   case CHAR(c) => true
       
   685   case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
       
   686   case _ => false
       
   687 }
       
   688 
       
   689 def flattenSeq(r: Rexp) : String = r match {
       
   690   case CHAR(c) => c.toString
       
   691   case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
       
   692   case _ => throw new Error("flatten unflattenable rexp")
       
   693 } 
   632 
   694 
   633 def shortRexpOutput(r: Rexp) : String = r match {
   695 def shortRexpOutput(r: Rexp) : String = r match {
   634     case CHAR(c) => c.toString
   696     case CHAR(c) => c.toString
   635     case ONE => "1"
   697     case ONE => "1"
   636     case ZERO => "0"
   698     case ZERO => "0"
       
   699     case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
   637     case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
   700     case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
   638     case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
   701     case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
   639     case STAR(r) => "[" ++ shortRexpOutput(r) ++ "]*"
   702     case STAR(r) => "[" ++ shortRexpOutput(r) ++ "]*"
   640     //case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
   703     //case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
   641     //case RTOP => "RTOP"
   704     //case RTOP => "RTOP"
   647       if (bnullable(r)) {
   710       if (bnullable(r)) {
   648         //println(asize(r))
   711         //println(asize(r))
   649         val bits = mkepsBC(r)
   712         val bits = mkepsBC(r)
   650         bits
   713         bits
   651       }
   714       }
   652     else throw new Exception("Not matched")
   715       else 
       
   716         throw new Exception("Not matched")
   653     }
   717     }
   654     case c::cs => {
   718     case c::cs => {
   655       val der_res = bder(c,r)
   719       val der_res = bder(c,r)
   656       val simp_res = bsimp(der_res)  
   720       val simp_res = bsimp(der_res)  
   657       blex_simp(simp_res, cs)      
   721       blex_simp(simp_res, cs)      
   722   def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
   786   def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
   723   //all implementation of partial derivatives that involve set union are potentially buggy
   787   //all implementation of partial derivatives that involve set union are potentially buggy
   724   //because they don't include the original regular term before they are pdered.
   788   //because they don't include the original regular term before they are pdered.
   725   //now only pderas is fixed.
   789   //now only pderas is fixed.
   726   def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
   790   def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
   727   def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r))
   791   def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
   728   def awidth(r: Rexp) : Int = r match {
   792   def awidth(r: Rexp) : Int = r match {
   729     case CHAR(c) => 1
   793     case CHAR(c) => 1
   730     case SEQ(r1, r2) => awidth(r1) + awidth(r2)
   794     case SEQ(r1, r2) => awidth(r1) + awidth(r2)
   731     case ALTS(r1, r2) => awidth(r1) + awidth(r2)
   795     case ALTS(r1, r2) => awidth(r1) + awidth(r2)
   732     case ONE => 0
   796     case ONE => 0
   752   def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
   816   def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
   753 
   817 
   754 
   818 
   755 
   819 
   756 // @arg(doc = "small tests")
   820 // @arg(doc = "small tests")
   757 val STARREG = (((STAR("a") | (STAR("aa")) | STAR(STAR("aaa") | STAR("aaaa")) | STAR("aaaaa") | (STAR("aaaaaa")) | STAR("aaaaaaa") | STAR("aaaaaaaa")).%).%).%
   821 val STARREG = (((STAR("a") | (STAR("aa")) | STAR("aaa") | STAR("aaaa") | STAR("aaaaa") | (STAR("aaaaaa")) | STAR("aaaaaaa") | STAR("aaaaaaaa")).%))
   758 //(STAR("a") | ( STAR("aaa")) | STAR("aaaa") | STAR("aaaaa")).%.%.%
   822 //(((STAR("a") | ( STAR("aaa")) | STAR("aaaaa")).%).%).%
   759 
   823 
   760 @main
   824 // @main
   761 def small() = {
   825 def small() = {
   762 
   826 
   763 
   827 
   764 //   println(lexing_simp(NOTREG, prog0))
   828 //   println(lexing_simp(NOTREG, prog0))
   765 //   val v = lex_simp(NOTREG, prog0.toList)
   829 //   val v = lex_simp(NOTREG, prog0.toList)
   766 //   println(v)
   830 //   println(v)
   767 
   831 
   768 //   val d =  (lex_simp(NOTREG, prog0.toList))
   832 //   val d =  (lex_simp(NOTREG, prog0.toList))
   769 //   println(d)
   833 //   println(d)
   770   val pderSTAR = pderUNIV(STARREG)
   834   val pderSTAR = pderUNIV(STARREG)
       
   835 
   771   val refSize = pderSTAR.map(size(_)).sum
   836   val refSize = pderSTAR.map(size(_)).sum
   772   println(refSize)
   837   println("different partial derivative terms:")
   773   for(i <- 10 to 100){
   838   pderSTAR.foreach(r => r match {
       
   839       
       
   840         case SEQ(head, rstar) =>
       
   841           println(shortRexpOutput(head) ++ "~STARREG")
       
   842         case STAR(rstar) =>
       
   843           println("STARREG")
       
   844       
       
   845     }
       
   846     )
       
   847   println("the total number of terms is")
       
   848   //println(refSize)
       
   849   println(pderSTAR.size)
       
   850   for(i <- List(1, 10, 100, 400, 800, 840, 900) ){
   774     val prog0 = "a" * i
   851     val prog0 = "a" * i
   775     println(s"test: $prog0")
   852     //println(s"test: $prog0")
       
   853     println(s"testing with $i a's" )
   776     val bd = bdersSimp(prog0, STARREG)//DB
   854     val bd = bdersSimp(prog0, STARREG)//DB
   777     val sbd = bdersSimpS(prog0, STARREG)//strongDB
   855     val sbd = bdersSimpS(prog0, STARREG)//strongDB
       
   856     val subTerms = breakIntoTerms(erase(sbd))
       
   857     val subTermsLarge = breakIntoTerms(erase(bd))
       
   858     
       
   859     println(s"subterms of regex with strongDB: ${subTerms.length}, standard DB: ${subTermsLarge.length}")
       
   860     println("the number of distinct subterms for bsimp with strongDB and standardDB")
       
   861     println(subTerms.distinct.size)
       
   862     println(subTermsLarge.distinct.size)
   778 
   863 
   779 
   864 
   780     // println(shortRexpOutput(erase(sbd)))
   865     // println(shortRexpOutput(erase(sbd)))
   781     // println(shortRexpOutput(erase(bd)))
   866     // println(shortRexpOutput(erase(bd)))
   782     println("pdersize, original, strongSimp, simp")
   867     
   783     println(refSize, size(STARREG), asize(sbd), asize(bd))
   868     println("pdersize, original, strongSimp")
   784 
   869     println(refSize, size(STARREG),  asize(sbd), asize(bd))
   785     val vres = blexing_simp( STARREG, prog0)
   870 
   786     println(vres)
   871     // val vres = strong_blexing_simp( STARREG, prog0)
       
   872     // println(vres)
   787   }
   873   }
   788 //   println(vs.length)
   874 //   println(vs.length)
   789 //   println(vs)
   875 //   println(vs)
   790    
   876    
   791 
   877 
   792   // val prog1 = """read  n; write n"""  
   878   // val prog1 = """read  n; write n"""  
   793   // println(s"test: $prog1")
   879   // println(s"test: $prog1")
   794   // println(lexing_simp(WHILE_REGS, prog1))
   880   // println(lexing_simp(WHILE_REGS, prog1))
   795 }
   881 }
       
   882 
       
   883 small()
   796 
   884 
   797 // // Bigger Tests
   885 // // Bigger Tests
   798 // //==============
   886 // //==============
   799 
   887 
   800 // // escapes strings and prints them out as "", "\n" and so on
   888 // // escapes strings and prints them out as "", "\n" and so on