lex_blex_Frankensteined.scala
changeset 5 622ddbb1223a
parent 3 f15dccc42c7b
child 11 9c1ca6d6e190
equal deleted inserted replaced
4:7a349fe58bf4 5:622ddbb1223a
    86     case STAR(r) => ASTAR(Nil, internalise(r))
    86     case STAR(r) => ASTAR(Nil, internalise(r))
    87     case RECD(x, r) => internalise(r)
    87     case RECD(x, r) => internalise(r)
    88   }
    88   }
    89 
    89 
    90   internalise(("a" | "ab") ~ ("b" | ""))
    90   internalise(("a" | "ab") ~ ("b" | ""))
    91 /*
    91 
    92   def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
    92   def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
    93     case (ONE, bs) => (Empty, bs)
    93     case (ONE, bs) => (Empty, bs)
    94     case (PRED(f), C(c)::bs) => (Chr(c), bs)
    94     case (CHAR(f), C(c)::bs) => (Chr(c), bs)
    95     case (ALTS(r::Nil), bs) => decode_aux(r, bs)//this case seems tailor made for those who want to simplify the regex before der or simp
    95     case (ALTS(r::Nil), bs) => decode_aux(r, bs)//this case seems tailor made for those who want to simplify the regex before der or simp
    96     case (ALTS(rs), bs) => bs match {
    96     case (ALTS(rs), bs) => bs match {
    97       case Z::bs1 => {
    97       case Z::bs1 => {
    98         val (v, bs2) = decode_aux(rs.head, bs1)
    98         val (v, bs2) = decode_aux(rs.head, bs1)
    99         (Left(v), bs2)
    99         (Left(v), bs2)
   123 
   123 
   124   def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
   124   def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
   125     case (v, Nil) => v
   125     case (v, Nil) => v
   126     case _ => throw new Exception("Not decodable")
   126     case _ => throw new Exception("Not decodable")
   127   }
   127   }
   128 */
   128 
   129 
   129 
   130   //erase function: extracts the regx from Aregex
   130   //erase function: extracts the regx from Aregex
   131   def erase(r:ARexp): Rexp = r match{
   131   def erase(r:ARexp): Rexp = r match{
   132     case AZERO => ZERO
   132     case AZERO => ZERO
   133     case AONE(_) => ONE
   133     case AONE(_) => ONE
   368     case Nil => Nil
   368     case Nil => Nil
   369     case ZERO :: rs1 => rflats(rs1)
   369     case ZERO :: rs1 => rflats(rs1)
   370     case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2)
   370     case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2)
   371     case r1 :: rs2 => r1 :: rflats(rs2)
   371     case r1 :: rs2 => r1 :: rflats(rs2)
   372   }
   372   }
   373   //val flats_time = scala.collection.mutable.ArrayBuffer.empty[Long]
       
   374   //val dist_time = scala.collection.mutable.ArrayBuffer.empty[Long]
       
   375   var flats_time = 0L
   373   var flats_time = 0L
   376   var dist_time = 0L
   374   var dist_time = 0L
   377   /*
       
   378   def bsimp(r: ARexp, depth: Int): ARexp = 
       
   379   {
       
   380     r match {
       
   381       case ASEQ(bs1, r1, r2) => (bsimp(r1, depth), bsimp(r2, depth)) match {
       
   382           case (AZERO, _) => AZERO
       
   383           case (_, AZERO) => AZERO
       
   384           case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   385           case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
       
   386       }
       
   387       case AALTS(bs1, rs) => {
       
   388         depth match {
       
   389           case 0 => {
       
   390             flats(distinctBy(rs, erase)) match {
       
   391               case Nil => AZERO
       
   392               case s :: Nil => fuse(bs1, s)
       
   393               case rs => AALTS(bs1, rs) 
       
   394             } 
       
   395           }
       
   396           case n => {
       
   397             val rs_simp = rs.map(bsimp(_, n - 1))
       
   398             val time2 = System.nanoTime()
       
   399             val flat_res = flats(rs_simp)
       
   400             val time3 = System.nanoTime()
       
   401             val dist_res = distinctBy(flat_res, erase)
       
   402             val time4 = System.nanoTime()
       
   403             flats_time = flats_time + time3 - time2
       
   404             dist_time = dist_time + time4 - time3
       
   405             //flats_time += time3 - time2
       
   406             //dist_time += time4 - time3
       
   407             //distinctBy(flats(rs.map(bsimp)), erase) match {
       
   408             dist_res match {
       
   409               case Nil => AZERO
       
   410               case s :: Nil => fuse(bs1, s)
       
   411               case rs => AALTS(bs1, rs)  
       
   412             }
       
   413           }
       
   414         }
       
   415       }
       
   416       case r => r
       
   417     }
       
   418   }
       
   419   */
       
   420   //----------------------------------------------------------------------------This bsimp is the original slow one
       
   421   
   375   
   422   def bsimp(r: ARexp): ARexp = r match {
   376   def bsimp(r: ARexp): ARexp = r match {
   423     case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
   377     case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
   424         case (AZERO, _) => AZERO
   378         case (AZERO, _) => AZERO
   425         case (_, AZERO) => AZERO
   379         case (_, AZERO) => AZERO
   426         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
   380         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
   427         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   381         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   428     }
   382     }
   429     case AALTS(bs1, rs) => {
   383     case AALTS(bs1, rs) => {
   430       val rs_simp = rs.map(bsimp)
   384       val rs_simp = rs.map(bsimp)
   431       val time2 = System.nanoTime()
       
   432       val flat_res = flats(rs_simp)
   385       val flat_res = flats(rs_simp)
   433       val time3 = System.nanoTime()
       
   434       val dist_res = distinctBy(flat_res, erase)
   386       val dist_res = distinctBy(flat_res, erase)
   435       val time4 = System.nanoTime()
       
   436       flats_time = flats_time + time3 - time2
       
   437       dist_time = dist_time + time4 - time3
       
   438       dist_res match {
   387       dist_res match {
   439         case Nil => AZERO
   388         case Nil => AZERO
   440         case s :: Nil => fuse(bs1, s)
   389         case s :: Nil => fuse(bs1, s)
   441         case rs => AALTS(bs1, rs)  
   390         case rs => AALTS(bs1, rs)  
   442       }
   391       }
   443     }
   392     }
   444     case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
   393     //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
   445     case r => r
   394     case r => r
   446   }
   395   }
   447 
   396   def super_bsimp(r: ARexp): ARexp = r match {
   448   def bsimp_weakened(r: ARexp): ARexp = r match {
   397     case ASEQ(bs1, r1, r2) => (super_bsimp(r1), super_bsimp(r2)) match {
   449     case ASEQ(bs1, r1, r2) => (bsimp_weakened(r1), r2) match {
       
   450         case (AZERO, _) => AZERO
   398         case (AZERO, _) => AZERO
   451         case (_, AZERO) => AZERO
   399         case (_, AZERO) => AZERO
   452         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
   400         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   401         case (AALTS(bs2, rs), r2) => AALTS(bs1 ++ bs2, rs.map(r => r match {case AONE(bs3) => fuse(bs3, r2) case r => ASEQ(Nil, r, r2)}) ) 
   453         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   402         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   454     }
   403     }
   455     case AALTS(bs1, rs) => {
   404     case AALTS(bs1, rs) => {
   456       val rs_simp = rs.map(bsimp_weakened)
   405       val rs_simp = rs.map(super_bsimp)
   457       val time2 = System.nanoTime()
       
   458       val flat_res = flats(rs_simp)
   406       val flat_res = flats(rs_simp)
   459       val time3 = System.nanoTime()
       
   460       val dist_res = distinctBy(flat_res, erase)
   407       val dist_res = distinctBy(flat_res, erase)
   461       val time4 = System.nanoTime()
       
   462       flats_time = flats_time + time3 - time2
       
   463       dist_time = dist_time + time4 - time3
       
   464       dist_res match {
   408       dist_res match {
   465         case Nil => AZERO
   409         case Nil => AZERO
   466         case s :: Nil => fuse(bs1, s)
   410         case s :: Nil => fuse(bs1, s)
   467         case rs => AALTS(bs1, rs)  
   411         case rs => AALTS(bs1, rs)  
   468       }
   412       }
   469     }
   413     }
   470     case ASTAR(bs, r) => ASTAR(bs, bsimp_weakened(r))
   414     //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
   471     case r => r
   415     case r => r
   472   }
   416   }
       
   417 
   473 
   418 
   474   def simp_weakened(r: Rexp): Rexp = r match {
   419   def simp_weakened(r: Rexp): Rexp = r match {
   475     case SEQ(r1, r2) => (simp_weakened(r1), r2) match {
   420     case SEQ(r1, r2) => (simp_weakened(r1), r2) match {
   476         case (ZERO, _) => ZERO
   421         case (ZERO, _) => ZERO
   477         case (_, ZERO) => ZERO
   422         case (_, ZERO) => ZERO
   530   
   475   
   531   def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
   476   def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
   532     case Nil => {
   477     case Nil => {
   533       if (bnullable(r)) {
   478       if (bnullable(r)) {
   534         //println(asize(r))
   479         //println(asize(r))
   535         val time4 = System.nanoTime()
   480         mkepsBC(r)
   536         val bits = mkepsBC(r)
       
   537         val time5 = System.nanoTime()
       
   538         mkepsBC_time = time5 - time4
       
   539         bits
       
   540       }
   481       }
   541     else throw new Exception("Not matched")
   482     else throw new Exception("Not matched")
   542     }
   483     }
   543     case c::cs => {
   484     case c::cs => {
   544       val time1 = System.nanoTime()
       
   545       val der_res = bder(c,r)
   485       val der_res = bder(c,r)
   546       val time2 = System.nanoTime()
       
   547       val simp_res = bsimp(der_res)
   486       val simp_res = bsimp(der_res)
   548       val time3 = System.nanoTime()  
       
   549       bder_time = bder_time + time2 - time1
       
   550       bsimp_time = bsimp_time + time3 - time2
       
   551       blex_simp(simp_res, cs)      
   487       blex_simp(simp_res, cs)      
       
   488     }
       
   489   }
       
   490   def super_blex_simp(r: ARexp, s: List[Char]): Bits = s match {
       
   491     case Nil => {
       
   492       if (bnullable(r)) {
       
   493         mkepsBC(r)
       
   494       }
       
   495       else throw new Exception("Not matched")
       
   496     }
       
   497     case c::cs => {
       
   498       super_blex_simp(super_bsimp(bder(c,r)), cs)
   552     }
   499     }
   553   }
   500   }
   554   def blex_real_simp(r: ARexp, s: List[Char]): ARexp = s match{
   501   def blex_real_simp(r: ARexp, s: List[Char]): ARexp = s match{
   555     case Nil => r
   502     case Nil => r
   556     case c::cs => blex_real_simp(bsimp(bder(c, r)), cs)
   503     case c::cs => blex_real_simp(bsimp(bder(c, r)), cs)
   557   }
   504   }
   558 
   505 
   559   //-------------------------------------------------------------------------------------tests whether simp(simp(r)) == simp(r) holds true
       
   560   /*
       
   561   def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
       
   562     case Nil => {
       
   563       if (bnullable(r)) {
       
   564         //println(asize(r))
       
   565         mkepsBC(r)
       
   566       }
       
   567     else throw new Exception("Not matched")
       
   568     }
       
   569     case c::cs => {
       
   570       val der_res = bder(c,r)
       
   571       val simp_res = bsimp(der_res)  
       
   572       //val simp_res2 = bsimp(simp_res)  
       
   573       //println("Size reduction from "+asize(der_res)+ " to " +asize(simp_res)+" to " + asize(simp_res2)) 
       
   574       blex_simp(simp_res, cs)
       
   575     }
       
   576   }
       
   577   */
       
   578   /*
       
   579   def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
       
   580     case Nil => {
       
   581       if (nullable(r)) {
       
   582         mkeps(r) 
       
   583       }
       
   584       else throw new Exception("Not matched")
       
   585     }
       
   586     case c::cs => {
       
   587       val start = System.nanoTime()
       
   588       val (r_simp, f_simp) = simp(der(c, r))
       
   589       val end = System.nanoTime()
       
   590       println((end - start)/1.0e9)
       
   591       inj(r, c, f_simp(lex_simp(r_simp, cs)))
       
   592     }
       
   593   }
       
   594   */
       
   595 
   506 
   596   //size: of a Aregx for testing purposes 
   507   //size: of a Aregx for testing purposes 
   597   def size(r: Rexp) : Int = r match {
   508   def size(r: Rexp) : Int = r match {
   598     case ZERO => 1
   509     case ZERO => 1
   599     case ONE => 1
   510     case ONE => 1
   606   def asize(a: ARexp) = size(erase(a))
   517   def asize(a: ARexp) = size(erase(a))
   607 
   518 
   608 
   519 
   609   // decoding does not work yet
   520   // decoding does not work yet
   610   def blexing_simp(r: Rexp, s: String) = {
   521   def blexing_simp(r: Rexp, s: String) = {
   611     //flats_time.clear()
       
   612     //dist_time.clear()
       
   613     flats_time = 0L
       
   614     dist_time = 0L
       
   615     bder_time = 0L
       
   616     bsimp_time = 0L
       
   617     mkepsBC_time = 0L
       
   618     val start = System.nanoTime()
       
   619     val bit_code = blex_simp(internalise(r), s.toList)
   522     val bit_code = blex_simp(internalise(r), s.toList)
   620     val end = System.nanoTime()
   523     decode(r, bit_code) 
   621     println("total time: "+ (end - start)/1.0e9)
   524   }
   622     println("spent on flats: " + (flats_time/(1.0e9)))
   525   def super_blexing_simp(r: Rexp, s: String) = {
   623     println("spent on distinctBy: " + (dist_time/(1.0e9)))
   526     decode(r, super_blex_simp(internalise(r), s.toList))
   624     println("spent on bder: "+ bder_time/1.0e9)
       
   625     println("spent on bsimp: " + bsimp_time/1.0e9)
       
   626     println("spent on mkepsBC: " + mkepsBC_time/1.0e9)
       
   627     //println(s"The length of the string ${s.length}; length of bit sequence:")
       
   628     //println((bit_code.length))
       
   629     //println(final_derivative)
       
   630     //bit_code
       
   631     //decode(r, bit_code) 
       
   632   }
   527   }
   633 
   528 
   634 
   529 
   635 
   530 
   636 
   531 
   828   def tests_blexer_simp(ss: Set[String])(r: Rexp) = {
   723   def tests_blexer_simp(ss: Set[String])(r: Rexp) = {
   829     clear()
   724     clear()
   830     //println(s"Testing ${r}")
   725     //println(s"Testing ${r}")
   831     for (s <- ss.par) yield {
   726     for (s <- ss.par) yield {
   832       val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None)
   727       val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None)
   833       val res2 = Try(Some(blexing_simp(r, s))).getOrElse(None)
   728       val res2 = Try(Some(super_blexing_simp(r, s))).getOrElse(None)
   834       if (res1 != res2) println(s"Disagree on ${r} and ${s}")
   729       if (res1 != res2) println(s"Disagree on ${r} and ${s}")
   835       if (res1 != res2) println(s"   ${res1} !=  ${res2}")
   730       if (res1 != res2) println(s"   ${res1} !=  ${res2}")
   836       if (res1 != res2) Some((r, s)) else None
   731       if (res1 != res2) Some((r, s)) else None
   837     }
   732     }
   838   }
   733   }
   839 
   734 
   840 
   735 
   841 
   736 
   842   //enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet
   737   
   843   /*
   738   /*
   844   def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = {
   739   def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = {
   845     for (s <- ss){
   740     for (s <- ss){
   846       
   741       
   847       val der_res = bder(c, ar) 
   742       val der_res = bder(c, ar)