thys2/blexer1.sc
changeset 412 48876e1092f1
parent 409 f71df68776bb
child 414 1234e6bd4fd1
equal deleted inserted replaced
409:f71df68776bb 412:48876e1092f1
   382           case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   382           case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   383       }
   383       }
   384       case AALTS(bs1, rs) => {
   384       case AALTS(bs1, rs) => {
   385             val rs_simp = rs.map(strongBsimp(_))
   385             val rs_simp = rs.map(strongBsimp(_))
   386             val flat_res = flats(rs_simp)
   386             val flat_res = flats(rs_simp)
   387             val dist_res = strongDB(flat_res)//distinctBy(flat_res, erase)
   387             val dist_res = distinctBy2(flat_res)//distinctBy(flat_res, erase)
   388             dist_res match {
   388             dist_res match {
   389               case Nil => AZERO
   389               case Nil => AZERO
   390               case s :: Nil => fuse(bs1, s)
   390               case s :: Nil => fuse(bs1, s)
   391               case rs => AALTS(bs1, rs)  
   391               case rs => AALTS(bs1, rs)  
   392             }
   392             }
   415       if (acc.contains(res)) distinctBy(xs, f, acc)  
   415       if (acc.contains(res)) distinctBy(xs, f, acc)  
   416       else x::distinctBy(xs, f, res::acc)
   416       else x::distinctBy(xs, f, res::acc)
   417     }
   417     }
   418   } 
   418   } 
   419 
   419 
       
   420   def projectFirstChild(rs: List[ARexp]) : List[ARexp] = rs match {
       
   421     case (ASEQ(bs, r1, r2p)::rs1) => r1::projectFirstChild(rs1)
       
   422     case Nil => Nil
       
   423   }
       
   424 
       
   425 
       
   426   def distinctBy2(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match {
       
   427     case Nil => Nil
       
   428     case (x::xs) => {
       
   429       val res = erase(x)
       
   430       if(acc.contains(res))
       
   431         distinctBy2(xs, acc)
       
   432       else
       
   433         x match {
       
   434           case ASEQ(bs0, AALTS(bs1, rs), r2) => 
       
   435             val newTerms =  distinctBy2(rs.map(r1 => ASEQ(Nil, r1, r2)), acc)
       
   436             val rsPrime = projectFirstChild(newTerms)
       
   437             newTerms match {
       
   438               case Nil => distinctBy2(xs, acc)
       
   439               case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy2(xs, erase(t)::acc)
       
   440               case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy2(xs, newTerms.map(erase(_)):::acc)
       
   441             }
       
   442           case x => x::distinctBy2(xs, res::acc)
       
   443         }
       
   444     }
       
   445   } 
       
   446 
   420   def prettyRexp(r: Rexp) : String = r match {
   447   def prettyRexp(r: Rexp) : String = r match {
   421       case STAR(r0) => s"${prettyRexp(r0)}*"
   448       case STAR(r0) => s"${prettyRexp(r0)}*"
   422       case SEQ(CHAR(c), r2) => c.toString ++ prettyRexp(r2)
   449       case SEQ(CHAR(c), r2) => c.toString ++ prettyRexp(r2)
   423       case SEQ(r1, r2) => s"${prettyRexp(r1)}~${prettyRexp(r2)}"
   450       case SEQ(r1, r2) => s"${prettyRexp(r1)}~${prettyRexp(r2)}"
   424       case CHAR(c) => c.toString
   451       case CHAR(c) => c.toString
   479     case c::s => bders_simp(s, bsimp(bder(c, r)))
   506     case c::s => bders_simp(s, bsimp(bder(c, r)))
   480   }
   507   }
   481   
   508   
   482   def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
   509   def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
   483 
   510 
       
   511   def bders_simpS(s: List[Char], r: ARexp) : ARexp = s match {
       
   512     case Nil => r 
       
   513     case c::s => bders_simpS(s, strongBsimp(bder(c, r)))
       
   514   }
       
   515 
       
   516   def bdersSimpS(s: String, r: Rexp) : ARexp = bders_simpS(s.toList, internalise(r))
   484 
   517 
   485   def erase(r:ARexp): Rexp = r match{
   518   def erase(r:ARexp): Rexp = r match{
   486     case AZERO => ZERO
   519     case AZERO => ZERO
   487     case AONE(_) => ONE
   520     case AONE(_) => ONE
   488     case ACHAR(bs, c) => CHAR(c)
   521     case ACHAR(bs, c) => CHAR(c)
   508       if (acc.contains(res)) distinctByWithAcc(xs, f, acc, accB)  
   541       if (acc.contains(res)) distinctByWithAcc(xs, f, acc, accB)  
   509       else distinctByWithAcc(xs, f, res::acc, x::accB)
   542       else distinctByWithAcc(xs, f, res::acc, x::accB)
   510     }
   543     }
   511   } 
   544   } 
   512 
   545 
   513 
   546 //(r1+r2)r + (r2'+r3)r' ~> (r1+r2)r + (r3)r' (erase r = erase r') (erase r2 = erase r2')
       
   547 //s = s1@s2 \in L R  s1 \in L r2 s2 \in L r
       
   548 //s = s3@s4  s3 \in L r1 s4 \in L r |s3| < |s1| \nexists s3' s4' s.t. s3'@s4' = s and s3' \in L r1 s4' \in L r
       
   549 //(r1+r4)r ~> r1r +_usedToBeSeq r4r 
       
   550 // | ss7:  "erase a01 = erase a02 ∧ (distinctBy as2 erase (set (map erase as1)) = as2p)  ⟹ 
       
   551 //         (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2) (ASTAR bs02 a02)]@rsc)  s↝
       
   552 //         (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2p) (ASTAR bs02 a02)]@rsc)"
       
   553 // | ss8:  "erase a01 = erase a02 ∧ (distinctBy [a2] erase (set (map erase as1)) = [])  ⟹ 
       
   554 //         (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@[ASEQ bs a2 (ASTAR bs02 a02)]@rsc) s↝                                                
       
   555 //         (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@rsc)"
       
   556 // | ss9:  "erase a01 = erase a02 ∧ (distinctBy as2 erase {erase a1} = as2p)  ⟹ 
       
   557 //         (rsa@[ASEQ bs a1 (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2) (ASTAR bs02 a02)]@rsc) s↝ 
       
   558 //         (rsa@[ASEQ bs a1 (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2p) (ASTAR bs02 a02)]@rsc)"
       
   559 
       
   560 
       
   561 //# of top-level terms
       
   562 //      r1r2 -> r11r2 + r12 -> r21r2 + r22 + r12' -> 4 terms -> 5 terms -> 6..........
       
   563 // if string long enough --> #| r1r2 \s s | > unbounded? No ->  #| r1r2 \s s | <  #| pders UNIV r1| + #|pders UNIV r2| <= awidth r1r2
       
   564 //r* -> r1r* -> r21r* + r22r* -> 4 terms -> 8 terms..............
   514   def strongDB(xs: List[ARexp], 
   565   def strongDB(xs: List[ARexp], 
   515                        acc1: List[Rexp] = Nil, 
   566                        acc1: List[Rexp] = Nil, 
   516                        acc2 : List[(List[Rexp], Rexp)] = Nil): List[ARexp] = xs match {
   567                        acc2 : List[(List[Rexp], Rexp)] = Nil): List[ARexp] = xs match {
   517     case Nil => Nil
   568     case Nil => Nil
   518     case (x::xs) => 
   569     case (x::xs) => 
   536                                 distinctByWithAcc(headList, erase, headListAlready._1)
   587                                 distinctByWithAcc(headList, erase, headListAlready._1)
   537                         newHeads match{
   588                         newHeads match{
   538                             case newHead::Nil =>
   589                             case newHead::Nil =>
   539                                 ASTAR(bs0, r0) :: 
   590                                 ASTAR(bs0, r0) :: 
   540                                 strongDB(xs, erase(x)::acc1, 
   591                                 strongDB(xs, erase(x)::acc1, 
   541                                 acc2.updated(i, (oldHeadsUpdated, headListAlready._2)) )//TODO: acc2 already contains headListAlready
   592                                 acc2.updated(i, (oldHeadsUpdated, headListAlready._2)) )
   542                             case Nil =>
   593                             case Nil =>
   543                                 strongDB(xs, erase(x)::acc1, 
   594                                 strongDB(xs, erase(x)::acc1, 
   544                                 acc2)
   595                                 acc2)
   545                         }
   596                         }
   546                     }                
   597                     }                
   577                 
   628                 
   578         }
   629         }
   579     
   630     
   580 }
   631 }
   581 
   632 
       
   633 def shortRexpOutput(r: Rexp) : String = r match {
       
   634     case CHAR(c) => c.toString
       
   635     case ONE => "1"
       
   636     case ZERO => "0"
       
   637     case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
       
   638     case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
       
   639     case STAR(r) => "[" ++ shortRexpOutput(r) ++ "]*"
       
   640     //case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
       
   641     //case RTOP => "RTOP"
       
   642   }
       
   643 
   582 
   644 
   583 def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
   645 def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
   584     case Nil => {
   646     case Nil => {
   585       if (bnullable(r)) {
   647       if (bnullable(r)) {
   586         //println(asize(r))
   648         //println(asize(r))
   606     case STAR(r) => 1 + size(r)
   668     case STAR(r) => 1 + size(r)
   607   }
   669   }
   608 
   670 
   609   def asize(a: ARexp) = size(erase(a))
   671   def asize(a: ARexp) = size(erase(a))
   610 
   672 
       
   673 //pder related
       
   674 type Mon = (Char, Rexp)
       
   675 type Lin = Set[Mon]
       
   676 
       
   677 def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
       
   678     case ZERO => Set()
       
   679     case ONE => rs
       
   680     case r => rs.map((re) => if (re == ONE) r else SEQ(re, r)  )   
       
   681   }
       
   682   def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
       
   683     case ZERO => Set()
       
   684     case ONE => l
       
   685     case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) }  )
       
   686   }
       
   687   def lf(r: Rexp): Lin = r match {
       
   688     case ZERO => Set()
       
   689     case ONE => Set()
       
   690     case CHAR(f) => {
       
   691       //val Some(c) = alphabet.find(f) 
       
   692       Set((f, ONE))
       
   693     }
       
   694     case ALTS(r1, r2) => {
       
   695       lf(r1 ) ++ lf(r2)
       
   696     }
       
   697     case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
       
   698     case SEQ(r1, r2) =>{
       
   699       if (nullable(r1))
       
   700         cir_prod(lf(r1), r2) ++ lf(r2)
       
   701       else
       
   702         cir_prod(lf(r1), r2) 
       
   703     }
       
   704   }
       
   705   def lfs(r: Set[Rexp]): Lin = {
       
   706     r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
       
   707   }
       
   708 
       
   709   def pder(x: Char, t: Rexp): Set[Rexp] = {
       
   710     val lft = lf(t)
       
   711     (lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
       
   712   }
       
   713   def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
       
   714     case x::xs => pders(xs, pder(x, t))
       
   715     case Nil => Set(t)
       
   716   }
       
   717   def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
       
   718     case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
       
   719     case Nil => ts 
       
   720   }
       
   721   def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
       
   722   def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
       
   723   //all implementation of partial derivatives that involve set union are potentially buggy
       
   724   //because they don't include the original regular term before they are pdered.
       
   725   //now only pderas is fixed.
       
   726   def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
       
   727   def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r))
       
   728   def awidth(r: Rexp) : Int = r match {
       
   729     case CHAR(c) => 1
       
   730     case SEQ(r1, r2) => awidth(r1) + awidth(r2)
       
   731     case ALTS(r1, r2) => awidth(r1) + awidth(r2)
       
   732     case ONE => 0
       
   733     case ZERO => 0
       
   734     case STAR(r) => awidth(r)
       
   735   }
       
   736   //def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
       
   737   //def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
       
   738   def o(r: Rexp) = if (nullable(r)) ONE else ZERO
       
   739   //def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
       
   740   def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
       
   741     case ZERO => Set[Rexp]()
       
   742     case ONE => Set[Rexp]()
       
   743     case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
       
   744     case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
       
   745     case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
       
   746     case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
       
   747   }
       
   748   def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
       
   749     case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
       
   750     case Nil => ts   
       
   751   }
       
   752   def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
       
   753 
       
   754 
   611 
   755 
   612 // @arg(doc = "small tests")
   756 // @arg(doc = "small tests")
   613 val STARREG = ((STAR("a") | STAR("aa") ).%).%
   757 val STARREG = (((STAR("a") | (STAR("aa")) | STAR(STAR("aaa") | STAR("aaaa")) | STAR("aaaaa") | (STAR("aaaaaa")) | STAR("aaaaaaa") | STAR("aaaaaaaa")).%).%).%
       
   758 //(STAR("a") | ( STAR("aaa")) | STAR("aaaa") | STAR("aaaaa")).%.%.%
   614 
   759 
   615 @main
   760 @main
   616 def small() = {
   761 def small() = {
   617 
   762 
   618   val prog0 = """aaaaaaaaa"""
   763 
   619   println(s"test: $prog0")
       
   620 //   println(lexing_simp(NOTREG, prog0))
   764 //   println(lexing_simp(NOTREG, prog0))
   621 //   val v = lex_simp(NOTREG, prog0.toList)
   765 //   val v = lex_simp(NOTREG, prog0.toList)
   622 //   println(v)
   766 //   println(v)
   623 
   767 
   624 //   val d =  (lex_simp(NOTREG, prog0.toList))
   768 //   val d =  (lex_simp(NOTREG, prog0.toList))
   625 //   println(d)
   769 //   println(d)
   626 
   770   val pderSTAR = pderUNIV(STARREG)
   627   val bd = bdersSimp(prog0, STARREG)
   771   val refSize = pderSTAR.map(size(_)).sum
   628     println(erase(bd))
   772   println(refSize)
   629     println(asize(bd))
   773   for(i <- 10 to 100){
       
   774     val prog0 = "a" * i
       
   775     println(s"test: $prog0")
       
   776     val bd = bdersSimp(prog0, STARREG)//DB
       
   777     val sbd = bdersSimpS(prog0, STARREG)//strongDB
       
   778 
       
   779 
       
   780     // println(shortRexpOutput(erase(sbd)))
       
   781     // println(shortRexpOutput(erase(bd)))
       
   782     println("pdersize, original, strongSimp, simp")
       
   783     println(refSize, size(STARREG), asize(sbd), asize(bd))
   630 
   784 
   631     val vres = blexing_simp( STARREG, prog0)
   785     val vres = blexing_simp( STARREG, prog0)
   632     println(vres)
   786     println(vres)
       
   787   }
   633 //   println(vs.length)
   788 //   println(vs.length)
   634 //   println(vs)
   789 //   println(vs)
   635    
   790    
   636 
   791 
   637   // val prog1 = """read  n; write n"""  
   792   // val prog1 = """read  n; write n"""