progs/scala/re-bit2.scala
changeset 436 222333d2bdc2
parent 416 57182b36ec01
equal deleted inserted replaced
434:0cce1aee0fb2 436:222333d2bdc2
       
     1 
       
     2 
       
     3 
       
     4 
     1 import scala.language.implicitConversions    
     5 import scala.language.implicitConversions    
     2 import scala.language.reflectiveCalls
     6 import scala.language.reflectiveCalls
     3 import scala.annotation.tailrec   
     7 import scala.annotation.tailrec   
     4 
     8 
     5 // standard regular expressions
     9 // standard regular expressions
   223       case AZERO :: rs1 => flats(rs1)
   227       case AZERO :: rs1 => flats(rs1)
   224       case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
   228       case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
   225       case r1 :: rs2 => r1 :: flats(rs2)
   229       case r1 :: rs2 => r1 :: flats(rs2)
   226     }
   230     }
   227 
   231 
   228 def projectFirstChild(rs: List[ARexp]) : List[ARexp] = rs match {
       
   229     case (ASEQ(bs, r1, r2p)::rs1) => r1::projectFirstChild(rs1)
       
   230     case Nil => Nil
       
   231   }
       
   232 
       
   233 
       
   234 
       
   235 def distinctBy2(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match {
       
   236     case Nil => Nil
       
   237     case (x::xs) => {
       
   238       val res = erase(x)
       
   239       if(acc.contains(res))
       
   240         distinctBy2(xs, acc)
       
   241       else
       
   242         x match {
       
   243           case ASEQ(bs0, AALTS(bs1, rs), r2) => 
       
   244             val newTerms =  distinctBy2(rs.map(r1 => ASEQ(Nil, r1, r2)), acc)
       
   245             val rsPrime = projectFirstChild(newTerms)
       
   246             newTerms match {
       
   247               case Nil => distinctBy2(xs, acc)
       
   248               case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy2(xs, erase(t)::acc)
       
   249               case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy2(xs, newTerms.map(erase(_)):::acc)
       
   250             }
       
   251           case x => x::distinctBy2(xs, res::acc)
       
   252         }
       
   253     }
       
   254   } 
       
   255 
       
   256 /*
       
   257 def strongBsimp(r: ARexp): ARexp =
       
   258   {
       
   259     r match {
       
   260       case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
       
   261           case (AZERO, _) => AZERO
       
   262           case (_, AZERO) => AZERO
       
   263           case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   264           case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
       
   265       }
       
   266       case AALTS(bs1, rs) => {
       
   267             val rs_simp = rs.map(strongBsimp(_))
       
   268             val flat_res = flats(rs_simp)
       
   269             val dist_res = distinctBy2(flat_res)//distinctBy(flat_res, erase)
       
   270             dist_res match {
       
   271               case Nil => AZERO
       
   272               case s :: Nil => fuse(bs1, s)
       
   273               case rs => AALTS(bs1, rs)  
       
   274             }
       
   275           
       
   276       }
       
   277       case r => r
       
   278     }
       
   279   }
       
   280 */
       
   281 
       
   282 def bsimp(r: ARexp): ARexp = r match {
   232 def bsimp(r: ARexp): ARexp = r match {
   283   case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
   233   case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
   284       case (AZERO, _) => AZERO
   234       case (AZERO, _) => AZERO
   285       case (_, AZERO) => AZERO
   235       case (_, AZERO) => AZERO
   286       case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
   236       case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
   287       //case (AALTS(bs, rs), r) => 
       
   288       //   AALTS(Nil, rs.map(ASEQ(bs1 ++ bs, _, r)))
       
   289       //case (ASEQ(bs2, r1, ASTAR(bs3, r2)), ASTAR(bs4, r3)) if erase(r2) == erase(r3) =>
       
   290       //  ASEQ(bs1 ++ bs2, r1, ASTAR(bs3, r2))
       
   291       //case (ASEQ(bs2, r1, r2), r3)  =>
       
   292       //      ASEQ(bs1 ++ bs2, r1, ASEQ(Nil, r2, r3))
       
   293       case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   237       case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   294   }
   238   }
   295   case AALTS(bs1, rs) => (flts(rs.map(bsimp))).distinctBy(erase) match {
   239   case AALTS(bs1, rs) => (flts(rs.map(bsimp))).distinctBy(erase) match {
   296       case Nil => AZERO
   240       case Nil => AZERO
   297       case r::Nil => fuse(bs1, r)
   241       case r::Nil => fuse(bs1, r)
   298       case rs => AALTS(bs1, rs)
   242       case rs => AALTS(bs1, rs)
   299   }
   243   }
   300   //case (ASTAR(bs1, ASTAR(bs2, r))) => bsimp(ASTAR(bs1 ++ bs2, r))
       
   301   case r => r
   244   case r => r
   302 }
   245 }
   303 
   246 
   304 def bders_simp(r: ARexp, s: List[Char]) : ARexp = s match {
   247 def bders_simp(r: ARexp, s: List[Char]) : ARexp = s match {
   305   case Nil => r
   248   case Nil => r
   308 
   251 
   309 
   252 
   310 def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
   253 def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
   311   case Nil => if (bnullable(r)) bmkeps(r) 
   254   case Nil => if (bnullable(r)) bmkeps(r) 
   312               else throw new Exception("Not matched")
   255               else throw new Exception("Not matched")
   313   case c::cs => blex(bsimp(bder(c, r)), cs)
   256   case c::cs => blex_simp(bsimp(bder(c, r)), cs)
   314 }
   257 }
   315 
   258 
   316 def blexing_simp(r: Rexp, s: String) : Val = 
   259 def blexing_simp(r: Rexp, s: String) : Val = 
   317   decode(r, blex_simp(internalise(r), s.toList))
   260   decode(r, blex_simp(internalise(r), s.toList))
       
   261 
       
   262 //////////////////////
       
   263 // new simplification
       
   264 
       
   265 // collects first components of sequences.
       
   266 def coll(r: Rexp, rs: List[Rexp]) : List[Rexp] = rs match {
       
   267  case Nil => Nil
       
   268  case SEQ(r1, r2) :: rs => 
       
   269    if (r == r2) r1 :: coll(r, rs) else coll(r, rs)
       
   270  case r1 :: rs => coll(r, rs)
       
   271 }
       
   272 
       
   273 def bsimp_ASEQ1(bs: Bits, r1: ARexp, r2: ARexp) : ARexp = r1 match {
       
   274   case AZERO => AZERO
       
   275   case AONE(bs1) => fuse(bs ::: bs1, r2)
       
   276   case _ => ASEQ(bs, r1, r2)
       
   277 }
       
   278 
       
   279 def bsimp_AALTs(bs: Bits, rs: List[ARexp]) : ARexp = rs match {
       
   280   case Nil => AZERO
       
   281   case r::Nil => fuse(bs, r)
       
   282   case _ => AALTS(bs, rs)
       
   283 }
       
   284 
       
   285 def prune(r: ARexp, all: List[Rexp]) : ARexp = r match {
       
   286   case ASEQ(bs, r1, r2) => {
       
   287     val termsTruncated = coll(erase(r2), all)
       
   288     val pruned = prune(r1, termsTruncated) 
       
   289     bsimp_ASEQ1(bs, pruned, r2)
       
   290   } 
       
   291   case AALTS(bs, rs) => {
       
   292     val rsp = rs.map(prune(_, all)).filter(_ != AZERO)
       
   293     bsimp_AALTs(bs, rsp)
       
   294   }
       
   295   case r => 
       
   296     if (all.contains(erase(r))) r else AZERO 
       
   297 }
       
   298 
       
   299 def oneSimp(r: Rexp) : Rexp = r match {
       
   300     case SEQ(ONE, r) => r
       
   301     case SEQ(r1, r2) => SEQ(oneSimp(r1), r2)
       
   302     case r => r
       
   303 }
       
   304 
       
   305 def breakup(r: Rexp) : List[Rexp] = r match {
       
   306     case SEQ(r1, r2) => breakup(r1).map(SEQ(_, r2))
       
   307     case ALT(r1, r2) => breakup(r1) ::: breakup(r2)
       
   308     case _ => r::Nil
       
   309 } 
       
   310 
       
   311 def addToAcc(r: ARexp, acc: List[Rexp]) : List[Rexp] =
       
   312   breakup(erase(r)).filterNot(r => acc.contains(oneSimp(r)))
       
   313 
       
   314 def dBStrong(rs: List[ARexp], acc: List[Rexp]) : List[ARexp] = rs match {
       
   315   case Nil => Nil
       
   316   case r::rs => if (acc.contains(erase(r))) dBStrong(rs, acc)
       
   317                  else prune(r, addToAcc(r, acc)) match { 
       
   318                    case AZERO => dBStrong(rs, addToAcc(r, acc) ::: acc)
       
   319                    case r1 => r1 :: dBStrong(rs, addToAcc(r, acc) ::: acc)
       
   320                  }
       
   321 }
       
   322 
       
   323 def bsimp_ASEQ(bs: Bits, r1: ARexp, r2: ARexp) : ARexp = (r1, r2) match {
       
   324   case (AZERO, _) => AZERO
       
   325   case (_, AZERO) => AZERO  
       
   326   case (AONE(bs1), r2) => fuse(bs ::: bs1, r2)
       
   327   case _ => ASEQ(bs, r1, r2)
       
   328 }
       
   329 
       
   330 def bsimp2(r: ARexp): ARexp = r match {
       
   331   case ASEQ(bs1, r1, r2) => bsimp_ASEQ(bs1, bsimp2(r1), bsimp2(r2))
       
   332   case AALTS(bs1, rs) => bsimp_AALTs(bs1, dBStrong(flts(rs.map(bsimp2(_))), Nil))
       
   333   case r => r
       
   334 }
       
   335 
       
   336 def bders_simp2(r: ARexp, s: List[Char]) : ARexp = s match {
       
   337   case Nil => r
       
   338   case c::cs => bders_simp2(bsimp2(bder(c, r)), cs)
       
   339 }
       
   340 
       
   341 
       
   342 def blex_simp2(r: ARexp, s: List[Char]) : Bits = s match {
       
   343   case Nil => if (bnullable(r)) bmkeps(r) 
       
   344               else throw new Exception("Not matched")
       
   345   case c::cs => blex_simp2(bsimp2(bder(c, r)), cs)
       
   346 }
       
   347 
       
   348 def blexing_simp2(r: Rexp, s: String) : Val = 
       
   349   decode(r, blex_simp2(internalise(r), s.toList))
   318 
   350 
   319 //println(blexing_simp(reg, "aab"))
   351 //println(blexing_simp(reg, "aab"))
   320 
   352 
   321 
   353 
   322 // extracts a string from value
   354 // extracts a string from value
   375   case ALT(r1, r2) => 1 + size(r1) + size (r2)
   407   case ALT(r1, r2) => 1 + size(r1) + size (r2)
   376   case SEQ(r1, r2) => 1 + size(r1) + size (r2)
   408   case SEQ(r1, r2) => 1 + size(r1) + size (r2)
   377   case STAR(r1) => 1 + size(r1)
   409   case STAR(r1) => 1 + size(r1)
   378 }
   410 }
   379 
   411 
       
   412 def asize(r: ARexp) : Int = size(erase(r)) 
       
   413 def psize(rs: Set[Rexp]) : Int = rs.map(size(_)).sum
       
   414 
   380 def pp(r: ARexp): String = r match {
   415 def pp(r: ARexp): String = r match {
   381   case ASEQ(_, ACHAR(_, a1),ASEQ(_, r1, r2)) => s"${a1}${pp(r1)}${pp(r2)}"
   416   case ASEQ(_, ACHAR(_, a1),ASEQ(_, r1, r2)) => s"${a1}${pp(r1)}${pp(r2)}"
   382   case ASEQ(_, ACHAR(_, a1),ACHAR(_, a2)) => s"${a1}${a2}"
   417   case ASEQ(_, ACHAR(_, a1),ACHAR(_, a2)) => s"${a1}${a2}"
   383   case AZERO => "0"
   418   case AZERO => "0"
   384   case AONE(_) => "1"
   419   case AONE(_) => "1"
   387   case ASEQ(_, r1, r2) => s"SEQ(${pp(r1)}, ${pp(r2)})"
   422   case ASEQ(_, r1, r2) => s"SEQ(${pp(r1)}, ${pp(r2)})"
   388   case ASTAR(_, r1) => s"(${pp(r1)})*"
   423   case ASTAR(_, r1) => s"(${pp(r1)})*"
   389 }
   424 }
   390 
   425 
   391 
   426 
       
   427 val TEST = STAR("a" | "aa")
       
   428 println(asize(bders(("a" * 0).toList, internalise(TEST))))
       
   429 println(asize(bders(("a" * 1).toList, internalise(TEST))))
       
   430 println(asize(bders(("a" * 2).toList, internalise(TEST))))
       
   431 println(asize(bders(("a" * 3).toList, internalise(TEST))))
       
   432 println(asize(bders(("a" * 4).toList, internalise(TEST))))
       
   433 
       
   434 println(asize(bders_simp(internalise(TEST), ("a" * 0).toList)))
       
   435 println(asize(bders_simp(internalise(TEST), ("a" * 1).toList)))
       
   436 println(asize(bders_simp(internalise(TEST), ("a" * 2).toList)))
       
   437 println(asize(bders_simp(internalise(TEST), ("a" * 3).toList)))
       
   438 println(asize(bders_simp(internalise(TEST), ("a" * 4).toList)))
       
   439 
       
   440 
       
   441 println(asize(bders_simp2(internalise(TEST), ("a" * 0).toList)))
       
   442 println(asize(bders_simp2(internalise(TEST), ("a" * 1).toList)))
       
   443 println(asize(bders_simp2(internalise(TEST), ("a" * 2).toList)))
       
   444 println(asize(bders_simp2(internalise(TEST), ("a" * 3).toList)))
       
   445 println(asize(bders_simp2(internalise(TEST), ("a" * 4).toList)))
       
   446 println(asize(bders_simp2(internalise(TEST), ("a" * 5).toList)))
       
   447 
   392 
   448 
   393 // Some Tests
   449 // Some Tests
   394 //============
   450 //============
   395 val ST = STAR(STAR("a"))
   451 val ST = STAR(STAR("a"))
   396 println(pp(internalise(ST)))
   452 println(pp(internalise(ST)))
   417 
   473 
   418 val STARREG = ((STAR("a") | STAR("aaa")) | STAR("aaaaa")).%
   474 val STARREG = ((STAR("a") | STAR("aaa")) | STAR("aaaaa")).%
   419 
   475 
   420 println(blexing(STARREG, "a" * 3))
   476 println(blexing(STARREG, "a" * 3))
   421 println(blexing_simp(STARREG, "a" * 3))
   477 println(blexing_simp(STARREG, "a" * 3))
       
   478 println(pders(List(STARREG), "a" * 3))
   422 
   479 
   423 
   480 
   424 size(STARREG)
   481 size(STARREG)
   425 size(erase(bders_simp(internalise(STARREG), ("a" * 1).toList)))
   482 size(erase(bders_simp(internalise(STARREG), ("a" * 1).toList)))
   426 size(erase(bders_simp(internalise(STARREG), ("a" * 2).toList)))
   483 size(erase(bders_simp(internalise(STARREG), ("a" * 2).toList)))
   433 size(erase(bders_simp(internalise(STARREG), ("a" * 9).toList)))
   490 size(erase(bders_simp(internalise(STARREG), ("a" * 9).toList)))
   434 size(erase(bders_simp(internalise(STARREG), ("a" * 100).toList)))
   491 size(erase(bders_simp(internalise(STARREG), ("a" * 100).toList)))
   435 size(erase(bders_simp(internalise(STARREG), ("a" * 101).toList)))
   492 size(erase(bders_simp(internalise(STARREG), ("a" * 101).toList)))
   436 size(erase(bders_simp(internalise(STARREG), ("a" * 102).toList)))
   493 size(erase(bders_simp(internalise(STARREG), ("a" * 102).toList)))
   437 size(erase(bders_simp(internalise(STARREG), ("a" * 103).toList)))
   494 size(erase(bders_simp(internalise(STARREG), ("a" * 103).toList)))
       
   495 
       
   496 size(erase(bders_simp2(internalise(STARREG), ("a" * 103).toList)))
       
   497 psize(pders(("a" * 103).toList, Set(STARREG)))
   438 
   498 
   439 println(bders_simp(internalise(STARREG), ("a" * 1).toList))
   499 println(bders_simp(internalise(STARREG), ("a" * 1).toList))
   440 println(bders_simp(internalise(STARREG), ("a" * 2).toList))
   500 println(bders_simp(internalise(STARREG), ("a" * 2).toList))
   441 println(bders_simp(internalise(STARREG), ("a" * 3).toList))
   501 println(bders_simp(internalise(STARREG), ("a" * 3).toList))
   442 println(bders_simp(internalise(STARREG), ("a" * 4).toList))
   502 println(bders_simp(internalise(STARREG), ("a" * 4).toList))
   707 decode(dr, res2)     // gives the value for original value
   767 decode(dr, res2)     // gives the value for original value
   708 
   768 
   709 encode(inj(dr, 'a', decode(dr_der, res1)))
   769 encode(inj(dr, 'a', decode(dr_der, res1)))
   710 
   770 
   711 */
   771 */
       
   772 
       
   773 
       
   774 
       
   775 
       
   776 
       
   777 
       
   778 
       
   779 /*
       
   780 def star(n: Long) = if ((n & 1L) == 1L) "*" else " "
       
   781 def stars(n: Long): String = if (n == 0L) "" else star(n) + " " + stars(n >> 1)
       
   782 def spaces(n: Int) = " " * n
       
   783 
       
   784 
       
   785 def sierpinski(n: Int) {  
       
   786   ((1 << n) - 1 to 0 by -1).foldLeft(1L) {
       
   787     case (bitmap, remainingLines) =>
       
   788       println(spaces(remainingLines) + stars(bitmap))
       
   789       (bitmap << 1) ^ bitmap
       
   790   }
       
   791 }
       
   792 
       
   793 */