progs/scala/re-bit2.scala
changeset 416 57182b36ec01
parent 360 e752d84225ec
child 436 222333d2bdc2
--- a/progs/scala/re-bit2.scala	Sat Feb 05 18:24:37 2022 +0000
+++ b/progs/scala/re-bit2.scala	Sun Feb 06 00:02:04 2022 +0000
@@ -73,6 +73,7 @@
   case AALTS(bs, r::Nil) => erase(r)
   case AALTS(bs, r::rs) => ALT(erase(r), erase(AALTS(bs, rs)))
   case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
+  case ASTAR(cs, ASTAR(ds, r))=> STAR(erase(r))
   case ASTAR(cs, r)=> STAR(erase(r))
 }
 
@@ -217,13 +218,78 @@
   }
 } 
 
+def flats(rs: List[ARexp]): List[ARexp] = rs match {
+      case Nil => Nil
+      case AZERO :: rs1 => flats(rs1)
+      case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
+      case r1 :: rs2 => r1 :: flats(rs2)
+    }
+
+def projectFirstChild(rs: List[ARexp]) : List[ARexp] = rs match {
+    case (ASEQ(bs, r1, r2p)::rs1) => r1::projectFirstChild(rs1)
+    case Nil => Nil
+  }
+
+
+
+def distinctBy2(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match {
+    case Nil => Nil
+    case (x::xs) => {
+      val res = erase(x)
+      if(acc.contains(res))
+        distinctBy2(xs, acc)
+      else
+        x match {
+          case ASEQ(bs0, AALTS(bs1, rs), r2) => 
+            val newTerms =  distinctBy2(rs.map(r1 => ASEQ(Nil, r1, r2)), acc)
+            val rsPrime = projectFirstChild(newTerms)
+            newTerms match {
+              case Nil => distinctBy2(xs, acc)
+              case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy2(xs, erase(t)::acc)
+              case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy2(xs, newTerms.map(erase(_)):::acc)
+            }
+          case x => x::distinctBy2(xs, res::acc)
+        }
+    }
+  } 
+
+/*
+def strongBsimp(r: ARexp): ARexp =
+  {
+    r match {
+      case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
+          case (AZERO, _) => AZERO
+          case (_, AZERO) => AZERO
+          case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+          case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+      }
+      case AALTS(bs1, rs) => {
+            val rs_simp = rs.map(strongBsimp(_))
+            val flat_res = flats(rs_simp)
+            val dist_res = distinctBy2(flat_res)//distinctBy(flat_res, erase)
+            dist_res match {
+              case Nil => AZERO
+              case s :: Nil => fuse(bs1, s)
+              case rs => AALTS(bs1, rs)  
+            }
+          
+      }
+      case r => r
+    }
+  }
+*/
 
 def bsimp(r: ARexp): ARexp = r match {
   case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
       case (AZERO, _) => AZERO
       case (_, AZERO) => AZERO
       case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
-      //case (AALTS(bs, rs), r2) => AALTS(bs, rs.map(ASEQ(Nil, _, r2)))
+      //case (AALTS(bs, rs), r) => 
+      //   AALTS(Nil, rs.map(ASEQ(bs1 ++ bs, _, r)))
+      //case (ASEQ(bs2, r1, ASTAR(bs3, r2)), ASTAR(bs4, r3)) if erase(r2) == erase(r3) =>
+      //  ASEQ(bs1 ++ bs2, r1, ASTAR(bs3, r2))
+      //case (ASEQ(bs2, r1, r2), r3)  =>
+      //      ASEQ(bs1 ++ bs2, r1, ASEQ(Nil, r2, r3))
       case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   }
   case AALTS(bs1, rs) => (flts(rs.map(bsimp))).distinctBy(erase) match {
@@ -231,9 +297,16 @@
       case r::Nil => fuse(bs1, r)
       case rs => AALTS(bs1, rs)
   }
+  //case (ASTAR(bs1, ASTAR(bs2, r))) => bsimp(ASTAR(bs1 ++ bs2, r))
   case r => r
 }
 
+def bders_simp(r: ARexp, s: List[Char]) : ARexp = s match {
+  case Nil => r
+  case c::cs => bders_simp(bsimp(bder(c, r)), cs)
+}
+
+
 def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
   case Nil => if (bnullable(r)) bmkeps(r) 
               else throw new Exception("Not matched")
@@ -266,8 +339,151 @@
   case Stars(vs) => vs.flatMap(env)
 }
 
+def nullable(r: Rexp) : Boolean = r match {
+  case ZERO => false
+  case ONE => true
+  case CHAR(_) => false
+  case ALT(r1, r2) => nullable(r1) || nullable(r2)
+  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+  case STAR(_) => true
+}
+
+
+def pder(c: Char, r: Rexp) : Set[Rexp] = r match {
+  case ZERO => Set()
+  case ONE => Set()
+  case CHAR(d) => if (c == d) Set(ONE) else Set()
+  case ALT(r1, r2) => pder(c, r1) ++ pder(c, r2)
+  case SEQ(r1, r2) => {
+    (for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) ++
+    (if (nullable(r1)) pder(c, r2) else Set())
+  }  
+  case STAR(r1) => {
+    for (pr1 <- pder(c, r1)) yield SEQ(pr1, STAR(r1))
+  }
+}
+
+def pders(s: List[Char], rs: Set[Rexp]) : Set[Rexp] = s match {
+  case Nil => rs
+  case c::s => pders(s, rs.flatMap(pder(c, _)))
+}
+
+def size(r: Rexp): Int = r match {
+  case ZERO => 1
+  case ONE => 1
+  case CHAR(_) => 1
+  case ALT(r1, r2) => 1 + size(r1) + size (r2)
+  case SEQ(r1, r2) => 1 + size(r1) + size (r2)
+  case STAR(r1) => 1 + size(r1)
+}
+
+def pp(r: ARexp): String = r match {
+  case ASEQ(_, ACHAR(_, a1),ASEQ(_, r1, r2)) => s"${a1}${pp(r1)}${pp(r2)}"
+  case ASEQ(_, ACHAR(_, a1),ACHAR(_, a2)) => s"${a1}${a2}"
+  case AZERO => "0"
+  case AONE(_) => "1"
+  case ACHAR(_, a) => a.toString
+  case AALTS(_, rs) => s"ALTs(${rs.map(pp(_)).mkString(",")})"
+  case ASEQ(_, r1, r2) => s"SEQ(${pp(r1)}, ${pp(r2)})"
+  case ASTAR(_, r1) => s"(${pp(r1)})*"
+}
+
+
+
 // Some Tests
 //============
+val ST = STAR(STAR("a"))
+println(pp(internalise(ST)))
+println(bders(("a" * 1).toList, internalise(ST)))
+println(bders_simp(internalise(ST), ("a" * 1).toList))
+println(pp(bders(("a" * 1).toList, internalise(ST))))
+println(pp(bders_simp(internalise(ST), ("a" * 1).toList)))
+
+
+
+println(pp(bders_simp(internalise(ST), ("a" * 1).toList)))
+println(pp(bders_simp(internalise(ST), ("a" * 2).toList)))
+println(pp(bders_simp(internalise(ST), ("a" * 3).toList)))
+println(pp(bders_simp(internalise(ST), ("a" * 4).toList)))
+
+println(blexing(ST, "a" * 1))
+println(blexing_simp(ST, "a" * 1))
+println(blexing(ST, "a" * 2))
+println(blexing_simp(ST, "a" * 2))
+println(blexing(ST, "a" * 3))
+println(blexing_simp(ST, "a" * 3))
+println(blexing(ST, "a" * 4))
+println(blexing_simp(ST, "a" * 4))
+
+val STARREG = ((STAR("a") | STAR("aaa")) | STAR("aaaaa")).%
+
+println(blexing(STARREG, "a" * 3))
+println(blexing_simp(STARREG, "a" * 3))
+
+
+size(STARREG)
+size(erase(bders_simp(internalise(STARREG), ("a" * 1).toList)))
+size(erase(bders_simp(internalise(STARREG), ("a" * 2).toList)))
+size(erase(bders_simp(internalise(STARREG), ("a" * 3).toList)))
+size(erase(bders_simp(internalise(STARREG), ("a" * 4).toList)))
+size(erase(bders_simp(internalise(STARREG), ("a" * 5).toList)))
+size(erase(bders_simp(internalise(STARREG), ("a" * 6).toList)))
+size(erase(bders_simp(internalise(STARREG), ("a" * 7).toList)))
+size(erase(bders_simp(internalise(STARREG), ("a" * 8).toList)))
+size(erase(bders_simp(internalise(STARREG), ("a" * 9).toList)))
+size(erase(bders_simp(internalise(STARREG), ("a" * 100).toList)))
+size(erase(bders_simp(internalise(STARREG), ("a" * 101).toList)))
+size(erase(bders_simp(internalise(STARREG), ("a" * 102).toList)))
+size(erase(bders_simp(internalise(STARREG), ("a" * 103).toList)))
+
+println(bders_simp(internalise(STARREG), ("a" * 1).toList))
+println(bders_simp(internalise(STARREG), ("a" * 2).toList))
+println(bders_simp(internalise(STARREG), ("a" * 3).toList))
+println(bders_simp(internalise(STARREG), ("a" * 4).toList))
+
+println(erase(bders_simp(internalise(STARREG), ("a" * 1).toList)))
+println(erase(bders_simp(internalise(STARREG), ("a" * 2).toList)))
+println(erase(bders_simp(internalise(STARREG), ("a" * 3).toList)))
+println(erase(bders_simp(internalise(STARREG), ("a" * 4).toList)))
+
+println(pp(internalise(STARREG)))
+println(pp(bders_simp(internalise(STARREG), ("a" * 1).toList)))
+println(pp(bders_simp(internalise(STARREG), ("a" * 2).toList)))
+println(pp(bders_simp(internalise(STARREG), ("a" * 3).toList)))
+println(pp(bders_simp(internalise(STARREG), ("a" * 4).toList)))
+
+val STARR = (STAR("a") | STAR("aa") | 
+             STAR("aaa") | STAR("aaaa") | 
+             STAR("aaaaa") | STAR("aaaaaa") | 
+             STAR("aaaaaaa") | STAR("aaaaaaaa")).%
+
+size(STARR)
+size(erase(bders_simp(internalise(STARR), ("a" * 1).toList)))
+size(erase(bders_simp(internalise(STARR), ("a" * 2).toList)))
+size(erase(bders_simp(internalise(STARR), ("a" * 3).toList)))
+size(erase(bders_simp(internalise(STARR), ("a" * 4).toList)))
+size(erase(bders_simp(internalise(STARR), ("a" * 5).toList)))
+size(erase(bders_simp(internalise(STARR), ("a" * 6).toList)))
+size(erase(bders_simp(internalise(STARR), ("a" * 7).toList)))
+size(erase(bders_simp(internalise(STARR), ("a" * 8).toList)))
+size(erase(bders_simp(internalise(STARR), ("a" * 9).toList)))
+size(erase(bders_simp(internalise(STARR), ("a" * 1000).toList)))
+size(erase(bders_simp(internalise(STARR), ("a" * 1001).toList)))
+size(erase(bders_simp(internalise(STARR), ("a" * 1002).toList)))
+size(erase(bders_simp(internalise(STARR), ("a" * 1010).toList)))
+size(erase(bders_simp(internalise(STARR), ("a" * 1010 ++ "b").toList)))
+
+val r0 = ("a" | "ab") ~ ("b" | "")
+println(pre_blexing(internalise(r0), "ab"))
+println(blexing(r0, "ab"))
+println(blexing_simp(r0, "ab"))
+
+println(pders("a".toList, Set(r0)))
+println(pders("ab".toList, Set(r0)))
+
+val r00 = ("a" ~ ("b" | "")) | ("ab" ~ ("b" | ""))
+
+
 
 /*
 def time_needed[T](i: Int, code: => T) = {