--- a/thys2/blexer1.sc Sat Feb 05 15:30:45 2022 +0000
+++ b/thys2/blexer1.sc Sat Feb 05 18:23:16 2022 +0000
@@ -442,6 +442,35 @@
case x => x::distinctBy2(xs, res::acc)
}
}
+ }
+
+
+
+ def distinctBy3(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match {
+ case Nil => Nil
+ case (x::xs) => {
+ val res = erase(x)
+ if(acc.contains(res))
+ distinctBy3(xs, acc)
+ else
+ x match {
+ case ASEQ(bs0, AALTS(bs1, rs), r2) =>
+ val newTerms = distinctBy3(rs.map(r1 => ASEQ(Nil, r1, r2)), acc)
+ val rsPrime = projectFirstChild(newTerms)
+ newTerms match {
+ case Nil => distinctBy3(xs, acc)
+ case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy3(xs, erase(t)::acc)
+ case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy3(xs, newTerms.map(erase(_)):::acc)
+ }
+ case x => x::distinctBy3(xs, res::acc)
+ }
+ }
+ }
+
+ def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
+ case SEQ(r1, r2) => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
+ case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
+ case _ => r::Nil
}
def prettyRexp(r: Rexp) : String = r match {
@@ -499,6 +528,27 @@
decode(r, bit_code)
}
+ def strong_blexing_simp(r: Rexp, s: String) : Val = {
+ decode(r, strong_blex_simp(internalise(r), s.toList))
+ }
+
+ def strong_blex_simp(r: ARexp, s: List[Char]) :Bits = s match {
+ case Nil => {
+ if (bnullable(r)) {
+ //println(asize(r))
+ val bits = mkepsBC(r)
+ bits
+ }
+ else
+ throw new Exception("Not matched")
+ }
+ case c::cs => {
+ val der_res = bder(c,r)
+ val simp_res = strongBsimp(der_res)
+ strong_blex_simp(simp_res, cs)
+ }
+ }
+
def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
@@ -627,13 +677,26 @@
}
}
-
+
}
+def allCharSeq(r: Rexp) : Boolean = r match {
+ case CHAR(c) => true
+ case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
+ case _ => false
+}
+
+def flattenSeq(r: Rexp) : String = r match {
+ case CHAR(c) => c.toString
+ case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
+ case _ => throw new Error("flatten unflattenable rexp")
+}
+
def shortRexpOutput(r: Rexp) : String = r match {
case CHAR(c) => c.toString
case ONE => "1"
case ZERO => "0"
+ case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
case STAR(r) => "[" ++ shortRexpOutput(r) ++ "]*"
@@ -649,7 +712,8 @@
val bits = mkepsBC(r)
bits
}
- else throw new Exception("Not matched")
+ else
+ throw new Exception("Not matched")
}
case c::cs => {
val der_res = bder(c,r)
@@ -724,7 +788,7 @@
//because they don't include the original regular term before they are pdered.
//now only pderas is fixed.
def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
- def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r))
+ def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
def awidth(r: Rexp) : Int = r match {
case CHAR(c) => 1
case SEQ(r1, r2) => awidth(r1) + awidth(r2)
@@ -754,10 +818,10 @@
// @arg(doc = "small tests")
-val STARREG = (((STAR("a") | (STAR("aa")) | STAR(STAR("aaa") | STAR("aaaa")) | STAR("aaaaa") | (STAR("aaaaaa")) | STAR("aaaaaaa") | STAR("aaaaaaaa")).%).%).%
-//(STAR("a") | ( STAR("aaa")) | STAR("aaaa") | STAR("aaaaa")).%.%.%
+val STARREG = (((STAR("a") | (STAR("aa")) | STAR("aaa") | STAR("aaaa") | STAR("aaaaa") | (STAR("aaaaaa")) | STAR("aaaaaaa") | STAR("aaaaaaaa")).%))
+//(((STAR("a") | ( STAR("aaa")) | STAR("aaaaa")).%).%).%
-@main
+// @main
def small() = {
@@ -768,22 +832,44 @@
// val d = (lex_simp(NOTREG, prog0.toList))
// println(d)
val pderSTAR = pderUNIV(STARREG)
+
val refSize = pderSTAR.map(size(_)).sum
- println(refSize)
- for(i <- 10 to 100){
+ println("different partial derivative terms:")
+ pderSTAR.foreach(r => r match {
+
+ case SEQ(head, rstar) =>
+ println(shortRexpOutput(head) ++ "~STARREG")
+ case STAR(rstar) =>
+ println("STARREG")
+
+ }
+ )
+ println("the total number of terms is")
+ //println(refSize)
+ println(pderSTAR.size)
+ for(i <- List(1, 10, 100, 400, 800, 840, 900) ){
val prog0 = "a" * i
- println(s"test: $prog0")
+ //println(s"test: $prog0")
+ println(s"testing with $i a's" )
val bd = bdersSimp(prog0, STARREG)//DB
val sbd = bdersSimpS(prog0, STARREG)//strongDB
+ val subTerms = breakIntoTerms(erase(sbd))
+ val subTermsLarge = breakIntoTerms(erase(bd))
+
+ println(s"subterms of regex with strongDB: ${subTerms.length}, standard DB: ${subTermsLarge.length}")
+ println("the number of distinct subterms for bsimp with strongDB and standardDB")
+ println(subTerms.distinct.size)
+ println(subTermsLarge.distinct.size)
// println(shortRexpOutput(erase(sbd)))
// println(shortRexpOutput(erase(bd)))
- println("pdersize, original, strongSimp, simp")
- println(refSize, size(STARREG), asize(sbd), asize(bd))
+
+ println("pdersize, original, strongSimp")
+ println(refSize, size(STARREG), asize(sbd), asize(bd))
- val vres = blexing_simp( STARREG, prog0)
- println(vres)
+ // val vres = strong_blexing_simp( STARREG, prog0)
+ // println(vres)
}
// println(vs.length)
// println(vs)
@@ -794,6 +880,8 @@
// println(lexing_simp(WHILE_REGS, prog1))
}
+small()
+
// // Bigger Tests
// //==============