# HG changeset patch # User Chengsong # Date 1644085396 0 # Node ID 1234e6bd4fd16a690b49765260d0db228d3a83db # Parent b85f8e28fbd8b0b0f5319d9a546627243a57dfd7 blexernew diff -r b85f8e28fbd8 -r 1234e6bd4fd1 thys2/blexer1.sc --- a/thys2/blexer1.sc Sat Feb 05 15:30:45 2022 +0000 +++ b/thys2/blexer1.sc Sat Feb 05 18:23:16 2022 +0000 @@ -442,6 +442,35 @@ case x => x::distinctBy2(xs, res::acc) } } + } + + + + def distinctBy3(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match { + case Nil => Nil + case (x::xs) => { + val res = erase(x) + if(acc.contains(res)) + distinctBy3(xs, acc) + else + x match { + case ASEQ(bs0, AALTS(bs1, rs), r2) => + val newTerms = distinctBy3(rs.map(r1 => ASEQ(Nil, r1, r2)), acc) + val rsPrime = projectFirstChild(newTerms) + newTerms match { + case Nil => distinctBy3(xs, acc) + case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy3(xs, erase(t)::acc) + case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy3(xs, newTerms.map(erase(_)):::acc) + } + case x => x::distinctBy3(xs, res::acc) + } + } + } + + def breakIntoTerms(r: Rexp) : List[Rexp] = r match { + case SEQ(r1, r2) => breakIntoTerms(r1).map(r11 => SEQ(r11, r2)) + case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2) + case _ => r::Nil } def prettyRexp(r: Rexp) : String = r match { @@ -499,6 +528,27 @@ decode(r, bit_code) } + def strong_blexing_simp(r: Rexp, s: String) : Val = { + decode(r, strong_blex_simp(internalise(r), s.toList)) + } + + def strong_blex_simp(r: ARexp, s: List[Char]) :Bits = s match { + case Nil => { + if (bnullable(r)) { + //println(asize(r)) + val bits = mkepsBC(r) + bits + } + else + throw new Exception("Not matched") + } + case c::cs => { + val der_res = bder(c,r) + val simp_res = strongBsimp(der_res) + strong_blex_simp(simp_res, cs) + } + } + def bders_simp(s: List[Char], r: ARexp) : ARexp = s match { @@ -627,13 +677,26 @@ } } - + } +def allCharSeq(r: Rexp) : Boolean = r match { + case CHAR(c) => true + case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2) + case _ => false +} + +def flattenSeq(r: Rexp) : String = r match { + case CHAR(c) => c.toString + case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2) + case _ => throw new Error("flatten unflattenable rexp") +} + def shortRexpOutput(r: Rexp) : String = r match { case CHAR(c) => c.toString case ONE => "1" case ZERO => "0" + case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]" case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]" case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")" case STAR(r) => "[" ++ shortRexpOutput(r) ++ "]*" @@ -649,7 +712,8 @@ val bits = mkepsBC(r) bits } - else throw new Exception("Not matched") + else + throw new Exception("Not matched") } case c::cs => { val der_res = bder(c,r) @@ -724,7 +788,7 @@ //because they don't include the original regular term before they are pdered. //now only pderas is fixed. def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders. - def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r)) + def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1) def awidth(r: Rexp) : Int = r match { case CHAR(c) => 1 case SEQ(r1, r2) => awidth(r1) + awidth(r2) @@ -754,10 +818,10 @@ // @arg(doc = "small tests") -val STARREG = (((STAR("a") | (STAR("aa")) | STAR(STAR("aaa") | STAR("aaaa")) | STAR("aaaaa") | (STAR("aaaaaa")) | STAR("aaaaaaa") | STAR("aaaaaaaa")).%).%).% -//(STAR("a") | ( STAR("aaa")) | STAR("aaaa") | STAR("aaaaa")).%.%.% +val STARREG = (((STAR("a") | (STAR("aa")) | STAR("aaa") | STAR("aaaa") | STAR("aaaaa") | (STAR("aaaaaa")) | STAR("aaaaaaa") | STAR("aaaaaaaa")).%)) +//(((STAR("a") | ( STAR("aaa")) | STAR("aaaaa")).%).%).% -@main +// @main def small() = { @@ -768,22 +832,44 @@ // val d = (lex_simp(NOTREG, prog0.toList)) // println(d) val pderSTAR = pderUNIV(STARREG) + val refSize = pderSTAR.map(size(_)).sum - println(refSize) - for(i <- 10 to 100){ + println("different partial derivative terms:") + pderSTAR.foreach(r => r match { + + case SEQ(head, rstar) => + println(shortRexpOutput(head) ++ "~STARREG") + case STAR(rstar) => + println("STARREG") + + } + ) + println("the total number of terms is") + //println(refSize) + println(pderSTAR.size) + for(i <- List(1, 10, 100, 400, 800, 840, 900) ){ val prog0 = "a" * i - println(s"test: $prog0") + //println(s"test: $prog0") + println(s"testing with $i a's" ) val bd = bdersSimp(prog0, STARREG)//DB val sbd = bdersSimpS(prog0, STARREG)//strongDB + val subTerms = breakIntoTerms(erase(sbd)) + val subTermsLarge = breakIntoTerms(erase(bd)) + + println(s"subterms of regex with strongDB: ${subTerms.length}, standard DB: ${subTermsLarge.length}") + println("the number of distinct subterms for bsimp with strongDB and standardDB") + println(subTerms.distinct.size) + println(subTermsLarge.distinct.size) // println(shortRexpOutput(erase(sbd))) // println(shortRexpOutput(erase(bd))) - println("pdersize, original, strongSimp, simp") - println(refSize, size(STARREG), asize(sbd), asize(bd)) + + println("pdersize, original, strongSimp") + println(refSize, size(STARREG), asize(sbd), asize(bd)) - val vres = blexing_simp( STARREG, prog0) - println(vres) + // val vres = strong_blexing_simp( STARREG, prog0) + // println(vres) } // println(vs.length) // println(vs) @@ -794,6 +880,8 @@ // println(lexing_simp(WHILE_REGS, prog1)) } +small() + // // Bigger Tests // //==============