# HG changeset patch # User Chengsong # Date 1646263399 0 # Node ID a5376206fd529454803327ef6bde31b110b6eb50 # Parent a73b2e553804bb8dadd6e1337ca4e82063da1907# Parent 43b87bab0dac3ff7fbb64d4e9bc31d382bff2b76 merged diff -r a73b2e553804 -r a5376206fd52 progs/scala/re-bit2.scala --- a/progs/scala/re-bit2.scala Wed Mar 02 23:13:59 2022 +0000 +++ b/progs/scala/re-bit2.scala Wed Mar 02 23:23:19 2022 +0000 @@ -1,3 +1,7 @@ + + + + import scala.language.implicitConversions import scala.language.reflectiveCalls import scala.annotation.tailrec @@ -225,71 +229,11 @@ case r1 :: rs2 => r1 :: flats(rs2) } -def projectFirstChild(rs: List[ARexp]) : List[ARexp] = rs match { - case (ASEQ(bs, r1, r2p)::rs1) => r1::projectFirstChild(rs1) - case Nil => Nil - } - - - -def distinctBy2(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match { - case Nil => Nil - case (x::xs) => { - val res = erase(x) - if(acc.contains(res)) - distinctBy2(xs, acc) - else - x match { - case ASEQ(bs0, AALTS(bs1, rs), r2) => - val newTerms = distinctBy2(rs.map(r1 => ASEQ(Nil, r1, r2)), acc) - val rsPrime = projectFirstChild(newTerms) - newTerms match { - case Nil => distinctBy2(xs, acc) - case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy2(xs, erase(t)::acc) - case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy2(xs, newTerms.map(erase(_)):::acc) - } - case x => x::distinctBy2(xs, res::acc) - } - } - } - -/* -def strongBsimp(r: ARexp): ARexp = - { - r match { - case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match { - case (AZERO, _) => AZERO - case (_, AZERO) => AZERO - case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) - case (r1s, r2s) => ASEQ(bs1, r1s, r2s) - } - case AALTS(bs1, rs) => { - val rs_simp = rs.map(strongBsimp(_)) - val flat_res = flats(rs_simp) - val dist_res = distinctBy2(flat_res)//distinctBy(flat_res, erase) - dist_res match { - case Nil => AZERO - case s :: Nil => fuse(bs1, s) - case rs => AALTS(bs1, rs) - } - - } - case r => r - } - } -*/ - def bsimp(r: ARexp): ARexp = r match { case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match { case (AZERO, _) => AZERO case (_, AZERO) => AZERO case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) - //case (AALTS(bs, rs), r) => - // AALTS(Nil, rs.map(ASEQ(bs1 ++ bs, _, r))) - //case (ASEQ(bs2, r1, ASTAR(bs3, r2)), ASTAR(bs4, r3)) if erase(r2) == erase(r3) => - // ASEQ(bs1 ++ bs2, r1, ASTAR(bs3, r2)) - //case (ASEQ(bs2, r1, r2), r3) => - // ASEQ(bs1 ++ bs2, r1, ASEQ(Nil, r2, r3)) case (r1s, r2s) => ASEQ(bs1, r1s, r2s) } case AALTS(bs1, rs) => (flts(rs.map(bsimp))).distinctBy(erase) match { @@ -297,7 +241,6 @@ case r::Nil => fuse(bs1, r) case rs => AALTS(bs1, rs) } - //case (ASTAR(bs1, ASTAR(bs2, r))) => bsimp(ASTAR(bs1 ++ bs2, r)) case r => r } @@ -310,12 +253,101 @@ def blex_simp(r: ARexp, s: List[Char]) : Bits = s match { case Nil => if (bnullable(r)) bmkeps(r) else throw new Exception("Not matched") - case c::cs => blex(bsimp(bder(c, r)), cs) + case c::cs => blex_simp(bsimp(bder(c, r)), cs) } def blexing_simp(r: Rexp, s: String) : Val = decode(r, blex_simp(internalise(r), s.toList)) +////////////////////// +// new simplification + +// collects first components of sequences. +def coll(r: Rexp, rs: List[Rexp]) : List[Rexp] = rs match { + case Nil => Nil + case SEQ(r1, r2) :: rs => + if (r == r2) r1 :: coll(r, rs) else coll(r, rs) + case r1 :: rs => coll(r, rs) +} + +def bsimp_ASEQ1(bs: Bits, r1: ARexp, r2: ARexp) : ARexp = r1 match { + case AZERO => AZERO + case AONE(bs1) => fuse(bs ::: bs1, r2) + case _ => ASEQ(bs, r1, r2) +} + +def bsimp_AALTs(bs: Bits, rs: List[ARexp]) : ARexp = rs match { + case Nil => AZERO + case r::Nil => fuse(bs, r) + case _ => AALTS(bs, rs) +} + +def prune(r: ARexp, all: List[Rexp]) : ARexp = r match { + case ASEQ(bs, r1, r2) => { + val termsTruncated = coll(erase(r2), all) + val pruned = prune(r1, termsTruncated) + bsimp_ASEQ1(bs, pruned, r2) + } + case AALTS(bs, rs) => { + val rsp = rs.map(prune(_, all)).filter(_ != AZERO) + bsimp_AALTs(bs, rsp) + } + case r => + if (all.contains(erase(r))) r else AZERO +} + +def oneSimp(r: Rexp) : Rexp = r match { + case SEQ(ONE, r) => r + case SEQ(r1, r2) => SEQ(oneSimp(r1), r2) + case r => r +} + +def breakup(r: Rexp) : List[Rexp] = r match { + case SEQ(r1, r2) => breakup(r1).map(SEQ(_, r2)) + case ALT(r1, r2) => breakup(r1) ::: breakup(r2) + case _ => r::Nil +} + +def addToAcc(r: ARexp, acc: List[Rexp]) : List[Rexp] = + breakup(erase(r)).filterNot(r => acc.contains(oneSimp(r))) + +def dBStrong(rs: List[ARexp], acc: List[Rexp]) : List[ARexp] = rs match { + case Nil => Nil + case r::rs => if (acc.contains(erase(r))) dBStrong(rs, acc) + else prune(r, addToAcc(r, acc)) match { + case AZERO => dBStrong(rs, addToAcc(r, acc) ::: acc) + case r1 => r1 :: dBStrong(rs, addToAcc(r, acc) ::: acc) + } +} + +def bsimp_ASEQ(bs: Bits, r1: ARexp, r2: ARexp) : ARexp = (r1, r2) match { + case (AZERO, _) => AZERO + case (_, AZERO) => AZERO + case (AONE(bs1), r2) => fuse(bs ::: bs1, r2) + case _ => ASEQ(bs, r1, r2) +} + +def bsimp2(r: ARexp): ARexp = r match { + case ASEQ(bs1, r1, r2) => bsimp_ASEQ(bs1, bsimp2(r1), bsimp2(r2)) + case AALTS(bs1, rs) => bsimp_AALTs(bs1, dBStrong(flts(rs.map(bsimp2(_))), Nil)) + case r => r +} + +def bders_simp2(r: ARexp, s: List[Char]) : ARexp = s match { + case Nil => r + case c::cs => bders_simp2(bsimp2(bder(c, r)), cs) +} + + +def blex_simp2(r: ARexp, s: List[Char]) : Bits = s match { + case Nil => if (bnullable(r)) bmkeps(r) + else throw new Exception("Not matched") + case c::cs => blex_simp2(bsimp2(bder(c, r)), cs) +} + +def blexing_simp2(r: Rexp, s: String) : Val = + decode(r, blex_simp2(internalise(r), s.toList)) + //println(blexing_simp(reg, "aab")) @@ -377,6 +409,9 @@ case STAR(r1) => 1 + size(r1) } +def asize(r: ARexp) : Int = size(erase(r)) +def psize(rs: Set[Rexp]) : Int = rs.map(size(_)).sum + def pp(r: ARexp): String = r match { case ASEQ(_, ACHAR(_, a1),ASEQ(_, r1, r2)) => s"${a1}${pp(r1)}${pp(r2)}" case ASEQ(_, ACHAR(_, a1),ACHAR(_, a2)) => s"${a1}${a2}" @@ -389,6 +424,27 @@ } +val TEST = STAR("a" | "aa") +println(asize(bders(("a" * 0).toList, internalise(TEST)))) +println(asize(bders(("a" * 1).toList, internalise(TEST)))) +println(asize(bders(("a" * 2).toList, internalise(TEST)))) +println(asize(bders(("a" * 3).toList, internalise(TEST)))) +println(asize(bders(("a" * 4).toList, internalise(TEST)))) + +println(asize(bders_simp(internalise(TEST), ("a" * 0).toList))) +println(asize(bders_simp(internalise(TEST), ("a" * 1).toList))) +println(asize(bders_simp(internalise(TEST), ("a" * 2).toList))) +println(asize(bders_simp(internalise(TEST), ("a" * 3).toList))) +println(asize(bders_simp(internalise(TEST), ("a" * 4).toList))) + + +println(asize(bders_simp2(internalise(TEST), ("a" * 0).toList))) +println(asize(bders_simp2(internalise(TEST), ("a" * 1).toList))) +println(asize(bders_simp2(internalise(TEST), ("a" * 2).toList))) +println(asize(bders_simp2(internalise(TEST), ("a" * 3).toList))) +println(asize(bders_simp2(internalise(TEST), ("a" * 4).toList))) +println(asize(bders_simp2(internalise(TEST), ("a" * 5).toList))) + // Some Tests //============ @@ -419,6 +475,7 @@ println(blexing(STARREG, "a" * 3)) println(blexing_simp(STARREG, "a" * 3)) +println(pders(List(STARREG), "a" * 3)) size(STARREG) @@ -436,6 +493,9 @@ size(erase(bders_simp(internalise(STARREG), ("a" * 102).toList))) size(erase(bders_simp(internalise(STARREG), ("a" * 103).toList))) +size(erase(bders_simp2(internalise(STARREG), ("a" * 103).toList))) +psize(pders(("a" * 103).toList, Set(STARREG))) + println(bders_simp(internalise(STARREG), ("a" * 1).toList)) println(bders_simp(internalise(STARREG), ("a" * 2).toList)) println(bders_simp(internalise(STARREG), ("a" * 3).toList)) @@ -709,3 +769,25 @@ encode(inj(dr, 'a', decode(dr_der, res1))) */ + + + + + + + +/* +def star(n: Long) = if ((n & 1L) == 1L) "*" else " " +def stars(n: Long): String = if (n == 0L) "" else star(n) + " " + stars(n >> 1) +def spaces(n: Int) = " " * n + + +def sierpinski(n: Int) { + ((1 << n) - 1 to 0 by -1).foldLeft(1L) { + case (bitmap, remainingLines) => + println(spaces(remainingLines) + stars(bitmap)) + (bitmap << 1) ^ bitmap + } +} + +*/ \ No newline at end of file diff -r a73b2e553804 -r a5376206fd52 thys2/PDerivs.thy --- a/thys2/PDerivs.thy Wed Mar 02 23:13:59 2022 +0000 +++ b/thys2/PDerivs.thy Wed Mar 02 23:23:19 2022 +0000 @@ -400,8 +400,9 @@ apply(rule subset_trans) thm pders_STAR apply(rule pders_STAR) - apply(simp) - apply(auto simp add: pders_Set_def)[1] + apply(simp) + apply(auto simp add: pders_Set_def)[1] +(* rest of SEQ case *) apply(simp) apply(rule conjI) apply blast diff -r a73b2e553804 -r a5376206fd52 thys2/Paper/Paper.thy --- a/thys2/Paper/Paper.thy Wed Mar 02 23:13:59 2022 +0000 +++ b/thys2/Paper/Paper.thy Wed Mar 02 23:23:19 2022 +0000 @@ -675,7 +675,7 @@ transforms a bitcoded regular expression into a (standard) regular expression by just erasing the annotated bitsequences. We omit the straightforward definition. For defining the algorithm, we also need - the functions \textit{bnullable} and \textit{bmkeps}, which are the + the functions \textit{bnullable} and \textit{bmkeps}(\textit{s}), which are the ``lifted'' versions of \textit{nullable} and \textit{mkeps} acting on bitcoded regular expressions. % @@ -695,14 +695,16 @@ & \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}} $\textit{bmkeps}\,(\textit{ONE}\,bs)$ & $\dn$ & $bs$\\ - $\textit{bmkeps}\,(\textit{ALTs}\,bs\,r\!::\!\rs)$ & $\dn$ & - $\textit{if}\;\textit{bnullable}\,r$\\ - & &$\textit{then}\;bs\,@\,\textit{bmkeps}\,r$\\ - & &$\textit{else}\;bs\,@\,\textit{bmkeps}\,\rs$\\ + $\textit{bmkeps}\,(\textit{ALTs}\,bs\,\rs)$ & $\dn$ & + $bs\,@\,\textit{bmkepss}\,\rs$\\ $\textit{bmkeps}\,(\textit{SEQ}\,bs\,r_1\,r_2)$ & $\dn$ &\\ \multicolumn{3}{r}{$bs \,@\,\textit{bmkeps}\,r_1\,@\, \textit{bmkeps}\,r_2$}\\ $\textit{bmkeps}\,(\textit{STAR}\,bs\,r)$ & $\dn$ & - $bs \,@\, [\S]$ + $bs \,@\, [\S]$\\ + $\textit{bmkepss}\,(r\!::\!\rs)$ & $\dn$ & + $\textit{if}\;\textit{bnullable}\,r$\\ + & &$\textit{then}\;\textit{bmkeps}\,r$\\ + & &$\textit{else}\;\textit{bmkepss}\,\rs$ \end{tabular} \end{tabular} \end{center} @@ -1040,7 +1042,7 @@ \noindent where we scan the list from left to right (because we have to remove later copies). In @{text distinctBy}, @{text f} is a function and @{text acc} is an accumulator for regular - expressions---essentially a set of regular expression that we have already seen + expressions---essentially a set of regular expressions that we have already seen while scanning the list. Therefore we delete an element, say @{text x}, from the list provided @{text "f x"} is already in the accumulator; otherwise we keep @{text x} and scan the rest of the list but @@ -1182,12 +1184,12 @@ simplified regular expressions in small steps (unlike the @{text bsimp}-function which does the same in a big step), and show that each of the small steps preserves the bitcodes that lead to the final POSIX value. - The rewrite system is organised such that $\leadsto$ is for bitcode regular + The rewrite system is organised such that $\leadsto$ is for bitcoded regular expressions and $\stackrel{s}{\leadsto}$ for lists of bitcoded regular expressions. The former essentially implements the simplifications of @{text "bsimpSEQ"} and @{text flts}; while the latter implements the simplifications in @{text "bsimpALTs"}. We can show that any bitcoded - regular expression reduces in one or more steps to the simplified + regular expression reduces in zero or more steps to the simplified regular expression generated by @{text bsimp}: \begin{lemma}\label{lemone} @@ -1317,9 +1319,9 @@ text {* -In this section let us sketch our argument on why the size of the simplified +In this section let us sketch our argument for why the size of the simplified derivatives with the aggressive simplification function is finite. Suppose -we have a size functions for bitcoded regular expressions, written +we have a size function for bitcoded regular expressions, written @{text "|r|"}, which counts the number of nodes if we regard $r$ as a tree (we omit the precise definition). For this we show that for every $r$ there exists a bound $N$ @@ -1418,7 +1420,7 @@ \cite[Page 14]{Sulzmann2014}. Given the growth of the derivatives in some cases even after aggressive simplification, this - is a hard to believe fact. A similar claim of about a theoretical runtime + is a hard to believe fact. A similar claim about a theoretical runtime of @{text "O(n\<^sup>2)"} is made for the Verbatim lexer, which calculates POSIX matches and is based on derivatives \cite{verbatim}. In this case derivatives are not simplified. Clearly our result of having finite diff -r a73b2e553804 -r a5376206fd52 thys2/SizeBound4.thy --- a/thys2/SizeBound4.thy Wed Mar 02 23:13:59 2022 +0000 +++ b/thys2/SizeBound4.thy Wed Mar 02 23:23:19 2022 +0000 @@ -1019,7 +1019,7 @@ apply(simp only: map_append) apply(simp only: map_single) apply(rule rs_in_rstar) - thm rrewrite_srewrite.intros + thm rrewrite_srewrite.intros apply(rule rrewrite_srewrite.ss6) using as apply(auto simp add: der_correctness Der_def) diff -r a73b2e553804 -r a5376206fd52 thys2/SizeBound6CT.thy diff -r a73b2e553804 -r a5376206fd52 thys2/SizeBoundStrong.thy --- a/thys2/SizeBoundStrong.thy Wed Mar 02 23:13:59 2022 +0000 +++ b/thys2/SizeBoundStrong.thy Wed Mar 02 23:23:19 2022 +0000 @@ -585,17 +585,20 @@ | "bsimp_ASEQ1 bs r1 r2 = ASEQ bs r1 r2" -fun collect where +fun collect :: "rexp \ rexp list \ rexp list" where \collect _ [] = []\ -| \collect erasedR2 ((SEQ r1 r2) # rs) = (if r2 = erasedR2 then r1 # (collect erasedR2 rs) - else collect erasedR2 rs)\ +| \collect erasedR2 ((SEQ r1 r2) # rs) = + (if r2 = erasedR2 then r1 # (collect erasedR2 rs) else collect erasedR2 rs)\ | \collect erasedR2 (r # rs) = collect erasedR2 rs\ fun pruneRexp where - \pruneRexp (ASEQ bs r1 r2) allowableTerms = -( let termsTruncated = (collect (erase r2) allowableTerms) in (let pruned = pruneRexp r1 termsTruncated in (bsimp_ASEQ1 bs pruned r2)) )\ -| \pruneRexp (AALTs bs rs) allowableTerms = (let rsp = (filter (\r. r \ AZERO) (map (\r. pruneRexp r allowableTerms) rs) ) in bsimp_AALTs bs rsp ) + "pruneRexp (ASEQ bs r1 r2) allowableTerms = + (let termsTruncated = (collect (erase r2) allowableTerms) in + (let pruned = pruneRexp r1 termsTruncated in + (bsimp_ASEQ1 bs pruned r2)))" +| \pruneRexp (AALTs bs rs) allowableTerms = + (let rsp = (filter (\r. r \ AZERO) (map (\r. pruneRexp r allowableTerms) rs) ) in bsimp_AALTs bs rsp ) \ | \pruneRexp r allowableTerms = (if (erase r) \ (set allowableTerms) then r else AZERO)\ diff -r a73b2e553804 -r a5376206fd52 thys2/paper.pdf Binary file thys2/paper.pdf has changed