# HG changeset patch # User Chengsong # Date 1661946713 -3600 # Node ID 86e0203db2da4f84a81864f04fc5719a1b3d712b # Parent 80e1114d6421ce73f791bc3513e3043d7b1dfc08 chap4 finished diff -r 80e1114d6421 -r 86e0203db2da ChengsongTanPhdThesis/Chapters/Bitcoded2.tex --- a/ChengsongTanPhdThesis/Chapters/Bitcoded2.tex Tue Aug 30 12:41:52 2022 +0100 +++ b/ChengsongTanPhdThesis/Chapters/Bitcoded2.tex Wed Aug 31 12:51:53 2022 +0100 @@ -942,47 +942,74 @@ As a corollary, we link this result with the lemma we proved earlier that \begin{center} - $(r, s) \rightarrow v \implies \blexer \; r \; s = v$ + $(r, s) \rightarrow v \;\; \textit{iff}\;\; \blexer \; r \; s = v$ \end{center} and obtain the corollary that the bit-coded lexer with simplification is indeed correctly outputting POSIX lexing result, if such a result exists. \begin{corollary} - $(r, s) \rightarrow v \implies \blexersimp{r}{s}$ + $(r, s) \rightarrow v \;\; \textit{iff} \;\; \blexersimp \; r\; s $ \end{corollary} \subsection{Comments on the Proof Techniques Used} -The non-trivial part of proving the correctness of the algorithm with simplification -compared with not having simplification is that we can no longer use the argument -in \cref{flex_retrieve}. -The function \retrieve needs the cumbersome structure of the (umsimplified) -annotated regular expression to -agree with the structure of the value, but simplification will always mess with the -structure. - -We also tried to prove $\bsimp{\bderssimp{a}{s}} = \bsimp{a\backslash s}$, -but this turns out to be not true, A counterexample of this being -\[ r = [(1+c)\cdot [aa \cdot (1+c)]] \land s = aa -\] +Straightforward and simple as the proof may seem, +the efforts we spent obtaining it was far from trivial.\\ +We initially attempted to re-use the argument +in \cref{flex_retrieve}. +The problem was that both functions $\inj$ and $\retrieve$ require +that the annotated regular expressions stay unsimplified, +so that one can +correctly compare $v_{i+1}$ and $r_i$ and $v_i$ +in diagram \ref{graph:inj} and +``fit the key into the lock hole''. -Then we would have $\bsimp{a \backslash s}$ being -$_{[]}(_{ZZ}\ONE + _{ZS}c ) $ -whereas $\bsimp{\bderssimp{a}{s}}$ would be -$_{Z}(_{Z} \ONE + _{S} c)$. -Unfortunately if we apply $\textit{bsimp}$ at different -stages we will always have this discrepancy, due to -whether the $\map \; (\fuse\; bs) \; as$ operation in $\textit{bsimp}$ -is taken at some points will be entirely dependant on when the simplification -take place whether there is a larger alternative structure surrounding the -alternative being simplified. -The good thing about $\stackrel{*}{\rightsquigarrow} $ is that it allows -us not specify how exactly the "atomic" simplification steps $\rightsquigarrow$ -are taken, but simply say that they can be taken to make two similar -regular expressions equal, and can be done after interleaving derivatives -and simplifications. +\noindent +We also tried to prove +\begin{center} +$\textit{bsimp} \;\; (\bderssimp{a}{s}) = +\textit{bsimp} \;\; (a\backslash s)$, +\end{center} +but this turns out to be not true. +A counterexample would be +\[ a = [(_{Z}1+_{S}c)\cdot [bb \cdot (_{Z}1+_{S}c)]] \;\; + \text{and} \;\; s = bb. +\] +\noindent +Then we would have +\begin{center} + $\textit{bsimp}\;\; ( a \backslash s )$ = + $_{[]}(_{ZZ}\ONE + _{ZS}c ) $ +\end{center} +\noindent +whereas +\begin{center} + $\textit{bsimp} \;\;( \bderssimp{a}{s} )$ = + $_{Z}(_{Z} \ONE + _{S} c)$. +\end{center} +Unfortunately, +if we apply $\textit{bsimp}$ differently +we will always have this discrepancy. +This is due to +the $\map \; (\fuse\; bs) \; as$ operation +happening at different locations in the regular expression.\\ +The rewriting relation +$\rightsquigarrow^*$ +allows us to ignore this discrepancy +and view the expressions +\begin{center} + $_{[]}(_{ZZ}\ONE + _{ZS}c ) $\\ + and\\ + $_{Z}(_{Z} \ONE + _{S} c)$ - -Having correctness property is good. But we would also like the lexer to be efficient in -some sense, for exampe, not grinding to a halt at certain cases. -In the next chapter we shall prove that for a given $r$, the internal derivative size is always +\end{center} +as equal, because they were both re-written +from the same expression.\\ +Having correctness property is good. +But we would also a guarantee that the lexer is not slow in +some sense, for exampe, not grinding to a halt regardless of the input. +As we have already seen, Sulzmann and Lu's simplification function +$\simpsulz$ cannot achieve this, because their claim that +the regular expression size does not grow arbitrary large +was not true. +In the next chapter we shall prove that with our $\simp$, +for a given $r$, the internal derivative size is always finitely bounded by a constant. -we would expect in the diff -r 80e1114d6421 -r 86e0203db2da thys4/posix/FBound.thy --- a/thys4/posix/FBound.thy Tue Aug 30 12:41:52 2022 +0100 +++ b/thys4/posix/FBound.thy Wed Aug 31 12:51:53 2022 +0100 @@ -184,23 +184,8 @@ thm bsimp_rerase -lemma cant1: - shows "\ bsimp a = b; rerase a = rerase b; a = ASEQ bs r1 r2 \ \ - \bs' r1' r2'. b = ASEQ bs' r1' r2' \ rerase r1' = rerase r1 \ rerase r2' = rerase r2" - sorry - -(*"part is less than whole" thm for rrexp, since rrexp is always finite*) -lemma rrexp_finite1: - shows "\ bsimp_ASEQ bs1 (bsimp ra1) (bsimp ra2) = ASEQ bs2 rb1 rb2; ra1 ~1 rb1; ra2 ~1 rb2 \ \ rerase ra1 = rerase (bsimp ra1) " - apply(case_tac ra1 ) - apply simp+ - apply(case_tac rb1) - apply simp+ - - sorry - lemma unsure_unchanging: assumes "bsimp a = bsimp b" and "a ~1 b" @@ -257,33 +242,115 @@ | "AZERO \1 ASEQ bs r AZERO" | "fuse (bs @ bs1) r2 \1 ASEQ bs (AONE bs1) r2" | " AALTs bs (rs1 @ rs) \1 AALTs bs (rs1 @( AZERO # rs))" -| "r2 \1 r1 \ AALTs bs (rs1 @ r2 # rs) \1 AALTs bs (rs1 @ r1 # rs)" | "AALTs bs1 (rsa @ (map (fuse bs1) rs1) @ rsb) \1 AALTs bs1 (rsa @ (AALTs bs1 rs1) # rsb)" | "rerase a1 = rerase a2 \ AALTs bs (rsa @ [a1] @ rsb @ rsc) \1 AALTs bs (rsa @ [a1] @ rsb @ [a2] @ rsc) " | "r1 \1 r2 \ r1 \1 ASEQ bs (AONE bs1) r2" +| "r2 \1 r1 \ AALTs bs (rs1 @ r2 # rs) \1 AALTs bs (rs1 @ r1 # rs)" +| "r2 \1 r1 \ ASEQ bs r r2 \1 ASEQ bs r r1" +| "r2 \1 r1 \ ASEQ bs r2 r \1 ASEQ bs r1 r" +| "r \1 r' \ ASTAR bs r \1 ASTAR bs r'" +| "AZERO \1 AALTs bs []" +| "fuse bs r \1 AALTs bs [r]" +| "\r1' \1 r1; r2' \1 r2\ \ bsimp_ASEQ bs1 r1' r2' \1 ASEQ bs1 r1 r2" + +lemma stupid_leq1_1: + shows " rerase r2 \ RSEQ r (RSEQ RONE (rerase r2))" + apply(induct r2) + apply simp+ + done + +lemma leq1_size: + shows "r1 \1 r2 \ asize r1 \ asize r2" + apply (induct rule: leq1.induct) + apply simp+ + apply (metis asize_rsize le_SucI le_add2 plus_1_eq_Suc rerase_fuse) + apply simp + apply simp + + apply (metis (mono_tags, lifting) asize_rsize comp_apply dual_order.eq_iff le_SucI map_eq_conv rerase_fuse) + apply simp+ + apply (metis Suc_n_not_le_n asize_rsize linorder_le_cases rerase_fuse) + apply(case_tac "r1' = AZERO") + apply simp + apply(case_tac "\bs1. r1' = AONE bs1") + apply(erule exE) + apply simp + apply (metis asize_rsize le_SucI rerase_fuse trans_le_add2) + by (smt (verit, best) Suc_eq_plus1 ab_semigroup_add_class.add_ac(1) add.commute add.right_neutral add_cancel_right_right add_mono_thms_linordered_semiring(1) asize.simps(5) asize_rsize nat_add_left_cancel_le order.trans order_trans plus_1_eq_Suc rSEQ_mono rerase_bsimp_ASEQ rsize.simps(5)) + + + +lemma size_deciding_equality: + shows "asize r1 \ asize r2 \ r1 \ r2 " + apply auto + done + +lemma size_deciding_equality2: + shows "rerase r1 = rerase r2 \ asize r1 = asize r2" + by (metis asize_rsize) + +lemma size_deciding_equality3: + shows "asize r1 \ asize r2 \ rerase r1 \ rerase r2" + by (metis asize_rsize) + +lemma size_deciding_equality4: + shows "rerase a1 = r2 \ asize a1 = rsize r2" + by (metis asize_rsize) + +lemma size_deciding_equality5: + shows "asize a1 \ rsize r2 \rerase a1 \ r2" + by (metis asize_rsize) + +lemma leq1_trans1: + shows " r1 \1 r2 \ rerase r1 \ RSEQ r (rerase r2)" + apply(induct rule: leq1.induct) + apply simp+ + using rerase_fuse stupid_leq1_1 apply presburger + apply simp+ + apply(subgoal_tac "asize r1 \ rsize (RSEQ r (RSEQ RONE (rerase r2)))") + using size_deciding_equality5 apply blast + using asize_rsize leq1_size apply fastforce + apply simp+ + apply(subgoal_tac "rsize (rerase (fuse bs ra)) \ rsize (RSEQ r (RALTS [rerase ra]))") + + apply force + apply simp + apply(simp add: asize_rsize) + by (simp add: rerase_fuse size_deciding_equality4) + + + + lemma leq1_less_or_equal: shows "r1 \1 r2 \ r1 = r2 \ rerase r1 \ rerase r2" apply(induct rule: leq1.induct) - apply simp+ - sorry - -lemma bsimp_leq1: - shows "bsimp r \1 r" - - sorry + apply simp + apply simp + apply simp + apply (simp add: rerase_fuse) + apply simp + apply simp + using r_finite1 rerase_fuse apply force + apply simp + apply simp + apply(case_tac "r1 = r2") + apply simp + apply simp + + using leq1_trans1 apply presburger + apply simp + apply simp + apply simp + apply simp + apply simp + apply simp + using r_finite1 rerase_fuse by auto -lemma arexpfiniteaux4_aux: - shows" \rerase (bsimp_AALTs bs1 (distinctWith (flts (map bsimp rs)) eq1 {})) = RALTS (map rerase rs) \ - \ map rerase (map bsimp rs) = map rerase rs" - apply(induct rs) - apply simp - apply simp - apply auto - prefer 2 + - sorry + lemma arexpfiniteaux4: shows" @@ -348,6 +415,15 @@ +lemma bsimp_reduces: + shows "bsimp r \1 r" + apply(induct rule: bsimp.induct) + apply simp + + sorry + + + lemma bitcodes_unchanging: shows "\bsimp a = b; rerase a = rerase b \ \ a = b" apply(induction a arbitrary: b) @@ -376,6 +452,7 @@ shows "bsimp (bsimp a) = bsimp a" using bitcodes_unchanging bsimp_rerase rsimp_idem by auto + unused_thms end