lexing: changeset 589:86e0203db2da

--- a/ChengsongTanPhdThesis/Chapters/Bitcoded2.tex	Tue Aug 30 12:41:52 2022 +0100
+++ b/ChengsongTanPhdThesis/Chapters/Bitcoded2.tex	Wed Aug 31 12:51:53 2022 +0100
@@ -942,47 +942,74 @@
 As a corollary,
 we link this result with the lemma we proved earlier that 
 \begin{center}
-	$(r, s) \rightarrow v \implies \blexer \; r \; s = v$
+	$(r, s) \rightarrow v \;\; \textit{iff}\;\; \blexer \; r \; s = v$
 \end{center}
 and obtain the corollary that the bit-coded lexer with simplification is
 indeed correctly outputting POSIX lexing result, if such a result exists.
 \begin{corollary}
-	$(r, s) \rightarrow v \implies \blexersimp{r}{s}$
+	$(r, s) \rightarrow v \;\; \textit{iff} \;\; \blexersimp \; r\; s $
 \end{corollary}
 
 \subsection{Comments on the Proof Techniques Used}
-The non-trivial part of proving the correctness of the algorithm with simplification
-compared with not having simplification is that we can no longer use the argument 
-in \cref{flex_retrieve}.
-The function \retrieve needs the cumbersome structure of the (umsimplified)
-annotated regular expression to 
-agree with the structure of the value, but simplification will always mess with the 
-structure.
-
-We also tried to prove $\bsimp{\bderssimp{a}{s}} = \bsimp{a\backslash s}$,
-but this turns out to be not true, A counterexample of this being
-\[ r = [(1+c)\cdot [aa \cdot (1+c)]] \land s = aa
-\]
+Straightforward and simple as the proof may seem,
+the efforts we spent obtaining it was far from trivial.\\
+We initially attempted to re-use the argument 
+in \cref{flex_retrieve}. 
+The problem was that both functions $\inj$ and $\retrieve$ require 
+that the annotated regular expressions stay unsimplified, 
+so that one can 
+correctly compare $v_{i+1}$ and $r_i$  and $v_i$ 
+in diagram \ref{graph:inj} and 
+``fit the key into the lock hole''.
 
-Then we would have $\bsimp{a \backslash s}$ being 
-$_{[]}(_{ZZ}\ONE +  _{ZS}c ) $
-whereas $\bsimp{\bderssimp{a}{s}}$ would be 
-$_{Z}(_{Z} \ONE + _{S} c)$.
-Unfortunately if we apply $\textit{bsimp}$ at different
-stages we will always have this discrepancy, due to 
-whether the $\map \; (\fuse\; bs) \; as$ operation in $\textit{bsimp}$
-is taken at some points will be entirely dependant on when the simplification 
-take place whether there is a larger alternative structure surrounding the 
-alternative being simplified.
-The good thing about $\stackrel{*}{\rightsquigarrow} $ is that it allows
-us not specify how exactly the "atomic" simplification steps $\rightsquigarrow$
-are taken, but simply say that they can be taken to make two similar 
-regular expressions equal, and can be done after interleaving derivatives
-and simplifications.
+\noindent
+We also tried to prove 
+\begin{center}
+$\textit{bsimp} \;\; (\bderssimp{a}{s}) = 
+\textit{bsimp} \;\;  (a\backslash s)$,
+\end{center}
+but this turns out to be not true.
+A counterexample would be
+\[ a = [(_{Z}1+_{S}c)\cdot [bb \cdot (_{Z}1+_{S}c)]] \;\; 
+	\text{and} \;\; s = bb.
+\]
+\noindent
+Then we would have 
+\begin{center}
+	$\textit{bsimp}\;\; ( a \backslash s )$ =
+	$_{[]}(_{ZZ}\ONE +  _{ZS}c ) $
+\end{center}
+\noindent
+whereas 
+\begin{center}
+	$\textit{bsimp} \;\;( \bderssimp{a}{s} )$ =  
+	$_{Z}(_{Z} \ONE + _{S} c)$.
+\end{center}
+Unfortunately, 
+if we apply $\textit{bsimp}$ differently
+we will always have this discrepancy. 
+This is due to 
+the $\map \; (\fuse\; bs) \; as$ operation 
+happening at different locations in the regular expression.\\
+The rewriting relation 
+$\rightsquigarrow^*$ 
+allows us to ignore this discrepancy
+and view the expressions 
+\begin{center}
+	$_{[]}(_{ZZ}\ONE +  _{ZS}c ) $\\
+	and\\
+	$_{Z}(_{Z} \ONE + _{S} c)$
 
-
-Having correctness property is good. But we would also like the lexer to be efficient in 
-some sense, for exampe, not grinding to a halt at certain cases.
-In the next chapter we shall prove that for a given $r$, the internal derivative size is always
+\end{center}
+as equal, because they were both re-written
+from the same expression.\\
+Having correctness property is good. 
+But we would also a guarantee that the lexer is not slow in 
+some sense, for exampe, not grinding to a halt regardless of the input.
+As we have already seen, Sulzmann and Lu's simplification function
+$\simpsulz$ cannot achieve this, because their claim that
+the regular expression size does not grow arbitrary large
+was not true. 
+In the next chapter we shall prove that with our $\simp$, 
+for a given $r$, the internal derivative size is always
 finitely bounded by a constant.
-we would expect in the

--- a/thys4/posix/FBound.thy	Tue Aug 30 12:41:52 2022 +0100
+++ b/thys4/posix/FBound.thy	Wed Aug 31 12:51:53 2022 +0100
@@ -184,23 +184,8 @@
 
 thm bsimp_rerase
 
-lemma cant1:
-  shows "\<lbrakk> bsimp a = b; rerase a = rerase b; a = ASEQ bs r1 r2 \<rbrakk> \<Longrightarrow>
-        \<exists>bs' r1' r2'. b = ASEQ bs' r1' r2' \<and> rerase r1' = rerase r1 \<and> rerase r2' = rerase r2"
-  sorry
 
 
-
-(*"part is less than whole" thm for rrexp, since rrexp is always finite*)
-lemma rrexp_finite1:
-  shows "\<lbrakk> bsimp_ASEQ bs1 (bsimp ra1) (bsimp ra2) = ASEQ bs2 rb1 rb2; ra1 ~1 rb1; ra2 ~1 rb2 \<rbrakk> \<Longrightarrow> rerase ra1 = rerase (bsimp ra1) "
-  apply(case_tac ra1 )
-        apply simp+
-     apply(case_tac rb1)
-           apply simp+
-
-  sorry
-
 lemma unsure_unchanging:
   assumes "bsimp a = bsimp b"
 and "a ~1 b"
@@ -257,33 +242,115 @@
 | "AZERO \<le>1 ASEQ bs r AZERO"
 | "fuse (bs @ bs1) r2 \<le>1 ASEQ bs (AONE bs1) r2"
 | " AALTs bs (rs1 @ rs) \<le>1 AALTs bs (rs1 @( AZERO # rs))"
-| "r2 \<le>1 r1 \<Longrightarrow> AALTs bs (rs1 @ r2 # rs) \<le>1 AALTs bs (rs1 @ r1 # rs)"
 | "AALTs bs1 (rsa @ (map (fuse bs1) rs1) @ rsb) \<le>1 AALTs bs1 (rsa @ (AALTs bs1 rs1) # rsb)"
 | "rerase a1 = rerase a2 \<Longrightarrow> AALTs bs (rsa @ [a1] @ rsb @ rsc) \<le>1 AALTs bs (rsa @ [a1] @ rsb @ [a2] @ rsc) "
 | "r1 \<le>1 r2 \<Longrightarrow> r1  \<le>1 ASEQ bs (AONE bs1) r2"
+| "r2 \<le>1 r1 \<Longrightarrow> AALTs bs (rs1 @ r2 # rs) \<le>1 AALTs bs (rs1 @ r1 # rs)"
+| "r2 \<le>1 r1 \<Longrightarrow> ASEQ bs r  r2 \<le>1 ASEQ bs r r1"
+| "r2 \<le>1 r1 \<Longrightarrow> ASEQ bs r2 r  \<le>1 ASEQ bs r1 r"
+| "r \<le>1 r' \<Longrightarrow> ASTAR bs r \<le>1 ASTAR bs r'"
+| "AZERO \<le>1 AALTs bs []"
+| "fuse bs r \<le>1 AALTs bs [r]"
+| "\<lbrakk>r1' \<le>1 r1;  r2' \<le>1 r2\<rbrakk> \<Longrightarrow> bsimp_ASEQ bs1 r1' r2' \<le>1 ASEQ bs1 r1 r2"
+
+lemma stupid_leq1_1:
+  shows " rerase  r2 \<noteq> RSEQ r (RSEQ RONE (rerase r2))"
+  apply(induct r2)
+        apply simp+
+  done
+
+lemma leq1_size:
+  shows "r1 \<le>1 r2 \<Longrightarrow> asize r1 \<le> asize r2"
+  apply (induct rule: leq1.induct)
+               apply simp+
+  apply (metis asize_rsize le_SucI le_add2 plus_1_eq_Suc rerase_fuse)
+           apply simp
+          apply simp
+  
+          apply (metis (mono_tags, lifting) asize_rsize comp_apply dual_order.eq_iff le_SucI map_eq_conv rerase_fuse)
+  apply simp+
+  apply (metis Suc_n_not_le_n asize_rsize linorder_le_cases rerase_fuse)
+  apply(case_tac "r1' = AZERO")
+   apply simp
+  apply(case_tac "\<exists>bs1. r1' = AONE bs1")
+   apply(erule exE)
+   apply simp
+  apply (metis asize_rsize le_SucI rerase_fuse trans_le_add2)
+  by (smt (verit, best) Suc_eq_plus1 ab_semigroup_add_class.add_ac(1) add.commute add.right_neutral add_cancel_right_right add_mono_thms_linordered_semiring(1) asize.simps(5) asize_rsize nat_add_left_cancel_le order.trans order_trans plus_1_eq_Suc rSEQ_mono rerase_bsimp_ASEQ rsize.simps(5))
+
+
+
+lemma size_deciding_equality:
+  shows "asize r1 \<noteq> asize r2 \<Longrightarrow> r1 \<noteq> r2 "
+  apply auto
+  done
+
+lemma size_deciding_equality2:
+  shows "rerase r1 = rerase r2 \<Longrightarrow> asize r1 = asize r2"
+  by (metis asize_rsize)
+
+lemma size_deciding_equality3:
+  shows "asize r1 \<noteq> asize r2 \<Longrightarrow> rerase r1 \<noteq> rerase r2"
+  by (metis asize_rsize)
+
+lemma size_deciding_equality4:
+  shows "rerase a1 = r2 \<Longrightarrow> asize a1 = rsize r2"
+  by (metis asize_rsize)
+
+lemma size_deciding_equality5:
+  shows "asize a1 \<noteq> rsize r2 \<Longrightarrow>rerase a1 \<noteq> r2"
+  by (metis asize_rsize)
+
+lemma leq1_trans1:
+  shows " r1 \<le>1 r2 \<Longrightarrow>  rerase r1 \<noteq> RSEQ r (rerase r2)"
+  apply(induct rule: leq1.induct)
+               apply simp+
+  using rerase_fuse stupid_leq1_1 apply presburger
+           apply simp+
+        apply(subgoal_tac "asize r1 \<noteq> rsize (RSEQ r (RSEQ RONE (rerase r2)))")
+  using size_deciding_equality5 apply blast
+  using asize_rsize leq1_size apply fastforce
+       apply simp+
+  apply(subgoal_tac "rsize (rerase (fuse bs ra)) \<noteq> rsize (RSEQ r (RALTS [rerase ra]))")
+  
+  apply force
+  apply simp
+  apply(simp add: asize_rsize)
+  by (simp add: rerase_fuse size_deciding_equality4)
+
+  
+
+  
 
 lemma leq1_less_or_equal: shows
 "r1 \<le>1 r2 \<Longrightarrow> r1 = r2 \<or> rerase r1 \<noteq> rerase r2"
   apply(induct rule: leq1.induct)
-          apply simp+
-  sorry
-
-lemma bsimp_leq1:
-  shows "bsimp r \<le>1 r"
-
-  sorry
+               apply simp
+              apply simp
+  apply simp
+            apply (simp add: rerase_fuse)
+           apply simp
+  apply simp
+  using r_finite1 rerase_fuse apply force
+         apply simp
+        apply simp
+        apply(case_tac "r1 = r2")
+         apply simp
+        apply simp
+  
+  using leq1_trans1 apply presburger
+       apply simp
+      apply simp
+     apply simp
+    apply simp
+   apply simp
+  apply simp
+  using r_finite1 rerase_fuse by auto
 
 
-lemma arexpfiniteaux4_aux:
-  shows" \<lbrakk>rerase (bsimp_AALTs bs1 (distinctWith (flts (map bsimp rs)) eq1 {})) = RALTS (map rerase rs) \<rbrakk>
-       \<Longrightarrow> map rerase (map bsimp rs) = map rerase rs"
-  apply(induct rs)
-   apply simp
-  apply simp
-  apply auto
-   prefer 2
+
 
-  sorry
+
 
 lemma arexpfiniteaux4:
   shows"
@@ -348,6 +415,15 @@
 
 
 
+lemma bsimp_reduces:
+  shows "bsimp r \<le>1 r"
+  apply(induct rule: bsimp.induct)
+        apply simp
+  
+  sorry
+
+
+
 lemma bitcodes_unchanging:
   shows "\<lbrakk>bsimp a = b; rerase a = rerase b \<rbrakk> \<Longrightarrow> a = b"
   apply(induction a arbitrary: b)
@@ -376,6 +452,7 @@
   shows "bsimp (bsimp a) = bsimp a"
   using bitcodes_unchanging bsimp_rerase rsimp_idem by auto
 
+
 unused_thms
 
 end

author	Chengsong
	Wed, 31 Aug 2022 12:51:53 +0100
changeset 589	86e0203db2da
parent 588	80e1114d6421
child 590	988e92a70704

ChengsongTanPhdThesis/Chapters/Bitcoded2.tex		file \| annotate \| diff \| comparison \| revisions
thys4/posix/FBound.thy		file \| annotate \| diff \| comparison \| revisions