lexing: changeset 180:42ffaca7c85e

--- a/thys/Paper/Paper.thy	Mon May 16 15:20:23 2016 +0100
+++ b/thys/Paper/Paper.thy	Tue May 17 03:47:33 2016 +0100
@@ -54,7 +54,7 @@
   F_SEQ ("F\<^bsub>Seq\<^esub> _ _") and
   simp_SEQ ("simp\<^bsub>Seq\<^esub> _ _" [1000, 1000] 1) and
   simp_ALT ("simp\<^bsub>Alt\<^esub> _ _" [1000, 1000] 1) and
-  slexer ("lexer\<^sup>+ _ _" [78,78] 77) and
+  slexer ("lexer\<^sup>+" 1000) and
 
   ValOrd ("_ >\<^bsub>_\<^esub> _" [77,77,77] 77) and
   ValOrdEq ("_ \<ge>\<^bsub>_\<^esub> _" [77,77,77] 77)
@@ -653,7 +653,7 @@
    We can prove that given a string @{term s} and regular expression @{term
    r}, the POSIX value @{term v} is uniquely determined by @{term "s \<in> r \<rightarrow> v"}.
 
-  \begin{theorem}
+  \begin{theorem}\label{posixdeterm}
   @{thm[mode=IfThen] Posix_determ(1)[of _ _ "v\<^sub>1" "v\<^sub>2"]}
   \end{theorem}
 
@@ -778,7 +778,7 @@
   the null value @{term "None"} iff the string is not in the language of the regular expression,
   and returning a unique POSIX value iff the string \emph{is} in the language):
 
-  \begin{theorem}\mbox{}\smallskip\\
+  \begin{theorem}\mbox{}\smallskip\\\label{lexercorrect}
   \begin{tabular}{ll}
   (1) & @{thm (lhs) lexer_correct_None} if and only if @{thm (rhs) lexer_correct_None}\\
   (2) & @{thm (lhs) lexer_correct_Some} if and only if @{thm (rhs) lexer_correct_Some}\\
@@ -928,26 +928,61 @@
   is then recursively called with the simplified derivative, but before
   we inject the character @{term c} into the value @{term v}, we need to rectify
   @{term v} (that is construct @{term "f\<^sub>r v"}). Before we can establish the correctness
-  of @{const "slexer"}, we need to show that simplification preserves the language
-  and the relation between simplification and our posix definition:
+  of @{term "slexer"}, we need to show that simplification preserves the language
+  and simplification preserves our POSIX relation once the value is rectified
+  (recall @{const "simp"} generates a regular expression, rectification function pair):
 
-  \begin{lemma}\mbox{}\smallskip\\
+  \begin{lemma}\mbox{}\smallskip\\\label{slexeraux}
   \begin{tabular}{ll}
   (1) & @{thm L_fst_simp[symmetric]}\\
   (2) & @{thm[mode=IfThen] Posix_simp}
   \end{tabular}
   \end{lemma}
 
-  \noindent
-  We can now prove that
+  \begin{proof}
+  Both are by induction on @{text r}. There is no interesting case for the
+  first statement. For the second statement of interest are the @{term "r = SEQ r\<^sub>1 r\<^sub>2"}
+  and @{term "r = ALT r\<^sub>1 r\<^sub>2"} cases.
+  
+  \end{proof}
+
+  \noindent We can now prove relatively straightforwardly that the
+  optimised lexer produce the expected result:
 
   \begin{theorem}
   @{thm slexer_correctness}
   \end{theorem}
 
-  \noindent
-  holds but for lack of space refer the reader to our mechanisation for details.
+  \begin{proof} By induction on @{term s} generalising over @{term
+  r}. The case @{term "[]"} is trivial. For the cons-case suppose the
+  string is of the form @{term "c # s"}. By induction hypothesis we
+  know @{term "slexer r s = lexer r s"} holds for all @{term r} (in
+  particular for @{term "r"} being the derivative @{term "der c
+  r"}). Let @{term "r\<^sub>s"} be the simplified derivative regular expression, @{term
+  "fst (simp (der c r))"}, and @{term "f\<^sub>r"} be the rectification
+  function, @{term "snd (simp (der c r))"}.  We distinguish the cases
+  whether (*) @{term "s \<in> L (der c r)"} or not. In the first case we
+  have by Thm~\ref{lexercorrect}(2) a value @{term "v"} so that @{term
+  "lexer (der c r) s = Some v"} and @{term "s \<in> der c r \<rightarrow> v"} hold.
+  By Lem~\ref{slexeraux}(1) we can also infer from~(*) that @{term "s
+  \<in> L r\<^sub>s"} holds.  Hence we know by Thm~\ref{lexercorrect}(2) that
+  there exists a @{term "v'"} with @{term "lexer r\<^sub>s s = Some v'"} and
+  @{term "s \<in> r\<^sub>s \<rightarrow> v'"}. From the latter we know by
+  Lem~\ref{slexeraux}(2) that @{term "s \<in> der c r \<rightarrow> (f\<^sub>r v')"} holds.
+  By the uniqueness of the POSIX relation (Thm~\ref{posixdeterm}) we
+  can infer that @{term v} is equal to @{term "f\<^sub>r v'"}---that is the 
+  rectification function applied to @{term "v'"}
+  produces the original @{term "v"}.  Now the case follows by the
+  definitions of @{const lexer} and @{const slexer}.
 
+  In the second case where @{term "s \<notin> L (der c r)"} we have that
+  @{term "lexer (der c r) s = None"} by Thm~\ref{lexercorrect}(1).  We
+  also know by Lem~\ref{slexeraux}(1) that @{term "s \<notin> L r\<^sub>s"}. Hence
+  @{term "lexer r\<^sub>s s = None"} by Thm~\ref{lexercorrect}(1) and
+  by IH then also @{term "slexer r\<^sub>s s = None"}. With this we can
+  conclude in this case too.\qed   
+
+  \end{proof} 
 *}
 
 section {* The Correctness Argument by Sulzmann and Lu\label{argu} *}

--- a/thys/Simplifying.thy	Mon May 16 15:20:23 2016 +0100
+++ b/thys/Simplifying.thy	Tue May 17 03:47:33 2016 +0100
@@ -82,29 +82,118 @@
   assumes "s \<in> (fst (simp r)) \<rightarrow> v" 
   shows "s \<in> r \<rightarrow> ((snd (simp r)) v)"
 using assms
-apply(induct r arbitrary: s v rule: simp.induct)
-apply(simp_all)
-apply(auto)[1]
-using Posix_elims(1) apply blast
-apply (simp add: Posix_ALT1)
-apply (metis L.simps(1) L_fst_simp Posix_ALT2 empty_iff)
-apply (smt F_ALT.simps(1) F_ALT.simps(2) L_fst_simp Posix_ALT1 Posix_ALT2 Posix_elims(4))
-apply(auto)[1]
-apply (metis (no_types, lifting) Nil_is_append_conv Posix_SEQ Posix_elims(2))
-apply(subst append_Nil2[symmetric])
-apply(rule Posix_SEQ)
-apply(simp)
-using Posix_ONE apply blast
-apply blast
-apply(subst append_Nil[symmetric])
-apply(rule Posix_SEQ)
-using Posix_ONE apply blast
-apply blast
-apply(auto)[1]
-apply (metis L.simps(2) L_fst_simp ex_in_conv insert_iff)
-apply(erule Posix_elims)
-apply(auto)
-using L_fst_simp Posix_SEQ by auto
+proof(induct r arbitrary: s v rule: rexp.induct)
+  case (ALT r1 r2 s v)
+  have IH1: "\<And>s v. s \<in> fst (simp r1) \<rightarrow> v \<Longrightarrow> s \<in> r1 \<rightarrow> snd (simp r1) v" by fact
+  have IH2: "\<And>s v. s \<in> fst (simp r2) \<rightarrow> v \<Longrightarrow> s \<in> r2 \<rightarrow> snd (simp r2) v" by fact
+  have as: "s \<in> fst (simp (ALT r1 r2)) \<rightarrow> v" by fact
+  consider (ZERO_ZERO) "fst (simp r1) = ZERO" "fst (simp r2) = ZERO"
+         | (ZERO_NZERO) "fst (simp r1) = ZERO" "fst (simp r2) \<noteq> ZERO"
+         | (NZERO_ZERO) "fst (simp r1) \<noteq> ZERO" "fst (simp r2) = ZERO"
+         | (NZERO_NZERO) "fst (simp r1) \<noteq> ZERO" "fst (simp r2) \<noteq> ZERO" by auto
+  then show "s \<in> ALT r1 r2 \<rightarrow> snd (simp (ALT r1 r2)) v" 
+    proof(cases)
+      case (ZERO_ZERO)
+      with as have "s \<in> ZERO \<rightarrow> v" by simp 
+      then show "s \<in> ALT r1 r2 \<rightarrow> snd (simp (ALT r1 r2)) v" by (rule Posix_elims(1))
+    next
+      case (ZERO_NZERO)
+      with as have "s \<in> fst (simp r2) \<rightarrow> v" by simp
+      with IH2 have "s \<in> r2 \<rightarrow> snd (simp r2) v" by simp
+      moreover
+      from ZERO_NZERO have "fst (simp r1) = ZERO" by simp
+      then have "L (fst (simp r1)) = {}" by simp
+      then have "L r1 = {}" using L_fst_simp by simp
+      then have "s \<notin> L r1" by simp 
+      ultimately have "s \<in> ALT r1 r2 \<rightarrow> Right (snd (simp r2) v)" by (rule Posix_ALT2)
+      then show "s \<in> ALT r1 r2 \<rightarrow> snd (simp (ALT r1 r2)) v"
+      using ZERO_NZERO by simp
+    next
+      case (NZERO_ZERO)
+      with as have "s \<in> fst (simp r1) \<rightarrow> v" by simp
+      with IH1 have "s \<in> r1 \<rightarrow> snd (simp r1) v" by simp
+      then have "s \<in> ALT r1 r2 \<rightarrow> Left (snd (simp r1) v)" by (rule Posix_ALT1) 
+      then show "s \<in> ALT r1 r2 \<rightarrow> snd (simp (ALT r1 r2)) v" using NZERO_ZERO by simp
+    next
+      case (NZERO_NZERO)
+      with as have "s \<in> ALT (fst (simp r1)) (fst (simp r2)) \<rightarrow> v" by simp
+      then consider (Left) v1 where "v = Left v1" "s \<in> (fst (simp r1)) \<rightarrow> v1"
+                  | (Right) v2 where "v = Right v2" "s \<in> (fst (simp r2)) \<rightarrow> v2" "s \<notin> L (fst (simp r1))"
+                  by (erule_tac Posix_elims(4)) 
+      then show "s \<in> ALT r1 r2 \<rightarrow> snd (simp (ALT r1 r2)) v"
+      proof(cases)
+        case (Left)
+        then have "v = Left v1" "s \<in> r1 \<rightarrow> (snd (simp r1) v1)" using IH1 by simp_all
+        then show "s \<in> ALT r1 r2 \<rightarrow> snd (simp (ALT r1 r2)) v" using NZERO_NZERO
+          by (simp_all add: Posix_ALT1)
+      next 
+        case (Right)
+        then have "v = Right v2" "s \<in> r2 \<rightarrow> (snd (simp r2) v2)" "s \<notin> L r1" using IH2 L_fst_simp by simp_all
+        then show "s \<in> ALT r1 r2 \<rightarrow> snd (simp (ALT r1 r2)) v" using NZERO_NZERO
+          by (simp_all add: Posix_ALT2)
+      qed
+    qed
+next
+  case (SEQ r1 r2 s v)
+  have IH1: "\<And>s v. s \<in> fst (simp r1) \<rightarrow> v \<Longrightarrow> s \<in> r1 \<rightarrow> snd (simp r1) v" by fact
+  have IH2: "\<And>s v. s \<in> fst (simp r2) \<rightarrow> v \<Longrightarrow> s \<in> r2 \<rightarrow> snd (simp r2) v" by fact
+  have as: "s \<in> fst (simp (SEQ r1 r2)) \<rightarrow> v" by fact
+  consider (ONE_ONE) "fst (simp r1) = ONE" "fst (simp r2) = ONE"
+         | (ONE_NONE) "fst (simp r1) = ONE" "fst (simp r2) \<noteq> ONE"
+         | (NONE_ONE) "fst (simp r1) \<noteq> ONE" "fst (simp r2) = ONE"
+         | (NONE_NONE) "fst (simp r1) \<noteq> ONE" "fst (simp r2) \<noteq> ONE" by auto
+  then show "s \<in> SEQ r1 r2 \<rightarrow> snd (simp (SEQ r1 r2)) v" 
+  proof(cases)
+      case (ONE_ONE)
+      with as have b: "s \<in> ONE \<rightarrow> v" by simp 
+      from b have "s \<in> r1 \<rightarrow> snd (simp r1) v" using IH1 ONE_ONE by simp
+      moreover
+      from b have c: "s = []" "v = Void" using Posix_elims(2) by auto
+      moreover
+      have "[] \<in> ONE \<rightarrow> Void" by (simp add: Posix_ONE)
+      then have "[] \<in> fst (simp r2) \<rightarrow> Void" using ONE_ONE by simp
+      then have "[] \<in> r2 \<rightarrow> snd (simp r2) Void" using IH2 by simp
+      ultimately have "([] @ []) \<in> SEQ r1 r2 \<rightarrow> Seq (snd (simp r1) Void) (snd (simp r2) Void)"
+        using Posix_SEQ by blast 
+      then show "s \<in> SEQ r1 r2 \<rightarrow> snd (simp (SEQ r1 r2)) v" using c ONE_ONE by simp
+    next
+      case (ONE_NONE)
+      with as have b: "s \<in> fst (simp r2) \<rightarrow> v" by simp 
+      from b have "s \<in> r2 \<rightarrow> snd (simp r2) v" using IH2 ONE_NONE by simp
+      moreover
+      have "[] \<in> ONE \<rightarrow> Void" by (simp add: Posix_ONE)
+      then have "[] \<in> fst (simp r1) \<rightarrow> Void" using ONE_NONE by simp
+      then have "[] \<in> r1 \<rightarrow> snd (simp r1) Void" using IH1 by simp
+      moreover
+      from ONE_NONE(1) have "L (fst (simp r1)) = {[]}" by simp
+      then have "L r1 = {[]}" by (simp add: L_fst_simp[symmetric])
+      ultimately have "([] @ s) \<in> SEQ r1 r2 \<rightarrow> Seq (snd (simp r1) Void) (snd (simp r2) v)"
+        by(rule_tac Posix_SEQ) auto
+      then show "s \<in> SEQ r1 r2 \<rightarrow> snd (simp (SEQ r1 r2)) v" using ONE_NONE by simp
+    next
+      case (NONE_ONE)
+        with as have "s \<in> fst (simp r1) \<rightarrow> v" by simp
+        with IH1 have "s \<in> r1 \<rightarrow> snd (simp r1) v" by simp
+      moreover
+        have "[] \<in> ONE \<rightarrow> Void" by (simp add: Posix_ONE)
+        then have "[] \<in> fst (simp r2) \<rightarrow> Void" using NONE_ONE by simp
+        then have "[] \<in> r2 \<rightarrow> snd (simp r2) Void" using IH2 by simp
+      ultimately have "(s @ []) \<in> SEQ r1 r2 \<rightarrow> Seq (snd (simp r1) v) (snd (simp r2) Void)"
+        by(rule_tac Posix_SEQ) auto
+      then show "s \<in> SEQ r1 r2 \<rightarrow> snd (simp (SEQ r1 r2)) v" using NONE_ONE by simp
+    next
+      case (NONE_NONE)
+      with as have "s \<in> SEQ (fst (simp r1)) (fst (simp r2)) \<rightarrow> v" by simp
+      then obtain s1 s2 v1 v2 where eqs: "s = s1 @ s2" "v = Seq v1 v2"
+                     "s1 \<in> (fst (simp r1)) \<rightarrow> v1" "s2 \<in> (fst (simp r2)) \<rightarrow> v2"
+                     "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L r1 \<and> s\<^sub>4 \<in> L r2)"
+                     by (erule_tac Posix_elims(5)) (auto simp add: L_fst_simp[symmetric]) 
+      then have "s1 \<in> r1 \<rightarrow> (snd (simp r1) v1)" "s2 \<in> r2 \<rightarrow> (snd (simp r2) v2)"
+        using IH1 IH2 by auto             
+      then show "s \<in> SEQ r1 r2 \<rightarrow> snd (simp (SEQ r1 r2)) v" using eqs NONE_NONE
+        by(auto intro: Posix_SEQ)
+    qed
+qed (simp_all)
 
 
 lemma slexer_correctness:

Binary file thys/paper.pdf has changed

author	Christian Urban <christian dot urban at kcl dot ac dot uk>
	Tue, 17 May 2016 03:47:33 +0100
changeset 180	42ffaca7c85e
parent 179	85766e408c66
child 181	162f112b814b

thys/Paper/Paper.thy		file \| annotate \| diff \| comparison \| revisions
thys/Simplifying.thy		file \| annotate \| diff \| comparison \| revisions
thys/paper.pdf		file \| annotate \| diff \| comparison \| revisions