lexing: changeset 124:5378ddbd1381

--- a/thys/Paper/Paper.thy	Mon Mar 07 22:20:15 2016 +0000
+++ b/thys/Paper/Paper.thy	Tue Mar 08 00:34:15 2016 +0000
@@ -36,7 +36,7 @@
   Sequ ("_ @ _" [78,77] 63) and
   injval ("inj _ _ _" [79,77,79] 76) and 
   mkeps ("mkeps _" [79] 76) and 
-  projval ("proj _ _ _" [1000,77,1000] 77) and 
+  (*projval ("proj _ _ _" [1000,77,1000] 77) and*) 
   length ("len _" [78] 73) and
   matcher ("lexer _ _" [78,78] 77) and
 
@@ -293,7 +293,7 @@
   \noindent Given the equations in \eqref{SemDer}, it is a relatively easy
   exercise in mechanical reasoning to establish that
 
-  \begin{proposition}\mbox{}\\ 
+  \begin{proposition}\label{derprop}\mbox{}\\ 
   \begin{tabular}{ll}
   @{text "(1)"} & @{thm (lhs) nullable_correctness} if and only if
   @{thm (rhs) nullable_correctness}, and \\ 
@@ -644,7 +644,7 @@
   analysis of @{term "s \<in> r \<rightarrow> v\<^sub>2"}.\qed
   \end{proof}
 
-  \begin{lemma}
+  \begin{lemma}\label{lemmkeps}
   @{thm[mode=IfThen] PMatch_mkeps}
   \end{lemma}
 
@@ -652,16 +652,89 @@
   By routine induction on @{term r}.\qed 
   \end{proof}
 
-  \begin{lemma}
+  \noindent
+  The central lemma for our POSIX relation is that the @{text inj}-function
+  preserves POSIX values.
+
+  \begin{lemma}\label{PMatch2}
   @{thm[mode=IfThen] PMatch2_roy_version}
   \end{lemma}
 
+  \begin{proof}
+
+  By induction on @{text r}. Suppose @{term "r = ALT r\<^sub>1 r\<^sub>2"}. There are
+  two subcases, namely @{text "(a)"} \mbox{@{term "v = Left v'"}} and @{term
+  "s \<in> der c r\<^sub>1 \<rightarrow> v'"}; and @{text "(b)"} @{term "v = Right v'"}, @{term
+  "s \<notin> L (der c r\<^sub>1)"} and @{term "s \<in> der c r\<^sub>2 \<rightarrow> v'"}. In @{text "(a)"} we
+  know @{term "s \<in> der c r\<^sub>1 \<rightarrow> v'"}, from which we can infer @{term "(c # s)
+  \<in> r\<^sub>1 \<rightarrow> injval r\<^sub>1 c v'"} by induction hypothesis and hence @{term "(c #
+  s) \<in> ALT r\<^sub>1 r\<^sub>2 \<rightarrow> injval (ALT r\<^sub>1 r\<^sub>2) c (Left v')"} as needed. Similarly
+  in subcase @{text "(b)"} where, however, in addition we have to use
+  Prop~\ref{derprop}(2) in order to infer @{term "c # s \<notin> L r\<^sub>1"} from @{term
+  "s \<notin> L (der c r\<^sub>1)"}.
+
+  Suppose @{term "r = SEQ r\<^sub>1 r\<^sub>2"}. There are three subcases:
+  
+  \begin{quote}
+  \begin{description}
+  \item[@{text "(a)"}] @{term "v = Left (Seq v\<^sub>1 v\<^sub>2)"} and @{term "nullable r\<^sub>1"} 
+  \item[@{text "(b)"}] @{term "v = Right v\<^sub>1"} and @{term "nullable r\<^sub>1"} 
+  \item[@{text "(c)"}] @{term "v = Seq v\<^sub>1 v\<^sub>2"} and @{term "\<not> nullable r\<^sub>1"} 
+  \end{description}
+  \end{quote}
+
+  \noindent For @{text "(a)"} we know @{term "s\<^sub>1 \<in> der c r\<^sub>1 \<rightarrow> v\<^sub>1"} and
+  @{term "s\<^sub>2 \<in> r\<^sub>2 \<rightarrow> v\<^sub>2"} as well as
+  
+  \[@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \<and> s\<^sub>1 @ s\<^sub>3 \<in> L (der c r\<^sub>1) \<and> s\<^sub>4 \<in> L r\<^sub>2)"}\]
+
+  \noindent From the latter we can infer by Prop~\ref{derprop}(2):
+
+  \[@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \<and> (c # s\<^sub>1) @ s\<^sub>3 \<in> L r\<^sub>1 \<and> s\<^sub>4 \<in> L r\<^sub>2)"}\]
+
+  \noindent We can use the induction hypothesis for @{text "r\<^sub>1"} to obtain
+  @{term "(c # s\<^sub>1) \<in> r\<^sub>1 \<rightarrow> injval r\<^sub>1 c v\<^sub>1"}. This allows us to infer
+  @{term "((c # s\<^sub>1) @ s\<^sub>2) \<in> SEQ r\<^sub>1 r\<^sub>2 \<rightarrow> Seq (injval r\<^sub>1 c v\<^sub>1) v\<^sub>2"}. The case @{text "(c)"}
+  is similarly.
+
+  For @{text "(b)"} we know @{term "s \<in> der c r\<^sub>2 \<rightarrow> v\<^sub>1"} and 
+  @{term "s\<^sub>1 @ s\<^sub>2 \<notin> L (SEQ (der c r\<^sub>1) r\<^sub>2)"}. From the former
+  we have @{term "(c # s) \<in> r\<^sub>2 \<rightarrow> (injval r\<^sub>2 c v\<^sub>1)"} by induction hypothesis
+  for @{term "r\<^sub>2"}. From the latter we can infer
+
+  \[@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = c # s \<and> s\<^sub>3 \<in> L r\<^sub>1 \<and> s\<^sub>4 \<in> L r\<^sub>2)"}\]
+
+  \noindent By Lem.~\ref{lemmkeps} we know @{term "[] \<in> r\<^sub>1 \<rightarrow> (mkeps r\<^sub>1)"}
+  holds. Putting this all together, we can conclude with @{term "(c #
+  s) \<in> SEQ r\<^sub>1 r\<^sub>2 \<rightarrow> Seq (mkeps r\<^sub>1) (injval r\<^sub>2 c v\<^sub>1)"}.
+
+  Finally suppose @{term "r = STAR r\<^sub>1"}. This case is very similar to the
+  sequence case, except that we need to ensure that @{term "flat (injval r\<^sub>1
+  c v\<^sub>1) \<noteq> []"}. This follows by Lem.~\ref{posixbasic} from @{term "(c # s\<^sub>1)
+  \<in> r' \<rightarrow> injval r\<^sub>1 c v\<^sub>1"} (which in turn follows from @{term "s\<^sub>1 \<in> der c
+  r\<^sub>1 \<rightarrow> v\<^sub>1"} and the induction hypothesis).\qed
+  \end{proof}
+
+  \noindent
+  With Lem.~\ref{PMatch2} in place, it is completely routine to establish
+  that the Sulzmann and Lu lexer satisfies its specification (returning
+  an ``error'' iff the string is not in the language of the regular expression,
+  and returning a unique POSIX value iff the string \emph{is} in the language):
+
   \begin{theorem}\mbox{}\smallskip\\
   \begin{tabular}{ll}
   (1) & @{thm (lhs) lex_correct1a} if and only if @{thm (rhs) lex_correct1a}\\
-  (2) & @{thm (lhs) lex_correct3a} if and only if @{thm (rhs) lex_correct3a}\\
+  (2) & @{thm (lhs) lex_correct3b} if and only if @{thm (rhs) lex_correct3b}\\
   \end{tabular}
   \end{theorem}
+
+  \begin{proof}
+  By induction on @{term s}.\qed  
+  \end{proof}
+
+  This concludes our correctness proof. Note that we have not changed the
+  algorithm by Sulzmann and Lu, but introduced our own specification for
+  what a correct result---a POSIX value---should be.
 *}
 
 section {* The Argument by Sulzmmann and Lu *}
@@ -681,7 +754,6 @@
   \noindent
   We have also introduced a slightly restricted version of this relation
   where the last rule is restricted so that @{term "flat v \<noteq> []"}.
-  This relation for \emph{non-problematic} is written @{term "\<Turnstile> v : r"}.
   \bigskip
 
 
@@ -691,46 +763,7 @@
   \noindent
   Our version of Sulzmann's ordering relation
 
-  \begin{center}
-  \begin{tabular}{c}
-  @{thm[mode=Rule] ValOrd.intros(2)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1'" "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'"]} \qquad
-  @{thm[mode=Rule] ValOrd.intros(1)[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'" "v\<^sub>1" "r\<^sub>1"]}\medskip\\
-  @{thm[mode=Rule] ValOrd.intros(3)[of "v\<^sub>1" "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]} \qquad
-  @{thm[mode=Rule] ValOrd.intros(4)[of "v\<^sub>2" "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\medskip\\ 
-  @{thm[mode=Rule] ValOrd.intros(5)[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'" "r\<^sub>1"]} \qquad
-  @{thm[mode=Rule] ValOrd.intros(6)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1'"  "r\<^sub>2"]} \medskip\\
-  @{thm[mode=Axiom] ValOrd.intros(7)}\qquad
-  @{thm[mode=Axiom] ValOrd.intros(8)[of "c"]}\medskip\\
-  @{thm[mode=Rule] ValOrd.intros(9)[of "v" "vs" "r"]}\qquad
-  @{thm[mode=Rule] ValOrd.intros(10)[of "v" "vs" "r"]}\medskip\\
-  @{thm[mode=Rule] ValOrd.intros(11)[of "v\<^sub>1" "r" "v\<^sub>2" "vs\<^sub>1" "vs\<^sub>2"]}\medskip\\
-  @{thm[mode=Rule] ValOrd.intros(12)[of "vs\<^sub>1" "r" "vs\<^sub>2" "v"]}\qquad
-  @{thm[mode=Axiom] ValOrd.intros(13)[of "r"]}\medskip\\
-  \end{tabular}
-  \end{center}
 
-  \noindent
-  A prefix of a string s
-
-  \begin{center}
-  \begin{tabular}{c}
-  @{thm prefix_def[of "s\<^sub>1" "s\<^sub>2"]}
-  \end{tabular}
-  \end{center}
-
-  \noindent
-  Values and non-problematic values
-
-  \begin{center}
-  \begin{tabular}{c}
-  @{thm Values_def}\medskip\\
-  \end{tabular}
-  \end{center}
-
-
-  \noindent
-  The point is that for a given @{text s} and @{text r} there are only finitely many
-  non-problematic values.
 *} 
 
 text {* 
@@ -739,8 +772,6 @@
   
   @{thm L_flat_Prf}
 
-  @{thm L_flat_NPrf}
-
   @{thm[mode=IfThen] mkeps_nullable}
 
   @{thm[mode=IfThen] mkeps_flat}
@@ -753,18 +784,8 @@
   
   @{thm[mode=IfThen] PMatch1(2)}
 
-  @{thm[mode=IfThen] PMatch1N}
-
   @{thm[mode=IfThen] PMatch_determ(1)[of "s" "r" "v\<^sub>1" "v\<^sub>2"]}
 
-  \medskip
-  \noindent
-  This is the main theorem that lets us prove that the algorithm is correct according to
-  @{term "s \<in> r \<rightarrow> v"}:
-
-  @{thm[mode=IfThen] PMatch2}
-
-  \mbox{}\bigskip
   
   \noindent {\bf Proof} The proof is by induction on the definition of
   @{const der}. Other inductions would go through as well. The
@@ -845,41 +866,6 @@
   \bibliographystyle{plain}
   \bibliography{root}
 
-  \section{Roy's Rules}
-
-   \newcommand{\abs}[1]{\mid\!\! #1\!\! \mid}
-   %%\newcommand{\mts}{\textit{``''}
-   \newcommand{\tl}{\ \triangleleft\ }
-   $$\inferrule[]{Void \tl \epsilon}{}
-            \quad\quad
-     \inferrule[]{Char\ c \tl Lit\ c}{}
-   $$
-   $$\inferrule
-       {v_1 \tl r_1}
-       {Left\ v_1 \tl r_1 + r_2}
-   \quad\quad
-     \inferrule[]
-       { v_2 \tl r_2 \\ \abs{v_2}\ \not\in\ L(r_1)}
-       {Right\ v_2 \tl r_1 + r_2}
-   $$
-   $$
-   \inferrule
-       {v_1 \tl r_1\\
-        v_2 \tl r_2\\
-        s \in\  L(r_1\backslash\! \abs{v_1}) \ \land\
-        \abs{v_2}\!\backslash s\ \epsilon\ L(r_2)
-        \ \Rightarrow\ s = []
-       }
-       {(v_1, v_2) \tl r_1 \cdot r_2}
-   $$
-   $$\inferrule
-         { v \tl r \\ vs \tl r^* \\ \abs{v}\ \not=\ []} 
-         { (v :: vs) \tl r^* }
-   \quad\quad
-       \inferrule{}
-         { []  \tl r^* }       
-   $$
-
 *}

--- a/thys/ReStar.thy	Mon Mar 07 22:20:15 2016 +0000
+++ b/thys/ReStar.thy	Tue Mar 08 00:34:15 2016 +0000
@@ -759,6 +759,24 @@
 apply(simp add: der_correctness Der_def)
 by (simp add: lex_correct1a)
 
+lemma lex_correct3b:
+  shows "s \<in> L r \<longleftrightarrow> (\<exists>!v. matcher r s = Some(v) \<and> s \<in> r \<rightarrow> v)"
+using assms
+apply(induct s arbitrary: r)
+apply(simp)
+apply (metis PMatch_mkeps nullable_correctness)
+apply(drule_tac x="der a r" in meta_spec)
+apply(simp add: der_correctness Der_def)
+apply(case_tac "matcher (der a r) s = None")
+apply(simp)
+apply(simp)
+apply(clarify)
+apply(rule iffI)
+apply(auto)
+apply(rule PMatch2_roy_version)
+apply(simp)
+using PMatch1(1) by auto
+
 lemma lex_correct5:
   assumes "s \<in> L r"
   shows "s \<in> r \<rightarrow> (matcher2 r s)"

Binary file thys/paper.pdf has changed

author	Christian Urban <christian dot urban at kcl dot ac dot uk>
	Tue, 08 Mar 2016 00:34:15 +0000
changeset 124	5378ddbd1381
parent 123	1bee7006b92b
child 125	ff0844860981

thys/Paper/Paper.thy		file \| annotate \| diff \| comparison \| revisions
thys/ReStar.thy		file \| annotate \| diff \| comparison \| revisions
thys/paper.pdf		file \| annotate \| diff \| comparison \| revisions