lexing: comparison thys/Paper/Paper.thy

equal deleted inserted replaced

-:1bee7006b92b
+:5378ddbd1381
 ders_syn ("_\\_" [79, 1000] 76) and
 flat ("|_|" [75] 74) and
 Sequ ("_ @ _" [78,77] 63) and
 injval ("inj _ _ _" [79,77,79] 76) and
 mkeps ("mkeps _" [79] 76) and
-projval ("proj _ _ _" [1000,77,1000] 77) and
+(*projval ("proj _ _ _" [1000,77,1000] 77) and*)
 length ("len _" [78] 73) and
 matcher ("lexer _ _" [78,78] 77) and
 Prf ("_ : _" [75,75] 75) and
 PMatch ("'(_, _') \<rightarrow> _" [63,75,75] 75)
 \end{center}
 \noindent Given the equations in \eqref{SemDer}, it is a relatively easy
 exercise in mechanical reasoning to establish that
-\begin{proposition}\mbox{}\\
+\begin{proposition}\label{derprop}\mbox{}\\
 \begin{tabular}{ll}
 @{text "(1)"} & @{thm (lhs) nullable_correctness} if and only if
 @{thm (rhs) nullable_correctness}, and \\
 @{text "(2)"} & @{thm[mode=IfThen] der_correctness}.
 \end{tabular}
 \begin{proof}
 By induction on the definition of @{term "s \<in> r \<rightarrow> v\<^sub>1"} and a case
 analysis of @{term "s \<in> r \<rightarrow> v\<^sub>2"}.\qed
 \end{proof}
-\begin{lemma}
+\begin{lemma}\label{lemmkeps}
 @{thm[mode=IfThen] PMatch_mkeps}
 \end{lemma}
 \begin{proof}
 By routine induction on @{term r}.\qed
 \end{proof}
-\begin{lemma}
+\noindent
+The central lemma for our POSIX relation is that the @{text inj}-function
+preserves POSIX values.
+\begin{lemma}\label{PMatch2}
 @{thm[mode=IfThen] PMatch2_roy_version}
 \end{lemma}
+\begin{proof}
+By induction on @{text r}. Suppose @{term "r = ALT r\<^sub>1 r\<^sub>2"}. There are
+two subcases, namely @{text "(a)"} \mbox{@{term "v = Left v'"}} and @{term
+"s \<in> der c r\<^sub>1 \<rightarrow> v'"}; and @{text "(b)"} @{term "v = Right v'"}, @{term
+"s \<notin> L (der c r\<^sub>1)"} and @{term "s \<in> der c r\<^sub>2 \<rightarrow> v'"}. In @{text "(a)"} we
+know @{term "s \<in> der c r\<^sub>1 \<rightarrow> v'"}, from which we can infer @{term "(c # s)
+\<in> r\<^sub>1 \<rightarrow> injval r\<^sub>1 c v'"} by induction hypothesis and hence @{term "(c #
+s) \<in> ALT r\<^sub>1 r\<^sub>2 \<rightarrow> injval (ALT r\<^sub>1 r\<^sub>2) c (Left v')"} as needed. Similarly
+in subcase @{text "(b)"} where, however, in addition we have to use
+Prop~\ref{derprop}(2) in order to infer @{term "c # s \<notin> L r\<^sub>1"} from @{term
+"s \<notin> L (der c r\<^sub>1)"}.
+Suppose @{term "r = SEQ r\<^sub>1 r\<^sub>2"}. There are three subcases:
+\begin{quote}
+\begin{description}
+\item[@{text "(a)"}] @{term "v = Left (Seq v\<^sub>1 v\<^sub>2)"} and @{term "nullable r\<^sub>1"}
+\item[@{text "(b)"}] @{term "v = Right v\<^sub>1"} and @{term "nullable r\<^sub>1"}
+\item[@{text "(c)"}] @{term "v = Seq v\<^sub>1 v\<^sub>2"} and @{term "\<not> nullable r\<^sub>1"}
+\end{description}
+\end{quote}
+\noindent For @{text "(a)"} we know @{term "s\<^sub>1 \<in> der c r\<^sub>1 \<rightarrow> v\<^sub>1"} and
+@{term "s\<^sub>2 \<in> r\<^sub>2 \<rightarrow> v\<^sub>2"} as well as
+\[@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \<and> s\<^sub>1 @ s\<^sub>3 \<in> L (der c r\<^sub>1) \<and> s\<^sub>4 \<in> L r\<^sub>2)"}\]
+\noindent From the latter we can infer by Prop~\ref{derprop}(2):
+\[@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \<and> (c # s\<^sub>1) @ s\<^sub>3 \<in> L r\<^sub>1 \<and> s\<^sub>4 \<in> L r\<^sub>2)"}\]
+\noindent We can use the induction hypothesis for @{text "r\<^sub>1"} to obtain
+@{term "(c # s\<^sub>1) \<in> r\<^sub>1 \<rightarrow> injval r\<^sub>1 c v\<^sub>1"}. This allows us to infer
+@{term "((c # s\<^sub>1) @ s\<^sub>2) \<in> SEQ r\<^sub>1 r\<^sub>2 \<rightarrow> Seq (injval r\<^sub>1 c v\<^sub>1) v\<^sub>2"}. The case @{text "(c)"}
+is similarly.
+For @{text "(b)"} we know @{term "s \<in> der c r\<^sub>2 \<rightarrow> v\<^sub>1"} and
+@{term "s\<^sub>1 @ s\<^sub>2 \<notin> L (SEQ (der c r\<^sub>1) r\<^sub>2)"}. From the former
+we have @{term "(c # s) \<in> r\<^sub>2 \<rightarrow> (injval r\<^sub>2 c v\<^sub>1)"} by induction hypothesis
+for @{term "r\<^sub>2"}. From the latter we can infer
+\[@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = c # s \<and> s\<^sub>3 \<in> L r\<^sub>1 \<and> s\<^sub>4 \<in> L r\<^sub>2)"}\]
+\noindent By Lem.~\ref{lemmkeps} we know @{term "[] \<in> r\<^sub>1 \<rightarrow> (mkeps r\<^sub>1)"}
+holds. Putting this all together, we can conclude with @{term "(c #
+s) \<in> SEQ r\<^sub>1 r\<^sub>2 \<rightarrow> Seq (mkeps r\<^sub>1) (injval r\<^sub>2 c v\<^sub>1)"}.
+Finally suppose @{term "r = STAR r\<^sub>1"}. This case is very similar to the
+sequence case, except that we need to ensure that @{term "flat (injval r\<^sub>1
+c v\<^sub>1) \<noteq> []"}. This follows by Lem.~\ref{posixbasic} from @{term "(c # s\<^sub>1)
+\<in> r' \<rightarrow> injval r\<^sub>1 c v\<^sub>1"} (which in turn follows from @{term "s\<^sub>1 \<in> der c
+r\<^sub>1 \<rightarrow> v\<^sub>1"} and the induction hypothesis).\qed
+\end{proof}
+\noindent
+With Lem.~\ref{PMatch2} in place, it is completely routine to establish
+that the Sulzmann and Lu lexer satisfies its specification (returning
+an ``error'' iff the string is not in the language of the regular expression,
+and returning a unique POSIX value iff the string \emph{is} in the language):
 \begin{theorem}\mbox{}\smallskip\\
 \begin{tabular}{ll}
 (1) & @{thm (lhs) lex_correct1a} if and only if @{thm (rhs) lex_correct1a}\\
-(2) & @{thm (lhs) lex_correct3a} if and only if @{thm (rhs) lex_correct3a}\\
+(2) & @{thm (lhs) lex_correct3b} if and only if @{thm (rhs) lex_correct3b}\\
 \end{tabular}
 \end{theorem}
+\begin{proof}
+By induction on @{term s}.\qed
+\end{proof}
+This concludes our correctness proof. Note that we have not changed the
+algorithm by Sulzmann and Lu, but introduced our own specification for
+what a correct result---a POSIX value---should be.
 *}
 section {* The Argument by Sulzmmann and Lu *}
 section {* Conclusion *}
 \noindent
 We have also introduced a slightly restricted version of this relation
 where the last rule is restricted so that @{term "flat v \<noteq> []"}.
-This relation for \emph{non-problematic} is written @{term "\<Turnstile> v : r"}.
 \bigskip
 \noindent
 \noindent
 Our version of Sulzmann's ordering relation
-\begin{center}
-\begin{tabular}{c}
-@{thm[mode=Rule] ValOrd.intros(2)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1'" "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'"]} \qquad
-@{thm[mode=Rule] ValOrd.intros(1)[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'" "v\<^sub>1" "r\<^sub>1"]}\medskip\\
-@{thm[mode=Rule] ValOrd.intros(3)[of "v\<^sub>1" "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]} \qquad
-@{thm[mode=Rule] ValOrd.intros(4)[of "v\<^sub>2" "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\medskip\\
-@{thm[mode=Rule] ValOrd.intros(5)[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'" "r\<^sub>1"]} \qquad
-@{thm[mode=Rule] ValOrd.intros(6)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1'"  "r\<^sub>2"]} \medskip\\
-@{thm[mode=Axiom] ValOrd.intros(7)}\qquad
-@{thm[mode=Axiom] ValOrd.intros(8)[of "c"]}\medskip\\
-@{thm[mode=Rule] ValOrd.intros(9)[of "v" "vs" "r"]}\qquad
-@{thm[mode=Rule] ValOrd.intros(10)[of "v" "vs" "r"]}\medskip\\
-@{thm[mode=Rule] ValOrd.intros(11)[of "v\<^sub>1" "r" "v\<^sub>2" "vs\<^sub>1" "vs\<^sub>2"]}\medskip\\
-@{thm[mode=Rule] ValOrd.intros(12)[of "vs\<^sub>1" "r" "vs\<^sub>2" "v"]}\qquad
-@{thm[mode=Axiom] ValOrd.intros(13)[of "r"]}\medskip\\
-\end{tabular}
-\end{center}
-\noindent
-A prefix of a string s
-\begin{center}
-\begin{tabular}{c}
-@{thm prefix_def[of "s\<^sub>1" "s\<^sub>2"]}
-\end{tabular}
-\end{center}
-\noindent
-Values and non-problematic values
-\begin{center}
-\begin{tabular}{c}
-@{thm Values_def}\medskip\\
-\end{tabular}
-\end{center}
-\noindent
-The point is that for a given @{text s} and @{text r} there are only finitely many
-non-problematic values.
 *}
 text {*
 \noindent
 Some lemmas we have proved:\bigskip
 @{thm L_flat_Prf}
-@{thm L_flat_NPrf}
 @{thm[mode=IfThen] mkeps_nullable}
 @{thm[mode=IfThen] mkeps_flat}
 @{thm[mode=IfThen] Prf_injval}
 @{thm[mode=IfThen] PMatch_mkeps}
 @{thm[mode=IfThen] PMatch1(2)}
-@{thm[mode=IfThen] PMatch1N}
 @{thm[mode=IfThen] PMatch_determ(1)[of "s" "r" "v\<^sub>1" "v\<^sub>2"]}
-\medskip
-\noindent
-This is the main theorem that lets us prove that the algorithm is correct according to
-@{term "s \<in> r \<rightarrow> v"}:
-@{thm[mode=IfThen] PMatch2}
-\mbox{}\bigskip
 \noindent {\bf Proof} The proof is by induction on the definition of
 @{const der}. Other inductions would go through as well. The
 interesting case is for @{term "SEQ r\<^sub>1 r\<^sub>2"}. First we analyse the
 case where @{term "nullable r\<^sub>1"}. We have by induction hypothesis
 %referees.
 \bibliographystyle{plain}
 \bibliography{root}
-\section{Roy's Rules}
-\newcommand{\abs}[1]{\mid\!\! #1\!\! \mid}
-%%\newcommand{\mts}{\textit{``''}
-\newcommand{\tl}{\ \triangleleft\ }
-$$\inferrule[]{Void \tl \epsilon}{}
-\quad\quad
-\inferrule[]{Char\ c \tl Lit\ c}{}
-$$
-$$\inferrule
-{v_1 \tl r_1}
-{Left\ v_1 \tl r_1 + r_2}
-\quad\quad
-\inferrule[]
-{ v_2 \tl r_2 \\ \abs{v_2}\ \not\in\ L(r_1)}
-{Right\ v_2 \tl r_1 + r_2}
-$$
-$$
-\inferrule
-{v_1 \tl r_1\\
-v_2 \tl r_2\\
-s \in\  L(r_1\backslash\! \abs{v_1}) \ \land\
-\abs{v_2}\!\backslash s\ \epsilon\ L(r_2)
-\ \Rightarrow\ s = []
-}
-{(v_1, v_2) \tl r_1 \cdot r_2}
-$$
-$$\inferrule
-{ v \tl r \\ vs \tl r^* \\ \abs{v}\ \not=\ []}
-{ (v :: vs) \tl r^* }
-\quad\quad
-\inferrule{}
-{ []  \tl r^* }
-$$
 *}
 (*<*)
 end

changeset 124	5378ddbd1381
parent 123	1bee7006b92b
child 125	ff0844860981