--- a/thys/Journal/Paper.thy Fri Aug 25 15:05:20 2017 +0200
+++ b/thys/Journal/Paper.thy Fri Aug 25 23:52:49 2017 +0200
@@ -1152,56 +1152,130 @@
@{thm [mode=IfThen] Posix_PosOrd[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
\end{theorem}
- \begin{proof} By induction on our POSIX rules. It is clear that
- @{text "v\<^sub>1"} and @{text "v\<^sub>2"} have the same underlying
- string.
+ \begin{proof} By induction on our POSIX rules. By
+ Thm~\ref{posixdeterm} and the definition of @{const LV}, it is clear
+ that @{text "v\<^sub>1"} and @{text "v\<^sub>2"} have the same
+ underlying string @{term s}. The three base cases are
+ straightforward: for example for @{term "v\<^sub>1 = Void"}, we have
+ that @{term "v\<^sub>2 \<in> LV ONE []"} must also be of the form
+ \mbox{@{term "v\<^sub>2 = Void"}}. Therefore we have @{term
+ "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"}. The inductive cases for
+ @{term "ALT r\<^sub>1 r\<^sub>2"} and @{term "SEQ r\<^sub>1
+ r\<^sub>2"} are as follows:
- The two base cases are straightforward: for example for @{term
- "v\<^sub>1 = Void"}, we have that @{term "v\<^sub>2 \<in> LV ONE
- []"} must also be of the form \mbox{@{term "v\<^sub>2 =
- Void"}}. Therefore we have @{term "v\<^sub>1 :\<sqsubseteq>val
- v\<^sub>2"}. The inductive cases are as follows:
+
+ \begin{itemize}
- \begin{itemize} \item[$\bullet$] Case @{term "s \<in> (ALT r\<^sub>1
- r\<^sub>2) \<rightarrow> (Left w\<^sub>1)"}: In this case @{term
- "v\<^sub>1 = Left w\<^sub>1"} and the value @{term "v\<^sub>2"} is
- either of the form @{term "Left w\<^sub>2"} or @{term "Right
- w\<^sub>2"}. In the latter case we can immediately conclude with
- @{term "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"} since a @{text
- Left}-value with the same underlying string @{text s} is always
- smaller or equal than a @{text Right}-value. In the former case we
- have @{term "w\<^sub>2 \<in> LV r\<^sub>1 s"} and can use the
- induction hypothesis to infer @{term "w\<^sub>1 :\<sqsubseteq>val
- w\<^sub>2"}. Because @{term "w\<^sub>1"} and @{term "w\<^sub>2"}
- have the same underlying string @{text s}, we can conclude with
- @{term "Left w\<^sub>1 :\<sqsubseteq>val Left w\<^sub>2"}.\smallskip
+ \item[$\bullet$] Case @{term "s \<in> (ALT r\<^sub>1 r\<^sub>2)
+ \<rightarrow> (Left w\<^sub>1)"}: In this case @{term "v\<^sub>1 =
+ Left w\<^sub>1"} and the value @{term "v\<^sub>2"} is either of the
+ form @{term "Left w\<^sub>2"} or @{term "Right w\<^sub>2"}. In the
+ latter case we can immediately conclude with @{term "v\<^sub>1
+ :\<sqsubseteq>val v\<^sub>2"} since a @{text Left}-value with the
+ same underlying string @{text s} is always smaller or equal than a
+ @{text Right}-value. In the former case we have @{term "w\<^sub>2
+ \<in> LV r\<^sub>1 s"} and can use the induction hypothesis to infer
+ @{term "w\<^sub>1 :\<sqsubseteq>val w\<^sub>2"}. Because @{term
+ "w\<^sub>1"} and @{term "w\<^sub>2"} have the same underlying string
+ @{text s}, we can conclude with @{term "Left w\<^sub>1
+ :\<sqsubseteq>val Left w\<^sub>2"} by Prop ???.\smallskip
\item[$\bullet$] Case @{term "s \<in> (ALT r\<^sub>1 r\<^sub>2)
\<rightarrow> (Right w\<^sub>1)"}: This case similar as the previous
case, except that we know that @{term "s \<notin> L
- r\<^sub>1"}. This is needed when @{term "v\<^sub>2 = Left
- w\<^sub>2"}: since \mbox{@{term "flat v\<^sub>2 = flat w\<^sub>2"}
- @{text "= s"}} and @{term "\<Turnstile> w\<^sub>2 : r\<^sub>1"}, we
- can derive a contradiction using Prop.~\ref{inhabs}. So also in this
- case \mbox{@{term "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"}}.\smallskip
+ r\<^sub>1"}. This is needed when @{term "v\<^sub>2"} is of the form
+ @{term "Left w\<^sub>2"}: since \mbox{@{term "flat v\<^sub>2 = flat
+ w\<^sub>2"} @{text "= s"}} and @{term "\<Turnstile> w\<^sub>2 :
+ r\<^sub>1"}, we can derive a contradiction using
+ Prop.~\ref{inhabs}. So also in this case \mbox{@{term "v\<^sub>1
+ :\<sqsubseteq>val v\<^sub>2"}}.\smallskip
- \item[$\bullet$] Case @{term "(s\<^sub>1 @ s\<^sub>2) \<in> (SEQ r\<^sub>1 r\<^sub>2)
- \<rightarrow> (Seq w\<^sub>1 w\<^sub>2)"}: Assume @{term "v\<^sub>2 =
- Seq (u\<^sub>1) (u\<^sub>2)"} with @{term "\<Turnstile> u\<^sub>1 : r\<^sub>1"}
- and \mbox{@{term "\<Turnstile> u\<^sub>2 : r\<^sub>2"}}. We have
-
+ \item[$\bullet$] Case @{term "(s\<^sub>1 @ s\<^sub>2) \<in> (SEQ
+ r\<^sub>1 r\<^sub>2) \<rightarrow> (Seq w\<^sub>1 w\<^sub>2)"}: We
+ can assume @{term "v\<^sub>2 = Seq (u\<^sub>1) (u\<^sub>2)"} with
+ @{term "\<Turnstile> u\<^sub>1 : r\<^sub>1"} and \mbox{@{term
+ "\<Turnstile> u\<^sub>2 : r\<^sub>2"}}. We have @{term "s\<^sub>1 @
+ s\<^sub>2 = (flat u\<^sub>1) @ (flat u\<^sub>2)"}. By the
+ side-condition on out @{text PS}-rule we know that either @{term
+ "s\<^sub>1 = flat u\<^sub>1"} or @{term "flat u\<^sub>1"} is a
+ strict prefix ??? of @{term "s\<^sub>1"}. In the latter case we can
+ infer @{term "w\<^sub>1 :\<sqsubset>val u\<^sub>1"} by ??? and from
+ this @{term "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"} by ???. In the
+ former case we know @{term "u\<^sub>1 \<in> LV r\<^sub>1 s\<^sub>1"}
+ and @{term "u\<^sub>2 \<in> LV r\<^sub>2 s\<^sub>2"}. With this we
+ can use the induction hypotheses to infer @{term "w\<^sub>1
+ :\<sqsubseteq>val u\<^sub>1"} and @{term "w\<^sub>2
+ :\<sqsubseteq>val u\<^sub>2"}. By ??? we can again infer @{term
+ "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"} and are done.
+
\end{itemize}
+
+ \noindent The case for @{text "P\<star>"} is similar to the @{text PS}-case.\qed
\end{proof}
- Given a lexical value @{text "v\<^sub>1"}, say, in @{term "LV r s"} for which there does
- not exists any smaller value in @{term "LV r s"}, then this value is our POSIX value:
+ \noindent This theorem shows that our posix value @{text
+ "v\<^sub>1"} with @{term "s \<in> r \<rightarrow> v\<^sub>1"} is a
+ minimal element for the values in @{text "LV r s"}. By ??? we also
+ know that any value in @{text "LV s' r"}, with @{term "s'"} being a
+ prefix, cannot be smaller than @{text "v\<^sub>1"}. The next theorem
+ shows the opposite---namely any minimal element must be a @{text
+ POSIX}-value. Given a lexical value @{text "v\<^sub>1"}, say, in
+ @{term "LV r s"}, for which there does not exists any smaller value
+ in @{term "LV r s"}, then this value is our @{text POSIX}-value:
\begin{theorem}
@{thm [mode=IfThen] PosOrd_Posix[where ?v1.0="v\<^sub>1"]}
\end{theorem}
- \begin{proof}
- By induction on @{text r}.
+ \begin{proof} By induction on @{text r}. The tree base cases are
+ again straightforward. For example if @{term "v\<^sub>1 \<in> LV
+ ONE s"} then @{term "v\<^sub>1 = Void"} and @{term "s = []"}. We
+ know that @{term "[] \<in> ONE \<rightarrow> Void"} holds. In the
+ cases for @{term "ALT r\<^sub>1 r\<^sub>2"} and @{term "SEQ
+ r\<^sub>1 r\<^sub>2"} we reason as follows:
+
+ \begin{itemize}
+
+ \item[$\bullet$] Case @{term "v\<^sub>1 \<in> LV (ALT r\<^sub>1
+ r\<^sub>2) s"} with @{term "v\<^sub>1 = Left w\<^sub>1"} and @{term
+ "w\<^sub>1 \<in> LV r\<^sub>1 s"}: In order to use the induction
+ hypothesis we need to infer
+
+ \begin{center}
+ @{term "\<forall>v'
+ \<in> LV (ALT r\<^sub>1 r\<^sub>2) s. \<not> (v' :\<sqsubset>val
+ Left w\<^sub>1)"}
+ implies
+ @{term "\<forall>v' \<in> LV r\<^sub>1
+ s. \<not> (v' :\<sqsubset>val w\<^sub>1)"}
+ \end{center}
+
+ \noindent This can be done because of ?? we can infer that for every
+ @{text v'} in @{term "LV r\<^sub>1 s"} and @{term "v'
+ :\<sqsubset>val w\<^sub>1"} also @{term "Left v' :\<sqsubset>val
+ Left w\<^sub>1"} holds. This gives a contradiction. Consequently, we
+ can use the induction hypothesis to obtain @{term "s \<in> r\<^sub>1
+ \<rightarrow> w\<^sub>1"} and then conclude this case with @{term "s
+ \<in> ALT r\<^sub>1 r\<^sub>2 \<rightarrow> v\<^sub>1"}.\smallskip
+
+ \item[$\bullet$] Case @{term "v\<^sub>1 \<in> LV (ALT r\<^sub>1
+ r\<^sub>2) s"} with @{term "v\<^sub>1 = Right w\<^sub>1"} and @{term
+ "w\<^sub>1 \<in> LV r\<^sub>2 s"}: Like above we can use the
+ induction hypothesis in order to infer @{term "s \<in> r\<^sub>2
+ \<rightarrow> w\<^sub>1"}. In order to conclude we still need to
+ obtain @{term "s \<notin> L r\<^sub>1"}. Let us suppose the opposite.
+ Then there is a @{term v'} such that @{term "v' \<in> LV r\<^sub>1 s"}
+ by Prop ??? and hence @{term "Left v' \<in> LV (ALT r\<^sub>1 r\<^sub>2) s"}.
+ But then we can use the second assumption of the theorem to infer that
+ @{term "\<not>(Left v' :\<sqsubset>val Right w\<^sub>1)"}, which cannot be the case.
+ Therefore @{term "s \<notin> L r\<^sub>1"} must hold and we can also conclude in this
+ case.
+
+
+
+
+ \end{itemize}
+
\end{proof}
*}
--- a/thys/Journal/document/root.tex Fri Aug 25 15:05:20 2017 +0200
+++ b/thys/Journal/document/root.tex Fri Aug 25 23:52:49 2017 +0200
@@ -72,18 +72,17 @@
in order to deal with POSIX matching, which is the underlying
disambiguation strategy for regular expressions needed in lexers.
Their algorithm generates POSIX values which encode the information of
-\emph{how} a regular expression matched a string---that is which part
+\emph{how} a regular expression matched a string---that is, which part
of the string is matched by which part of the regular expression. In
-the first part of this paper we give our inductive definition of what
-a POSIX value is and show $(i)$ that such a value is unique (for given
-regular expression and string being matched) and $(ii)$ that Sulzmann
-and Lu's algorithm always generates such a value (provided that the
-regular expression matches the string). We also prove the correctness
-of an optimised version of the POSIX matching algorithm. In the
-second part we show that $(iii)$ our inductive definition of a POSIX
-value is equivalent to an alternative definition by Okui and Suzuki
-which identifies POSIX values as least elements according to an
-ordering of values. \smallskip
+this paper we give our inductive definition of what a POSIX value is
+and show $(i)$ that such a value is unique (for given regular
+expression and string being matched) and $(ii)$ that Sulzmann and Lu's
+algorithm always generates such a value (provided that the regular
+expression matches the string). We show that $(iii)$ our inductive
+definition of a POSIX value is equivalent to an alternative definition
+by Okui and Suzuki which identifies POSIX values as least elements
+according to an ordering of values. We also prove the correctness of
+an optimised version of the POSIX matching algorithm. \smallskip
{\bf Keywords:} POSIX matching, Derivatives of Regular Expressions,
Isabelle/HOL
--- a/thys/Positions.thy Fri Aug 25 15:05:20 2017 +0200
+++ b/thys/Positions.thy Fri Aug 25 23:52:49 2017 +0200
@@ -671,6 +671,7 @@
by (auto simp add: LV_def)
then have "v1 :\<sqsubset>val v3a \<or> (v1 :\<sqsubseteq>val v3a \<and> v2 :\<sqsubseteq>val v3b)" using IH1 IH2 by blast
then have "Seq v1 v2 :\<sqsubseteq>val Seq v3a v3b" using eqs q2 as1
+ thm PosOrd_SeqI1 PosOrd_SeqI2
unfolding PosOrd_ex_eq_def by (auto simp add: PosOrd_SeqI1 PosOrd_SeqI2)
then show "Seq v1 v2 :\<sqsubseteq>val v3" unfolding eqs by blast
next
@@ -690,11 +691,10 @@
unfolding LV_def
apply(auto)
apply(erule Prf.cases)
- apply(simp_all)
- apply(auto)[1]
+ apply(auto)
apply(case_tac vs)
- apply(auto)
- using Prf.intros(6) by blast
+ apply(auto intro: Prf.intros)
+ done
then show "Stars (v # vs) :\<sqsubseteq>val v3"
proof (cases)
case (NonEmpty v3a vs3)
@@ -821,8 +821,10 @@
next
case (ONE s v1)
have "v1 \<in> LV ONE s" by fact
- then show "s \<in> ONE \<rightarrow> v1" unfolding LV_def
- by(auto elim!: Prf.cases intro: Posix.intros)
+ then have "v1 = Void" "s = []" unfolding LV_def
+ by(auto elim: Prf.cases)
+ then show "s \<in> ONE \<rightarrow> v1"
+ by(auto intro: Posix.intros)
next
case (CHAR c s v1)
have "v1 \<in> LV (CHAR c) s" by fact
@@ -832,7 +834,7 @@
case (ALT r1 r2 s v1)
have IH1: "\<And>s v1. \<lbrakk>v1 \<in> LV r1 s; \<forall>v2 \<in> LV r1 s. \<not> v2 :\<sqsubset>val v1\<rbrakk> \<Longrightarrow> s \<in> r1 \<rightarrow> v1" by fact
have IH2: "\<And>s v1. \<lbrakk>v1 \<in> LV r2 s; \<forall>v2 \<in> LV r2 s. \<not> v2 :\<sqsubset>val v1\<rbrakk> \<Longrightarrow> s \<in> r2 \<rightarrow> v1" by fact
- have as1: "\<forall>v2\<in>LV (ALT r1 r2) s. \<not> v2 :\<sqsubset>val v1" by fact
+ have as1: "\<forall>v2 \<in> LV (ALT r1 r2) s. \<not> v2 :\<sqsubset>val v1" by fact
have as2: "v1 \<in> LV (ALT r1 r2) s" by fact
then consider
(Left) v1' where
@@ -845,18 +847,17 @@
then show "s \<in> ALT r1 r2 \<rightarrow> v1"
proof (cases)
case (Left v1')
- have "v1' \<in> LV r1 s" using as2
- unfolding LV_def Left by (auto elim: Prf.cases)
+ have "v1' \<in> LV r1 s" using Left(3) .
moreover
have "\<forall>v2 \<in> LV r1 s. \<not> v2 :\<sqsubset>val v1'" using as1
- unfolding LV_def Left using Prf.intros(2) PosOrd_Left_eq by force
+ unfolding Left(1,2) unfolding LV_def
+ using Prf.intros(2) PosOrd_Left_eq by force
ultimately have "s \<in> r1 \<rightarrow> v1'" using IH1 by simp
then have "s \<in> ALT r1 r2 \<rightarrow> Left v1'" by (rule Posix.intros)
then show "s \<in> ALT r1 r2 \<rightarrow> v1" using Left by simp
next
case (Right v1')
- have "v1' \<in> LV r2 s" using as2
- unfolding LV_def Right by (auto elim: Prf.cases)
+ have "v1' \<in> LV r2 s" using Right(3) .
moreover
have "\<forall>v2 \<in> LV r2 s. \<not> v2 :\<sqsubset>val v1'" using as1
unfolding LV_def Right using Prf.intros(3) PosOrd_RightI by force