# HG changeset patch # User Christian Urban # Date 1503697969 -7200 # Node ID 462d893ecb3d795c0c150b8d168e824d7f482ab2 # Parent 12772d537b713e31dcff1fc0dbeb7f724cea853f updated diff -r 12772d537b71 -r 462d893ecb3d thys/Journal/Paper.thy --- a/thys/Journal/Paper.thy Fri Aug 25 15:05:20 2017 +0200 +++ b/thys/Journal/Paper.thy Fri Aug 25 23:52:49 2017 +0200 @@ -1152,56 +1152,130 @@ @{thm [mode=IfThen] Posix_PosOrd[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} \end{theorem} - \begin{proof} By induction on our POSIX rules. It is clear that - @{text "v\<^sub>1"} and @{text "v\<^sub>2"} have the same underlying - string. + \begin{proof} By induction on our POSIX rules. By + Thm~\ref{posixdeterm} and the definition of @{const LV}, it is clear + that @{text "v\<^sub>1"} and @{text "v\<^sub>2"} have the same + underlying string @{term s}. The three base cases are + straightforward: for example for @{term "v\<^sub>1 = Void"}, we have + that @{term "v\<^sub>2 \ LV ONE []"} must also be of the form + \mbox{@{term "v\<^sub>2 = Void"}}. Therefore we have @{term + "v\<^sub>1 :\val v\<^sub>2"}. The inductive cases for + @{term "ALT r\<^sub>1 r\<^sub>2"} and @{term "SEQ r\<^sub>1 + r\<^sub>2"} are as follows: - The two base cases are straightforward: for example for @{term - "v\<^sub>1 = Void"}, we have that @{term "v\<^sub>2 \ LV ONE - []"} must also be of the form \mbox{@{term "v\<^sub>2 = - Void"}}. Therefore we have @{term "v\<^sub>1 :\val - v\<^sub>2"}. The inductive cases are as follows: + + \begin{itemize} - \begin{itemize} \item[$\bullet$] Case @{term "s \ (ALT r\<^sub>1 - r\<^sub>2) \ (Left w\<^sub>1)"}: In this case @{term - "v\<^sub>1 = Left w\<^sub>1"} and the value @{term "v\<^sub>2"} is - either of the form @{term "Left w\<^sub>2"} or @{term "Right - w\<^sub>2"}. In the latter case we can immediately conclude with - @{term "v\<^sub>1 :\val v\<^sub>2"} since a @{text - Left}-value with the same underlying string @{text s} is always - smaller or equal than a @{text Right}-value. In the former case we - have @{term "w\<^sub>2 \ LV r\<^sub>1 s"} and can use the - induction hypothesis to infer @{term "w\<^sub>1 :\val - w\<^sub>2"}. Because @{term "w\<^sub>1"} and @{term "w\<^sub>2"} - have the same underlying string @{text s}, we can conclude with - @{term "Left w\<^sub>1 :\val Left w\<^sub>2"}.\smallskip + \item[$\bullet$] Case @{term "s \ (ALT r\<^sub>1 r\<^sub>2) + \ (Left w\<^sub>1)"}: In this case @{term "v\<^sub>1 = + Left w\<^sub>1"} and the value @{term "v\<^sub>2"} is either of the + form @{term "Left w\<^sub>2"} or @{term "Right w\<^sub>2"}. In the + latter case we can immediately conclude with @{term "v\<^sub>1 + :\val v\<^sub>2"} since a @{text Left}-value with the + same underlying string @{text s} is always smaller or equal than a + @{text Right}-value. In the former case we have @{term "w\<^sub>2 + \ LV r\<^sub>1 s"} and can use the induction hypothesis to infer + @{term "w\<^sub>1 :\val w\<^sub>2"}. Because @{term + "w\<^sub>1"} and @{term "w\<^sub>2"} have the same underlying string + @{text s}, we can conclude with @{term "Left w\<^sub>1 + :\val Left w\<^sub>2"} by Prop ???.\smallskip \item[$\bullet$] Case @{term "s \ (ALT r\<^sub>1 r\<^sub>2) \ (Right w\<^sub>1)"}: This case similar as the previous case, except that we know that @{term "s \ L - r\<^sub>1"}. This is needed when @{term "v\<^sub>2 = Left - w\<^sub>2"}: since \mbox{@{term "flat v\<^sub>2 = flat w\<^sub>2"} - @{text "= s"}} and @{term "\ w\<^sub>2 : r\<^sub>1"}, we - can derive a contradiction using Prop.~\ref{inhabs}. So also in this - case \mbox{@{term "v\<^sub>1 :\val v\<^sub>2"}}.\smallskip + r\<^sub>1"}. This is needed when @{term "v\<^sub>2"} is of the form + @{term "Left w\<^sub>2"}: since \mbox{@{term "flat v\<^sub>2 = flat + w\<^sub>2"} @{text "= s"}} and @{term "\ w\<^sub>2 : + r\<^sub>1"}, we can derive a contradiction using + Prop.~\ref{inhabs}. So also in this case \mbox{@{term "v\<^sub>1 + :\val v\<^sub>2"}}.\smallskip - \item[$\bullet$] Case @{term "(s\<^sub>1 @ s\<^sub>2) \ (SEQ r\<^sub>1 r\<^sub>2) - \ (Seq w\<^sub>1 w\<^sub>2)"}: Assume @{term "v\<^sub>2 = - Seq (u\<^sub>1) (u\<^sub>2)"} with @{term "\ u\<^sub>1 : r\<^sub>1"} - and \mbox{@{term "\ u\<^sub>2 : r\<^sub>2"}}. We have - + \item[$\bullet$] Case @{term "(s\<^sub>1 @ s\<^sub>2) \ (SEQ + r\<^sub>1 r\<^sub>2) \ (Seq w\<^sub>1 w\<^sub>2)"}: We + can assume @{term "v\<^sub>2 = Seq (u\<^sub>1) (u\<^sub>2)"} with + @{term "\ u\<^sub>1 : r\<^sub>1"} and \mbox{@{term + "\ u\<^sub>2 : r\<^sub>2"}}. We have @{term "s\<^sub>1 @ + s\<^sub>2 = (flat u\<^sub>1) @ (flat u\<^sub>2)"}. By the + side-condition on out @{text PS}-rule we know that either @{term + "s\<^sub>1 = flat u\<^sub>1"} or @{term "flat u\<^sub>1"} is a + strict prefix ??? of @{term "s\<^sub>1"}. In the latter case we can + infer @{term "w\<^sub>1 :\val u\<^sub>1"} by ??? and from + this @{term "v\<^sub>1 :\val v\<^sub>2"} by ???. In the + former case we know @{term "u\<^sub>1 \ LV r\<^sub>1 s\<^sub>1"} + and @{term "u\<^sub>2 \ LV r\<^sub>2 s\<^sub>2"}. With this we + can use the induction hypotheses to infer @{term "w\<^sub>1 + :\val u\<^sub>1"} and @{term "w\<^sub>2 + :\val u\<^sub>2"}. By ??? we can again infer @{term + "v\<^sub>1 :\val v\<^sub>2"} and are done. + \end{itemize} + + \noindent The case for @{text "P\"} is similar to the @{text PS}-case.\qed \end{proof} - Given a lexical value @{text "v\<^sub>1"}, say, in @{term "LV r s"} for which there does - not exists any smaller value in @{term "LV r s"}, then this value is our POSIX value: + \noindent This theorem shows that our posix value @{text + "v\<^sub>1"} with @{term "s \ r \ v\<^sub>1"} is a + minimal element for the values in @{text "LV r s"}. By ??? we also + know that any value in @{text "LV s' r"}, with @{term "s'"} being a + prefix, cannot be smaller than @{text "v\<^sub>1"}. The next theorem + shows the opposite---namely any minimal element must be a @{text + POSIX}-value. Given a lexical value @{text "v\<^sub>1"}, say, in + @{term "LV r s"}, for which there does not exists any smaller value + in @{term "LV r s"}, then this value is our @{text POSIX}-value: \begin{theorem} @{thm [mode=IfThen] PosOrd_Posix[where ?v1.0="v\<^sub>1"]} \end{theorem} - \begin{proof} - By induction on @{text r}. + \begin{proof} By induction on @{text r}. The tree base cases are + again straightforward. For example if @{term "v\<^sub>1 \ LV + ONE s"} then @{term "v\<^sub>1 = Void"} and @{term "s = []"}. We + know that @{term "[] \ ONE \ Void"} holds. In the + cases for @{term "ALT r\<^sub>1 r\<^sub>2"} and @{term "SEQ + r\<^sub>1 r\<^sub>2"} we reason as follows: + + \begin{itemize} + + \item[$\bullet$] Case @{term "v\<^sub>1 \ LV (ALT r\<^sub>1 + r\<^sub>2) s"} with @{term "v\<^sub>1 = Left w\<^sub>1"} and @{term + "w\<^sub>1 \ LV r\<^sub>1 s"}: In order to use the induction + hypothesis we need to infer + + \begin{center} + @{term "\v' + \ LV (ALT r\<^sub>1 r\<^sub>2) s. \ (v' :\val + Left w\<^sub>1)"} + implies + @{term "\v' \ LV r\<^sub>1 + s. \ (v' :\val w\<^sub>1)"} + \end{center} + + \noindent This can be done because of ?? we can infer that for every + @{text v'} in @{term "LV r\<^sub>1 s"} and @{term "v' + :\val w\<^sub>1"} also @{term "Left v' :\val + Left w\<^sub>1"} holds. This gives a contradiction. Consequently, we + can use the induction hypothesis to obtain @{term "s \ r\<^sub>1 + \ w\<^sub>1"} and then conclude this case with @{term "s + \ ALT r\<^sub>1 r\<^sub>2 \ v\<^sub>1"}.\smallskip + + \item[$\bullet$] Case @{term "v\<^sub>1 \ LV (ALT r\<^sub>1 + r\<^sub>2) s"} with @{term "v\<^sub>1 = Right w\<^sub>1"} and @{term + "w\<^sub>1 \ LV r\<^sub>2 s"}: Like above we can use the + induction hypothesis in order to infer @{term "s \ r\<^sub>2 + \ w\<^sub>1"}. In order to conclude we still need to + obtain @{term "s \ L r\<^sub>1"}. Let us suppose the opposite. + Then there is a @{term v'} such that @{term "v' \ LV r\<^sub>1 s"} + by Prop ??? and hence @{term "Left v' \ LV (ALT r\<^sub>1 r\<^sub>2) s"}. + But then we can use the second assumption of the theorem to infer that + @{term "\(Left v' :\val Right w\<^sub>1)"}, which cannot be the case. + Therefore @{term "s \ L r\<^sub>1"} must hold and we can also conclude in this + case. + + + + + \end{itemize} + \end{proof} *} diff -r 12772d537b71 -r 462d893ecb3d thys/Journal/document/root.tex --- a/thys/Journal/document/root.tex Fri Aug 25 15:05:20 2017 +0200 +++ b/thys/Journal/document/root.tex Fri Aug 25 23:52:49 2017 +0200 @@ -72,18 +72,17 @@ in order to deal with POSIX matching, which is the underlying disambiguation strategy for regular expressions needed in lexers. Their algorithm generates POSIX values which encode the information of -\emph{how} a regular expression matched a string---that is which part +\emph{how} a regular expression matched a string---that is, which part of the string is matched by which part of the regular expression. In -the first part of this paper we give our inductive definition of what -a POSIX value is and show $(i)$ that such a value is unique (for given -regular expression and string being matched) and $(ii)$ that Sulzmann -and Lu's algorithm always generates such a value (provided that the -regular expression matches the string). We also prove the correctness -of an optimised version of the POSIX matching algorithm. In the -second part we show that $(iii)$ our inductive definition of a POSIX -value is equivalent to an alternative definition by Okui and Suzuki -which identifies POSIX values as least elements according to an -ordering of values. \smallskip +this paper we give our inductive definition of what a POSIX value is +and show $(i)$ that such a value is unique (for given regular +expression and string being matched) and $(ii)$ that Sulzmann and Lu's +algorithm always generates such a value (provided that the regular +expression matches the string). We show that $(iii)$ our inductive +definition of a POSIX value is equivalent to an alternative definition +by Okui and Suzuki which identifies POSIX values as least elements +according to an ordering of values. We also prove the correctness of +an optimised version of the POSIX matching algorithm. \smallskip {\bf Keywords:} POSIX matching, Derivatives of Regular Expressions, Isabelle/HOL diff -r 12772d537b71 -r 462d893ecb3d thys/Positions.thy --- a/thys/Positions.thy Fri Aug 25 15:05:20 2017 +0200 +++ b/thys/Positions.thy Fri Aug 25 23:52:49 2017 +0200 @@ -671,6 +671,7 @@ by (auto simp add: LV_def) then have "v1 :\val v3a \ (v1 :\val v3a \ v2 :\val v3b)" using IH1 IH2 by blast then have "Seq v1 v2 :\val Seq v3a v3b" using eqs q2 as1 + thm PosOrd_SeqI1 PosOrd_SeqI2 unfolding PosOrd_ex_eq_def by (auto simp add: PosOrd_SeqI1 PosOrd_SeqI2) then show "Seq v1 v2 :\val v3" unfolding eqs by blast next @@ -690,11 +691,10 @@ unfolding LV_def apply(auto) apply(erule Prf.cases) - apply(simp_all) - apply(auto)[1] + apply(auto) apply(case_tac vs) - apply(auto) - using Prf.intros(6) by blast + apply(auto intro: Prf.intros) + done then show "Stars (v # vs) :\val v3" proof (cases) case (NonEmpty v3a vs3) @@ -821,8 +821,10 @@ next case (ONE s v1) have "v1 \ LV ONE s" by fact - then show "s \ ONE \ v1" unfolding LV_def - by(auto elim!: Prf.cases intro: Posix.intros) + then have "v1 = Void" "s = []" unfolding LV_def + by(auto elim: Prf.cases) + then show "s \ ONE \ v1" + by(auto intro: Posix.intros) next case (CHAR c s v1) have "v1 \ LV (CHAR c) s" by fact @@ -832,7 +834,7 @@ case (ALT r1 r2 s v1) have IH1: "\s v1. \v1 \ LV r1 s; \v2 \ LV r1 s. \ v2 :\val v1\ \ s \ r1 \ v1" by fact have IH2: "\s v1. \v1 \ LV r2 s; \v2 \ LV r2 s. \ v2 :\val v1\ \ s \ r2 \ v1" by fact - have as1: "\v2\LV (ALT r1 r2) s. \ v2 :\val v1" by fact + have as1: "\v2 \ LV (ALT r1 r2) s. \ v2 :\val v1" by fact have as2: "v1 \ LV (ALT r1 r2) s" by fact then consider (Left) v1' where @@ -845,18 +847,17 @@ then show "s \ ALT r1 r2 \ v1" proof (cases) case (Left v1') - have "v1' \ LV r1 s" using as2 - unfolding LV_def Left by (auto elim: Prf.cases) + have "v1' \ LV r1 s" using Left(3) . moreover have "\v2 \ LV r1 s. \ v2 :\val v1'" using as1 - unfolding LV_def Left using Prf.intros(2) PosOrd_Left_eq by force + unfolding Left(1,2) unfolding LV_def + using Prf.intros(2) PosOrd_Left_eq by force ultimately have "s \ r1 \ v1'" using IH1 by simp then have "s \ ALT r1 r2 \ Left v1'" by (rule Posix.intros) then show "s \ ALT r1 r2 \ v1" using Left by simp next case (Right v1') - have "v1' \ LV r2 s" using as2 - unfolding LV_def Right by (auto elim: Prf.cases) + have "v1' \ LV r2 s" using Right(3) . moreover have "\v2 \ LV r2 s. \ v2 :\val v1'" using as1 unfolding LV_def Right using Prf.intros(3) PosOrd_RightI by force