# HG changeset patch # User Christian Urban # Date 1500399560 -3600 # Node ID d36be1e356c0ee01b9885e6fbf5f394af9c1e101 # Parent e2828c4a1e2318e76d8966d3bdc2d64cf2fdff5a changed definitions of PRF diff -r e2828c4a1e23 -r d36be1e356c0 Literature/alternative-posix-conf.pdf Binary file Literature/alternative-posix-conf.pdf has changed diff -r e2828c4a1e23 -r d36be1e356c0 thys/Journal/Paper.thy --- a/thys/Journal/Paper.thy Thu Jul 06 16:05:33 2017 +0100 +++ b/thys/Journal/Paper.thy Tue Jul 18 18:39:20 2017 +0100 @@ -3,17 +3,28 @@ imports "../Lexer" "../Simplifying" - "../Sulzmann" + "../Positions" "~~/src/HOL/Library/LaTeXsugar" begin +lemma Suc_0_fold: + "Suc 0 = 1" +by simp + + + declare [[show_question_marks = false]] abbreviation - "der_syn r c \ der c r" + "der_syn r c \ der c r" abbreviation - "ders_syn r s \ ders s r" + "ders_syn r s \ ders s r" + + +abbreviation + "nprec v1 v2 \ \(v1 :\val v2)" + notation (latex output) If ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and @@ -26,7 +37,7 @@ SEQ ("_ \ _" [77,77] 78) and STAR ("_\<^sup>\" [1000] 78) and - val.Void ("'(')" 1000) and + val.Void ("Empty" 78) and val.Char ("Char _" [1000] 78) and val.Left ("Left _" [79] 78) and val.Right ("Right _" [1000] 78) and @@ -56,8 +67,18 @@ simp_ALT ("simp\<^bsub>Alt\<^esub> _ _" [1000, 1000] 1) and slexer ("lexer\<^sup>+" 1000) and + at ("_\<^raw:\mbox{$\downharpoonleft$}>\<^bsub>_\<^esub>") and + lex_list ("_ \\<^bsub>lex\<^esub> _") and + PosOrd ("_ \\<^bsub>_\<^esub> _" [77,77,77] 77) and + PosOrd_ex ("_ \ _" [77,77] 77) and + PosOrd_ex1 ("_ \<^raw:\mbox{$\preccurlyeq$}> _" [77,77] 77) and + pflat_len ("\_\\<^bsub>_\<^esub>") and + nprec ("_ \<^raw:\mbox{$\not\prec$}> _" [77,77] 77) + + (* ValOrd ("_ >\<^bsub>_\<^esub> _" [77,77,77] 77) and ValOrdEq ("_ \\<^bsub>_\<^esub> _" [77,77,77] 77) + *) definition "match r s \ nullable (ders s r)" @@ -99,7 +120,7 @@ one way of how the string is matched. There are two commonly used disambiguation strategies to generate a unique answer: one is called GREEDY matching \cite{Frisch2004} and the other is POSIX -matching~\cite{Kuklewicz,Sulzmann2014,Vansummeren2006}. For example consider +matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2013,Sulzmann2014,Vansummeren2006}. For example consider the string @{term xy} and the regular expression \mbox{@{term "STAR (ALT (ALT x y) xy)"}}. Either the string can be matched in two `iterations' by the single letter-regular expressions @{term x} and @{term y}, or directly @@ -114,18 +135,24 @@ what programmers consider basic syntactic building blocks in their programs. These building blocks are often specified by some regular expressions, say @{text "r\<^bsub>key\<^esub>"} and @{text "r\<^bsub>id\<^esub>"} for recognising keywords and -identifiers, respectively. There are two underlying (informal) rules behind -tokenising a string in a POSIX fashion according to a collection of regular +identifiers, respectively. There are a few underlying (informal) rules behind +tokenising a string in a POSIX \cite{POSIX} fashion according to a collection of regular expressions: \begin{itemize} -\item[$\bullet$] \emph{The Longest Match Rule} (or \emph{``maximal munch rule''}): +\item[$\bullet$] \emph{The Longest Match Rule} (or \emph{``{M}aximal {M}unch {R}ule''}): The longest initial substring matched by any regular expression is taken as next token.\smallskip \item[$\bullet$] \emph{Priority Rule:} -For a particular longest initial substring, the first regular expression -that can match determines the token. +For a particular longest initial substring, the first (leftmost) regular expression +that can match determines the token.\smallskip + +\item[$\bullet$] \emph{Star Rule:} A subexpression repeated by ${}^\star$ shall +not match an empty string unless this is the only match for the repetition.\smallskip + +\item[$\bullet$] \emph{Empty String Rule:} An empty string shall be considered to +be longer than no match at all. \end{itemize} \noindent Consider for example a regular expression @{text "r\<^bsub>key\<^esub>"} for recognising keywords @@ -157,7 +184,7 @@ algorithm. This proof idea is inspired by work of Frisch and Cardelli \cite{Frisch2004} on a GREEDY regular expression matching algorithm. However, we were not able to establish transitivity and -totality for the ``order relation'' by Sulzmann and Lu. In Section +totality for the ``order relation'' by Sulzmann and Lu. ??In Section \ref{argu} we identify some inherent problems with their approach (of which some of the proofs are not published in \cite{Sulzmann2014}); perhaps more importantly, we give a simple inductive (and @@ -1008,6 +1035,104 @@ \end{proof} *} +section {* Ordering of Values according to Okui and Suzuki*} + +text {* + + Positions are lists of natural numbers. + + The subvalue at position @{text p}, written @{term "at v p"}, is defined + + + \begin{center} + \begin{tabular}{r@ {\hspace{0mm}}lcl} + @{term v} & @{text "\\<^bsub>[]\<^esub>"} & @{text "\"}& @{thm (rhs) at.simps(1)}\\ + @{term "Left v"} & @{text "\\<^bsub>0::ps\<^esub>"} & @{text "\"}& @{thm (rhs) at.simps(2)}\\ + @{term "Right v"} & @{text "\\<^bsub>1::ps\<^esub>"} & @{text "\"} & + @{thm (rhs) at.simps(3)[simplified Suc_0_fold]}\\ + @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\\<^bsub>0::ps\<^esub>"} & @{text "\"} & + @{thm (rhs) at.simps(4)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} \\ + @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\\<^bsub>1::ps\<^esub>"} + & @{text "\"} & + @{thm (rhs) at.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2", simplified Suc_0_fold]} \\ + @{term "Stars vs"} & @{text "\\<^bsub>n::ps\<^esub>"} & @{text "\"}& @{thm (rhs) at.simps(6)}\\ + \end{tabular} + \end{center} + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) Pos.simps(1)} & @{text "\"} & @{thm (rhs) Pos.simps(1)}\\ + @{thm (lhs) Pos.simps(2)} & @{text "\"} & @{thm (rhs) Pos.simps(2)}\\ + @{thm (lhs) Pos.simps(3)} & @{text "\"} & @{thm (rhs) Pos.simps(3)}\\ + @{thm (lhs) Pos.simps(4)} & @{text "\"} & @{thm (rhs) Pos.simps(4)}\\ + @{thm (lhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + & @{text "\"} + & @{thm (rhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\ + @{thm (lhs) Pos_stars} & @{text "\"} & @{thm (rhs) Pos_stars}\\ + \end{tabular} + \end{center} + + @{thm pflat_len_def} + + + \begin{center} + \begin{tabular}{ccc} + @{thm [mode=Axiom] lex_list.intros(1)}\hspace{1cm} & + @{thm [mode=Rule] lex_list.intros(2)[where ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]}\hspace{1cm} & + @{thm [mode=Rule] lex_list.intros(3)[where ?p1.0="p\<^sub>1" and ?p2.0="p\<^sub>2" and + ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]} + \end{tabular} + \end{center} + + + Main definition by Okui and Suzuki. + + \begin{center} + \begin{tabular}{ccl} + @{thm (lhs) PosOrd_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} & + @{text "\"} & + @{term "pflat_len v\<^sub>1 p > pflat_len v\<^sub>2 p"} and\\ + & & @{term "(\q \ Pos v\<^sub>1 \ Pos v\<^sub>2. q \lex p --> pflat_len v\<^sub>1 q = pflat_len v\<^sub>2 q)"} + \end{tabular} + \end{center} + + @{thm PosOrd_ex_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + + @{thm PosOrd_ex1_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + + Lemma 1 + + @{thm [mode=IfThen] PosOrd_shorterE[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + + but in the other direction only + + @{thm [mode=IfThen] PosOrd_shorterI[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + + Lemma Transitivity: + + @{thm [mode=IfThen] PosOrd_trans[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and ?v3.0="v\<^sub>3"]} + + +*} + +text {* + + Theorems: + + @{thm [mode=IfThen] Posix_CPT} + + @{thm [mode=IfThen] Posix_PosOrd[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + + Corrollary from the last one + + @{thm [mode=IfThen] Posix_PosOrd_stronger[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + + Theorem + + @{thm [mode=IfThen] PosOrd_Posix[where ?v1.0="v\<^sub>1"]} +*} + + section {* The Correctness Argument by Sulzmann and Lu\label{argu} *} text {* @@ -1022,24 +1147,7 @@ Their central definition is an ``ordering relation'' defined by the rules (slightly adapted to fit our notation): -\begin{center} -\begin{tabular}{@ {}c@ {\hspace{4mm}}c@ {}} -@{thm[mode=Rule] C2[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1\" "v\<^sub>2" "r\<^sub>2" "v\<^sub>2\"]}\,(C2) & -@{thm[mode=Rule] C1[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2\" "v\<^sub>1" "r\<^sub>1"]}\,(C1)\smallskip\\ - -@{thm[mode=Rule] A1[of "v\<^sub>1" "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\,(A1) & -@{thm[mode=Rule] A2[of "v\<^sub>2" "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\,(A2)\smallskip\\ - -@{thm[mode=Rule] A3[of "v\<^sub>1" "r\<^sub>2" "v\<^sub>2" "r\<^sub>1"]}\,(A3) & -@{thm[mode=Rule] A4[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\,(A4)\smallskip\\ - -@{thm[mode=Rule] K1[of "v" "vs" "r"]}\,(K1) & -@{thm[mode=Rule] K2[of "v" "vs" "r"]}\,(K2)\smallskip\\ - -@{thm[mode=Rule] K3[of "v\<^sub>1" "r" "v\<^sub>2" "vs\<^sub>1" "vs\<^sub>2"]}\,(K3) & -@{thm[mode=Rule] K4[of "vs\<^sub>1" "r" "vs\<^sub>2" "v"]}\,(K4) -\end{tabular} -\end{center} + ?? \noindent The idea behind the rules (A1) and (A2), for example, is that a @{text Left}-value is bigger than a @{text Right}-value, if the underlying @@ -1062,143 +1170,8 @@ What is interesting for our purposes is that the properties reflexivity, totality and transitivity for this GREEDY ordering can be proved relatively easily by induction. - - These properties of GREEDY, however, do not transfer to the POSIX - ``ordering'' by Sulzmann and Lu, which they define as @{text "v\<^sub>1 \\<^sub>r v\<^sub>2"}. - To start with, @{text "v\<^sub>1 \\<^sub>r v\<^sub>2"} is - not defined inductively, but as $($@{term "v\<^sub>1 = v\<^sub>2"}$)$ $\vee$ $($@{term "(v\<^sub>1 >r - v\<^sub>2) \ (flat v\<^sub>1 = flat (v\<^sub>2::val))"}$)$. This means that @{term "v\<^sub>1 - >(r::rexp) (v\<^sub>2::val)"} does not necessarily imply @{term "v\<^sub>1 \(r::rexp) - (v\<^sub>2::val)"}. Moreover, transitivity does not hold in the ``usual'' - formulation, for example: - - \begin{falsehood} - Suppose @{term "\ v\<^sub>1 : r"}, @{term "\ v\<^sub>2 : r"} and @{term "\ v\<^sub>3 : r"}. - If @{term "v\<^sub>1 >(r::rexp) (v\<^sub>2::val)"} and @{term "v\<^sub>2 >(r::rexp) (v\<^sub>3::val)"} - then @{term "v\<^sub>1 >(r::rexp) (v\<^sub>3::val)"}. - \end{falsehood} - - \noindent If formulated in this way, then there are various counter - examples: For example let @{term r} be @{text "a + ((a + a)\(a + \))"} - then the @{term "v\<^sub>1"}, @{term "v\<^sub>2"} and @{term "v\<^sub>3"} below are values - of @{term r}: - - \begin{center} - \begin{tabular}{lcl} - @{term "v\<^sub>1"} & $=$ & @{term "Left(Char a)"}\\ - @{term "v\<^sub>2"} & $=$ & @{term "Right(Seq (Left(Char a)) (Right Void))"}\\ - @{term "v\<^sub>3"} & $=$ & @{term "Right(Seq (Right(Char a)) (Left(Char a)))"} - \end{tabular} - \end{center} - - \noindent Moreover @{term "v\<^sub>1 >(r::rexp) v\<^sub>2"} and @{term "v\<^sub>2 >(r::rexp) - v\<^sub>3"}, but \emph{not} @{term "v\<^sub>1 >(r::rexp) v\<^sub>3"}! The reason is that - although @{term "v\<^sub>3"} is a @{text "Right"}-value, it can match a longer - string, namely @{term "flat v\<^sub>3 = [a,a]"}, while @{term "flat v\<^sub>1"} (and - @{term "flat v\<^sub>2"}) matches only @{term "[a]"}. So transitivity in this - formulation does not hold---in this example actually @{term "v\<^sub>3 - >(r::rexp) v\<^sub>1"}! - - Sulzmann and Lu ``fix'' this problem by weakening the transitivity - property. They require in addition that the underlying strings are of the - same length. This excludes the counter example above and any - counter-example we were able to find (by hand and by machine). Thus the - transitivity lemma should be formulated as: - - \begin{conject} - Suppose @{term "\ v\<^sub>1 : r"}, @{term "\ v\<^sub>2 : r"} and @{term "\ v\<^sub>3 : r"}, - and also @{text "|v\<^sub>1| = |v\<^sub>2| = |v\<^sub>3|"}.\\ - If @{term "v\<^sub>1 >(r::rexp) (v\<^sub>2::val)"} and @{term "v\<^sub>2 >(r::rexp) (v\<^sub>3::val)"} - then @{term "v\<^sub>1 >(r::rexp) (v\<^sub>3::val)"}. - \end{conject} - - \noindent While we agree with Sulzmann and Lu that this property - probably(!) holds, proving it seems not so straightforward: although one - begins with the assumption that the values have the same flattening, this - cannot be maintained as one descends into the induction. This is a problem - that occurs in a number of places in the proofs by Sulzmann and Lu. - - Although they do not give an explicit proof of the transitivity property, - they give a closely related property about the existence of maximal - elements. They state that this can be verified by an induction on @{term r}. We - disagree with this as we shall show next in case of transitivity. The case - where the reasoning breaks down is the sequence case, say @{term "SEQ r\<^sub>1 r\<^sub>2"}. - The induction hypotheses in this case are +*} -\begin{center} -\begin{tabular}{@ {}c@ {\hspace{10mm}}c@ {}} -\begin{tabular}{@ {}l@ {\hspace{-7mm}}l@ {}} -IH @{term "r\<^sub>1"}:\\ -@{text "\ v\<^sub>1, v\<^sub>2, v\<^sub>3."} \\ - & @{term "\ v\<^sub>1 : r\<^sub>1"}\;@{text "\"} - @{term "\ v\<^sub>2 : r\<^sub>1"}\;@{text "\"} - @{term "\ v\<^sub>3 : r\<^sub>1"}\\ - & @{text "\"} @{text "|v\<^sub>1| = |v\<^sub>2| = |v\<^sub>3|"}\\ - & @{text "\"} @{term "v\<^sub>1 >(r\<^sub>1::rexp) v\<^sub>2 \ v\<^sub>2 >(r\<^sub>1::rexp) v\<^sub>3"}\medskip\\ - & $\Rightarrow$ @{term "v\<^sub>1 >(r\<^sub>1::rexp) v\<^sub>3"} -\end{tabular} & -\begin{tabular}{@ {}l@ {\hspace{-7mm}}l@ {}} -IH @{term "r\<^sub>2"}:\\ -@{text "\ v\<^sub>1, v\<^sub>2, v\<^sub>3."}\\ - & @{term "\ v\<^sub>1 : r\<^sub>2"}\;@{text "\"} - @{term "\ v\<^sub>2 : r\<^sub>2"}\;@{text "\"} - @{term "\ v\<^sub>3 : r\<^sub>2"}\\ - & @{text "\"} @{text "|v\<^sub>1| = |v\<^sub>2| = |v\<^sub>3|"}\\ - & @{text "\"} @{term "v\<^sub>1 >(r\<^sub>2::rexp) v\<^sub>2 \ v\<^sub>2 >(r\<^sub>2::rexp) v\<^sub>3"}\medskip\\ - & $\Rightarrow$ @{term "v\<^sub>1 >(r\<^sub>2::rexp) v\<^sub>3"} -\end{tabular} -\end{tabular} -\end{center} - - \noindent We can assume that - % - \begin{equation} - @{term "(Seq (v\<^sub>1\<^sub>l) (v\<^sub>1\<^sub>r)) >(SEQ r\<^sub>1 r\<^sub>2) (Seq (v\<^sub>2\<^sub>l) (v\<^sub>2\<^sub>r))"} - \qquad\textrm{and}\qquad - @{term "(Seq (v\<^sub>2\<^sub>l) (v\<^sub>2\<^sub>r)) >(SEQ r\<^sub>1 r\<^sub>2) (Seq (v\<^sub>3\<^sub>l) (v\<^sub>3\<^sub>r))"} - \label{assms} - \end{equation} - - \noindent hold, and furthermore that the values have equal length, namely: - % - \begin{equation} - @{term "flat (Seq (v\<^sub>1\<^sub>l) (v\<^sub>1\<^sub>r)) = flat (Seq (v\<^sub>2\<^sub>l) (v\<^sub>2\<^sub>r))"} - \qquad\textrm{and}\qquad - @{term "flat (Seq (v\<^sub>2\<^sub>l) (v\<^sub>2\<^sub>r)) = flat (Seq (v\<^sub>3\<^sub>l) (v\<^sub>3\<^sub>r))"} - \label{lens} - \end{equation} - - \noindent We need to show that @{term "(Seq (v\<^sub>1\<^sub>l) (v\<^sub>1\<^sub>r)) >(SEQ r\<^sub>1 r\<^sub>2) - (Seq (v\<^sub>3\<^sub>l) (v\<^sub>3\<^sub>r))"} holds. We can proceed by analysing how the - assumptions in \eqref{assms} have arisen. There are four cases. Let us - assume we are in the case where we know - - \[ - @{term "v\<^sub>1\<^sub>l >(r\<^sub>1::rexp) v\<^sub>2\<^sub>l"} - \qquad\textrm{and}\qquad - @{term "v\<^sub>2\<^sub>l >(r\<^sub>1::rexp) v\<^sub>3\<^sub>l"} - \] - - \noindent and also know the corresponding inhabitation judgements. This is - exactly a case where we would like to apply the induction hypothesis - IH~$r_1$. But we cannot! We still need to show that @{term "flat (v\<^sub>1\<^sub>l) = - flat(v\<^sub>2\<^sub>l)"} and @{term "flat(v\<^sub>2\<^sub>l) = flat(v\<^sub>3\<^sub>l)"}. We know from - \eqref{lens} that the lengths of the sequence values are equal, but from - this we cannot infer anything about the lengths of the component values. - Indeed in general they will be unequal, that is - - \[ - @{term "flat(v\<^sub>1\<^sub>l) \ flat(v\<^sub>2\<^sub>l)"} - \qquad\textrm{and}\qquad - @{term "flat(v\<^sub>1\<^sub>r) \ flat(v\<^sub>2\<^sub>r)"} - \] - - \noindent but still \eqref{lens} will hold. Now we are stuck, since the IH - does not apply. As said, this problem where the induction hypothesis does - not apply arises in several places in the proof of Sulzmann and Lu, not - just for proving transitivity. - -*} section {* Conclusion *} diff -r e2828c4a1e23 -r d36be1e356c0 thys/Journal/document/root.bib --- a/thys/Journal/document/root.bib Thu Jul 06 16:05:33 2017 +0100 +++ b/thys/Journal/document/root.bib Tue Jul 18 18:39:20 2017 +0100 @@ -1,5 +1,25 @@ + + + +@Misc{POSIX, + title = {{T}he {O}pen {G}roup {B}ase {S}pecification {I}ssue 6 {IEEE} {S}td 1003.1 2004 {E}dition}, + year = {2004}, + note = {\url{http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html}} +} + + +@InProceedings{AusafDyckhoffUrban2016, + author = {F.~Ausaf and R.~Dyckhoff and C.~Urban}, + title = {{POSIX} {L}exing with {D}erivatives of {R}egular {E}xpressions ({P}roof {P}earl)}, + year = {2016}, + booktitle = {Proc.~of the 7th International Conference on Interactive Theorem Proving (ITP)}, + volume = {9807}, + series = {LNCS}, + pages = {69--86} +} + @article{aduAFP16, - author = {Fahad Ausaf and Roy Dyckhoff and Christian Urban}, + author = {F.~Ausaf and R.~Dyckhoff and C.~Urban}, title = {{POSIX} {L}exing with {D}erivatives of {R}egular {E}xpressions}, journal = {Archive of Formal Proofs}, year = 2016, @@ -255,13 +275,6 @@ series = {LNCS} } -@book{Michaelson, - title={An introduction to functional programming through lambda calculus}, - author={Michaelson, Greg}, - year={2011}, - publisher={Courier Corporation} -} - @article{Owens2008, author = {S.~Owens and K.~Slind}, title = {{A}dapting {F}unctional {P}rograms to {H}igher {O}rder {L}ogic}, @@ -273,92 +286,8 @@ } -@book{Velleman, - title={How to prove it: a structured approach}, - author={Velleman, Daniel J}, - year={2006}, - publisher={Cambridge University Press} -} - -@book{Jones, - title={Implementing functional languages:[a tutorial]}, - author={Jones, Simon L Peyton and Lester, David R}, - volume={124}, - year={1992}, - publisher={Prentice Hall Reading} -} - -@article{Cardelli, - title={Type systems}, - author={Cardelli, Luca}, - journal={ACM Computing Surveys}, - volume={28}, - number={1}, - pages={263--264}, - year={1996} -} - -@article{Morrisett, - author = {J. Gregory Morrisett and - Karl Crary and - Neal Glew and - David Walker}, - title = {Stack-based typed assembly language}, - journal = {J. Funct. Program.}, - volume = {13}, - number = {5}, - pages = {957--959}, - year = {2003}, - url = {http://dx.doi.org/10.1017/S0956796802004446}, - doi = {10.1017/S0956796802004446}, - timestamp = {Fri, 19 Mar 2004 13:48:27 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/jfp/MorrisettCGW03}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@book{Nipkow, - title={Concrete Semantics: With Isabelle/HOL}, - author={Nipkow, Tobias and Klein, Gerwin}, - year={2014}, - publisher={Springer} -} - -@article{Dube, - author = {Danny Dub{\'{e}} and - Marc Feeley}, - title = {Efficiently building a parse tree from a regular expression}, - journal = {Acta Inf.}, - volume = {37}, - number = {2}, - pages = {121--144}, - year = {2000}, - url = {http://link.springer.de/link/service/journals/00236/bibs/0037002/00370121.htm}, - timestamp = {Tue, 25 Nov 2003 14:51:21 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/acta/DubeF00}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@article{Morrisett2, - author = {J. Gregory Morrisett and - David Walker and - Karl Crary and - Neal Glew}, - title = {From system {F} to typed assembly language}, - journal = {{ACM} Trans. Program. Lang. Syst.}, - volume = {21}, - number = {3}, - pages = {527--568}, - year = {1999}, - url = {http://doi.acm.org/10.1145/319301.319345}, - doi = {10.1145/319301.319345}, - timestamp = {Wed, 26 Nov 2003 14:26:46 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/toplas/MorrisettWCG99}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - @article{Owens2, - author = {Scott Owens and - Konrad Slind}, + author = {S.~Owens and K.~Slind}, title = {Adapting functional programs to higher order logic}, journal = {Higher-Order and Symbolic Computation}, volume = {21}, @@ -379,3 +308,15 @@ +@InProceedings{OkuiSuzuki2013, + author = {S.~Okui and T.~Suzuki}, + title = {{D}isambiguation in {R}egular {E}xpression {M}atching via + {P}osition {A}utomata with {A}ugmented {T}ransitions}, + booktitle = {Proc.~of the 15th International Conference on Implementation + and Application of Automata (CIAA)}, + year = {2010}, + volume = {6482}, + series = {LNCS}, + pages = {231--240} +} + diff -r e2828c4a1e23 -r d36be1e356c0 thys/Journal/document/root.tex --- a/thys/Journal/document/root.tex Thu Jul 06 16:05:33 2017 +0100 +++ b/thys/Journal/document/root.tex Tue Jul 18 18:39:20 2017 +0100 @@ -33,8 +33,17 @@ \newtheorem{conject}{Conjecture} \begin{document} +\renewcommand{\thefootnote}{$\star$} \footnotetext[1]{This paper is a + revised and expanded version of \cite{AusafDyckhoffUrban2016}. + Compared with that paper we give a second definition for POSIX + values and prove that it is equivalent to the original one. This + definition is based on an ordering of values and very similar to the + definition given by Sulzmann and Lu~\cite{Sulzmann2014}. We also + extend our results to additional constructors of regular + expressions.} \renewcommand{\thefootnote}{\arabic{footnote}} -\title{POSIX {L}exing with {D}erivatives of {R}egular {E}xpressions (Proof Pearl)} + +\title{POSIX {L}exing with {D}erivatives of {R}egular {E}xpressions} \author{Fahad Ausaf\inst{1} \and Roy Dyckhoff\inst{2} \and Christian Urban\inst{3}} \institute{King's College London\\ \email{fahad.ausaf@icloud.com} @@ -50,20 +59,19 @@ expressions. They can be used for a very simple regular expression matching algorithm. Sulzmann and Lu cleverly extended this algorithm in order to deal with POSIX matching, which is the underlying -disambiguation strategy for regular expressions needed in lexers. -Sulzmann and Lu have made available on-line what they call a -``rigorous proof'' of the correctness of their algorithm w.r.t.\ their -specification; regrettably, it appears to us to have unfillable gaps. -In the first part of this paper we give our inductive definition of -what a POSIX value is and show $(i)$ that such a value is unique (for -given regular expression and string being matched) and $(ii)$ that -Sulzmann and Lu's algorithm always generates such a value (provided -that the regular expression matches the string). We also prove the -correctness of an optimised version of the POSIX matching -algorithm. Our definitions and proof are much simpler than those by -Sulzmann and Lu and can be easily formalised in Isabelle/HOL. In the -second part we analyse the correctness argument by Sulzmann and Lu and -explain why the gaps in this argument cannot be filled easily.\smallskip +disambiguation strategy for regular expressions needed in lexers. In +the first part of this paper we give our inductive definition of what +a POSIX value is and show $(i)$ that such a value is unique (for given +regular expression and string being matched) and $(ii)$ that Sulzmann +and Lu's algorithm always generates such a value (provided that the +regular expression matches the string). We also prove the correctness +of an optimised version of the POSIX matching algorithm. In the +second part we show that $(iii)$ our inductive definition of a POSIX +value is equivalent to an alternative definition by Okui and Suzuki +which identifies a POSIX value as least element according to an +ordering of values. The advantage of the definition based on the +ordering is that it implements more directly the POSIX +longest-leftmost matching semantics.\smallskip {\bf Keywords:} POSIX matching, Derivatives of Regular Expressions, Isabelle/HOL diff -r e2828c4a1e23 -r d36be1e356c0 thys/Lexer.thy --- a/thys/Lexer.thy Thu Jul 06 16:05:33 2017 +0100 +++ b/thys/Lexer.thy Tue Jul 18 18:39:20 2017 +0100 @@ -79,17 +79,20 @@ by (auto) (metis Star.simps) lemma Star_decomp: - assumes a: "c # x \ A\" - shows "\a b. x = a @ b \ c # a \ A \ b \ A\" -using a + assumes "c # x \ A\" + shows "\s1 s2. x = s1 @ s2 \ c # s1 \ A \ s2 \ A\" +using assms by (induct x\"c # x" rule: Star.induct) (auto simp add: append_eq_Cons_conv) lemma Star_Der_Sequ: shows "Der c (A\) \ (Der c A) ;; A\" unfolding Der_def -by(auto simp add: Der_def Sequ_def Star_decomp) - +apply(rule subsetI) +apply(simp) +unfolding Sequ_def +apply(simp) +by(auto simp add: Sequ_def Star_decomp) lemma Der_star [simp]: @@ -245,8 +248,7 @@ | "\ v2 : r2 \ \ Right v2 : ALT r1 r2" | "\ Void : ONE" | "\ Char c : CHAR c" -| "\ Stars [] : STAR r" -| "\\ v : r; \ Stars vs : STAR r\ \ \ Stars (v # vs) : STAR r" +| "\v \ set vs. \ v : r \ \ Stars vs : STAR r" inductive_cases Prf_elims: "\ v : ZERO" @@ -254,31 +256,25 @@ "\ v : ALT r1 r2" "\ v : ONE" "\ v : CHAR c" -(* "\ vs : STAR r"*) + "\ vs : STAR r" + +lemma Star_concat: + assumes "\s \ set ss. s \ A" + shows "concat ss \ A\" +using assms by (induct ss) (auto) + lemma Prf_flat_L: assumes "\ v : r" shows "flat v \ L r" using assms -by(induct v r rule: Prf.induct) - (auto simp add: Sequ_def) - -lemma Prf_Stars: - assumes "\v \ set vs. \ v : r" - shows "\ Stars vs : STAR r" -using assms -by(induct vs) (auto intro: Prf.intros) +by (induct v r rule: Prf.induct) + (auto simp add: Sequ_def Star_concat) lemma Prf_Stars_append: assumes "\ Stars vs1 : STAR r" "\ Stars vs2 : STAR r" shows "\ Stars (vs1 @ vs2) : STAR r" using assms -apply(induct vs1 arbitrary: vs2) -apply(auto intro: Prf.intros) -apply(erule Prf.cases) -apply(simp_all) -apply(auto intro: Prf.intros) -done - +by (auto intro!: Prf.intros elim!: Prf_elims) lemma Star_string: assumes "s \ A\" @@ -306,40 +302,45 @@ assumes "\ v : r" shows "flat v \ L r" using assms -by (induct)(auto simp add: Sequ_def) +by (induct) (auto simp add: Sequ_def Star_concat) lemma L_flat_Prf2: assumes "s \ L r" shows "\v. \ v : r \ flat v = s" using assms -apply(induct r arbitrary: s) -apply(auto simp add: Sequ_def intro: Prf.intros) -using Prf.intros(1) flat.simps(5) apply blast -using Prf.intros(2) flat.simps(3) apply blast -using Prf.intros(3) flat.simps(4) apply blast -apply(subgoal_tac "\vs::val list. concat (map flat vs) = s \ (\v \ set vs. \ v : r)") -apply(auto)[1] -apply(rule_tac x="Stars vs" in exI) -apply(simp) -apply (simp add: Prf_Stars) -apply(drule Star_string) -apply(auto) -apply(rule Star_val) -apply(auto) -done +proof(induct r arbitrary: s) + case (STAR r s) + have IH: "\s. s \ L r \ \v. \ v : r \ flat v = s" by fact + have "s \ L (STAR r)" by fact + then obtain ss where "concat ss = s" "\s \ set ss. s \ L r" + using Star_string by auto + then obtain vs where "concat (map flat vs) = s" "\v\set vs. \ v : r" + using IH Star_val by blast + then show "\v. \ v : STAR r \ flat v = s" + using Prf.intros(6) flat_Stars by blast +next + case (SEQ r1 r2 s) + then show "\v. \ v : SEQ r1 r2 \ flat v = s" + unfolding Sequ_def L.simps by (fastforce intro: Prf.intros) +next + case (ALT r1 r2 s) + then show "\v. \ v : ALT r1 r2 \ flat v = s" + unfolding L.simps by (fastforce intro: Prf.intros) +qed (auto intro: Prf.intros) lemma L_flat_Prf: "L(r) = {flat v | v. \ v : r}" using L_flat_Prf1 L_flat_Prf2 by blast +(* lemma Star_values_exists: assumes "s \ (L r)\" shows "\vs. concat (map flat vs) = s \ \ Stars vs : STAR r" using assms apply(drule_tac Star_string) apply(auto) -by (metis L_flat_Prf2 Prf_Stars Star_val) - +by (metis L_flat_Prf2 Prf.intros(6) Star_val) +*) section {* Sulzmann and Lu functions *} @@ -385,24 +386,14 @@ using assms apply(induct r arbitrary: c v rule: rexp.induct) apply(auto intro!: Prf.intros mkeps_nullable elim!: Prf_elims split: if_splits) -(* STAR *) -apply(rotate_tac 2) -apply(erule Prf.cases) -apply(simp_all)[7] -apply(auto) -apply (metis Prf.intros(6) Prf.intros(7)) -by (metis Prf.intros(7)) +done lemma Prf_injval_flat: assumes "\ v : der c r" shows "flat (injval r c v) = c # (flat v)" using assms apply(induct arbitrary: v rule: der.induct) -apply(auto elim!: Prf_elims split: if_splits) -apply(metis mkeps_flat) -apply(rotate_tac 2) -apply(erule Prf.cases) -apply(simp_all)[7] +apply(auto elim!: Prf_elims intro: mkeps_flat split: if_splits) done @@ -444,7 +435,9 @@ assumes "s \ r \ v" shows "\ v : r" using assms -by (induct s r v rule: Posix.induct)(auto intro: Prf.intros) +apply(induct s r v rule: Posix.induct) +apply(auto intro!: Prf.intros elim!: Prf_elims) +done lemma Posix_mkeps: diff -r e2828c4a1e23 -r d36be1e356c0 thys/Positions.thy --- a/thys/Positions.thy Thu Jul 06 16:05:33 2017 +0100 +++ b/thys/Positions.thy Tue Jul 18 18:39:20 2017 +0100 @@ -3,8 +3,7 @@ imports "Lexer" begin - -section {* Position in values *} +section {* Positions in Values *} fun at :: "val \ nat list \ val" @@ -16,6 +15,8 @@ | "at (Seq v1 v2) (Suc 0#ps)= at v2 ps" | "at (Stars vs) (n#ps)= at (nth vs n) ps" + + fun Pos :: "val \ (nat list) set" where "Pos (Void) = {[]}" @@ -26,15 +27,13 @@ | "Pos (Stars []) = {[]}" | "Pos (Stars (v#vs)) = {[]} \ {0#ps | ps. ps \ Pos v} \ {Suc n#ps | n ps. n#ps \ Pos (Stars vs)}" + lemma Pos_stars: "Pos (Stars vs) = {[]} \ (\n < length vs. {n#ps | ps. ps \ Pos (vs ! n)})" apply(induct vs) apply(simp) -apply(simp) apply(simp add: insert_ident) apply(rule subset_antisym) -apply(auto) -apply(auto)[1] using less_Suc_eq_0_disj by auto lemma Pos_empty: @@ -44,7 +43,7 @@ fun intlen :: "'a list \ int" where "intlen [] = 0" -| "intlen (x#xs) = 1 + intlen xs" +| "intlen (x # xs) = 1 + intlen xs" lemma intlen_bigger: shows "0 \ intlen xs" @@ -62,6 +61,15 @@ apply (smt Suc_lessI intlen.simps(2) length_Suc_conv nat_neq_iff) by (smt Suc_lessE intlen.simps(2) length_Suc_conv) +lemma intlen_length_eq: + shows "intlen xs = intlen ys \ length xs = length ys" +apply(induct xs arbitrary: ys) +apply (auto simp add: intlen_bigger not_less) +apply(case_tac ys) +apply(simp_all) +apply (smt intlen_bigger) +apply (smt intlen.elims intlen_bigger length_Suc_conv) +by (metis intlen.simps(2) length_Suc_conv) definition pflat_len :: "val \ nat list => int" where @@ -87,9 +95,9 @@ apply(auto simp add: less_Suc_eq_0_disj pflat_len_simps) done -lemma outside_lemma: - assumes "p \ Pos v1 \ Pos v2" - shows "pflat_len v1 p = pflat_len v2 p" +lemma pflat_len_outside: + assumes "p \ Pos v1" + shows "pflat_len v1 p = -1 " using assms by (auto simp add: pflat_len_def) @@ -121,10 +129,8 @@ fixes xs ys :: "nat list" shows "[] \lex ys \ ys \ []" and "xs \lex [] \ False" - and "(x # xs) \lex (y # ys) \ (x < y \ (\ y < x \ xs \lex ys))" -apply(auto elim: lex_list.cases intro: lex_list.intros) -apply(auto simp add: neq_Nil_conv not_less_iff_gr_or_eq intro: lex_list.intros) -done + and "(x # xs) \lex (y # ys) \ (x < y \ (x = y \ xs \lex ys))" +by (auto simp add: neq_Nil_conv elim: lex_list.cases intro: lex_list.intros) lemma lex_trans: fixes ps1 ps2 ps3 :: "nat list" @@ -139,7 +145,7 @@ fixes p q :: "nat list" shows "p = q \ p \lex q \ q \lex p" apply(induct p arbitrary: q) -apply(auto) +apply(auto elim: lex_list.cases) apply(case_tac q) apply(auto) done @@ -147,7 +153,7 @@ -section {* Ordering of values according to Okui & Suzuki *} +section {* POSIX Ordering of Values According to Okui & Suzuki *} definition PosOrd:: "val \ nat list \ val \ bool" ("_ \val _ _" [60, 60, 59] 60) @@ -155,111 +161,95 @@ "v1 \val p v2 \ pflat_len v1 p > pflat_len v2 p \ (\q \ Pos v1 \ Pos v2. q \lex p \ pflat_len v1 q = pflat_len v2 q)" -definition PosOrd2:: "val \ nat list \ val \ bool" ("_ \val2 _ _" [60, 60, 59] 60) -where - "v1 \val2 p v2 \ p \ Pos v1 \ - pflat_len v1 p > pflat_len v2 p \ - (\q \ Pos v1 \ Pos v2. q \lex p \ pflat_len v1 q = pflat_len v2 q)" + -lemma XXX: - "v1 \val p v2 \ v1 \val2 p v2" -apply(auto simp add: PosOrd_def PosOrd2_def) -apply(auto simp add: pflat_len_def split: if_splits) -by (smt intlen_bigger) +definition pflat_len2 :: "val \ nat list => (bool * nat)" +where + "pflat_len2 v p \ (if p \ Pos v then (True, length (flat (at v p))) else (False, 0))" + +instance prod :: (ord, ord) ord + by (rule Orderings.class.Orderings.ord.of_class.intro) -(* some tests *) +lemma "(0, 0) < (3::nat, 2::nat)" -definition ValFlat_ord:: "val \ nat list \ val \ bool" ("_ \fval _ _") -where - "v1 \fval p v2 \ p \ Pos v1 \ - (p \ Pos v2 \ flat (at v2 p) \spre flat (at v1 p)) \ - (\q \ Pos v1 \ Pos v2. q \lex p \ (pflat_len v1 q = pflat_len v2 q))" + + -lemma - assumes "v1 \fval p v2" - shows "v1 \val p v2" -using assms -unfolding ValFlat_ord_def PosOrd_def -apply(clarify) -apply(simp add: pflat_len_def) -apply(auto)[1] -apply(smt intlen_bigger) -apply(simp add: sprefix_list_def prefix_list_def) -apply(auto)[1] -apply(drule sym) -apply(simp add: intlen_append) -apply (metis intlen.simps(1) intlen_length length_greater_0_conv list.size(3)) -apply(smt intlen_bigger) -done - -lemma - assumes "v1 \val p v2" "flat (at v2 p) \spre flat (at v1 p)" - shows "v1 \fval p v2" -using assms -unfolding ValFlat_ord_def PosOrd_def -apply(clarify) -oops +definition PosOrd2:: "val \ nat list \ val \ bool" ("_ \val2 _ _" [60, 60, 59] 60) +where + "v1 \val2 p v2 \ (fst (pflat_len2 v1 p) > fst (pflat_len2 v2 p) \ + snd (pflat_len2 v1 p) > fst (pflat_len2 v2 p)) \ + (\q \ Pos v1 \ Pos v2. q \lex p \ pflat_len v1 q = pflat_len v2 q)" definition PosOrd_ex:: "val \ val \ bool" ("_ :\val _" [60, 59] 60) where - "v1 :\val v2 \ (\p. v1 \val p v2)" + "v1 :\val v2 \ \p. v1 \val p v2" -definition PosOrd_ex1:: "val \ val \ bool" ("_ :\val _" [60, 59] 60) +definition PosOrd_ex_eq:: "val \ val \ bool" ("_ :\val _" [60, 59] 60) where "v1 :\val v2 \ v1 :\val v2 \ v1 = v2" + lemma PosOrd_shorterE: assumes "v1 :\val v2" shows "length (flat v2) \ length (flat v1)" using assms -apply(auto simp add: PosOrd_ex_def PosOrd_def) +apply(auto simp add: pflat_len_simps PosOrd_ex_def PosOrd_def) apply(case_tac p) -apply(simp add: pflat_len_simps) -apply(simp add: intlen_length) +apply(simp add: pflat_len_simps intlen_length) apply(simp) apply(drule_tac x="[]" in bspec) apply(simp add: Pos_empty) -apply(simp add: pflat_len_simps) -by (metis intlen_length le_less less_irrefl linear) - +apply(simp add: pflat_len_simps le_less intlen_length_eq) +done lemma PosOrd_shorterI: - assumes "length (flat v') < length (flat v)" - shows "v :\val v'" + assumes "length (flat v2) < length (flat v1)" + shows "v1 :\val v2" using assms unfolding PosOrd_ex_def by (metis intlen_length lex_simps(2) pflat_len_simps(9) PosOrd_def) lemma PosOrd_spreI: - assumes "(flat v') \spre (flat v)" + assumes "flat v' \spre flat v" shows "v :\val v'" using assms apply(rule_tac PosOrd_shorterI) by (metis append_eq_conv_conj le_less_linear prefix_list_def sprefix_list_def take_all) + lemma PosOrd_Left_Right: assumes "flat v1 = flat v2" shows "Left v1 :\val Right v2" unfolding PosOrd_ex_def apply(rule_tac x="[0]" in exI) -using assms +using assms apply(auto simp add: PosOrd_def pflat_len_simps) apply(smt intlen_bigger) done -lemma PosOrd_LeftI: - assumes "v :\val v'" "flat v = flat v'" - shows "(Left v) :\val (Left v')" -using assms(1) +lemma PosOrd_Left_eq: + assumes "flat v = flat v'" + shows "Left v :\val Left v' \ v :\val v'" +using assms unfolding PosOrd_ex_def apply(auto) +apply(case_tac p) +apply(simp add: PosOrd_def pflat_len_simps) +apply(case_tac a) +apply(simp add: PosOrd_def pflat_len_simps) +apply(rule_tac x="list" in exI) +apply(auto simp add: PosOrd_def pflat_len_simps)[1] +apply (smt Un_def lex_list.intros(2) mem_Collect_eq pflat_len_simps(3)) +apply (smt Collect_disj_eq lex_list.intros(2) mem_Collect_eq pflat_len_simps(3)) +apply(auto simp add: PosOrd_def pflat_len_outside)[1] apply(rule_tac x="0#p" in exI) -using assms(2) apply(auto simp add: PosOrd_def pflat_len_simps) done + lemma PosOrd_RightI: assumes "v :\val v'" "flat v = flat v'" shows "(Right v) :\val (Right v')" @@ -271,31 +261,6 @@ apply(auto simp add: PosOrd_def pflat_len_simps) done -lemma PosOrd_LeftE: - assumes "(Left v1) :\val (Left v2)" - shows "v1 :\val v2" -using assms -apply(simp add: PosOrd_ex_def) -apply(erule exE) -apply(case_tac p) -apply(simp add: PosOrd_def) -apply(auto simp add: pflat_len_simps) -apply(rule_tac x="[]" in exI) -apply(simp add: Pos_empty pflat_len_simps) -apply(auto simp add: pflat_len_simps PosOrd_def) -apply(case_tac a) -apply(auto simp add: pflat_len_simps)[1] -apply(rule_tac x="list" in exI) -apply(auto) -apply(drule_tac x="0#q" in bspec) -apply(simp) -apply(simp add: pflat_len_simps) -apply(drule_tac x="0#q" in bspec) -apply(simp) -apply(simp add: pflat_len_simps) -apply(simp add: pflat_len_def) -done - lemma PosOrd_RightE: assumes "(Right v1) :\val (Right v2)" shows "v1 :\val v2" @@ -517,14 +482,14 @@ by (rule lex_trichotomous) moreover { assume "p = p'" - with as have "v1 \val p v3" unfolding PosOrd_def - by (smt outside_lemma) + with as have "v1 \val p v3" unfolding PosOrd_def pflat_len_def + by fastforce then have " v1 :\val v3" unfolding PosOrd_ex_def by blast } moreover { assume "p \lex p'" - with as have "v1 \val p v3" unfolding PosOrd_def - by (metis lex_trans outside_lemma) + with as have "v1 \val p v3" unfolding PosOrd_def pflat_len_def + by (smt Un_iff lex_trans) then have " v1 :\val v3" unfolding PosOrd_ex_def by blast } moreover @@ -560,26 +525,47 @@ apply(auto simp add: PosOrd_ex_def PosOrd_def) using assms PosOrd_irrefl PosOrd_trans by blast -lemma WW2: - assumes "\(v1 :\val v2)" - shows "v1 = v2 \ v2 :\val v1" -using assms -oops - lemma PosOrd_SeqE2: - assumes "(Seq v1 v2) :\val (Seq v1' v2')" - shows "v1 :\val v1' \ (v1 = v1' \ v2 :\val v2')" + assumes "(Seq v1 v2) :\val (Seq v1' v2')" "flat (Seq v1 v2) = flat (Seq v1' v2')" + shows "v1 :\val v1' \ (intlen (flat v1) = intlen (flat v1') \ v2 :\val v2')" using assms apply(frule_tac PosOrd_SeqE) apply(erule disjE) apply(simp) -apply(auto) apply(case_tac "v1 :\val v1'") apply(simp) +apply(rule disjI2) +apply(rule conjI) +prefer 2 +apply(simp) +apply(auto) apply(auto simp add: PosOrd_ex_def) -apply(case_tac "v1 = v1'") +apply(auto simp add: PosOrd_def pflat_len_simps) +apply(case_tac p) +apply(auto simp add: PosOrd_def pflat_len_simps) +apply(case_tac a) +apply(auto simp add: PosOrd_def pflat_len_simps) +apply (metis PosOrd_SeqI1 PosOrd_almost_trichotomous PosOrd_def PosOrd_ex_def WW1 assms(1) assms(2)) +by (metis PosOrd_SeqI1 PosOrd_almost_trichotomous PosOrd_def PosOrd_ex_def WW1 assms(1) assms(2)) + +lemma PosOrd_SeqE4: + assumes "(Seq v1 v2) :\val (Seq v1' v2')" "flat (Seq v1 v2) = flat (Seq v1' v2')" + shows "v1 :\val v1' \ (flat v1 = flat v1' \ v2 :\val v2')" +using assms +apply(frule_tac PosOrd_SeqE) +apply(erule disjE) apply(simp) -oops +apply(case_tac "v1 :\val v1'") +apply(simp) +apply(rule disjI2) +apply(rule conjI) +prefer 2 +apply(simp) +apply(auto) +apply(case_tac "length (flat v1') < length (flat v1)") +using PosOrd_shorterI apply blast +by (metis PosOrd_SeqI1 PosOrd_shorterI WW1 antisym_conv3 append_eq_append_conv assms(2)) + section {* CPT and CPTpre *} @@ -592,84 +578,19 @@ | "\ v2 : r2 \ \ Right v2 : ALT r1 r2" | "\ Void : ONE" | "\ Char c : CHAR c" -| "\ Stars [] : STAR r" -| "\\ v : r; flat v \ []; \ Stars vs : STAR r\ \ \ Stars (v # vs) : STAR r" +| "\v \ set vs. \ v : r \ flat v \ [] \ \ Stars vs : STAR r" lemma Prf_CPrf: assumes "\ v : r" shows "\ v : r" using assms -by (induct) (auto intro: Prf.intros) - -lemma pflat_len_equal_iff: - assumes "\ v1 : r" "\ v2 : r" - and "\p. pflat_len v1 p = pflat_len v2 p" - shows "v1 = v2" -using assms -apply(induct v1 r arbitrary: v2 rule: CPrf.induct) -apply(rotate_tac 4) -apply(erule CPrf.cases) -apply(simp_all)[7] -apply (metis pflat_len_simps(1) pflat_len_simps(2)) -apply(rotate_tac 2) -apply(erule CPrf.cases) -apply(simp_all)[7] -apply (metis pflat_len_simps(3)) -apply (metis intlen.simps(1) intlen_length length_greater_0_conv list.size(3) neg_0_le_iff_le not_less not_less_iff_gr_or_eq not_one_le_zero pflat_len_simps(3) pflat_len_simps(6) pflat_len_simps(9)) -apply(rotate_tac 2) -apply(erule CPrf.cases) -apply(simp_all)[7] -apply (metis intlen.simps(1) intlen_length length_greater_0_conv list.size(3) neg_0_le_iff_le not_less not_less_iff_gr_or_eq not_one_le_zero pflat_len_simps(3) pflat_len_simps(6) pflat_len_simps(9)) -apply (metis pflat_len_simps(5)) -apply(erule CPrf.cases) -apply(simp_all)[7] -apply(erule CPrf.cases) -apply(simp_all)[7] -apply(erule CPrf.cases) -apply(simp_all)[7] -apply (metis append_Cons flat.simps(6) flat.simps(7) intlen_length length_greater_0_conv neq_Nil_conv not_less_iff_gr_or_eq pflat_len_simps(9)) -apply(rotate_tac 5) -apply(erule CPrf.cases) -apply(simp_all)[7] -apply (metis append_Cons flat.simps(6) flat.simps(7) intlen_length length_greater_0_conv list.distinct(1) list.exhaust not_less_iff_gr_or_eq pflat_len_simps(9)) -apply(auto) -apply (metis pflat_len_simps(8)) -apply(subgoal_tac "v = va") -prefer 2 -apply (metis pflat_len_simps(8)) -apply(simp) -apply(rotate_tac 3) -apply(drule_tac x="Stars vsa" in meta_spec) -apply(simp) -apply(drule_tac meta_mp) -apply(rule allI) -apply(case_tac p) -apply(simp add: pflat_len_simps) -apply(drule_tac x="[]" in spec) -apply(simp add: pflat_len_simps intlen_append) -apply(drule_tac x="Suc a#list" in spec) -apply(simp add: pflat_len_simps intlen_append) -apply(simp) -done - -lemma PosOrd_trichotomous_stronger: - assumes "\ v1 : r" "\ v2 : r" - shows "v1 :\val v2 \ v2 :\val v1 \ (v1 = v2)" -oops +by (induct)(auto intro: Prf.intros) lemma CPrf_stars: assumes "\ Stars vs : STAR r" shows "\v \ set vs. flat v \ [] \ \ v : r" using assms -apply(induct vs) -apply(auto) -apply(erule CPrf.cases) -apply(simp_all) -apply(erule CPrf.cases) -apply(simp_all) -apply(erule CPrf.cases) -apply(simp_all) -apply(erule CPrf.cases) +apply(erule_tac CPrf.cases) apply(simp_all) done @@ -677,11 +598,8 @@ assumes "\ Stars (vs1 @ vs2) : STAR r" shows "\ Stars vs1 : STAR r \ \ Stars vs2 : STAR r" using assms -apply(induct vs1 arbitrary: vs2) -apply(auto intro: CPrf.intros)[1] -apply(erule CPrf.cases) -apply(simp_all) -apply(auto intro: CPrf.intros) +apply(erule_tac CPrf.cases) +apply(auto intro: CPrf.intros elim: Prf.cases) done definition PT :: "rexp \ string \ val set" @@ -697,6 +615,74 @@ shows "CPT r s \ CPTpre r s" by(auto simp add: CPT_def CPTpre_def) +lemma CPT_simps: + shows "CPT ZERO s = {}" + and "CPT ONE s = (if s = [] then {Void} else {})" + and "CPT (CHAR c) s = (if s = [c] then {Char c} else {})" + and "CPT (ALT r1 r2) s = Left ` CPT r1 s \ Right ` CPT r2 s" + and "CPT (SEQ r1 r2) s = + {Seq v1 v2 | v1 v2. flat v1 @ flat v2 = s \ v1 \ CPT r1 (flat v1) \ v2 \ CPT r2 (flat v2)}" + and "CPT (STAR r) s = + Stars ` {vs. concat (map flat vs) = s \ (\v \ set vs. v \ CPT r (flat v) \ flat v \ [])}" +apply - +apply(auto simp add: CPT_def intro: CPrf.intros)[1] +apply(erule CPrf.cases) +apply(simp_all)[6] +apply(auto simp add: CPT_def intro: CPrf.intros)[1] +apply(erule CPrf.cases) +apply(simp_all)[6] +apply(erule CPrf.cases) +apply(simp_all)[6] +apply(auto simp add: CPT_def intro: CPrf.intros)[1] +apply(erule CPrf.cases) +apply(simp_all)[6] +apply(erule CPrf.cases) +apply(simp_all)[6] +apply(auto simp add: CPT_def intro: CPrf.intros)[1] +apply(erule CPrf.cases) +apply(simp_all)[6] +apply(auto simp add: CPT_def intro: CPrf.intros)[1] +apply(erule CPrf.cases) +apply(simp_all)[6] +(* STAR case *) +apply(auto simp add: CPT_def intro: CPrf.intros)[1] +apply(erule CPrf.cases) +apply(simp_all)[6] +done + +(* +lemma CPTpre_STAR_finite: + assumes "\s. finite (CPT r s)" + shows "finite (CPT (STAR r) s)" +apply(induct s rule: length_induct) +apply(case_tac xs) +apply(simp) +apply(simp add: CPT_simps) +apply(auto) +apply(rule finite_imageI) +using assms +thm finite_Un +prefer 2 +apply(simp add: CPT_simps) +apply(rule finite_imageI) +apply(rule finite_subset) +apply(rule CPTpre_subsets) +apply(simp) +apply(rule_tac B="(\(v, vs). Stars (v#vs)) ` {(v, vs). v \ CPTpre r (a#list) \ flat v \ [] \ Stars vs \ CPTpre (STAR r) (drop (length (flat v)) (a#list))}" in finite_subset) +apply(auto)[1] +apply(rule finite_imageI) +apply(simp add: Collect_case_prod_Sigma) +apply(rule finite_SigmaI) +apply(rule assms) +apply(case_tac "flat v = []") +apply(simp) +apply(drule_tac x="drop (length (flat v)) (a # list)" in spec) +apply(simp) +apply(auto)[1] +apply(rule test) +apply(simp) +done + lemma CPTpre_subsets: "CPTpre ZERO s = {}" "CPTpre ONE s \ {Void}" @@ -717,6 +703,7 @@ apply(simp_all) apply(erule CPrf.cases) apply(simp_all) + apply(erule CPrf.cases) apply(simp_all) apply(erule CPrf.cases) @@ -845,13 +832,21 @@ apply(rule CPT_CPTpre_subset) apply(rule CPTpre_finite) done +*) lemma Posix_CPT: assumes "s \ r \ v" shows "v \ CPT r s" using assms -by (induct rule: Posix.induct) - (auto simp add: CPT_def intro: CPrf.intros) +apply(induct rule: Posix.induct) +apply(auto simp add: CPT_def intro: CPrf.intros elim: CPrf.cases) +apply(rotate_tac 5) +apply(erule CPrf.cases) +apply(simp_all) +apply(rule CPrf.intros) +apply(simp_all) +done + section {* The Posix Value is smaller than any other Value *} @@ -866,14 +861,14 @@ then have "v = Void" by (simp add: CPT_simps) then show "Void :\val v" - by (simp add: PosOrd_ex1_def) + by (simp add: PosOrd_ex_eq_def) next case (Posix_CHAR c v) have "v \ CPT (CHAR c) [c]" by fact then have "v = Char c" by (simp add: CPT_simps) then show "Char c :\val v" - by (simp add: PosOrd_ex1_def) + by (simp add: PosOrd_ex_eq_def) next case (Posix_ALT1 s r1 v r2 v2) have as1: "s \ r1 \ v" by fact @@ -895,14 +890,14 @@ have "flat v3 = flat v" using as1 Left(3) by (simp add: Posix1(2)) ultimately have "Left v :\val Left v3" - by (auto simp add: PosOrd_ex1_def PosOrd_LeftI) + by (simp add: PosOrd_ex_eq_def PosOrd_Left_eq) then show "Left v :\val v2" unfolding Left . next case (Right v3) have "flat v3 = flat v" using as1 Right(3) by (simp add: Posix1(2)) then have "Left v :\val Right v3" using Right(3) as1 - by (auto simp add: PosOrd_ex1_def PosOrd_Left_Right) + by (auto simp add: PosOrd_ex_eq_def PosOrd_Left_Right) then show "Left v :\val v2" unfolding Right . qed next @@ -927,7 +922,7 @@ have "flat v3 = flat v" using as1 Right(3) by (simp add: Posix1(2)) ultimately have "Right v :\val Right v3" - by (auto simp add: PosOrd_ex1_def PosOrd_RightI) + by (auto simp add: PosOrd_ex_eq_def PosOrd_RightI) then show "Right v :\val v2" unfolding Right . next case (Left v3) @@ -961,7 +956,7 @@ by (auto simp add: CPT_def) then have "v1 :\val v3a \ (v1 :\val v3a \ v2 :\val v3b)" using IH1 IH2 by blast then have "Seq v1 v2 :\val Seq v3a v3b" using eqs q2 as1 - unfolding PosOrd_ex1_def by (auto simp add: PosOrd_SeqI1 PosOrd_SeqI2) + unfolding PosOrd_ex_eq_def by (auto simp add: PosOrd_SeqI1 PosOrd_SeqI2) then show "Seq v1 v2 :\val v3" unfolding eqs by blast next case (Posix_STAR1 s1 r v s2 vs v3) @@ -972,13 +967,20 @@ have cond: "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L r \ s\<^sub>4 \ L (STAR r))" by fact have cond2: "flat v \ []" by fact have "v3 \ CPT (STAR r) (s1 @ s2)" by fact - then consider - (NonEmpty) v3a vs3 where - "v3 = Stars (v3a # vs3)" "\ v3a : r" "\ Stars vs3 : STAR r" + then consider + (NonEmpty) v3a vs3 where "v3 = Stars (v3a # vs3)" + "\ v3a : r" "\ Stars vs3 : STAR r" "flat (Stars (v3a # vs3)) = s1 @ s2" | (Empty) "v3 = Stars []" - by (force simp add: CPT_def elim: CPrf.cases) - then show "Stars (v # vs) :\val v3" + unfolding CPT_def + apply(auto) + apply(erule CPrf.cases) + apply(simp_all) + apply(auto)[1] + apply(case_tac vs) + apply(auto) + using CPrf.intros(6) by blast + then show "Stars (v # vs) :\val v3" (* HERE *) proof (cases) case (NonEmpty v3a vs3) have "flat (Stars (v3a # vs3)) = s1 @ s2" using NonEmpty(4) . @@ -993,16 +995,16 @@ using NonEmpty(2,3) by (auto simp add: CPT_def) then have "v :\val v3a \ (v :\val v3a \ Stars vs :\val Stars vs3)" using IH1 IH2 by blast then have "v :\val v3a \ (v = v3a \ Stars vs :\val Stars vs3)" - unfolding PosOrd_ex1_def by auto + unfolding PosOrd_ex_eq_def by auto then have "Stars (v # vs) :\val Stars (v3a # vs3)" using NonEmpty(4) q2 as1 - unfolding PosOrd_ex1_def + unfolding PosOrd_ex_eq_def by (metis PosOrd_StarsI PosOrd_StarsI2 flat.simps(7) val.inject(5)) then show "Stars (v # vs) :\val v3" unfolding NonEmpty by blast next case Empty have "v3 = Stars []" by fact then show "Stars (v # vs) :\val v3" - unfolding PosOrd_ex1_def using cond2 + unfolding PosOrd_ex_eq_def using cond2 by (simp add: PosOrd_shorterI) qed next @@ -1011,7 +1013,7 @@ then have "v2 = Stars []" unfolding CPT_def by (auto elim: CPrf.cases) then show "Stars [] :\val v2" - by (simp add: PosOrd_ex1_def) + by (simp add: PosOrd_ex_eq_def) qed lemma Posix_PosOrd_stronger: @@ -1030,7 +1032,7 @@ then have "flat v2 \spre flat v1" using assms(1) using Posix1(2) by blast then have "v1 :\val v2" - by (simp add: PosOrd_ex1_def PosOrd_spreI) + by (simp add: PosOrd_ex_eq_def PosOrd_spreI) } ultimately show "v1 :\val v2" by blast qed @@ -1040,9 +1042,9 @@ shows "\(\v2 \ CPTpre r s. v2 :\val v1)" using assms by (metis Posix_PosOrd_stronger less_irrefl PosOrd_def - PosOrd_ex1_def PosOrd_ex_def PosOrd_trans) + PosOrd_ex_eq_def PosOrd_ex_def PosOrd_trans) - +(* lemma PosOrd_Posix_Stars: assumes "(Stars vs) \ CPT (STAR r) (flat (Stars vs))" "\v \ set vs. flat v \ r \ v" and "\(\vs2 \ PT (STAR r) (flat (Stars vs)). vs2 :\val (Stars vs))" @@ -1115,12 +1117,93 @@ apply(simp) apply (metis flat.simps(7) flat_Stars PosOrd_StarsI2 PosOrd_ex_def) by (metis PosOrd_StarsI PosOrd_ex_def PosOrd_spreI append_assoc append_self_conv flat.simps(7) flat_Stars prefix_list_def sprefix_list_def) +*) + + +lemma test2: + assumes "\v \ set vs. flat v \ r \ v \ flat v \ []" + shows "(Stars vs) \ CPT (STAR r) (flat (Stars vs))" +using assms +apply(induct vs) +apply(auto simp add: CPT_def) +apply(rule CPrf.intros) +apply(simp) +apply(rule CPrf.intros) +apply(simp_all) +by (metis (no_types, lifting) CPT_def Posix_CPT mem_Collect_eq) + + +lemma PosOrd_Posix_Stars: + assumes "\v \ set vs. flat v \ r \ v \ flat v \ []" + and "\(\vs2 \ PT (STAR r) (flat (Stars vs)). vs2 :\val (Stars vs))" + shows "(flat (Stars vs)) \ (STAR r) \ Stars vs" +using assms +proof(induct vs) + case Nil + show "flat (Stars []) \ STAR r \ Stars []" + by(simp add: Posix.intros) +next + case (Cons v vs) + have IH: "\\v\set vs. flat v \ r \ v \ flat v \ []; + \ (\vs2\PT (STAR r) (flat (Stars vs)). vs2 :\val Stars vs)\ + \ flat (Stars vs) \ STAR r \ Stars vs" by fact + have as2: "\v\set (v # vs). flat v \ r \ v \ flat v \ []" by fact + have as3: "\ (\vs2\PT (STAR r) (flat (Stars (v # vs))). vs2 :\val Stars (v # vs))" by fact + have "flat v \ r \ v" using as2 by simp + moreover + have "flat (Stars vs) \ STAR r \ Stars vs" + proof (rule IH) + show "\v\set vs. flat v \ r \ v \ flat v \ []" using as2 by simp + next + show "\ (\vs2\PT (STAR r) (flat (Stars vs)). vs2 :\val Stars vs)" using as3 + apply(auto) + apply(subst (asm) (2) PT_def) + apply(auto) + apply(erule Prf.cases) + apply(simp_all) + apply(drule_tac x="Stars (v # vs)" in bspec) + apply(simp add: PT_def CPT_def) + using Posix1a Prf.intros(6) calculation + apply(rule_tac Prf.intros) + apply(simp add:) + apply (simp add: PosOrd_StarsI2) + done + qed + moreover + have "flat v \ []" using as2 by simp + moreover + have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = flat (Stars vs) \ flat v @ s\<^sub>3 \ L r \ s\<^sub>4 \ L (STAR r))" + using as3 + apply(auto) + apply(drule L_flat_Prf2) + apply(erule exE) + apply(simp only: L.simps[symmetric]) + apply(drule L_flat_Prf2) + apply(erule exE) + apply(clarify) + apply(rotate_tac 5) + apply(erule Prf.cases) + apply(simp_all) + apply(clarify) + apply(drule_tac x="Stars (va#vs)" in bspec) + apply(auto simp add: PT_def)[1] + apply(rule Prf.intros) + apply(simp) + by (simp add: PosOrd_StarsI PosOrd_shorterI) + ultimately show "flat (Stars (v # vs)) \ STAR r \ Stars (v # vs)" + by (simp add: Posix.intros) +qed + section {* The Smallest Value is indeed the Posix Value *} +text {* + The next lemma seems to require PT instead of CPT in the Star-case. +*} + lemma PosOrd_Posix: - assumes "v1 \ CPT r s" "\v2 \ PT r s. \ v2 :\val v1" + assumes "v1 \ CPT r s" "\v\<^sub>2 \ PT r s. \ v\<^sub>2 :\val v1" shows "s \ r \ v1" using assms proof(induct r arbitrary: s v1) @@ -1159,7 +1242,7 @@ unfolding CPT_def Left by (auto elim: CPrf.cases) moreover have "\v2 \ PT r1 s. \ v2 :\val v1'" using as1 - unfolding PT_def Left using Prf.intros(2) PosOrd_LeftI by force + unfolding PT_def Left using Prf.intros(2) PosOrd_Left_eq by force ultimately have "s \ r1 \ v1'" using IH1 by simp then have "s \ ALT r1 r2 \ Left v1'" by (rule Posix.intros) then show "s \ ALT r1 r2 \ v1" using Left by simp @@ -1248,10 +1331,7 @@ "v1 = Stars vs" "s = flat (Stars vs)" "\v \ set vs. v \ CPT r (flat v)" unfolding CPT_def by (auto elim: CPrf.cases dest!: CPrf_stars) - have "Stars vs \ CPT (STAR r) (flat (Stars vs))" - using as2 unfolding eqs . - moreover - have "\v\set vs. flat v \ r \ v" + have "\v\set vs. flat v \ r \ v \ flat v \ []" proof fix v assume a: "v \ set vs" @@ -1266,7 +1346,7 @@ assume "v2 \ PT r (flat v)" then have "Stars (pre @ [v2] @ post) \ PT (STAR r) s" using as2 unfolding e eqs - apply(auto simp add: CPT_def PT_def intro!: Prf_Stars)[1] + apply(auto simp add: CPT_def PT_def intro!: Prf.intros)[1] using CPrf_Stars_appendE CPrf_stars Prf_CPrf apply blast by (meson CPrf_Stars_appendE CPrf_stars Prf_CPrf list.set_intros(2)) then have "\ Stars (pre @ [v2] @ post) :\val Stars (pre @ [v] @ post)" @@ -1276,8 +1356,8 @@ PosOrd_StarsI PosOrd_Stars_appendI by auto qed with IH - show "flat v \ r \ v" using a as2 unfolding eqs - using eqs(3) by blast + show "flat v \ r \ v \ flat v \ []" using a as2 unfolding eqs + using eqs(3) by (smt CPT_def CPrf_stars mem_Collect_eq) qed moreover have "\ (\vs2\PT (STAR r) (flat (Stars vs)). vs2 :\val Stars vs)" @@ -1289,12 +1369,6 @@ apply(auto elim: Prf.cases) apply(erule Prf.cases) apply(auto intro: Prf.intros) - apply(drule_tac x="[]" in meta_spec) - apply(simp) - apply(drule meta_mp) - apply(auto intro: Prf.intros) - apply(drule_tac x="(v#vsa)" in meta_spec) - apply(auto intro: Prf.intros) done then show "False" using as1 unfolding eqs apply - @@ -1302,7 +1376,8 @@ apply(auto simp add: PT_def) done qed - ultimately have "flat (Stars vs) \ STAR r \ Stars vs" + ultimately have "flat (Stars vs) \ STAR r \ Stars vs" + thm PosOrd_Posix_Stars by (rule PosOrd_Posix_Stars) then show "s \ STAR r \ v1" unfolding eqs . qed diff -r e2828c4a1e23 -r d36be1e356c0 thys/README --- a/thys/README Thu Jul 06 16:05:33 2017 +0100 +++ b/thys/README Tue Jul 18 18:39:20 2017 +0100 @@ -12,10 +12,11 @@ isabelle build -c -v -d . Journal -Othe directories are: +Other directories are: ===================== Paper + Journal Literature