# HG changeset patch # User Christian Urban # Date 1500399560 -3600 # Node ID d36be1e356c0ee01b9885e6fbf5f394af9c1e101 # Parent e2828c4a1e2318e76d8966d3bdc2d64cf2fdff5a changed definitions of PRF diff -r e2828c4a1e23 -r d36be1e356c0 Literature/alternative-posix-conf.pdf Binary file Literature/alternative-posix-conf.pdf has changed diff -r e2828c4a1e23 -r d36be1e356c0 thys/Journal/Paper.thy --- a/thys/Journal/Paper.thy Thu Jul 06 16:05:33 2017 +0100 +++ b/thys/Journal/Paper.thy Tue Jul 18 18:39:20 2017 +0100 @@ -3,17 +3,28 @@ imports "../Lexer" "../Simplifying" - "../Sulzmann" + "../Positions" "~~/src/HOL/Library/LaTeXsugar" begin +lemma Suc_0_fold: + "Suc 0 = 1" +by simp + + + declare [[show_question_marks = false]] abbreviation - "der_syn r c \ der c r" + "der_syn r c \ der c r" abbreviation - "ders_syn r s \ ders s r" + "ders_syn r s \ ders s r" + + +abbreviation + "nprec v1 v2 \ \(v1 :\val v2)" + notation (latex output) If ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and @@ -26,7 +37,7 @@ SEQ ("_ \ _" [77,77] 78) and STAR ("_\<^sup>\" [1000] 78) and - val.Void ("'(')" 1000) and + val.Void ("Empty" 78) and val.Char ("Char _" [1000] 78) and val.Left ("Left _" [79] 78) and val.Right ("Right _" [1000] 78) and @@ -56,8 +67,18 @@ simp_ALT ("simp\<^bsub>Alt\<^esub> _ _" [1000, 1000] 1) and slexer ("lexer\<^sup>+" 1000) and + at ("_\<^raw:\mbox{$\downharpoonleft$}>\<^bsub>_\<^esub>") and + lex_list ("_ \\<^bsub>lex\<^esub> _") and + PosOrd ("_ \\<^bsub>_\<^esub> _" [77,77,77] 77) and + PosOrd_ex ("_ \ _" [77,77] 77) and + PosOrd_ex1 ("_ \<^raw:\mbox{$\preccurlyeq$}> _" [77,77] 77) and + pflat_len ("\_\\<^bsub>_\<^esub>") and + nprec ("_ \<^raw:\mbox{$\not\prec$}> _" [77,77] 77) + + (* ValOrd ("_ >\<^bsub>_\<^esub> _" [77,77,77] 77) and ValOrdEq ("_ \\<^bsub>_\<^esub> _" [77,77,77] 77) + *) definition "match r s \ nullable (ders s r)" @@ -99,7 +120,7 @@ one way of how the string is matched. There are two commonly used disambiguation strategies to generate a unique answer: one is called GREEDY matching \cite{Frisch2004} and the other is POSIX -matching~\cite{Kuklewicz,Sulzmann2014,Vansummeren2006}. For example consider +matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2013,Sulzmann2014,Vansummeren2006}. For example consider the string @{term xy} and the regular expression \mbox{@{term "STAR (ALT (ALT x y) xy)"}}. Either the string can be matched in two `iterations' by the single letter-regular expressions @{term x} and @{term y}, or directly @@ -114,18 +135,24 @@ what programmers consider basic syntactic building blocks in their programs. These building blocks are often specified by some regular expressions, say @{text "r\<^bsub>key\<^esub>"} and @{text "r\<^bsub>id\<^esub>"} for recognising keywords and -identifiers, respectively. There are two underlying (informal) rules behind -tokenising a string in a POSIX fashion according to a collection of regular +identifiers, respectively. There are a few underlying (informal) rules behind +tokenising a string in a POSIX \cite{POSIX} fashion according to a collection of regular expressions: \begin{itemize} -\item[$\bullet$] \emph{The Longest Match Rule} (or \emph{``maximal munch rule''}): +\item[$\bullet$] \emph{The Longest Match Rule} (or \emph{``{M}aximal {M}unch {R}ule''}): The longest initial substring matched by any regular expression is taken as next token.\smallskip \item[$\bullet$] \emph{Priority Rule:} -For a particular longest initial substring, the first regular expression -that can match determines the token. +For a particular longest initial substring, the first (leftmost) regular expression +that can match determines the token.\smallskip + +\item[$\bullet$] \emph{Star Rule:} A subexpression repeated by ${}^\star$ shall +not match an empty string unless this is the only match for the repetition.\smallskip + +\item[$\bullet$] \emph{Empty String Rule:} An empty string shall be considered to +be longer than no match at all. \end{itemize} \noindent Consider for example a regular expression @{text "r\<^bsub>key\<^esub>"} for recognising keywords @@ -157,7 +184,7 @@ algorithm. This proof idea is inspired by work of Frisch and Cardelli \cite{Frisch2004} on a GREEDY regular expression matching algorithm. However, we were not able to establish transitivity and -totality for the ``order relation'' by Sulzmann and Lu. In Section +totality for the ``order relation'' by Sulzmann and Lu. ??In Section \ref{argu} we identify some inherent problems with their approach (of which some of the proofs are not published in \cite{Sulzmann2014}); perhaps more importantly, we give a simple inductive (and @@ -1008,6 +1035,104 @@ \end{proof} *} +section {* Ordering of Values according to Okui and Suzuki*} + +text {* + + Positions are lists of natural numbers. + + The subvalue at position @{text p}, written @{term "at v p"}, is defined + + + \begin{center} + \begin{tabular}{r@ {\hspace{0mm}}lcl} + @{term v} & @{text "\\<^bsub>[]\<^esub>"} & @{text "\"}& @{thm (rhs) at.simps(1)}\\ + @{term "Left v"} & @{text "\\<^bsub>0::ps\<^esub>"} & @{text "\"}& @{thm (rhs) at.simps(2)}\\ + @{term "Right v"} & @{text "\\<^bsub>1::ps\<^esub>"} & @{text "\"} & + @{thm (rhs) at.simps(3)[simplified Suc_0_fold]}\\ + @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\\<^bsub>0::ps\<^esub>"} & @{text "\"} & + @{thm (rhs) at.simps(4)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} \\ + @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\\<^bsub>1::ps\<^esub>"} + & @{text "\"} & + @{thm (rhs) at.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2", simplified Suc_0_fold]} \\ + @{term "Stars vs"} & @{text "\\<^bsub>n::ps\<^esub>"} & @{text "\"}& @{thm (rhs) at.simps(6)}\\ + \end{tabular} + \end{center} + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) Pos.simps(1)} & @{text "\"} & @{thm (rhs) Pos.simps(1)}\\ + @{thm (lhs) Pos.simps(2)} & @{text "\"} & @{thm (rhs) Pos.simps(2)}\\ + @{thm (lhs) Pos.simps(3)} & @{text "\"} & @{thm (rhs) Pos.simps(3)}\\ + @{thm (lhs) Pos.simps(4)} & @{text "\"} & @{thm (rhs) Pos.simps(4)}\\ + @{thm (lhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + & @{text "\"} + & @{thm (rhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\ + @{thm (lhs) Pos_stars} & @{text "\"} & @{thm (rhs) Pos_stars}\\ + \end{tabular} + \end{center} + + @{thm pflat_len_def} + + + \begin{center} + \begin{tabular}{ccc} + @{thm [mode=Axiom] lex_list.intros(1)}\hspace{1cm} & + @{thm [mode=Rule] lex_list.intros(2)[where ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]}\hspace{1cm} & + @{thm [mode=Rule] lex_list.intros(3)[where ?p1.0="p\<^sub>1" and ?p2.0="p\<^sub>2" and + ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]} + \end{tabular} + \end{center} + + + Main definition by Okui and Suzuki. + + \begin{center} + \begin{tabular}{ccl} + @{thm (lhs) PosOrd_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} & + @{text "\"} & + @{term "pflat_len v\<^sub>1 p > pflat_len v\<^sub>2 p"} and\\ + & & @{term "(\q \ Pos v\<^sub>1 \ Pos v\<^sub>2. q \lex p --> pflat_len v\<^sub>1 q = pflat_len v\<^sub>2 q)"} + \end{tabular} + \end{center} + + @{thm PosOrd_ex_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + + @{thm PosOrd_ex1_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + + Lemma 1 + + @{thm [mode=IfThen] PosOrd_shorterE[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + + but in the other direction only + + @{thm [mode=IfThen] PosOrd_shorterI[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + + Lemma Transitivity: + + @{thm [mode=IfThen] PosOrd_trans[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and ?v3.0="v\<^sub>3"]} + + +*} + +text {* + + Theorems: + + @{thm [mode=IfThen] Posix_CPT} + + @{thm [mode=IfThen] Posix_PosOrd[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + + Corrollary from the last one + + @{thm [mode=IfThen] Posix_PosOrd_stronger[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + + Theorem + + @{thm [mode=IfThen] PosOrd_Posix[where ?v1.0="v\<^sub>1"]} +*} + + section {* The Correctness Argument by Sulzmann and Lu\label{argu} *} text {* @@ -1022,24 +1147,7 @@ Their central definition is an ``ordering relation'' defined by the rules (slightly adapted to fit our notation): -\begin{center} -\begin{tabular}{@ {}c@ {\hspace{4mm}}c@ {}} -@{thm[mode=Rule] C2[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1\" "v\<^sub>2" "r\<^sub>2" "v\<^sub>2\"]}\,(C2) & -@{thm[mode=Rule] C1[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2\" "v\<^sub>1" "r\<^sub>1"]}\,(C1)\smallskip\\ - -@{thm[mode=Rule] A1[of "v\<^sub>1" "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\,(A1) & -@{thm[mode=Rule] A2[of "v\<^sub>2" "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\,(A2)\smallskip\\ - -@{thm[mode=Rule] A3[of "v\<^sub>1" "r\<^sub>2" "v\<^sub>2" "r\<^sub>1"]}\,(A3) & -@{thm[mode=Rule] A4[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\,(A4)\smallskip\\ - -@{thm[mode=Rule] K1[of "v" "vs" "r"]}\,(K1) & -@{thm[mode=Rule] K2[of "v" "vs" "r"]}\,(K2)\smallskip\\ - -@{thm[mode=Rule] K3[of "v\<^sub>1" "r" "v\<^sub>2" "vs\<^sub>1" "vs\<^sub>2"]}\,(K3) & -@{thm[mode=Rule] K4[of "vs\<^sub>1" "r" "vs\<^sub>2" "v"]}\,(K4) -\end{tabular} -\end{center} + ?? \noindent The idea behind the rules (A1) and (A2), for example, is that a @{text Left}-value is bigger than a @{text Right}-value, if the underlying @@ -1062,143 +1170,8 @@ What is interesting for our purposes is that the properties reflexivity, totality and transitivity for this GREEDY ordering can be proved relatively easily by induction. - - These properties of GREEDY, however, do not transfer to the POSIX - ``ordering'' by Sulzmann and Lu, which they define as @{text "v\<^sub>1 \\<^sub>r v\<^sub>2"}. - To start with, @{text "v\<^sub>1 \\<^sub>r v\<^sub>2"} is - not defined inductively, but as $($@{term "v\<^sub>1 = v\<^sub>2"}$)$ $\vee$ $($@{term "(v\<^sub>1 >r - v\<^sub>2) \ (flat v\<^sub>1 = flat (v\<^sub>2::val))"}$)$. This means that @{term "v\<^sub>1 - >(r::rexp) (v\<^sub>2::val)"} does not necessarily imply @{term "v\<^sub>1 $r::rexp) - (v\<^sub>2::val)"}. Moreover, transitivity does not hold in the ``usual'' - formulation, for example: - - \begin{falsehood} - Suppose @{term "\ v\<^sub>1 : r"}, @{term "\ v\<^sub>2 : r"} and @{term "\ v\<^sub>3 : r"}. - If @{term "v\<^sub>1 >(r::rexp) (v\<^sub>2::val)"} and @{term "v\<^sub>2 >(r::rexp) (v\<^sub>3::val)"} - then @{term "v\<^sub>1 >(r::rexp) (v\<^sub>3::val)"}. - \end{falsehood} - - \noindent If formulated in this way, then there are various counter - examples: For example let @{term r} be @{text "a + ((a + a)\(a + $)"} - then the @{term "v\<^sub>1"}, @{term "v\<^sub>2"} and @{term "v\<^sub>3"} below are values - of @{term r}: - - \begin{center} - \begin{tabular}{lcl} - @{term "v\<^sub>1"} & $=$ & @{term "Left(Char a)"}\\ - @{term "v\<^sub>2"} & $=$ & @{term "Right(Seq (Left(Char a)) (Right Void))"}\\ - @{term "v\<^sub>3"} & $=$ & @{term "Right(Seq (Right(Char a)) (Left(Char a)))"} - \end{tabular} - \end{center} - - \noindent Moreover @{term "v\<^sub>1 >(r::rexp) v\<^sub>2"} and @{term "v\<^sub>2 >(r::rexp) - v\<^sub>3"}, but \emph{not} @{term "v\<^sub>1 >(r::rexp) v\<^sub>3"}! The reason is that - although @{term "v\<^sub>3"} is a @{text "Right"}-value, it can match a longer - string, namely @{term "flat v\<^sub>3 = [a,a]"}, while @{term "flat v\<^sub>1"} (and - @{term "flat v\<^sub>2"}) matches only @{term "[a]"}. So transitivity in this - formulation does not hold---in this example actually @{term "v\<^sub>3 - >(r::rexp) v\<^sub>1"}! - - Sulzmann and Lu ``fix'' this problem by weakening the transitivity - property. They require in addition that the underlying strings are of the - same length. This excludes the counter example above and any - counter-example we were able to find (by hand and by machine). Thus the - transitivity lemma should be formulated as: - - \begin{conject} - Suppose @{term "\ v\<^sub>1 : r"}, @{term "\ v\<^sub>2 : r"} and @{term "\ v\<^sub>3 : r"}, - and also @{text "|v\<^sub>1| = |v\<^sub>2| = |v\<^sub>3|"}.\\ - If @{term "v\<^sub>1 >(r::rexp) (v\<^sub>2::val)"} and @{term "v\<^sub>2 >(r::rexp) (v\<^sub>3::val)"} - then @{term "v\<^sub>1 >(r::rexp) (v\<^sub>3::val)"}. - \end{conject} - - \noindent While we agree with Sulzmann and Lu that this property - probably(!) holds, proving it seems not so straightforward: although one - begins with the assumption that the values have the same flattening, this - cannot be maintained as one descends into the induction. This is a problem - that occurs in a number of places in the proofs by Sulzmann and Lu. - - Although they do not give an explicit proof of the transitivity property, - they give a closely related property about the existence of maximal - elements. They state that this can be verified by an induction on @{term r}. We - disagree with this as we shall show next in case of transitivity. The case - where the reasoning breaks down is the sequence case, say @{term "SEQ r\<^sub>1 r\<^sub>2"}. - The induction hypotheses in this case are +*} -\begin{center} -\begin{tabular}{@ {}c@ {\hspace{10mm}}c@ {}} -\begin{tabular}{@ {}l@ {\hspace{-7mm}}l@ {}} -IH @{term "r\<^sub>1"}:\\ -@{text "\ v\<^sub>1, v\<^sub>2, v\<^sub>3."} \\ - & @{term "\ v\<^sub>1 : r\<^sub>1"}\;@{text "\"} - @{term "\ v\<^sub>2 : r\<^sub>1"}\;@{text "\"} - @{term "\ v\<^sub>3 : r\<^sub>1"}\\ - & @{text "\"} @{text "|v\<^sub>1| = |v\<^sub>2| = |v\<^sub>3|"}\\ - & @{text "\"} @{term "v\<^sub>1 >(r\<^sub>1::rexp) v\<^sub>2 \ v\<^sub>2 >(r\<^sub>1::rexp) v\<^sub>3"}\medskip\\ - & $\Rightarrow$ @{term "v\<^sub>1 >(r\<^sub>1::rexp) v\<^sub>3"} -\end{tabular} & -\begin{tabular}{@ {}l@ {\hspace{-7mm}}l@ {}} -IH @{term "r\<^sub>2"}:\\ -@{text "\ v\<^sub>1, v\<^sub>2, v\<^sub>3."}\\ - & @{term "\ v\<^sub>1 : r\<^sub>2"}\;@{text "\"} - @{term "\ v\<^sub>2 : r\<^sub>2"}\;@{text "\"} - @{term "\ v\<^sub>3 : r\<^sub>2"}\\ - & @{text "\"} @{text "|v\<^sub>1| = |v\<^sub>2| = |v\<^sub>3|"}\\ - & @{text "\"} @{term "v\<^sub>1 >(r\<^sub>2::rexp) v\<^sub>2 \ v\<^sub>2 >(r\<^sub>2::rexp) v\<^sub>3"}\medskip\\ - & $\Rightarrow$ @{term "v\<^sub>1 >(r\<^sub>2::rexp) v\<^sub>3"} -\end{tabular} -\end{tabular} -\end{center} - - \noindent We can assume that - % - \begin{equation} - @{term "(Seq (v\<^sub>1\<^sub>l) (v\<^sub>1\<^sub>r)) >(SEQ r\<^sub>1 r\<^sub>2) (Seq (v\<^sub>2\<^sub>l) (v\<^sub>2\<^sub>r))"} - \qquad\textrm{and}\qquad - @{term "(Seq (v\<^sub>2\<^sub>l) (v\<^sub>2\<^sub>r)) >(SEQ r\<^sub>1 r\<^sub>2) (Seq (v\<^sub>3\<^sub>l) (v\<^sub>3\<^sub>r))"} - \label{assms} - \end{equation} - - \noindent hold, and furthermore that the values have equal length, namely: - % - \begin{equation} - @{term "flat (Seq (v\<^sub>1\<^sub>l) (v\<^sub>1\<^sub>r)) = flat (Seq (v\<^sub>2\<^sub>l) (v\<^sub>2\<^sub>r))"} - \qquad\textrm{and}\qquad - @{term "flat (Seq (v\<^sub>2\<^sub>l) (v\<^sub>2\<^sub>r)) = flat (Seq (v\<^sub>3\<^sub>l) (v\<^sub>3\<^sub>r))"} - \label{lens} - \end{equation} - - \noindent We need to show that @{term "(Seq (v\<^sub>1\<^sub>l) (v\<^sub>1\<^sub>r)) >(SEQ r\<^sub>1 r\<^sub>2) - (Seq (v\<^sub>3\<^sub>l) (v\<^sub>3\<^sub>r))"} holds. We can proceed by analysing how the - assumptions in \eqref{assms} have arisen. There are four cases. Let us - assume we are in the case where we know - - \[ - @{term "v\<^sub>1\<^sub>l >(r\<^sub>1::rexp) v\<^sub>2\<^sub>l"} - \qquad\textrm{and}\qquad - @{term "v\<^sub>2\<^sub>l >(r\<^sub>1::rexp) v\<^sub>3\<^sub>l"} - \] - - \noindent and also know the corresponding inhabitation judgements. This is - exactly a case where we would like to apply the induction hypothesis - IH~$r_1$. But we cannot! We still need to show that @{term "flat (v\<^sub>1\<^sub>l) = - flat(v\<^sub>2\<^sub>l)"} and @{term "flat(v\<^sub>2\<^sub>l) = flat(v\<^sub>3\<^sub>l)"}. We know from - \eqref{lens} that the lengths of the sequence values are equal, but from - this we cannot infer anything about the lengths of the component values. - Indeed in general they will be unequal, that is - - \[ - @{term "flat(v\<^sub>1\<^sub>l) \ flat(v\<^sub>2\<^sub>l)"} - \qquad\textrm{and}\qquad - @{term "flat(v\<^sub>1\<^sub>r) \ flat(v\<^sub>2\<^sub>r)"} - \] - - \noindent but still \eqref{lens} will hold. Now we are stuck, since the IH - does not apply. As said, this problem where the induction hypothesis does - not apply arises in several places in the proof of Sulzmann and Lu, not - just for proving transitivity. - -*} section {* Conclusion *} diff -r e2828c4a1e23 -r d36be1e356c0 thys/Journal/document/root.bib --- a/thys/Journal/document/root.bib Thu Jul 06 16:05:33 2017 +0100 +++ b/thys/Journal/document/root.bib Tue Jul 18 18:39:20 2017 +0100 @@ -1,5 +1,25 @@ + + + +@Misc{POSIX, + title = {{T}he {O}pen {G}roup {B}ase {S}pecification {I}ssue 6 {IEEE} {S}td 1003.1 2004 {E}dition}, + year = {2004}, + note = {\url{http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html}} +} + + +@InProceedings{AusafDyckhoffUrban2016, + author = {F.~Ausaf and R.~Dyckhoff and C.~Urban}, + title = {{POSIX} {L}exing with {D}erivatives of {R}egular {E}xpressions ({P}roof {P}earl)}, + year = {2016}, + booktitle = {Proc.~of the 7th International Conference on Interactive Theorem Proving (ITP)}, + volume = {9807}, + series = {LNCS}, + pages = {69--86} +} + @article{aduAFP16, - author = {Fahad Ausaf and Roy Dyckhoff and Christian Urban}, + author = {F.~Ausaf and R.~Dyckhoff and C.~Urban}, title = {{POSIX} {L}exing with {D}erivatives of {R}egular {E}xpressions}, journal = {Archive of Formal Proofs}, year = 2016, @@ -255,13 +275,6 @@ series = {LNCS} } -@book{Michaelson, - title={An introduction to functional programming through lambda calculus}, - author={Michaelson, Greg}, - year={2011}, - publisher={Courier Corporation} -} - @article{Owens2008, author = {S.~Owens and K.~Slind}, title = {{A}dapting {F}unctional {P}rograms to {H}igher {O}rder {L}ogic}, @@ -273,92 +286,8 @@ } -@book{Velleman, - title={How to prove it: a structured approach}, - author={Velleman, Daniel J}, - year={2006}, - publisher={Cambridge University Press} -} - -@book{Jones, - title={Implementing functional languages:[a tutorial]}, - author={Jones, Simon L Peyton and Lester, David R}, - volume={124}, - year={1992}, - publisher={Prentice Hall Reading} -} - -@article{Cardelli, - title={Type systems}, - author={Cardelli, Luca}, - journal={ACM Computing Surveys}, - volume={28}, - number={1}, - pages={263--264}, - year={1996} -} - -@article{Morrisett, - author = {J. Gregory Morrisett and - Karl Crary and - Neal Glew and - David Walker}, - title = {Stack-based typed assembly language}, - journal = {J. Funct. Program.}, - volume = {13}, - number = {5}, - pages = {957--959}, - year = {2003}, - url = {http://dx.doi.org/10.1017/S0956796802004446}, - doi = {10.1017/S0956796802004446}, - timestamp = {Fri, 19 Mar 2004 13:48:27 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/jfp/MorrisettCGW03}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@book{Nipkow, - title={Concrete Semantics: With Isabelle/HOL}, - author={Nipkow, Tobias and Klein, Gerwin}, - year={2014}, - publisher={Springer} -} - -@article{Dube, - author = {Danny Dub{\'{e}} and - Marc Feeley}, - title = {Efficiently building a parse tree from a regular expression}, - journal = {Acta Inf.}, - volume = {37}, - number = {2}, - pages = {121--144}, - year = {2000}, - url = {http://link.springer.de/link/service/journals/00236/bibs/0037002/00370121.htm}, - timestamp = {Tue, 25 Nov 2003 14:51:21 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/acta/DubeF00}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@article{Morrisett2, - author = {J. Gregory Morrisett and - David Walker and - Karl Crary and - Neal Glew}, - title = {From system {F} to typed assembly language}, - journal = {{ACM} Trans. Program. Lang. Syst.}, - volume = {21}, - number = {3}, - pages = {527--568}, - year = {1999}, - url = {http://doi.acm.org/10.1145/319301.319345}, - doi = {10.1145/319301.319345}, - timestamp = {Wed, 26 Nov 2003 14:26:46 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/toplas/MorrisettWCG99}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - @article{Owens2, - author = {Scott Owens and - Konrad Slind}, + author = {S.~Owens and K.~Slind}, title = {Adapting functional programs to higher order logic}, journal = {Higher-Order and Symbolic Computation}, volume = {21}, @@ -379,3 +308,15 @@ +@InProceedings{OkuiSuzuki2013, + author = {S.~Okui and T.~Suzuki}, + title = {{D}isambiguation in {R}egular {E}xpression {M}atching via + {P}osition {A}utomata with {A}ugmented {T}ransitions}, + booktitle = {Proc.~of the 15th International Conference on Implementation + and Application of Automata (CIAA)}, + year = {2010}, + volume = {6482}, + series = {LNCS}, + pages = {231--240} +} + diff -r e2828c4a1e23 -r d36be1e356c0 thys/Journal/document/root.tex --- a/thys/Journal/document/root.tex Thu Jul 06 16:05:33 2017 +0100 +++ b/thys/Journal/document/root.tex Tue Jul 18 18:39:20 2017 +0100 @@ -33,8 +33,17 @@ \newtheorem{conject}{Conjecture} \begin{document} +\renewcommand{\thefootnote}{$\star$} \footnotetext[1]{This paper is a + revised and expanded version of \cite{AusafDyckhoffUrban2016}. + Compared with that paper we give a second definition for POSIX + values and prove that it is equivalent to the original one. This + definition is based on an ordering of values and very similar to the + definition given by Sulzmann and Lu~\cite{Sulzmann2014}. We also + extend our results to additional constructors of regular + expressions.} \renewcommand{\thefootnote}{\arabic{footnote}} -\title{POSIX {L}exing with {D}erivatives of {R}egular {E}xpressions (Proof Pearl)} + +\title{POSIX {L}exing with {D}erivatives of {R}egular {E}xpressions} \author{Fahad Ausaf\inst{1} \and Roy Dyckhoff\inst{2} \and Christian Urban\inst{3}} \institute{King's College London\\ \email{fahad.ausaf@icloud.com} @@ -50,20 +59,19 @@ expressions. They can be used for a very simple regular expression matching algorithm. Sulzmann and Lu cleverly extended this algorithm in order to deal with POSIX matching, which is the underlying -disambiguation strategy for regular expressions needed in lexers. -Sulzmann and Lu have made available on-line what they call a -``rigorous proof'' of the correctness of their algorithm w.r.t.\ their -specification; regrettably, it appears to us to have unfillable gaps. -In the first part of this paper we give our inductive definition of -what a POSIX value is and show $(i)$ that such a value is unique (for -given regular expression and string being matched) and $(ii)$ that -Sulzmann and Lu's algorithm always generates such a value (provided -that the regular expression matches the string). We also prove the -correctness of an optimised version of the POSIX matching -algorithm. Our definitions and proof are much simpler than those by -Sulzmann and Lu and can be easily formalised in Isabelle/HOL. In the -second part we analyse the correctness argument by Sulzmann and Lu and -explain why the gaps in this argument cannot be filled easily.\smallskip +disambiguation strategy for regular expressions needed in lexers. In +the first part of this paper we give our inductive definition of what +a POSIX value is and show $(i)$ that such a value is unique (for given +regular expression and string being matched) and $(ii)$ that Sulzmann +and Lu's algorithm always generates such a value (provided that the +regular expression matches the string). We also prove the correctness +of an optimised version of the POSIX matching algorithm. In the +second part we show that $(iii)$ our inductive definition of a POSIX +value is equivalent to an alternative definition by Okui and Suzuki +which identifies a POSIX value as least element according to an +ordering of values. The advantage of the definition based on the +ordering is that it implements more directly the POSIX +longest-leftmost matching semantics.\smallskip {\bf Keywords:} POSIX matching, Derivatives of Regular Expressions, Isabelle/HOL diff -r e2828c4a1e23 -r d36be1e356c0 thys/Lexer.thy --- a/thys/Lexer.thy Thu Jul 06 16:05:33 2017 +0100 +++ b/thys/Lexer.thy Tue Jul 18 18:39:20 2017 +0100 @@ -79,17 +79,20 @@ by (auto) (metis Star.simps) lemma Star_decomp: - assumes a: "c # x \ A\" - shows "\a b. x = a @ b \ c # a \ A \ b \ A\" -using a + assumes "c # x \ A\" + shows "\s1 s2. x = s1 @ s2 \ c # s1 \ A \ s2 \ A\" +using assms by (induct x\"c # x" rule: Star.induct) (auto simp add: append_eq_Cons_conv) lemma Star_Der_Sequ: shows "Der c (A\) \ (Der c A) ;; A\" unfolding Der_def -by(auto simp add: Der_def Sequ_def Star_decomp) - +apply(rule subsetI) +apply(simp) +unfolding Sequ_def +apply(simp) +by(auto simp add: Sequ_def Star_decomp) lemma Der_star [simp]: @@ -245,8 +248,7 @@ | "\ v2 : r2 \ \ Right v2 : ALT r1 r2" | "\ Void : ONE" | "\ Char c : CHAR c" -| "\ Stars [] : STAR r" -| "\\ v : r; \ Stars vs : STAR r\ \ \ Stars (v # vs) : STAR r" +| "\v \ set vs. \ v : r \ \ Stars vs : STAR r" inductive_cases Prf_elims: "\ v : ZERO" @@ -254,31 +256,25 @@ "\ v : ALT r1 r2" "\ v : ONE" "\ v : CHAR c" -(* "\ vs : STAR r"*) + "\ vs : STAR r" + +lemma Star_concat: + assumes "\s \ set ss. s \ A" + shows "concat ss \ A\" +using assms by (induct ss) (auto) + lemma Prf_flat_L: assumes "\ v : r" shows "flat v \ L r" using assms -by(induct v r rule: Prf.induct) - (auto simp add: Sequ_def) - -lemma Prf_Stars: - assumes "\v \ set vs. \ v : r" - shows "\ Stars vs : STAR r" -using assms -by(induct vs) (auto intro: Prf.intros) +by (induct v r rule: Prf.induct) + (auto simp add: Sequ_def Star_concat) lemma Prf_Stars_append: assumes "\ Stars vs1 : STAR r" "\ Stars vs2 : STAR r" shows "\ Stars (vs1 @ vs2) : STAR r" using assms -apply(induct vs1 arbitrary: vs2) -apply(auto intro: Prf.intros) -apply(erule Prf.cases) -apply(simp_all) -apply(auto intro: Prf.intros) -done - +by (auto intro!: Prf.intros elim!: Prf_elims) lemma Star_string: assumes "s \ A\" @@ -306,40 +302,45 @@ assumes "\ v : r" shows "flat v \ L r" using assms -by (induct)(auto simp add: Sequ_def) +by (induct) (auto simp add: Sequ_def Star_concat) lemma L_flat_Prf2: assumes "s \ L r" shows "\v. \ v : r \ flat v = s" using assms -apply(induct r arbitrary: s) -apply(auto simp add: Sequ_def intro: Prf.intros) -using Prf.intros(1) flat.simps(5) apply blast -using Prf.intros(2) flat.simps(3) apply blast -using Prf.intros(3) flat.simps(4) apply blast -apply(subgoal_tac "\vs::val list. concat (map flat vs) = s \ (\v \ set vs. \ v : r)") -apply(auto)[1] -apply(rule_tac x="Stars vs" in exI) -apply(simp) -apply (simp add: Prf_Stars) -apply(drule Star_string) -apply(auto) -apply(rule Star_val) -apply(auto) -done +proof(induct r arbitrary: s) + case (STAR r s) + have IH: "\s. s \ L r \ \v. \ v : r \ flat v = s" by fact + have "s \ L (STAR r)" by fact + then obtain ss where "concat ss = s" "\s \ set ss. s \ L r" + using Star_string by auto + then obtain vs where "concat (map flat vs) = s" "\v\set vs. \ v : r" + using IH Star_val by blast + then show "\v. \ v : STAR r \ flat v = s" + using Prf.intros(6) flat_Stars by blast +next + case (SEQ r1 r2 s) + then show "\v. \ v : SEQ r1 r2 \ flat v = s" + unfolding Sequ_def L.simps by (fastforce intro: Prf.intros) +next + case (ALT r1 r2 s) + then show "\v. \ v : ALT r1 r2 \ flat v = s" + unfolding L.simps by (fastforce intro: Prf.intros) +qed (auto intro: Prf.intros) lemma L_flat_Prf: "L(r) = {flat v | v. \ v : r}" using L_flat_Prf1 L_flat_Prf2 by blast +(* lemma Star_values_exists: assumes "s \ (L r)\" shows "\vs. concat (map flat vs) = s \ \ Stars vs : STAR r" using assms apply(drule_tac Star_string) apply(auto) -by (metis L_flat_Prf2 Prf_Stars Star_val) - +by (metis L_flat_Prf2 Prf.intros(6) Star_val) +*) section {* Sulzmann and Lu functions *} @@ -385,24 +386,14 @@ using assms apply(induct r arbitrary: c v rule: rexp.induct) apply(auto intro!: Prf.intros mkeps_nullable elim!: Prf_elims split: if_splits) -(* STAR *) -apply(rotate_tac 2) -apply(erule Prf.cases) -apply(simp_all)[7] -apply(auto) -apply (metis Prf.intros(6) Prf.intros(7)) -by (metis Prf.intros(7)) +done lemma Prf_injval_flat: assumes "\ v : der c r" shows "flat (injval r c v) = c # (flat v)" using assms apply(induct arbitrary: v rule: der.induct) -apply(auto elim!: Prf_elims split: if_splits) -apply(metis mkeps_flat) -apply(rotate_tac 2) -apply(erule Prf.cases) -apply(simp_all)[7] +apply(auto elim!: Prf_elims intro: mkeps_flat split: if_splits) done @@ -444,7 +435,9 @@ assumes "s \ r \ v" shows "\ v : r" using assms -by (induct s r v rule: Posix.induct)(auto intro: Prf.intros) +apply(induct s r v rule: Posix.induct) +apply(auto intro!: Prf.intros elim!: Prf_elims) +done lemma Posix_mkeps: diff -r e2828c4a1e23 -r d36be1e356c0 thys/Positions.thy --- a/thys/Positions.thy Thu Jul 06 16:05:33 2017 +0100 +++ b/thys/Positions.thy Tue Jul 18 18:39:20 2017 +0100 @@ -3,8 +3,7 @@ imports "Lexer" begin - -section {* Position in values *} +section {* Positions in Values *} fun at :: "val \ nat list \ val" @@ -16,6 +15,8 @@ | "at (Seq v1 v2) (Suc 0#ps)= at v2 ps" | "at (Stars vs) (n#ps)= at (nth vs n) ps" + + fun Pos :: "val \ (nat list) set" where "Pos (Void) = {[]}" @@ -26,15 +27,13 @@ | "Pos (Stars []) = {[]}" | "Pos (Stars (v#vs)) = {[]} \ {0#ps | ps. ps \ Pos v} \ {Suc n#ps | n ps. n#ps \ Pos (Stars vs)}" + lemma Pos_stars: "Pos (Stars vs) = {[]} \ (\n < length vs. {n#ps | ps. ps \ Pos (vs ! n)})" apply(induct vs) apply(simp) -apply(simp) apply(simp add: insert_ident) apply(rule subset_antisym) -apply(auto) -apply(auto)[1] using less_Suc_eq_0_disj by auto lemma Pos_empty: @@ -44,7 +43,7 @@ fun intlen :: "'a list \ int" where "intlen [] = 0" -| "intlen (x#xs) = 1 + intlen xs" +| "intlen (x # xs) = 1 + intlen xs" lemma intlen_bigger: shows "0 \ intlen xs" @@ -62,6 +61,15 @@ apply (smt Suc_lessI intlen.simps(2) length_Suc_conv nat_neq_iff) by (smt Suc_lessE intlen.simps(2) length_Suc_conv) +lemma intlen_length_eq: + shows "intlen xs = intlen ys \ length xs = length ys" +apply(induct xs arbitrary: ys) +apply (auto simp add: intlen_bigger not_less) +apply(case_tac ys) +apply(simp_all) +apply (smt intlen_bigger) +apply (smt intlen.elims intlen_bigger length_Suc_conv) +by (metis intlen.simps(2) length_Suc_conv) definition pflat_len :: "val \ nat list => int" where @@ -87,9 +95,9 @@ apply(auto simp add: less_Suc_eq_0_disj pflat_len_simps) done -lemma outside_lemma: - assumes "p \ Pos v1 \ Pos v2" - shows "pflat_len v1 p = pflat_len v2 p" +lemma pflat_len_outside: + assumes "p \ Pos v1" + shows "pflat_len v1 p = -1 " using assms by (auto simp add: pflat_len_def) @@ -121,10 +129,8 @@ fixes xs ys :: "nat list" shows "[] \lex ys \ ys \ []" and "xs \lex [] \ False" - and "(x # xs) \lex (y # ys) \ (x < y \ (\ y < x \ xs \lex ys))" -apply(auto elim: lex_list.cases intro: lex_list.intros) -apply(auto simp add: neq_Nil_conv not_less_iff_gr_or_eq intro: lex_list.intros) -done + and "(x # xs) \lex (y # ys) \ (x < y \ (x = y \ xs \lex ys))" +by (auto simp add: neq_Nil_conv elim: lex_list.cases intro: lex_list.intros) lemma lex_trans: fixes ps1 ps2 ps3 :: "nat list" @@ -139,7 +145,7 @@ fixes p q :: "nat list" shows "p = q \ p \lex q \ q \lex p" apply(induct p arbitrary: q) -apply(auto) +apply(auto elim: lex_list.cases) apply(case_tac q) apply(auto) done @@ -147,7 +153,7 @@ -section {* Ordering of values according to Okui & Suzuki *} +section {* POSIX Ordering of Values According to Okui & Suzuki *} definition PosOrd:: "val \ nat list \ val \ bool" ("_ \val _ _" [60, 60, 59] 60) @@ -155,111 +161,95 @@ "v1 \val p v2 \ pflat_len v1 p > pflat_len v2 p \ (\q \ Pos v1 \ Pos v2. q \lex p \ pflat_len v1 q = pflat_len v2 q)" -definition PosOrd2:: "val \ nat list \ val \ bool" ("_ \val2 _ _" [60, 60, 59] 60) -where - "v1 \val2 p v2 \ p \ Pos v1 \ - pflat_len v1 p > pflat_len v2 p \ - (\q \ Pos v1 \ Pos v2. q \lex p \ pflat_len v1 q = pflat_len v2 q)" + -lemma XXX: - "v1 \val p v2 \ v1 \val2 p v2" -apply(auto simp add: PosOrd_def PosOrd2_def) -apply(auto simp add: pflat_len_def split: if_splits) -by (smt intlen_bigger) +definition pflat_len2 :: "val \ nat list => (bool * nat)" +where + "pflat_len2 v p \ (if p \ Pos v then (True, length (flat (at v p))) else (False, 0))" + +instance prod :: (ord, ord) ord + by (rule Orderings.class.Orderings.ord.of_class.intro) -(* some tests *) +lemma "(0, 0) < (3::nat, 2::nat)" -definition ValFlat_ord:: "val \ nat list \ val \ bool" ("_ \fval _ _") -where - "v1 \fval p v2 \ p \ Pos v1 \ - (p \ Pos v2 \ flat (at v2 p) \spre flat (at v1 p)) \ - (\q \ Pos v1 \ Pos v2. q \lex p \ (pflat_len v1 q = pflat_len v2 q))" + + -lemma - assumes "v1 \fval p v2" - shows "v1 \val p v2" -using assms -unfolding ValFlat_ord_def PosOrd_def -apply(clarify) -apply(simp add: pflat_len_def) -apply(auto)[1] -apply(smt intlen_bigger) -apply(simp add: sprefix_list_def prefix_list_def) -apply(auto)[1] -apply(drule sym) -apply(simp add: intlen_append) -apply (metis intlen.simps(1) intlen_length length_greater_0_conv list.size(3)) -apply(smt intlen_bigger) -done - -lemma - assumes "v1 \val p v2" "flat (at v2 p) \spre flat (at v1 p)" - shows "v1 \fval p v2" -using assms -unfolding ValFlat_ord_def PosOrd_def -apply(clarify) -oops +definition PosOrd2:: "val \ nat list \ val \ bool" ("_ \val2 _ _" [60, 60, 59] 60) +where + "v1 \val2 p v2 \ (fst (pflat_len2 v1 p) > fst (pflat_len2 v2 p) \ + snd (pflat_len2 v1 p) > fst (pflat_len2 v2 p)) \ + (\q \ Pos v1 \ Pos v2. q \lex p \ pflat_len v1 q = pflat_len v2 q)" definition PosOrd_ex:: "val \ val \ bool" ("_ :\val _" [60, 59] 60) where - "v1 :\val v2 \ (\p. v1 \val p v2)" + "v1 :\val v2 \ \p. v1 \val p v2" -definition PosOrd_ex1:: "val \ val \ bool" ("_ :\val _" [60, 59] 60) +definition PosOrd_ex_eq:: "val \ val \ bool" ("_ :\val _" [60, 59] 60) where "v1 :\val v2 \ v1 :\val v2 \ v1 = v2" + lemma PosOrd_shorterE: assumes "v1 :\val v2" shows "length (flat v2) \ length (flat v1)" using assms -apply(auto simp add: PosOrd_ex_def PosOrd_def) +apply(auto simp add: pflat_len_simps PosOrd_ex_def PosOrd_def) apply(case_tac p) -apply(simp add: pflat_len_simps) -apply(simp add: intlen_length) +apply(simp add: pflat_len_simps intlen_length) apply(simp) apply(drule_tac x="[]" in bspec) apply(simp add: Pos_empty) -apply(simp add: pflat_len_simps) -by (metis intlen_length le_less less_irrefl linear) - +apply(simp add: pflat_len_simps le_less intlen_length_eq) +done lemma PosOrd_shorterI: - assumes "length (flat v') < length (flat v)" - shows "v :\val v'" + assumes "length (flat v2) < length (flat v1)" + shows "v1 :\val v2" using assms unfolding PosOrd_ex_def by (metis intlen_length lex_simps(2) pflat_len_simps(9) PosOrd_def) lemma PosOrd_spreI: - assumes "(flat v') \spre (flat v)" + assumes "flat v' \spre flat v" shows "v :\val v'" using assms apply(rule_tac PosOrd_shorterI) by (metis append_eq_conv_conj le_less_linear prefix_list_def sprefix_list_def take_all) + lemma PosOrd_Left_Right: assumes "flat v1 = flat v2" shows "Left v1 :\val Right v2" unfolding PosOrd_ex_def apply(rule_tac x="[0]" in exI) -using assms +using assms apply(auto simp add: PosOrd_def pflat_len_simps) apply(smt intlen_bigger) done -lemma PosOrd_LeftI: - assumes "v :\val v'" "flat v = flat v'" - shows "(Left v) :\val (Left v')" -using assms(1) +lemma PosOrd_Left_eq: + assumes "flat v = flat v'" + shows "Left v :\val Left v' \ v :\val v'" +using assms unfolding PosOrd_ex_def apply(auto) +apply(case_tac p) +apply(simp add: PosOrd_def pflat_len_simps) +apply(case_tac a) +apply(simp add: PosOrd_def pflat_len_simps) +apply(rule_tac x="list" in exI) +apply(auto simp add: PosOrd_def pflat_len_simps)[1] +apply (smt Un_def lex_list.intros(2) mem_Collect_eq pflat_len_simps(3)) +apply (smt Collect_disj_eq lex_list.intros(2) mem_Collect_eq pflat_len_simps(3)) +apply(auto simp add: PosOrd_def pflat_len_outside)[1] apply(rule_tac x="0#p" in exI) -using assms(2) apply(auto simp add: PosOrd_def pflat_len_simps) done + lemma PosOrd_RightI: assumes "v :\val v'" "flat v = flat v'" shows "(Right v) :\val (Right v')" @@ -271,31 +261,6 @@ apply(auto simp add: PosOrd_def pflat_len_simps) done -lemma PosOrd_LeftE: - assumes "(Left v1) :\val (Left v2)" - shows "v1 :\val v2" -using assms -apply(simp add: PosOrd_ex_def) -apply(erule exE) -apply(case_tac p) -apply(simp add: PosOrd_def) -apply(auto simp add: pflat_len_simps) -apply(rule_tac x="[]" in exI) -apply(simp add: Pos_empty pflat_len_simps) -apply(auto simp add: pflat_len_simps PosOrd_def) -apply(case_tac a) -apply(auto simp add: pflat_len_simps)[1] -apply(rule_tac x="list" in exI) -apply(auto) -apply(drule_tac x="0#q" in bspec) -apply(simp) -apply(simp add: pflat_len_simps) -apply(drule_tac x="0#q" in bspec) -apply(simp) -apply(simp add: pflat_len_simps) -apply(simp add: pflat_len_def) -done - lemma PosOrd_RightE: assumes "(Right v1) :\val (Right v2)" shows "v1 :\val v2" @@ -517,14 +482,14 @@ by (rule lex_trichotomous) moreover { assume "p = p'" - with as have "v1 \val p v3" unfolding PosOrd_def - by (smt outside_lemma) + with as have "v1 \val p v3" unfolding PosOrd_def pflat_len_def + by fastforce then have " v1 :\val v3" unfolding PosOrd_ex_def by blast } moreover { assume "p \lex p'" - with as have "v1 \val p v3" unfolding PosOrd_def - by (metis lex_trans outside_lemma) + with as have "v1 \val p v3" unfolding PosOrd_def pflat_len_def + by (smt Un_iff lex_trans) then have " v1 :\val v3" unfolding PosOrd_ex_def by blast } moreover @@ -560,26 +525,47 @@ apply(auto simp add: PosOrd_ex_def PosOrd_def) using assms PosOrd_irrefl PosOrd_trans by blast -lemma WW2: - assumes "\(v1 :\val v2)" - shows "v1 = v2 \ v2 :\val v1" -using assms -oops - lemma PosOrd_SeqE2: - assumes "(Seq v1 v2) :\val (Seq v1' v2')" - shows "v1 :\val v1' \ (v1 = v1' \ v2 :\val v2')" + assumes "(Seq v1 v2) :\val (Seq v1' v2')" "flat (Seq v1 v2) = flat (Seq v1' v2')" + shows "v1 :\val v1' \ (intlen (flat v1) = intlen (flat v1') \ v2 :\val v2')" using assms apply(frule_tac PosOrd_SeqE) apply(erule disjE) apply(simp) -apply(auto) apply(case_tac "v1 :\val v1'") apply(simp) +apply(rule disjI2) +apply(rule conjI) +prefer 2 +apply(simp) +apply(auto) apply(auto simp add: PosOrd_ex_def) -apply(case_tac "v1 = v1'") +apply(auto simp add: PosOrd_def pflat_len_simps) +apply(case_tac p) +apply(auto simp add: PosOrd_def pflat_len_simps) +apply(case_tac a) +apply(auto simp add: PosOrd_def pflat_len_simps) +apply (metis PosOrd_SeqI1 PosOrd_almost_trichotomous PosOrd_def PosOrd_ex_def WW1 assms(1) assms(2)) +by (metis PosOrd_SeqI1 PosOrd_almost_trichotomous PosOrd_def PosOrd_ex_def WW1 assms(1) assms(2)) + +lemma PosOrd_SeqE4: + assumes "(Seq v1 v2) :\val (Seq v1' v2')" "flat (Seq v1 v2) = flat (Seq v1' v2')" + shows "v1 :\val v1' \ (flat v1 = flat v1' \ v2 :\val v2')" +using assms +apply(frule_tac PosOrd_SeqE) +apply(erule disjE) apply(simp) -oops +apply(case_tac "v1 :\val v1'") +apply(simp) +apply(rule disjI2) +apply(rule conjI) +prefer 2 +apply(simp) +apply(auto) +apply(case_tac "length (flat v1') < length (flat v1)") +using PosOrd_shorterI apply blast +by (metis PosOrd_SeqI1 PosOrd_shorterI WW1 antisym_conv3 append_eq_append_conv assms(2)) + section {* CPT and CPTpre *} @@ -592,84 +578,19 @@ | "\ v2 : r2 \ \ Right v2 : ALT r1 r2" | "\ Void : ONE" | "\ Char c : CHAR c" -| "\ Stars [] : STAR r" -| "\\ v : r; flat v \ []; \ Stars vs : STAR r\ \ \ Stars (v # vs) : STAR r" +| "\v \ set vs. \ v : r \ flat v \ [] \ \ Stars vs : STAR r" lemma Prf_CPrf: assumes "\ v : r" shows "\ v : r" using assms -by (induct) (auto intro: Prf.intros) - -lemma pflat_len_equal_iff: - assumes "\ v1 : r" "\ v2 : r" - and "\p. pflat_len v1 p = pflat_len v2 p" - shows "v1 = v2" -using assms -apply(induct v1 r arbitrary: v2 rule: CPrf.induct) -apply(rotate_tac 4) -apply(erule CPrf.cases) -apply(simp_all)[7] -apply (metis pflat_len_simps(1) pflat_len_simps(2)) -apply(rotate_tac 2) -apply(erule CPrf.cases) -apply(simp_all)[7] -apply (metis pflat_len_simps(3)) -apply (metis intlen.simps(1) intlen_length length_greater_0_conv list.size(3) neg_0_le_iff_le not_less not_less_iff_gr_or_eq not_one_le_zero pflat_len_simps(3) pflat_len_simps(6) pflat_len_simps(9)) -apply(rotate_tac 2) -apply(erule CPrf.cases) -apply(simp_all)[7] -apply (metis intlen.simps(1) intlen_length length_greater_0_conv list.size(3) neg_0_le_iff_le not_less not_less_iff_gr_or_eq not_one_le_zero pflat_len_simps(3) pflat_len_simps(6) pflat_len_simps(9)) -apply (metis pflat_len_simps(5)) -apply(erule CPrf.cases) -apply(simp_all)[7] -apply(erule CPrf.cases) -apply(simp_all)[7] -apply(erule CPrf.cases) -apply(simp_all)[7] -apply (metis append_Cons flat.simps(6) flat.simps(7) intlen_length length_greater_0_conv neq_Nil_conv not_less_iff_gr_or_eq pflat_len_simps(9)) -apply(rotate_tac 5) -apply(erule CPrf.cases) -apply(simp_all)[7] -apply (metis append_Cons flat.simps(6) flat.simps(7) intlen_length length_greater_0_conv list.distinct(1) list.exhaust not_less_iff_gr_or_eq pflat_len_simps(9)) -apply(auto) -apply (metis pflat_len_simps(8)) -apply(subgoal_tac "v = va") -prefer 2 -apply (metis pflat_len_simps(8)) -apply(simp) -apply(rotate_tac 3) -apply(drule_tac x="Stars vsa" in meta_spec) -apply(simp) -apply(drule_tac meta_mp) -apply(rule allI) -apply(case_tac p) -apply(simp add: pflat_len_simps) -apply(drule_tac x="[]" in spec) -apply(simp add: pflat_len_simps intlen_append) -apply(drule_tac x="Suc a#list" in spec) -apply(simp add: pflat_len_simps intlen_append) -apply(simp) -done - -lemma PosOrd_trichotomous_stronger: - assumes "\ v1 : r" "\ v2 : r" - shows "v1 :\val v2 \ v2 :\val v1 \ (v1 = v2)" -oops +by (induct)(auto intro: Prf.intros) lemma CPrf_stars: assumes "\ Stars vs : STAR r" shows "\v \ set vs. flat v \ [] \ \ v : r" using assms -apply(induct vs) -apply(auto) -apply(erule CPrf.cases) -apply(simp_all) -apply(erule CPrf.cases) -apply(simp_all) -apply(erule CPrf.cases) -apply(simp_all) -apply(erule CPrf.cases) +apply(erule_tac CPrf.cases) apply(simp_all) done @@ -677,11 +598,8 @@ assumes "\ Stars (vs1 @ vs2) : STAR r" shows "\ Stars vs1 : STAR r \ \ Stars vs2 : STAR r" using assms -apply(induct vs1 arbitrary: vs2) -apply(auto intro: CPrf.intros)[1] -apply(erule CPrf.cases) -apply(simp_all) -apply(auto intro: CPrf.intros) +apply(erule_tac CPrf.cases) +apply(auto intro: CPrf.intros elim: Prf.cases) done definition PT :: "rexp \ string \ val set" @@ -697,6 +615,74 @@ shows "CPT r s \ CPTpre r s" by(auto simp add: CPT_def CPTpre_def) +lemma CPT_simps: + shows "CPT ZERO s = {}" + and "CPT ONE s = (if s = [] then {Void} else {})" + and "CPT (CHAR c) s = (if s = [c] then {Char c} else {})" + and "CPT (ALT r1 r2) s = Left ` CPT r1 s \ Right ` CPT r2 s" + and "CPT (SEQ r1 r2) s = + {Seq v1 v2 | v1 v2. flat v1 @ flat v2 = s \ v1 \ CPT r1 (flat v1) \ v2 \ CPT r2 (flat v2)}" + and "CPT (STAR r) s = + Stars ` {vs. concat (map flat vs) = s \ (\v \