# HG changeset patch # User cu # Date 1508930324 -3600 # Node ID c840a99a3e05dade0b40cddcb83dd42a66f46741 # Parent f754a10875c7f04ff82aaa74da24d1fb46cfd8fe updated diff -r f754a10875c7 -r c840a99a3e05 thys/Journal/Paper.thy --- a/thys/Journal/Paper.thy Tue Oct 10 11:31:47 2017 +0100 +++ b/thys/Journal/Paper.thy Wed Oct 25 12:18:44 2017 +0100 @@ -3,7 +3,7 @@ imports "../Lexer" "../Simplifying" - "../Positions" + "../Positions" "~~/src/HOL/Library/LaTeXsugar" begin @@ -507,7 +507,7 @@ ordering relation for values.} \begin{center} - \begin{tabular}{c@ {\hspace{12mm}}c} + \begin{tabular}{c@ {\hspace{12mm}}c}\label{prfintros} \\[-8mm] @{thm[mode=Axiom] Prf.intros(4)} & @{thm[mode=Axiom] Prf.intros(5)[of "c"]}\\[4mm] @@ -1552,145 +1552,7 @@ *} -section {* Extensions*} -text {* - - A strong point in favour of - Sulzmann and Lu's algorithm is that it can be extended in various - ways. If we are interested in tokenising a string, then we need to not just - split up the string into tokens, but also ``classify'' the tokens (for - example whether it is a keyword or an identifier). This can be done with - only minor modifications to the algorithm by introducing \emph{record - regular expressions} and \emph{record values} (for example - \cite{Sulzmann2014b}): - - \begin{center} - @{text "r :="} - @{text "..."} $\mid$ - @{text "(l : r)"} \qquad\qquad - @{text "v :="} - @{text "..."} $\mid$ - @{text "(l : v)"} - \end{center} - - \noindent where @{text l} is a label, say a string, @{text r} a regular - expression and @{text v} a value. All functions can be smoothly extended - to these regular expressions and values. For example \mbox{@{text "(l : - r)"}} is nullable iff @{term r} is, and so on. The purpose of the record - regular expression is to mark certain parts of a regular expression and - then record in the calculated value which parts of the string were matched - by this part. The label can then serve as classification for the tokens. - For this recall the regular expression @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\"} for - keywords and identifiers from the Introduction. With the record regular - expression we can form \mbox{@{text "((key : r\<^bsub>key\<^esub>) + (id : r\<^bsub>id\<^esub>))\<^sup>\"}} - and then traverse the calculated value and only collect the underlying - strings in record values. With this we obtain finite sequences of pairs of - labels and strings, for example - - \[@{text "(l\<^sub>1 : s\<^sub>1), ..., (l\<^sub>n : s\<^sub>n)"}\] - - \noindent from which tokens with classifications (keyword-token, - identifier-token and so on) can be extracted. - - -*} - - - -section {* The Correctness Argument by Sulzmann and Lu\label{argu} *} - -text {* -% \newcommand{\greedy}{\succcurlyeq_{gr}} - \newcommand{\posix}{>} - - An extended version of \cite{Sulzmann2014} is available at the website of - its first author; this includes some ``proofs'', claimed in - \cite{Sulzmann2014} to be ``rigorous''. Since these are evidently not in - final form, we make no comment thereon, preferring to give general reasons - for our belief that the approach of \cite{Sulzmann2014} is problematic. - Their central definition is an ``ordering relation'' defined by the - rules (slightly adapted to fit our notation): - - ?? - - \noindent The idea behind the rules (A1) and (A2), for example, is that a - @{text Left}-value is bigger than a @{text Right}-value, if the underlying - string of the @{text Left}-value is longer or of equal length to the - underlying string of the @{text Right}-value. The order is reversed, - however, if the @{text Right}-value can match a longer string than a - @{text Left}-value. In this way the POSIX value is supposed to be the - biggest value for a given string and regular expression. - - Sulzmann and Lu explicitly refer to the paper \cite{Frisch2004} by Frisch - and Cardelli from where they have taken the idea for their correctness - proof. Frisch and Cardelli introduced a similar ordering for GREEDY - matching and they showed that their GREEDY matching algorithm always - produces a maximal element according to this ordering (from all possible - solutions). The only difference between their GREEDY ordering and the - ``ordering'' by Sulzmann and Lu is that GREEDY always prefers a @{text - Left}-value over a @{text Right}-value, no matter what the underlying - string is. This seems to be only a very minor difference, but it has - drastic consequences in terms of what properties both orderings enjoy. - What is interesting for our purposes is that the properties reflexivity, - totality and transitivity for this GREEDY ordering can be proved - relatively easily by induction. -*} - - -section {* Conclusion *} - -text {* - - We have implemented the POSIX value calculation algorithm introduced by - Sulzmann and Lu - \cite{Sulzmann2014}. Our implementation is nearly identical to the - original and all modifications we introduced are harmless (like our char-clause for - @{text inj}). We have proved this algorithm to be correct, but correct - according to our own specification of what POSIX values are. Our - specification (inspired from work by Vansummeren \cite{Vansummeren2006}) appears to be - much simpler than in \cite{Sulzmann2014} and our proofs are nearly always - straightforward. We have attempted to formalise the original proof - by Sulzmann and Lu \cite{Sulzmann2014}, but we believe it contains - unfillable gaps. In the online version of \cite{Sulzmann2014}, the authors - already acknowledge some small problems, but our experience suggests - that there are more serious problems. - - Having proved the correctness of the POSIX lexing algorithm in - \cite{Sulzmann2014}, which lessons have we learned? Well, this is a - perfect example for the importance of the \emph{right} definitions. We - have (on and off) explored mechanisations as soon as first versions - of \cite{Sulzmann2014} appeared, but have made little progress with - turning the relatively detailed proof sketch in \cite{Sulzmann2014} into a - formalisable proof. Having seen \cite{Vansummeren2006} and adapted the - POSIX definition given there for the algorithm by Sulzmann and Lu made all - the difference: the proofs, as said, are nearly straightforward. The - question remains whether the original proof idea of \cite{Sulzmann2014}, - potentially using our result as a stepping stone, can be made to work? - Alas, we really do not know despite considerable effort. - - - Closely related to our work is an automata-based lexer formalised by - Nipkow \cite{Nipkow98}. This lexer also splits up strings into longest - initial substrings, but Nipkow's algorithm is not completely - computational. The algorithm by Sulzmann and Lu, in contrast, can be - implemented with ease in any functional language. A bespoke lexer for the - Imp-language is formalised in Coq as part of the Software Foundations book - by Pierce et al \cite{Pierce2015}. The disadvantage of such bespoke lexers is that they - do not generalise easily to more advanced features. - Our formalisation is available from the Archive of Formal Proofs \cite{aduAFP16} - under \url{http://www.isa-afp.org/entries/Posix-Lexing.shtml}.\medskip - - \noindent - {\bf Acknowledgements:} - We are very grateful to Martin Sulzmann for his comments on our work and - moreover for patiently explaining to us the details in \cite{Sulzmann2014}. We - also received very helpful comments from James Cheney and anonymous referees. - % \small - \bibliographystyle{plain} - \bibliography{root} - -*} (*<*) diff -r f754a10875c7 -r c840a99a3e05 thys/Journal/document/root.tex --- a/thys/Journal/document/root.tex Tue Oct 10 11:31:47 2017 +0100 +++ b/thys/Journal/document/root.tex Wed Oct 25 12:18:44 2017 +0100 @@ -82,7 +82,8 @@ definition of a POSIX value is equivalent to an alternative definition by Okui and Suzuki which identifies POSIX values as least elements according to an ordering of values. We also prove the correctness of -an optimised version of the POSIX matching algorithm. \smallskip +an optimised version of the POSIX matching algorithm and extend the +results to additional constructors for regular expressions. \smallskip {\bf Keywords:} POSIX matching, Derivatives of Regular Expressions, Isabelle/HOL diff -r f754a10875c7 -r c840a99a3e05 thys/PositionsExt.thy --- a/thys/PositionsExt.thy Tue Oct 10 11:31:47 2017 +0100 +++ b/thys/PositionsExt.thy Wed Oct 25 12:18:44 2017 +0100 @@ -1142,7 +1142,9 @@ prefer 2 apply(simp add: acyclic_def) apply(induct_tac rule: trancl.induct) -apply(auto)[1] + apply(auto)[1] + prefer 3 + oops diff -r f754a10875c7 -r c840a99a3e05 thys/ROOT --- a/thys/ROOT Tue Oct 10 11:31:47 2017 +0100 +++ b/thys/ROOT Wed Oct 25 12:18:44 2017 +0100 @@ -25,6 +25,7 @@ theories "~~/src/HOL/Library/LaTeXsugar" "Paper" + "PaperExt" document_files "root.bib" "root.tex" diff -r f754a10875c7 -r c840a99a3e05 thys/SpecExt.thy --- a/thys/SpecExt.thy Tue Oct 10 11:31:47 2017 +0100 +++ b/thys/SpecExt.thy Wed Oct 25 12:18:44 2017 +0100 @@ -71,7 +71,7 @@ lemma Der_Sequ [simp]: shows "Der c (A ;; B) = (Der c A) ;; B \ (if [] \ A then Der c B else {})" unfolding Der_def Sequ_def -by (auto simp add: Cons_eq_append_conv) + by (auto simp add: Cons_eq_append_conv) section {* Kleene Star for Languages *} @@ -237,9 +237,9 @@ | "L (SEQ r1 r2) = (L r1) ;; (L r2)" | "L (ALT r1 r2) = (L r1) \ (L r2)" | "L (STAR r) = (L r)\" -| "L (UPNTIMES r n) = (\i\ {..n} . (L r) \ i)" +| "L (UPNTIMES r n) = (\i\{..n} . (L r) \ i)" | "L (NTIMES r n) = (L r) \ n" -| "L (FROMNTIMES r n) = (\i\ {n..} . (L r) \ i)" +| "L (FROMNTIMES r n) = (\i\{n..} . (L r) \ i)" | "L (NMTIMES r n m) = (\i\{n..m} . (L r) \ i)" section {* Nullable, Derivatives *} @@ -368,7 +368,6 @@ (simp_all add: Ders_def der_correctness Der_def) - section {* Values *} datatype val = @@ -1422,8 +1421,12 @@ apply(drule_tac x="vs" in meta_spec) apply(simp) apply(drule meta_mp) - apply (smt L.simps(10) Posix1(1) Posix1(2) Posix_NMTIMES1.hyps(4) UN_E append.right_neutral - append_eq_append_conv2 diff_Suc_1 val.inject(5)) + apply(drule Posix1(1)) + apply(drule Posix1(1)) + apply(drule Posix1(1)) + apply(frule Posix1(1)) + apply(simp) + using Posix_NMTIMES1.hyps(4) apply force apply (metis L.simps(10) Posix1(1) UN_E append_Nil2 append_self_conv2) by (metis One_nat_def Posix1(1) Posix_NMTIMES1.hyps(8) append.right_neutral append_Nil) moreover diff -r f754a10875c7 -r c840a99a3e05 thys/journal.pdf Binary file thys/journal.pdf has changed