# HG changeset patch # User Christian Urban # Date 1457418648 0 # Node ID b208bc047eed07f4601e6fc5c509ec05d79d2869 # Parent e866678c29cb51d973e189c10dd1f81dd629bbb9 updated diff -r e866678c29cb -r b208bc047eed thys/Paper/Paper.thy --- a/thys/Paper/Paper.thy Tue Mar 08 05:03:47 2016 +0000 +++ b/thys/Paper/Paper.thy Tue Mar 08 06:30:48 2016 +0000 @@ -41,8 +41,18 @@ matcher ("lexer _ _" [78,78] 77) and Prf ("_ : _" [75,75] 75) and - PMatch ("'(_, _') \ _" [63,75,75] 75) + PMatch ("'(_, _') \ _" [63,75,75] 75) and (* and ValOrd ("_ \\<^bsub>_\<^esub> _" [78,77,77] 73) *) + + F_RIGHT ("F\<^bsub>Right\<^esub> _") and + F_LEFT ("F\<^bsub>Left\<^esub> _") and + F_ALT ("F\<^bsub>Alt\<^esub> _ _") and + F_SEQ1 ("F\<^bsub>Seq1\<^esub> _ _") and + F_SEQ2 ("F\<^bsub>Seq2\<^esub> _ _") and + F_SEQ ("F\<^bsub>Seq\<^esub> _ _") and + simp_SEQ ("simp\<^bsub>Seq\<^esub> _ _" [1000, 1000] 1) and + simp_ALT ("simp\<^bsub>Alt\<^esub> _ _" [1000, 1000] 1) and + matcher3 ("lexer\<^sup>+ _ _") definition "match r s \ nullable (ders s r)" @@ -172,13 +182,6 @@ r"}, @{term "ALT r ZERO"}, @{term "SEQ ONE r"} and @{term "SEQ r ONE"} to @{term r} are applied. -%An extended version of \cite{Sulzmann2014} is available at the website of -%its first author; this includes some ``proofs'', claimed in -%\cite{Sulzmann2014} to be ``rigorous''. Since these are evidently not in -%final form, we make no comment thereon, preferring to give general reasons -%for our belief that the approach of \cite{Sulzmann2014} is problematic -%rather than to discuss details of unpublished work. - *} section {* Preliminaries *} @@ -803,34 +806,156 @@ optimisations and simple correctness proof. While the simplification of regular expressions according to - simplification rules + rules like + + \begin{center} + \begin{tabular}{lcl} + @{term "ALT ZERO r"} & @{text "\"} & @{term r}\\ + @{term "ALT r ZERO"} & @{text "\"} & @{term r}\\ + @{term "SEQ ONE r"} & @{text "\"} & @{term r}\\ + @{term "SEQ r ONE"} & @{text "\"} & @{term r} + \end{tabular} + \end{center} + \noindent is well understood, there is an obstacle with the POSIX value + calculation algorithm by Sulzmann and Lu: if we build a derivative regular + expression and then simplify it, we will calculate a POSIX value for this + simplified regular expression, \emph{not} for the original (unsimplified) + derivative regular expression. Sulzmann and Lu overcome this obstacle by + not just calculating a simplified regular expression, but also calculating + a \emph{rectification function} that ``repairs'' the incorrect value. + + The rectification functions can be (slightly clumsily) implemented in + Isabelle/HOL as follows using some auxiliary functions: + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) F_RIGHT.simps(1)} & $\dn$ & @{text "Right (f v)"}\\ + @{thm (lhs) F_LEFT.simps(1)} & $\dn$ & @{text "Left (f v)"}\\ + + @{thm (lhs) F_ALT.simps(1)} & $\dn$ & @{text "Right (f\<^sub>2 v)"}\\ + @{thm (lhs) F_ALT.simps(2)} & $\dn$ & @{text "Left (f\<^sub>1 v)"}\\ + + @{thm (lhs) F_SEQ1.simps(1)} & $\dn$ & @{text "Seq (f\<^sub>1 ()) (f\<^sub>2 v)"}\\ + @{thm (lhs) F_SEQ2.simps(1)} & $\dn$ & @{text "Seq (f\<^sub>1 v) (f\<^sub>2 ())"}\\ + @{thm (lhs) F_SEQ.simps(1)} & $\dn$ & @{text "Seq (f\<^sub>1 v\<^sub>1) (f\<^sub>2 v\<^sub>2)"}\bigskip\\ + + @{term "simp_ALT (ZERO, DUMMY) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(r\<^sub>2, F_RIGHT f\<^sub>2)"}\\ + @{term "simp_ALT (r\<^sub>1, f\<^sub>1) (ZERO, DUMMY)"} & $\dn$ & @{term "(r\<^sub>1, F_LEFT f\<^sub>1)"}\\ + @{term "simp_ALT (r\<^sub>1, f\<^sub>1) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(ALT r\<^sub>1 r\<^sub>2, F_ALT f\<^sub>1 f\<^sub>2)"}\\ + @{term "simp_SEQ (ONE, f\<^sub>1) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(r\<^sub>2, F_SEQ1 f\<^sub>1 f\<^sub>2)"}\\ + @{term "simp_SEQ (r\<^sub>1, f\<^sub>1) (ONE, f\<^sub>2)"} & $\dn$ & @{term "(r\<^sub>1, F_SEQ2 f\<^sub>1 f\<^sub>2)"}\\ + @{term "simp_SEQ (r\<^sub>1, f\<^sub>1) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(SEQ r\<^sub>1 r\<^sub>2, F_SEQ f\<^sub>1 f\<^sub>2)"}\\ + \end{tabular} + \end{center} \noindent - is well understood, there is a problem with the POSIX - value calculation algorithm by Sulzmann and Lu: if we build a derivative - regular expression and then simplify it, we will calculate a POSIX value - for this simplified regular expression, \emph{not} for the original - (unsimplified) derivative regular expression. Sulzmann and Lu overcome - this problem by not just calculating a simplified regular expression, but - also calculating a \emph{rectification function} that ``repairs'' the - incorrect value. + The main simplification function is then + + \begin{center} + \begin{tabular}{lcl} + @{term "simp (ALT r\<^sub>1 r\<^sub>2)"} & $\dn$ & @{term "simp_ALT (simp r\<^sub>1) (simp r\<^sub>2)"}\\ + @{term "simp (SEQ r\<^sub>1 r\<^sub>2)"} & $\dn$ & @{term "simp_SEQ (simp r\<^sub>1) (simp r\<^sub>2)"}\\ + @{term "simp r"} & $\dn$ & @{term "(r, id)"}\\ + \end{tabular} + \end{center} + + \noindent where @{term "id"} stands for the identity function. Note that + we do not simplify under stars: this seems to slow down the algorithm, + rather than speed up. The optimised lexer is then given by the clauses: + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) matcher3.simps(1)} & $\dn$ & @{thm (rhs) matcher3.simps(1)}\\ + @{thm (lhs) matcher3.simps(2)} & $\dn$ & + @{text "let (r\<^sub>s, f\<^sub>r) = simp (r "}$\backslash$@{text " c) in"}\\ + & & @{text "case"} @{term "matcher3 r\<^sub>s s"} @{text of}\\ + & & \phantom{$|$} @{term "None"} @{text "\"} @{term None}\\ + & & $|$ @{term "Some v"} @{text "\"} @{text "Some (inj r c (f\<^sub>r v))"} + \end{tabular} + \end{center} + + \noindent + In the second clause we first calculate the derivative @{text "r \\ c"} + and then simplify the result. This gives us a simplified derivative + @{text "r\<^sub>s"} and a rectification function @{text "f\<^sub>r"}. The matcher + is recursively called with the simplified derivative, but before + we inject the character @{term c} into value, we need to rectify + it (@{term "f\<^sub>r v"}). We can prove that + + \begin{lemma} + @{term "matcher3 r s = matcher r s"} + \end{lemma} + + \noindent + holds but refer the reader to our mechanisation for details. *} -section {* The Argument by Sulzmmann and Lu *} +section {* The Correctness Argument by Sulzmmann and Lu *} + +text {* + \newcommand{\greedy}{\succcurlyeq_{gr}} + \newcommand{\posix}{\succcurlyeq_{PX}} + + An extended version of \cite{Sulzmann2014} is available at the website of + its first author; this includes some ``proofs'', claimed in + \cite{Sulzmann2014} to be ``rigorous''. Since these are evidently not in + final form, we make no comment thereon, preferring to give general reasons + for our belief that the approach of \cite{Sulzmann2014} is problematic. + + Sulzmann and Lu explicitly refer to the paper \cite{Frisch2004} by Frisch + and Cardelli from where they have taken their main idea for their + correctness proof of the POSIX algorithm. Frisch and Cardelli introduced + an ordering, written $\greedy$, for values and they show that their greedy + matching algorithm always produces a maximal element according to this + ordering (from all possible solutions). The ordering $\greedy$ is defined + by the following rules: + +\begin{center} +\begin{tabular}{cc} +\infer{v_{1} \greedy v'_{1}} {(v_{1},v_{2}) \greedy (v'_{1},v'_{2})} +& +\infer{v_{2} \greedy v'_{2}} {(v_{1},v_{2}) \greedy (v_{1},v'_{2})} +\smallskip\\ +\infer{v_{1} \greedy v_{2}} {Left \; v_{1} \greedy Left \; v_{2}} +& +\infer{v_{1} \greedy v_{2}} {Right \; v_{1} \greedy Right \; v_{2}} +\\ + +\infer{\mbox{}}{Left \; v_{2}\greedy Right \; v_{1} } +\medskip\\ + +$\infer{ v_{1} \greedy v_{2}} {v_{1} :: vs_{1} \greedy v_{2} :: vs_{2}} $ & +$\infer{ vs_{1} \greedy vs_{2}} {v :: vs_{1} \greedy v :: vs_{2}}$ +\\ + +& +$\infer{\mbox{}} { v::vs \greedy []}$ +\\ + +\infer{\mbox{}}{Char \; c \greedy Char \; c} +& +\infer{\mbox{}}{Void \greedy Void}\\ + +\end{tabular} +\end{center} + +\noindent That these rules realise a greedy ordering can be +seen in the first rule in the third line where a $Left$-value +is always bigger than (or preferred over) a $Right$-value. +What is interesting for our purposes is that the properties +reflexivity, totality and transitivity for this greedy +ordering can be proved relatively easily by +induction. + +*} section {* Conclusion *} -text {* - - Nipkow lexer from 2000 - -*} - text {* - + Nipkow lexer from 2000 \noindent We have also introduced a slightly restricted version of this relation @@ -838,103 +963,6 @@ \bigskip - \noindent - - - \noindent - Our version of Sulzmann's ordering relation - - -*} - -text {* - \noindent - Some lemmas we have proved:\bigskip - - @{thm L_flat_Prf} - - @{thm[mode=IfThen] mkeps_nullable} - - @{thm[mode=IfThen] mkeps_flat} - - @{thm[mode=IfThen] Prf_injval} - - @{thm[mode=IfThen] Prf_injval_flat} - - @{thm[mode=IfThen] PMatch_mkeps} - - @{thm[mode=IfThen] PMatch1(2)} - - @{thm[mode=IfThen] PMatch_determ(1)[of "s" "r" "v\<^sub>1" "v\<^sub>2"]} - - - \noindent {\bf Proof} The proof is by induction on the definition of - @{const der}. Other inductions would go through as well. The - interesting case is for @{term "SEQ r\<^sub>1 r\<^sub>2"}. First we analyse the - case where @{term "nullable r\<^sub>1"}. We have by induction hypothesis - - \[ - \begin{array}{l} - (IH1)\quad @{text "\s v."} \text{\;if\;} @{term "s \ der c r\<^sub>1 \ v"} - \text{\;then\;} @{term "(c # s) \ r\<^sub>1 \ injval r\<^sub>1 c v"}\\ - (IH2)\quad @{text "\s v."} \text{\;if\;} @{term "s \ der c r\<^sub>2 \ v"} - \text{\;then\;} @{term "(c # s) \ r\<^sub>2 \ injval r\<^sub>2 c v"} - \end{array} - \] - - \noindent - and have - - \[ - @{term "s \ ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c r\<^sub>2) \ v"} - \] - - \noindent - There are two cases what @{term v} can be: (1) @{term "Left v'"} and (2) @{term "Right v'"}. - - \begin{itemize} - \item[(1)] We know @{term "s \ SEQ (der c r\<^sub>1) r\<^sub>2 \ v'"} holds, from which we - can infer that there are @{text "s\<^sub>1"}, @{term "s\<^sub>2"}, @{text "v\<^sub>1"}, @{term "v\<^sub>2"} - with - - \[ - @{term "s\<^sub>1 \ der c r\<^sub>1 \ v\<^sub>1"} \qquad\text{and}\qquad @{term "s\<^sub>2 \ r\<^sub>2 \ v\<^sub>2"} - \] - - and also - - \[ - @{term "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \ s\<^sub>1 @ s\<^sub>3 \ L (der c r\<^sub>1) \ s\<^sub>4 \ L r\<^sub>2)"} - \] - - \noindent - and have to prove - - \[ - @{term "((c # s\<^sub>1) @ s\<^sub>2) \ SEQ r\<^sub>1 r\<^sub>2 \ Seq (injval r\<^sub>1 c v\<^sub>1) v\<^sub>2"} - \] - - \noindent - The two requirements @{term "(c # s\<^sub>1) \ r\<^sub>1 \ injval r\<^sub>1 c v\<^sub>1"} and - @{term "s\<^sub>2 \ r\<^sub>2 \ v\<^sub>2"} can be proved by the induction hypothese (IH1) and the - fact above. - - \noindent - This leaves to prove - - \[ - @{term "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \ (c # s\<^sub>1) @ s\<^sub>3 \ L r\<^sub>1 \ s\<^sub>4 \ L r\<^sub>2)"} - \] - - \noindent - which holds because @{term "(c # s\<^sub>1) @ s\<^sub>3 \ L r\<^sub>1 "} implies @{term "s\<^sub>1 @ s\<^sub>3 \ L (der c r\<^sub>1) "} - - \item[(2)] This case is similar. - \end{itemize} - - \noindent - The final case is that @{term " \ nullable r\<^sub>1"} holds. This case again similar - to the cases above. *} diff -r e866678c29cb -r b208bc047eed thys/ReStar.thy --- a/thys/ReStar.thy Tue Mar 08 05:03:47 2016 +0000 +++ b/thys/ReStar.thy Tue Mar 08 06:30:48 2016 +0000 @@ -804,29 +804,27 @@ "F_LEFT f v = Left (f v)" fun F_ALT where - "F_ALT f1 f2 (Right v) = Right (f2 v)" -| "F_ALT f1 f2 (Left v) = Left (f1 v)" + "F_ALT f\<^sub>1 f\<^sub>2 (Right v) = Right (f\<^sub>2 v)" +| "F_ALT f\<^sub>1 f\<^sub>2 (Left v) = Left (f\<^sub>1 v)" fun F_SEQ1 where - "F_SEQ1 f1 f2 v = Seq (f1 Void) (f2 v)" + "F_SEQ1 f\<^sub>1 f\<^sub>2 v = Seq (f\<^sub>1 Void) (f\<^sub>2 v)" fun F_SEQ2 where - "F_SEQ2 f1 f2 v = Seq (f1 v) (f2 Void)" + "F_SEQ2 f\<^sub>1 f\<^sub>2 v = Seq (f\<^sub>1 v) (f\<^sub>2 Void)" fun F_SEQ where - "F_SEQ f1 f2 (Seq v1 v2) = Seq (f1 v1) (f2 v2)" + "F_SEQ f\<^sub>1 f\<^sub>2 (Seq v\<^sub>1 v\<^sub>2) = Seq (f\<^sub>1 v\<^sub>1) (f\<^sub>2 v\<^sub>2)" fun simp_ALT where - "simp_ALT (ZERO, _) (r2, f2) = (r2, F_RIGHT f2)" -| "simp_ALT (r1, f1) (ZERO, _) = (r1, F_LEFT f1)" -| "simp_ALT (r1, f1) (r2, f2) = (ALT r1 r2, F_ALT f1 f2)" + "simp_ALT (ZERO, f\<^sub>1) (r\<^sub>2, f\<^sub>2) = (r\<^sub>2, F_RIGHT f\<^sub>2)" +| "simp_ALT (r\<^sub>1, f\<^sub>1) (ZERO, f\<^sub>2) = (r\<^sub>1, F_LEFT f\<^sub>1)" +| "simp_ALT (r\<^sub>1, f\<^sub>1) (r\<^sub>2, f\<^sub>2) = (ALT r\<^sub>1 r\<^sub>2, F_ALT f\<^sub>1 f\<^sub>2)" fun simp_SEQ where - "simp_SEQ (ONE, f1) (r2, f2) = (r2, F_SEQ1 f1 f2)" -| "simp_SEQ (r1, f1) (ONE, f2) = (r1, F_SEQ2 f1 f2)" -| "simp_SEQ (ZERO, f1) (r2, f2) = (ZERO, f1)" -| "simp_SEQ (r1, f1) (ZERO, f2) = (ZERO, f2)" -| "simp_SEQ (r1, f1) (r2, f2) = (SEQ r1 r2, F_SEQ f1 f2)" + "simp_SEQ (ONE, f\<^sub>1) (r\<^sub>2, f\<^sub>2) = (r\<^sub>2, F_SEQ1 f\<^sub>1 f\<^sub>2)" +| "simp_SEQ (r\<^sub>1, f\<^sub>1) (ONE, f\<^sub>2) = (r\<^sub>1, F_SEQ2 f\<^sub>1 f\<^sub>2)" +| "simp_SEQ (r\<^sub>1, f\<^sub>1) (r\<^sub>2, f\<^sub>2) = (SEQ r\<^sub>1 r\<^sub>2, F_SEQ f\<^sub>1 f\<^sub>2)" fun simp :: "rexp \ rexp * (val \ val)" diff -r e866678c29cb -r b208bc047eed thys/paper.pdf Binary file thys/paper.pdf has changed