# HG changeset patch # User Christian Urban # Date 1457377001 0 # Node ID 4c85af262ee792bbdbfc4b2ca358ff64f61d6464 # Parent d74bfa11802c97beca8ec3b1a6817eb1853447bc updated diff -r d74bfa11802c -r 4c85af262ee7 thys/Paper/Paper.thy --- a/thys/Paper/Paper.thy Mon Mar 07 03:23:28 2016 +0000 +++ b/thys/Paper/Paper.thy Mon Mar 07 18:56:41 2016 +0000 @@ -83,7 +83,7 @@ correct according to the specification. The answer given by Sulzmann and Lu \cite{Sulzmann2014} is to define a -relation (called an ``Order Relation'') on the set of values of @{term r}, +relation (called an ``order relation'') on the set of values of @{term r}, and to show that (once a string to be matched is chosen) there is a maximum element and that it is computed by their derivative-based algorithm. This proof idea is inspired by work of Frisch and Cardelli \cite{Frisch2004} on a @@ -152,7 +152,7 @@ POSIX matching to tokenise strings, say @{text "iffoo"} and @{text "if"}. For @{text "iffoo"} we obtain by the longest match rule a single identifier token, not a keyword followed by an identifier. For @{text "if"} we obtain by -rule priority a keyword token, not an identifier token---even if @{text +the priority rule a keyword token, not an identifier token---even if @{text "r\<^bsub>id\<^esub>"} matches also.\bigskip \noindent {\bf Contributions:} (NOT DONE YET) We have implemented in @@ -213,17 +213,17 @@ recursive function @{term L} with the clauses: \begin{center} - \begin{tabular}{rcl} - @{thm (lhs) L.simps(1)} & $\dn$ & @{thm (rhs) L.simps(1)}\\ - @{thm (lhs) L.simps(2)} & $\dn$ & @{thm (rhs) L.simps(2)}\\ - @{thm (lhs) L.simps(3)} & $\dn$ & @{thm (rhs) L.simps(3)}\\ - @{thm (lhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ - @{thm (lhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ - @{thm (lhs) L.simps(6)} & $\dn$ & @{thm (rhs) L.simps(6)}\\ + \begin{tabular}{l@ {\hspace{5mm}}rcl} + (1) & @{thm (lhs) L.simps(1)} & $\dn$ & @{thm (rhs) L.simps(1)}\\ + (2) & @{thm (lhs) L.simps(2)} & $\dn$ & @{thm (rhs) L.simps(2)}\\ + (3) & @{thm (lhs) L.simps(3)} & $\dn$ & @{thm (rhs) L.simps(3)}\\ + (4) & @{thm (lhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ + (5) & @{thm (lhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ + (6) & @{thm (lhs) L.simps(6)} & $\dn$ & @{thm (rhs) L.simps(6)}\\ \end{tabular} \end{center} - \noindent In the fourth clause we use the operation @{term "DUMMY ;; + \noindent In clause (4) we use the operation @{term "DUMMY ;; DUMMY"} for the concatenation of two languages (it is also list-append for strings). We use the star-notation for regular expressions and for languages (in the last clause above). The star for languages is defined @@ -310,11 +310,11 @@ \noindent gives a positive answer if and only if @{term "s \ L r"}. Consequently, this regular expression matching algorithm satisfies the - usual specification. While the matcher above calculates a provably correct - YES/NO answer for whether a regular expression matches a string, the novel - idea of Sulzmann and Lu \cite{Sulzmann2014} is to append another phase to - this algorithm in order to calculate a [lexical] value. We will explain - the details next. + usual specification for regular expression matching. While the matcher + above calculates a provably correct YES/NO answer for whether a regular + expression matches a string or not, the novel idea of Sulzmann and Lu + \cite{Sulzmann2014} is to append another phase to this algorithm in order + to calculate a [lexical] value. We will explain the details next. *} @@ -338,11 +338,11 @@ @{term "Stars vs"} \end{center} - \noindent where we use @{term vs} standing for a list of values. (This is + \noindent where we use @{term vs} to stand for a list of values. (This is similar to the approach taken by Frisch and Cardelli for GREEDY matching - \cite{Frisch2004}, and Sulzmann and Lu \cite{Sulzmann2014} for POSIX matching). - The string underlying a value can be calculated by the @{const flat} - function, written @{term "flat DUMMY"} and defined as: + \cite{Frisch2004}, and Sulzmann and Lu \cite{Sulzmann2014} for POSIX + matching). The string underlying a value can be calculated by the @{const + flat} function, written @{term "flat DUMMY"} and defined as: \begin{center} \begin{tabular}{lcl} @@ -422,14 +422,15 @@ \draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}}; \end{tikzpicture} \end{center} -\caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014} +\caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014}, matching the string @{term "[a,b,c]"}. The first phase (the arrows from -left to right) is \Brz's matcher building succesive derivatives. If at the -last regular expression is @{term nullable}, then functions of the -second phase are called: first @{term mkeps} calculates a value witnessing +left to right) is \Brz's matcher building succesive derivatives. If the +last regular expression is @{term nullable}, then the functions of the +second phase are called (the top-down and right-to-left arrows): first +@{term mkeps} calculates a value witnessing how the empty string has been recognised by @{term "r\<^sub>4"}. After that the function @{term inj} `injects back' the characters of the string into -the values (the arrows from right to left). +the values. \label{Sulz}} \end{figure} @@ -450,9 +451,10 @@ alternative regular expression, say @{term "ALT r\<^sub>1 r\<^sub>2"}, can match the empty string and furthermore @{term "r\<^sub>1"} can match the empty string, then we return a @{text Left}-value. The @{text - Right}-value will only be returned if @{term "r\<^sub>1"} is not nullable. + Right}-value will only be returned if @{term "r\<^sub>1"} cannot match the empty + string. - The most interesting novelty from Sulzmann and Lu \cite{Sulzmann2014} is + The most interesting idea from Sulzmann and Lu \cite{Sulzmann2014} is the construction of a value for how @{term "r\<^sub>1"} can match the string @{term "[a,b,c]"} from the value how the last derivative, @{term "r\<^sub>4"} in Fig~\ref{Sulz}, can match the empty string. Sulzmann and @@ -510,9 +512,9 @@ we match in the clause (5) is @{term "Left (Seq v\<^sub>1 v\<^sub>2)"}, while in (6) it is just @{term "Right v\<^sub>2"}. One more interesting point is in the right-hand side of clause (6): since in this case the - regular expression @{text "r\<^sub>1"} does not ``contribute'' for - matching the string, that is only matches the empty string, we need to - call @{const mkeps} in order to construct a value how @{term "r\<^sub>1"} + regular expression @{text "r\<^sub>1"} does not ``contribute'' to + matching the string, that means it only matches the empty string, we need to + call @{const mkeps} in order to construct a value for how @{term "r\<^sub>1"} can match this empty string. A similar argument applies for why we can expect in the left-hand side of clause (7) that the value is of the form @{term "Seq v (Stars vs)"}---the derivative of a star is @{term "SEQ r @@ -524,6 +526,18 @@ injval.simps(1)[of "c" "c"]} $\dn$ @{thm (rhs) injval.simps(1)[of "c"]}, but our deviation is harmless.} + The idea of @{term inj} to ``inject back'' a character into a value can + be made precise by the first part of the following lemma; the second + part shows that the underlying string of an @{const mkeps}-value is + the empty string. + + \begin{lemma}\mbox{}\\\label{Prf_injval_flat} + \begin{tabular}{ll} + (1) & @{thm[mode=IfThen] Prf_injval_flat}\\ + (2) & @{thm[mode=IfThen] mkeps_flat} + \end{tabular} + \end{lemma} + Having defined the @{const mkeps} and @{text inj} function we can extend \Brz's matcher so that a [lexical] value is constructed (assuming the regular expression matches the string). The clauses of the lexer are @@ -599,9 +613,7 @@ @{thm[mode=IfThen] PMatch_mkeps} \end{lemma} - \begin{lemma} - @{thm[mode=IfThen] Prf_injval_flat} - \end{lemma} + \begin{lemma} @{thm[mode=IfThen] PMatch2_roy_version} diff -r d74bfa11802c -r 4c85af262ee7 thys/ReStar.thy --- a/thys/ReStar.thy Mon Mar 07 03:23:28 2016 +0000 +++ b/thys/ReStar.thy Mon Mar 07 18:56:41 2016 +0000 @@ -582,131 +582,158 @@ apply(clarify) by (metis PMatch1(2)) - - - (* a proof that does not need proj *) lemma PMatch2_roy_version: assumes "s \ (der c r) \ v" shows "(c#s) \ r \ (injval r c v)" using assms -apply(induct r arbitrary: s v c rule: rexp.induct) -apply(auto) -(* ZERO case *) -apply(erule PMatch.cases) -apply(simp_all)[7] -(* ONE case *) -apply(erule PMatch.cases) -apply(simp_all)[7] -(* CHAR case *) -apply(case_tac "c = x") -apply(simp) -apply(erule PMatch.cases) -apply(simp_all)[7] -apply (metis PMatch.intros(2)) -apply(simp) -apply(erule PMatch.cases) -apply(simp_all)[7] -(* ALT case *) -prefer 2 -apply(erule PMatch.cases) -apply(simp_all)[7] -apply (metis PMatch.intros(3)) -apply(clarify) -apply(rule PMatch.intros) -apply metis -apply(simp add: der_correctness Der_def) -(* SEQ case *) -apply(case_tac "nullable x1") -apply(simp) -prefer 2 -(* not-nullbale case *) -apply(simp) -apply(erule PMatch.cases) -apply(simp_all)[7] -apply(clarify) -apply(subst append.simps(2)[symmetric]) -apply(rule PMatch.intros) -apply metis -apply metis -apply(auto)[1] -apply(simp add: der_correctness Der_def) -apply(auto)[1] -(* nullable case *) -apply(erule PMatch.cases) -apply(simp_all)[7] -(* left case *) -apply(clarify) -apply(erule PMatch.cases) -apply(simp_all)[4] -prefer 2 -apply(clarify) -prefer 2 -apply(clarify) -apply(clarify) -apply(simp (no_asm)) -apply(subst append.simps(2)[symmetric]) -apply(rule PMatch.intros) -apply metis -apply metis -apply(erule contrapos_nn) -apply(erule exE)+ -apply(auto)[1] -apply(simp add: der_correctness Der_def) -apply metis -(* right interesting case *) -apply(clarify) -apply(subst append.simps(1)[symmetric]) -apply(rule PMatch.intros) -apply (metis PMatch_mkeps) -apply metis -apply(rule notI) -apply(clarify) -apply(simp) -apply(simp add: der_correctness) -apply(simp only: Der_def Sequ_def) -apply(simp) -apply (metis Cons_eq_append_conv) -(* Stars case *) -apply(erule PMatch.cases) -apply(simp_all)[7] -apply(clarify) -apply(rotate_tac 2) -apply(frule_tac PMatch1) -apply(erule PMatch.cases) -apply(simp_all)[7] -apply(subst append.simps(2)[symmetric]) -apply(rule PMatch.intros) -apply metis -apply(auto)[1] -apply(rule PMatch.intros) -apply(simp) -apply(simp) -apply(simp) -apply (metis L.simps(6)) -apply(subst Prf_injval_flat) -apply (metis PMatch1) -apply(simp) -apply(auto)[1] -apply(drule_tac x="s\<^sub>3" in spec) -apply(drule mp) -defer -apply metis -apply(clarify) -apply(drule_tac x="s1" in meta_spec) -apply(drule_tac x="v1" in meta_spec) -apply(drule_tac x="c" in meta_spec) -apply(simp) -apply(rotate_tac 2) -apply(drule PMatch.intros(6)) -apply(rule PMatch.intros(7)) -(* HERE *) -apply (metis PMatch1(1) list.distinct(1) Prf_injval_flat) -apply (metis Nil_is_append_conv) -apply(simp) -apply(subst der_correctness) -apply(simp add: Der_def) -done +proof(induct r arbitrary: s v rule: rexp.induct) + case ZERO + have "s \ der c ZERO \ v" by fact + then have "s \ ZERO \ v" by simp + then have "False" by cases + then show "(c#s) \ ZERO \ (injval ZERO c v)" by simp +next + case ONE + have "s \ der c ONE \ v" by fact + then have "s \ ZERO \ v" by simp + then have "False" by cases + then show "(c#s) \ ONE \ (injval ONE c v)" by simp +next + case (CHAR d) + consider (eq) "c = d" | (ineq) "c \ d" by blast + then show "(c#s) \ (CHAR d) \ (injval (CHAR d) c v)" + proof (cases) + case eq + have "s \ der c (CHAR d) \ v" by fact + then have "s \ ONE \ v" using eq by simp + then have eqs: "s = [] \ v = Void" by cases simp + show "(c # s) \ CHAR d \ injval (CHAR d) c v" using eq eqs by (auto intro: PMatch.intros) + next + case ineq + have "s \ der c (CHAR d) \ v" by fact + then have "s \ ZERO \ v" using ineq by simp + then have "False" by cases + then show "(c # s) \ CHAR d \ injval (CHAR d) c v" by simp + qed +next + case (ALT r1 r2) + have IH1: "\s v. s \ der c r1 \ v \ (c # s) \ r1 \ injval r1 c v" by fact + have IH2: "\s v. s \ der c r2 \ v \ (c # s) \ r2 \ injval r2 c v" by fact + have "s \ der c (ALT r1 r2) \ v" by fact + then have "s \ ALT (der c r1) (der c r2) \ v" by simp + then consider (left) v' where "v = Left v'" "s \ der c r1 \ v'" + | (right) v' where "v = Right v'" "s \ L (der c r1)" "s \ der c r2 \ v'" + by cases auto + then show "(c # s) \ ALT r1 r2 \ injval (ALT r1 r2) c v" + proof (cases) + case left + have "s \ der c r1 \ v'" by fact + then have "(c # s) \ r1 \ injval r1 c v'" using IH1 by simp + then have "(c # s) \ ALT r1 r2 \ injval (ALT r1 r2) c (Left v')" by (auto intro: PMatch.intros) + then show "(c # s) \ ALT r1 r2 \ injval (ALT r1 r2) c v" using left by simp + next + case right + have "s \ L (der c r1)" by fact + then have "c # s \ L r1" by (simp add: der_correctness Der_def) + moreover + have "s \ der c r2 \ v'" by fact + then have "(c # s) \ r2 \ injval r2 c v'" using IH2 by simp + ultimately have "(c # s) \ ALT r1 r2 \ injval (ALT r1 r2) c (Right v')" + by (auto intro: PMatch.intros) + then show "(c # s) \ ALT r1 r2 \ injval (ALT r1 r2) c v" using right by simp + qed +next + case (SEQ r1 r2) + have IH1: "\s v. s \ der c r1 \ v \ (c # s) \ r1 \ injval r1 c v" by fact + have IH2: "\s v. s \ der c r2 \ v \ (c # s) \ r2 \ injval r2 c v" by fact + have "s \ der c (SEQ r1 r2) \ v" by fact + then consider + (left_nullable) v1 v2 s1 s2 where + "v = Left (Seq v1 v2)" "s = s1 @ s2" + "s1 \ der c r1 \ v1" "s2 \ r2 \ v2" "nullable r1" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r1) \ s\<^sub>4 \ L r2)" + | (right_nullable) v1 s1 s2 where + "v = Right v1" "s = s1 @ s2" + "s \ der c r2 \ v1" "nullable r1" "s1 @ s2 \ L (SEQ (der c r1) r2)" + | (not_nullable) v1 v2 s1 s2 where + "v = Seq v1 v2" "s = s1 @ s2" + "s1 \ der c r1 \ v1" "s2 \ r2 \ v2" "\nullable r1" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r1) \ s\<^sub>4 \ L r2)" + apply(auto split: if_splits simp add: Sequ_def) apply(erule PMatch.cases) + apply(auto elim: PMatch.cases simp add: Sequ_def der_correctness Der_def) + by fastforce + then show "(c # s) \ SEQ r1 r2 \ injval (SEQ r1 r2) c v" + proof (cases) + case left_nullable + have "s1 \ der c r1 \ v1" by fact + then have "(c # s1) \ r1 \ injval r1 c v1" using IH1 by simp + moreover + have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r1) \ s\<^sub>4 \ L r2)" by fact + then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (c # s1) @ s\<^sub>3 \ L r1 \ s\<^sub>4 \ L r2)" by (simp add: der_correctness Der_def) + ultimately have "((c # s1) @ s2) \ SEQ r1 r2 \ Seq (injval r1 c v1) v2" using left_nullable by (rule_tac PMatch.intros) + then show "(c # s) \ SEQ r1 r2 \ injval (SEQ r1 r2) c v" using left_nullable by simp + next + case right_nullable + have "nullable r1" by fact + then have "[] \ r1 \ (mkeps r1)" by (rule PMatch_mkeps) + moreover + have "s \ der c r2 \ v1" by fact + then have "(c # s) \ r2 \ (injval r2 c v1)" using IH2 by simp + moreover + have "s1 @ s2 \ L (SEQ (der c r1) r2)" by fact + then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = c # s \ [] @ s\<^sub>3 \ L r1 \ s\<^sub>4 \ L r2)" using right_nullable + by(auto simp add: der_correctness Der_def append_eq_Cons_conv Sequ_def) + ultimately have "([] @ (c # s)) \ SEQ r1 r2 \ Seq (mkeps r1) (injval r2 c v1)" + by(rule PMatch.intros) + then show "(c # s) \ SEQ r1 r2 \ injval (SEQ r1 r2) c v" using right_nullable by simp + next + case not_nullable + have "s1 \ der c r1 \ v1" by fact + then have "(c # s1) \ r1 \ injval r1 c v1" using IH1 by simp + moreover + have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r1) \ s\<^sub>4 \ L r2)" by fact + then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (c # s1) @ s\<^sub>3 \ L r1 \ s\<^sub>4 \ L r2)" by (simp add: der_correctness Der_def) + ultimately have "((c # s1) @ s2) \ SEQ r1 r2 \ Seq (injval r1 c v1) v2" using not_nullable + by (rule_tac PMatch.intros) (simp_all) + then show "(c # s) \ SEQ r1 r2 \ injval (SEQ r1 r2) c v" using not_nullable by simp + qed +next + case (STAR r) + have IH: "\s v. s \ der c r \ v \ (c # s) \ r \ injval r c v" by fact + have "s \ der c (STAR r) \ v" by fact + then consider + (cons) v1 vs s1 s2 where + "v = Seq v1 (Stars vs)" "s = s1 @ s2" + "s1 \ der c r \ v1" "s2 \ (STAR r) \ (Stars vs)" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r) \ s\<^sub>4 \ L (STAR r))" + apply(erule_tac PMatch.cases) + apply(auto) + apply(rotate_tac 4) + apply(erule_tac PMatch.cases) + apply(auto) + apply (simp add: PMatch.intros(6)) + using PMatch.intros(7) by blast + then show "(c # s) \ STAR r \ injval (STAR r) c v" + proof (cases) + case cons + have "s1 \ der c r \ v1" by fact + then have "(c # s1) \ r \ injval r c v1" using IH by simp + moreover + have "s2 \ STAR r \ Stars vs" by fact + moreover + have "(c # s1) \ r \ injval r c v1" by fact + then have "flat (injval r c v1) = (c # s1)" by (rule PMatch1) + then have "flat (injval r c v1) \ []" by simp + moreover + have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r) \ s\<^sub>4 \ L (STAR r))" by fact + then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (c # s1) @ s\<^sub>3 \ L r \ s\<^sub>4 \ L (STAR r))" + by (simp add: der_correctness Der_def) + ultimately + have "((c # s1) @ s2) \ STAR r \ Stars (injval r c v1 # vs)" by (rule PMatch.intros) + then show "(c # s) \ STAR r \ injval (STAR r) c v" using cons by(simp) + qed +qed lemma lex_correct1: assumes "s \ L r" diff -r d74bfa11802c -r 4c85af262ee7 thys/paper.pdf Binary file thys/paper.pdf has changed