# HG changeset patch # User Christian Urban # Date 1456668072 0 # Node ID 6adda4a667b13b17c0acf7b67c44b93df09cbbb9 # Parent 489dfa0d7ec999f1c660e619c0a1b11f8d8d6f52 updated diff -r 489dfa0d7ec9 -r 6adda4a667b1 Literature/ktl16regexp-agda.pdf Binary file Literature/ktl16regexp-agda.pdf has changed diff -r 489dfa0d7ec9 -r 6adda4a667b1 thys/Paper/Paper.thy --- a/thys/Paper/Paper.thy Thu Feb 25 20:13:41 2016 +0000 +++ b/thys/Paper/Paper.thy Sun Feb 28 14:01:12 2016 +0000 @@ -7,7 +7,13 @@ notation (latex output) If ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and - Cons ("_::_" [78,77] 73) and + Cons ("_\<^raw:\mbox{$\,$}>::\<^raw:\mbox{$\,$}>_" [78,77] 73) and + ZERO ("\<^raw:\textrm{0}>" 78) and + ONE ("\<^raw:\textrm{1}>" 78) and + CHAR ("_" [1000] 10) and + ALT ("_ + _" [1000,1000] 78) and + SEQ ("_ \ _" [1000,1000] 78) and + STAR ("_\<^sup>\" [1000] 78) and val.Char ("Char _" [1000] 78) and val.Left ("Left _" [1000] 78) and val.Right ("Right _" [1000] 78) and @@ -23,20 +29,117 @@ section {* Introduction *} text {* + - \noindent +Sulzmann and Lu \cite{Sulzmann2014} + +there are two commonly used +disambiguation strategies to create a unique matching tree: +one is called \emph{greedy} matching \cite{Frisch2004} and the +other is \emph{POSIX} matching~\cite{Kuklewicz,Sulzmann2014}. +For the latter there are two rough rules: + +\begin{itemize} +\item The Longest Match Rule (or ``maximal munch rule''):\smallskip\\ The + longest initial substring matched by any regular + expression is taken as next token. + +\item Rule Priority:\smallskip\\ For a particular longest initial + substring, the first regular expression that can match + determines the token. +\end{itemize} + +\noindent In the context of lexing, POSIX is the more +interesting disambiguation strategy as it produces longest +matches, which is necessary for tokenising programs. For +example the string \textit{iffoo} should not match the keyword +\textit{if} and the rest, but as one string \textit{iffoo}, +which might be a variable name in a program. As another +example consider the string $xy$ and the regular expression +\mbox{$(x + y + xy)^*$}. Either the input string can be +matched in two `iterations' by the single letter-regular +expressions $x$ and $y$, or directly in one iteration by $xy$. +The first case corresponds to greedy matching, which first +matches with the left-most symbol and only matches the next +symbol in case of a mismatch. The second case is POSIX +matching, which prefers the longest match. In case more than +one (longest) matches exist, only then it prefers the +left-most match. While POSIX matching seems natural, it turns +out to be much more subtle than greedy matching in terms of +implementations and in terms of proving properties about it. +If POSIX matching is implemented using automata, then one has +to follow transitions (according to the input string) until +one finds an accepting state, record this state and look for +further transition which might lead to another accepting state +that represents a longer input initial substring to be +matched. Only if none can be found, the last accepting state +is returned. + + +Sulzmann and Lu's paper \cite{Sulzmann2014} targets POSIX +regular expression matching. They write that it is known to be +to be a non-trivial problem and nearly all POSIX matching +implementations are ``buggy'' \cite[Page 203]{Sulzmann2014}. +For this they cite a study by Kuklewicz \cite{Kuklewicz}. My +current work is about formalising the proofs in the paper by +Sulzmann and Lu. Specifically, they propose in this paper a +POSIX matching algorithm and give some details of a +correctness proof for this algorithm inside the paper and some +more details in an appendix. This correctness proof is +unformalised, meaning it is just a ``pencil-and-paper'' proof, +not done in a theorem prover. Though, the paper and presumably +the proof have been peer-reviewed. Unfortunately their proof +does not give enough details such that it can be +straightforwardly implemented in a theorem prover, say +Isabelle. In fact, the purported proof they outline does not +work in central places. We discovered this when filling in +many gaps and attempting to formalise the proof in Isabelle. + + + +{\bf Contributions:} + +*} + +section {* Preliminaries *} + +text {* \noindent Strings in Isabelle/HOL are lists of characters with + the empty string being represented by the empty list, written @{term + "[]"}, and list-cons being written as @{term "DUMMY # DUMMY"}. By + using the type char we have a supply of finitely many characters + roughly corresponding to the ASCII character set. Regular exprtessions \begin{center} @{text "r :="} - @{const "NULL"} $\mid$ - @{const "EMPTY"} $\mid$ + @{const "ZERO"} $\mid$ + @{const "ONE"} $\mid$ @{term "CHAR c"} $\mid$ @{term "ALT r\<^sub>1 r\<^sub>2"} $\mid$ @{term "SEQ r\<^sub>1 r\<^sub>2"} $\mid$ @{term "STAR r"} \end{center} +*} + +section {* POSIX Regular Expression Matching *} + +section {* The Argument by Sulzmmann and Lu *} + +section {* Conclusion *} + +text {* + + Nipkow lexer from 2000 + +*} + + +text {* + + + + \noindent Values @@ -216,7 +319,6 @@ \begin{center} \begin{tabular}{c} @{thm Values_def}\medskip\\ - @{thm NValues_def} \end{tabular} \end{center} @@ -238,9 +340,9 @@ @{thm[mode=IfThen] mkeps_flat} - @{thm[mode=IfThen] v3} + @{thm[mode=IfThen] Prf_injval} - @{thm[mode=IfThen] v4} + @{thm[mode=IfThen] Prf_injval_flat} @{thm[mode=IfThen] PMatch_mkeps} diff -r 489dfa0d7ec9 -r 6adda4a667b1 thys/Paper/document/root.tex --- a/thys/Paper/document/root.tex Thu Feb 25 20:13:41 2016 +0000 +++ b/thys/Paper/document/root.tex Sun Feb 28 14:01:12 2016 +0000 @@ -39,12 +39,29 @@ \maketitle \begin{abstract} -\Brz{} introduced the notion of derivatives of regular expressions -that can be used for very simple regular expression matching algorithms. -BLA BLA Sulzmann and Lu \cite{Sulzmann2014} +\Brz{} introduced the notion of derivatives for regular +expressions. They can be used for a very simple regular expression +matching algorithm. Sulzmann and Lu cleverly extended this algorithm +in order to deal with POSIX matching, which is the underlying +disambiguation strategy for regular expressions needed in +lexers. Sulzmann and Lu have made available on-line what they call a +``rigorous proof'' of the correctness of their algorithm w.r.t.~their +specification; regrettably, it appears to us to have unfillable gaps. +In the first part of this paper we give our inductive definition of +what a POSIX value is and show $(i)$ that such a value is unique (for +given regular expression and string being matched) and $(ii)$ that +Sulzmann and Lu's algorithm always generates such a value (provided +that the regular expression matches the string). We also prove the +correctness of an optimised version of the POSIX matching +algorithm. Our definitions and proof are much simpler than those by +Sulzmann and Lu and can be easily formalised in Isabelle/HOL. In the +second part we analyse the correctness argument by Sulzmann and Lu in +more detail and explain why it seems hard to turn it into a proof +rigorous enough to be accepted by a system such as Isabelle/HOL. -{\bf Keywords:} +{\bf Keywords:} POSIX matching, Derivatives of Regular Expressions, +Isabelle/HOL \end{abstract} \input{session} diff -r 489dfa0d7ec9 -r 6adda4a667b1 thys/ReStar.thy --- a/thys/ReStar.thy Thu Feb 25 20:13:41 2016 +0000 +++ b/thys/ReStar.thy Sun Feb 28 14:01:12 2016 +0000 @@ -103,8 +103,8 @@ section {* Regular Expressions *} datatype rexp = - NULL -| EMPTY + ZERO +| ONE | CHAR char | SEQ rexp rexp | ALT rexp rexp @@ -115,28 +115,65 @@ fun L :: "rexp \ string set" where - "L (NULL) = {}" -| "L (EMPTY) = {[]}" + "L (ZERO) = {}" +| "L (ONE) = {[]}" | "L (CHAR c) = {[c]}" | "L (SEQ r1 r2) = (L r1) ;; (L r2)" | "L (ALT r1 r2) = (L r1) \ (L r2)" | "L (STAR r) = (L r)\" + +section {* Nullable, Derivatives *} + fun nullable :: "rexp \ bool" where - "nullable (NULL) = False" -| "nullable (EMPTY) = True" + "nullable (ZERO) = False" +| "nullable (ONE) = True" | "nullable (CHAR c) = False" | "nullable (ALT r1 r2) = (nullable r1 \ nullable r2)" | "nullable (SEQ r1 r2) = (nullable r1 \ nullable r2)" | "nullable (STAR r) = True" + +fun + der :: "char \ rexp \ rexp" +where + "der c (ZERO) = ZERO" +| "der c (ONE) = ZERO" +| "der c (CHAR c') = (if c = c' then ONE else ZERO)" +| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" +| "der c (SEQ r1 r2) = + (if nullable r1 + then ALT (SEQ (der c r1) r2) (der c r2) + else SEQ (der c r1) r2)" +| "der c (STAR r) = SEQ (der c r) (STAR r)" + +fun + ders :: "string \ rexp \ rexp" +where + "ders [] r = r" +| "ders (c # s) r = ders s (der c r)" + + lemma nullable_correctness: shows "nullable r \ [] \ (L r)" by (induct r) (auto simp add: Sequ_def) +lemma der_correctness: + shows "L (der c r) = Der c (L r)" +apply(induct r) +apply(simp_all add: nullable_correctness) +done + +lemma ders_correctness: + shows "L (ders s r) = Ders s (L r)" +apply(induct s arbitrary: r) +apply(simp_all add: der_correctness Der_def Ders_def) +done + + section {* Values *} datatype val = @@ -173,7 +210,7 @@ "\\ v1 : r1; \ v2 : r2\ \ \ Seq v1 v2 : SEQ r1 r2" | "\ v1 : r1 \ \ Left v1 : ALT r1 r2" | "\ v2 : r2 \ \ Right v2 : ALT r1 r2" -| "\ Void : EMPTY" +| "\ Void : ONE" | "\ Char c : CHAR c" | "\ Stars [] : STAR r" | "\\ v : r; \ Stars vs : STAR r\ \ \ Stars (v # vs) : STAR r" @@ -245,196 +282,16 @@ done -section {* Values Sets *} - -definition prefix :: "string \ string \ bool" ("_ \ _" [100, 100] 100) -where - "s1 \ s2 \ \s3. s1 @ s3 = s2" - -definition sprefix :: "string \ string \ bool" ("_ \ _" [100, 100] 100) -where - "s1 \ s2 \ (s1 \ s2 \ s1 \ s2)" - -lemma length_sprefix: - "s1 \ s2 \ length s1 < length s2" -unfolding sprefix_def prefix_def -by (auto) - -definition Prefixes :: "string \ string set" where - "Prefixes s \ {sp. sp \ s}" - -definition Suffixes :: "string \ string set" where - "Suffixes s \ rev ` (Prefixes (rev s))" - -definition SPrefixes :: "string \ string set" where - "SPrefixes s \ {sp. sp \ s}" - -definition SSuffixes :: "string \ string set" where - "SSuffixes s \ rev ` (SPrefixes (rev s))" - -lemma Suffixes_in: - "\s1. s1 @ s2 = s3 \ s2 \ Suffixes s3" -unfolding Suffixes_def Prefixes_def prefix_def image_def -apply(auto) -by (metis rev_rev_ident) - -lemma SSuffixes_in: - "\s1. s1 \ [] \ s1 @ s2 = s3 \ s2 \ SSuffixes s3" -unfolding SSuffixes_def Suffixes_def SPrefixes_def Prefixes_def sprefix_def prefix_def image_def -apply(auto) -by (metis append_self_conv rev.simps(1) rev_rev_ident) - -lemma Prefixes_Cons: - "Prefixes (c # s) = {[]} \ {c # sp | sp. sp \ Prefixes s}" -unfolding Prefixes_def prefix_def -apply(auto simp add: append_eq_Cons_conv) -done - -lemma finite_Prefixes: - "finite (Prefixes s)" -apply(induct s) -apply(auto simp add: Prefixes_def prefix_def)[1] -apply(simp add: Prefixes_Cons) -done - -lemma finite_Suffixes: - "finite (Suffixes s)" -unfolding Suffixes_def -apply(rule finite_imageI) -apply(rule finite_Prefixes) -done - -lemma prefix_Cons: - "((c # s1) \ (c # s2)) = (s1 \ s2)" -apply(auto simp add: prefix_def) -done - -lemma prefix_append: - "((s @ s1) \ (s @ s2)) = (s1 \ s2)" -apply(induct s) -apply(simp) -apply(simp add: prefix_Cons) -done - - -definition Values :: "rexp \ string \ val set" where - "Values r s \ {v. \ v : r \ flat v \ s}" - -definition rest :: "val \ string \ string" where - "rest v s \ drop (length (flat v)) s" - -lemma rest_Nil: - "rest v [] = []" -apply(simp add: rest_def) -done - -lemma rest_Suffixes: - "rest v s \ Suffixes s" -unfolding rest_def -by (metis Suffixes_in append_take_drop_id) - -lemma Values_recs: - "Values (NULL) s = {}" - "Values (EMPTY) s = {Void}" - "Values (CHAR c) s = (if [c] \ s then {Char c} else {})" - "Values (ALT r1 r2) s = {Left v | v. v \ Values r1 s} \ {Right v | v. v \ Values r2 s}" - "Values (SEQ r1 r2) s = {Seq v1 v2 | v1 v2. v1 \ Values r1 s \ v2 \ Values r2 (rest v1 s)}" - "Values (STAR r) s = - {Stars []} \ {Stars (v # vs) | v vs. v \ Values r s \ Stars vs \ Values (STAR r) (rest v s)}" -unfolding Values_def -apply(auto) -(*NULL*) -apply(erule Prf.cases) -apply(simp_all)[7] -(*EMPTY*) -apply(erule Prf.cases) -apply(simp_all)[7] -apply(rule Prf.intros) -apply (metis append_Nil prefix_def) -(*CHAR*) -apply(erule Prf.cases) -apply(simp_all)[7] -apply(rule Prf.intros) -apply(erule Prf.cases) -apply(simp_all)[7] -(*ALT*) -apply(erule Prf.cases) -apply(simp_all)[7] -apply (metis Prf.intros(2)) -apply (metis Prf.intros(3)) -(*SEQ*) -apply(erule Prf.cases) -apply(simp_all)[7] -apply (simp add: append_eq_conv_conj prefix_def rest_def) -apply (metis Prf.intros(1)) -apply (simp add: append_eq_conv_conj prefix_def rest_def) -(*STAR*) -apply(erule Prf.cases) -apply(simp_all)[7] -apply(rule conjI) -apply(simp add: prefix_def) -apply(auto)[1] -apply(simp add: prefix_def) -apply(auto)[1] -apply (metis append_eq_conv_conj rest_def) -apply (metis Prf.intros(6)) -apply (metis append_Nil prefix_def) -apply (metis Prf.intros(7)) -by (metis append_eq_conv_conj prefix_append prefix_def rest_def) - -lemma finite_image_set2: - "finite {x. P x} \ finite {y. Q y} \ finite {(x, y) | x y. P x \ Q y}" - by (rule finite_subset [where B = "\x \ {x. P x}. \y \ {y. Q y}. {(x, y)}"]) auto - section {* Sulzmann functions *} fun mkeps :: "rexp \ val" where - "mkeps(EMPTY) = Void" + "mkeps(ONE) = Void" | "mkeps(SEQ r1 r2) = Seq (mkeps r1) (mkeps r2)" | "mkeps(ALT r1 r2) = (if nullable(r1) then Left (mkeps r1) else Right (mkeps r2))" | "mkeps(STAR r) = Stars []" -section {* Derivatives *} - -fun - der :: "char \ rexp \ rexp" -where - "der c (NULL) = NULL" -| "der c (EMPTY) = NULL" -| "der c (CHAR c') = (if c = c' then EMPTY else NULL)" -| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" -| "der c (SEQ r1 r2) = - (if nullable r1 - then ALT (SEQ (der c r1) r2) (der c r2) - else SEQ (der c r1) r2)" -| "der c (STAR r) = SEQ (der c r) (STAR r)" - -fun - ders :: "string \ rexp \ rexp" -where - "ders [] r = r" -| "ders (c # s) r = ders s (der c r)" - - -lemma der_correctness: - shows "L (der c r) = Der c (L r)" -apply(induct r) -apply(simp_all add: nullable_correctness) -done - -lemma ders_correctness: - shows "L (ders s r) = Ders s (L r)" -apply(induct s arbitrary: r) -apply(simp add: Ders_def) -apply(simp) -apply(subst der_correctness) -apply(simp add: Ders_def Der_def) -done - -section {* Injection function *} - fun injval :: "rexp \ char \ val \ val" where "injval (CHAR d) c Void = Char d" @@ -445,34 +302,27 @@ | "injval (SEQ r1 r2) c (Right v2) = Seq (mkeps r1) (injval r2 c v2)" | "injval (STAR r) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)" + +section {* Matcher *} + fun - lex :: "rexp \ string \ val option" + matcher :: "rexp \ string \ val option" where - "lex r [] = (if nullable r then Some(mkeps r) else None)" -| "lex r (c#s) = (case (lex (der c r) s) of + "matcher r [] = (if nullable r then Some(mkeps r) else None)" +| "matcher r (c#s) = (case (matcher (der c r) s) of None \ None | Some(v) \ Some(injval r c v))" fun - lex2 :: "rexp \ string \ val" -where - "lex2 r [] = mkeps r" -| "lex2 r (c#s) = injval r c (lex2 (der c r) s)" - -section {* Projection function *} - -fun projval :: "rexp \ char \ val \ val" + matcher2 :: "rexp \ string \ val" where - "projval (CHAR d) c _ = Void" -| "projval (ALT r1 r2) c (Left v1) = Left (projval r1 c v1)" -| "projval (ALT r1 r2) c (Right v2) = Right (projval r2 c v2)" -| "projval (SEQ r1 r2) c (Seq v1 v2) = - (if flat v1 = [] then Right(projval r2 c v2) - else if nullable r1 then Left (Seq (projval r1 c v1) v2) - else Seq (projval r1 c v1) v2)" -| "projval (STAR r) c (Stars (v # vs)) = Seq (projval r c v) (Stars vs)" + "matcher2 r [] = mkeps r" +| "matcher2 r (c#s) = injval r c (matcher2 (der c r) s)" + +section {* Mkeps, injval *} + lemma mkeps_nullable: assumes "nullable(r)" shows "\ mkeps r : r" @@ -489,33 +339,29 @@ apply(auto) done - -lemma v3: +lemma Prf_injval: assumes "\ v : der c r" shows "\ (injval r c v) : r" using assms -apply(induct arbitrary: v rule: der.induct) -apply(simp) +apply(induct r arbitrary: c v rule: rexp.induct) +apply(simp_all) +(* ZERO *) apply(erule Prf.cases) apply(simp_all)[7] +(* ONE *) +apply(erule Prf.cases) +apply(simp_all)[7] +(* CHAR *) +apply(case_tac "c = char") apply(simp) apply(erule Prf.cases) apply(simp_all)[7] -apply(case_tac "c = c'") -apply(simp) -apply(erule Prf.cases) -apply(simp_all)[7] -apply (metis Prf.intros(5)) +apply(rule Prf.intros(5)) apply(simp) apply(erule Prf.cases) apply(simp_all)[7] -apply(simp) -apply(erule Prf.cases) -apply(simp_all)[7] -apply (metis Prf.intros(2)) -apply (metis Prf.intros(3)) -apply(simp) -apply(case_tac "nullable r1") +(* SEQ *) +apply(case_tac "nullable rexp1") apply(simp) apply(erule Prf.cases) apply(simp_all)[7] @@ -523,8 +369,8 @@ apply(erule Prf.cases) apply(simp_all)[7] apply(auto)[1] -apply (metis Prf.intros(1)) -apply(auto)[1] +apply(rule Prf.intros) +apply(auto)[2] apply (metis Prf.intros(1) mkeps_nullable) apply(simp) apply(erule Prf.cases) @@ -532,7 +378,13 @@ apply(auto)[1] apply(rule Prf.intros) apply(auto)[2] -apply(simp) +(* ALT *) +apply(erule Prf.cases) +apply(simp_all)[7] +apply(clarify) +apply (metis Prf.intros(2)) +apply (metis Prf.intros(3)) +(* STAR *) apply(erule Prf.cases) apply(simp_all)[7] apply(clarify) @@ -543,9 +395,7 @@ apply (metis Prf.intros(6) Prf.intros(7)) by (metis Prf.intros(7)) - - -lemma v4: +lemma Prf_injval_flat: assumes "\ v : der c r" shows "flat (injval r c v) = c # (flat v)" using assms @@ -599,7 +449,7 @@ inductive PMatch :: "string \ rexp \ val \ bool" ("_ \ _ \ _" [100, 100, 100] 100) where - "[] \ EMPTY \ Void" + "[] \ ONE \ Void" | "[c] \ (CHAR c) \ (Char c)" | "s \ r1 \ v \ s \ (ALT r1 r2) \ (Left v)" | "\s \ r2 \ v; s \ L(r1)\ \ s \ (ALT r1 r2) \ (Right v)" @@ -616,14 +466,8 @@ shows "\ v : r" "flat v = s" using assms apply(induct s r v rule: PMatch.induct) -apply(auto) -apply (metis Prf.intros(4)) -apply (metis Prf.intros(5)) -apply (metis Prf.intros(2)) -apply (metis Prf.intros(3)) -apply (metis Prf.intros(1)) -apply (metis Prf.intros(7)) -by (metis Prf.intros(6)) +apply(auto intro: Prf.intros) +done lemma PMatch_mkeps: assumes "nullable r" @@ -745,12 +589,6 @@ -lemma PMatch_Values: - assumes "s \ r \ v" - shows "v \ Values r s" -using assms -apply(simp add: Values_def PMatch1) -by (metis append_Nil2 prefix_def) (* a proof that does not need proj *) lemma PMatch2_roy_version: @@ -759,10 +597,10 @@ using assms apply(induct r arbitrary: s v c rule: rexp.induct) apply(auto) -(* NULL case *) +(* ZERO case *) apply(erule PMatch.cases) apply(simp_all)[7] -(* EMPTY case *) +(* ONE case *) apply(erule PMatch.cases) apply(simp_all)[7] (* CHAR case *) @@ -851,7 +689,7 @@ apply(simp) apply(simp) apply (metis L.simps(6)) -apply(subst v4) +apply(subst Prf_injval_flat) apply (metis PMatch1) apply(simp) apply(auto)[1] @@ -868,17 +706,16 @@ apply(drule PMatch.intros(6)) apply(rule PMatch.intros(7)) (* HERE *) -apply (metis PMatch1(1) list.distinct(1) v4) +apply (metis PMatch1(1) list.distinct(1) Prf_injval_flat) apply (metis Nil_is_append_conv) apply(simp) apply(subst der_correctness) apply(simp add: Der_def) done - lemma lex_correct1: assumes "s \ L r" - shows "lex r s = None" + shows "matcher r s = None" using assms apply(induct s arbitrary: r) apply(simp) @@ -887,13 +724,13 @@ apply(drule_tac x="der a r" in meta_spec) apply(drule meta_mp) apply(auto) -apply(simp add: L_flat_Prf) -by (metis v3 v4) +apply(simp add: der_correctness Der_def) +done lemma lex_correct2: assumes "s \ L r" - shows "\v. lex r s = Some(v) \ \ v : r \ flat v = s" + shows "\v. matcher r s = Some(v) \ \ v : r \ flat v = s" using assms apply(induct s arbitrary: r) apply(simp) @@ -902,13 +739,13 @@ apply(drule meta_mp) apply(simp add: der_correctness Der_def) apply(auto) -apply (metis v3) -apply(rule v4) +apply (metis Prf_injval) +apply(rule Prf_injval_flat) by simp lemma lex_correct3: assumes "s \ L r" - shows "\v. lex r s = Some(v) \ s \ r \ v" + shows "\v. matcher r s = Some(v) \ s \ r \ v" using assms apply(induct s arbitrary: r) apply(simp) @@ -921,7 +758,7 @@ lemma lex_correct5: assumes "s \ L r" - shows "s \ r \ (lex2 r s)" + shows "s \ r \ (matcher2 r s)" using assms apply(induct s arbitrary: r) apply(simp) @@ -935,11 +772,176 @@ done lemma - "lex2 (ALT (CHAR a) (ALT (CHAR b) (SEQ (CHAR a) (CHAR b)))) [a,b] = Right (Right (Seq (Char a) (Char b)))" + "matcher2 (ALT (CHAR a) (ALT (CHAR b) (SEQ (CHAR a) (CHAR b)))) [a,b] = Right (Right (Seq (Char a) (Char b)))" apply(simp) done +section {* Attic stuff below *} + +section {* Projection function *} + +fun projval :: "rexp \ char \ val \ val" +where + "projval (CHAR d) c _ = Void" +| "projval (ALT r1 r2) c (Left v1) = Left (projval r1 c v1)" +| "projval (ALT r1 r2) c (Right v2) = Right (projval r2 c v2)" +| "projval (SEQ r1 r2) c (Seq v1 v2) = + (if flat v1 = [] then Right(projval r2 c v2) + else if nullable r1 then Left (Seq (projval r1 c v1) v2) + else Seq (projval r1 c v1) v2)" +| "projval (STAR r) c (Stars (v # vs)) = Seq (projval r c v) (Stars vs)" + + + +section {* Values Sets *} + +definition prefix :: "string \ string \ bool" ("_ \ _" [100, 100] 100) +where + "s1 \ s2 \ \s3. s1 @ s3 = s2" + +definition sprefix :: "string \ string \ bool" ("_ \ _" [100, 100] 100) +where + "s1 \ s2 \ (s1 \ s2 \ s1 \ s2)" + +lemma length_sprefix: + "s1 \ s2 \ length s1 < length s2" +unfolding sprefix_def prefix_def +by (auto) + +definition Prefixes :: "string \ string set" where + "Prefixes s \ {sp. sp \ s}" + +definition Suffixes :: "string \ string set" where + "Suffixes s \ rev ` (Prefixes (rev s))" + +definition SPrefixes :: "string \ string set" where + "SPrefixes s \ {sp. sp \ s}" + +definition SSuffixes :: "string \ string set" where + "SSuffixes s \ rev ` (SPrefixes (rev s))" + +lemma Suffixes_in: + "\s1. s1 @ s2 = s3 \ s2 \ Suffixes s3" +unfolding Suffixes_def Prefixes_def prefix_def image_def +apply(auto) +by (metis rev_rev_ident) + +lemma SSuffixes_in: + "\s1. s1 \ [] \ s1 @ s2 = s3 \ s2 \ SSuffixes s3" +unfolding SSuffixes_def Suffixes_def SPrefixes_def Prefixes_def sprefix_def prefix_def image_def +apply(auto) +by (metis append_self_conv rev.simps(1) rev_rev_ident) + +lemma Prefixes_Cons: + "Prefixes (c # s) = {[]} \ {c # sp | sp. sp \ Prefixes s}" +unfolding Prefixes_def prefix_def +apply(auto simp add: append_eq_Cons_conv) +done + +lemma finite_Prefixes: + "finite (Prefixes s)" +apply(induct s) +apply(auto simp add: Prefixes_def prefix_def)[1] +apply(simp add: Prefixes_Cons) +done + +lemma finite_Suffixes: + "finite (Suffixes s)" +unfolding Suffixes_def +apply(rule finite_imageI) +apply(rule finite_Prefixes) +done + +lemma prefix_Cons: + "((c # s1) \ (c # s2)) = (s1 \ s2)" +apply(auto simp add: prefix_def) +done + +lemma prefix_append: + "((s @ s1) \ (s @ s2)) = (s1 \ s2)" +apply(induct s) +apply(simp) +apply(simp add: prefix_Cons) +done + + +definition Values :: "rexp \ string \ val set" where + "Values r s \ {v. \ v : r \ flat v \ s}" + +definition rest :: "val \ string \ string" where + "rest v s \ drop (length (flat v)) s" + +lemma rest_Nil: + "rest v [] = []" +apply(simp add: rest_def) +done + +lemma rest_Suffixes: + "rest v s \ Suffixes s" +unfolding rest_def +by (metis Suffixes_in append_take_drop_id) + +lemma Values_recs: + "Values (ZERO) s = {}" + "Values (ONE) s = {Void}" + "Values (CHAR c) s = (if [c] \ s then {Char c} else {})" + "Values (ALT r1 r2) s = {Left v | v. v \ Values r1 s} \ {Right v | v. v \ Values r2 s}" + "Values (SEQ r1 r2) s = {Seq v1 v2 | v1 v2. v1 \ Values r1 s \ v2 \ Values r2 (rest v1 s)}" + "Values (STAR r) s = + {Stars []} \ {Stars (v # vs) | v vs. v \ Values r s \ Stars vs \ Values (STAR r) (rest v s)}" +unfolding Values_def +apply(auto) +(*ZERO*) +apply(erule Prf.cases) +apply(simp_all)[7] +(*ONE*) +apply(erule Prf.cases) +apply(simp_all)[7] +apply(rule Prf.intros) +apply (metis append_Nil prefix_def) +(*CHAR*) +apply(erule Prf.cases) +apply(simp_all)[7] +apply(rule Prf.intros) +apply(erule Prf.cases) +apply(simp_all)[7] +(*ALT*) +apply(erule Prf.cases) +apply(simp_all)[7] +apply (metis Prf.intros(2)) +apply (metis Prf.intros(3)) +(*SEQ*) +apply(erule Prf.cases) +apply(simp_all)[7] +apply (simp add: append_eq_conv_conj prefix_def rest_def) +apply (metis Prf.intros(1)) +apply (simp add: append_eq_conv_conj prefix_def rest_def) +(*STAR*) +apply(erule Prf.cases) +apply(simp_all)[7] +apply(rule conjI) +apply(simp add: prefix_def) +apply(auto)[1] +apply(simp add: prefix_def) +apply(auto)[1] +apply (metis append_eq_conv_conj rest_def) +apply (metis Prf.intros(6)) +apply (metis append_Nil prefix_def) +apply (metis Prf.intros(7)) +by (metis append_eq_conv_conj prefix_append prefix_def rest_def) + +lemma PMatch_Values: + assumes "s \ r \ v" + shows "v \ Values r s" +using assms +apply(simp add: Values_def PMatch1) +by (metis append_Nil2 prefix_def) + +lemma finite_image_set2: + "finite {x. P x} \ finite {y. Q y} \ finite {(x, y) | x y. P x \ Q y}" + by (rule finite_subset [where B = "\x \ {x. P x}. \y \ {y. Q y}. {(x, y)}"]) auto + section {* Connection with Sulzmann's Ordering of values *} @@ -951,7 +953,7 @@ | "length (flat v2) > length (flat v1) \ (Right v2) \(ALT r1 r2) (Left v1)" | "v2 \r2 v2' \ (Right v2) \(ALT r1 r2) (Right v2')" | "v1 \r1 v1' \ (Left v1) \(ALT r1 r2) (Left v1')" -| "Void \EMPTY Void" +| "Void \ONE Void" | "(Char c) \(CHAR c) (Char c)" | "flat (Stars (v # vs)) = [] \ (Stars []) \(STAR r) (Stars (v # vs))" | "flat (Stars (v # vs)) \ [] \ (Stars (v # vs)) \(STAR r) (Stars [])" @@ -969,7 +971,7 @@ "\\ v1 : r1; \ v2 : r2\ \ \ Seq v1 v2 : SEQ r1 r2" | "\ v1 : r1 \ \ Left v1 : ALT r1 r2" | "\ v2 : r2 \ \ Right v2 : ALT r1 r2" -| "\ Void : EMPTY" +| "\ Void : ONE" | "\ Char c : CHAR c" | "\ Stars [] : STAR r" | "\\ v : r; \ Stars vs : STAR r; flat v \ []\ \ \ Stars (v # vs) : STAR r" @@ -1163,10 +1165,10 @@ using assms apply(induct c r arbitrary: s v rule: der.induct) apply(auto) -(* NULL case *) +(* ZERO case *) apply(erule PMatch.cases) apply(simp_all)[7] -(* EMPTY case *) +(* ONE case *) apply(erule PMatch.cases) apply(simp_all)[7] (* CHAR case *) @@ -1265,7 +1267,7 @@ apply(simp) apply(simp) apply (metis L.simps(6)) -apply(subst v4) +apply(subst Prf_injval_flat) apply (metis NPrf_imp_Prf PMatch1N) apply(simp) apply(auto)[1] @@ -1280,7 +1282,7 @@ apply(rotate_tac 2) apply(drule PMatch.intros(6)) apply(rule PMatch.intros(7)) -apply (metis PMatch1(1) list.distinct(1) v4) +apply (metis PMatch1(1) list.distinct(1) Prf_injval_flat) apply (metis Nil_is_append_conv) apply(simp) apply(subst der_correctness) @@ -1289,7 +1291,7 @@ lemma lex_correct4: assumes "s \ L r" - shows "\v. lex r s = Some(v) \ \ v : r \ flat v = s" + shows "\v. matcher r s = Some(v) \ \ v : r \ flat v = s" using lex_correct3[OF assms] apply(auto) apply (metis PMatch1N) diff -r 489dfa0d7ec9 -r 6adda4a667b1 thys/paper.pdf Binary file thys/paper.pdf has changed