Binary file Literature/ktl16regexp-agda.pdf has changed
--- a/thys/Paper/Paper.thy Thu Feb 25 20:13:41 2016 +0000
+++ b/thys/Paper/Paper.thy Sun Feb 28 14:01:12 2016 +0000
@@ -7,7 +7,13 @@
notation (latex output)
If ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and
- Cons ("_::_" [78,77] 73) and
+ Cons ("_\<^raw:\mbox{$\,$}>::\<^raw:\mbox{$\,$}>_" [78,77] 73) and
+ ZERO ("\<^raw:\textrm{0}>" 78) and
+ ONE ("\<^raw:\textrm{1}>" 78) and
+ CHAR ("_" [1000] 10) and
+ ALT ("_ + _" [1000,1000] 78) and
+ SEQ ("_ \<cdot> _" [1000,1000] 78) and
+ STAR ("_\<^sup>\<star>" [1000] 78) and
val.Char ("Char _" [1000] 78) and
val.Left ("Left _" [1000] 78) and
val.Right ("Right _" [1000] 78) and
@@ -23,20 +29,117 @@
section {* Introduction *}
text {*
+
- \noindent
+Sulzmann and Lu \cite{Sulzmann2014}
+
+there are two commonly used
+disambiguation strategies to create a unique matching tree:
+one is called \emph{greedy} matching \cite{Frisch2004} and the
+other is \emph{POSIX} matching~\cite{Kuklewicz,Sulzmann2014}.
+For the latter there are two rough rules:
+
+\begin{itemize}
+\item The Longest Match Rule (or ``maximal munch rule''):\smallskip\\ The
+ longest initial substring matched by any regular
+ expression is taken as next token.
+
+\item Rule Priority:\smallskip\\ For a particular longest initial
+ substring, the first regular expression that can match
+ determines the token.
+\end{itemize}
+
+\noindent In the context of lexing, POSIX is the more
+interesting disambiguation strategy as it produces longest
+matches, which is necessary for tokenising programs. For
+example the string \textit{iffoo} should not match the keyword
+\textit{if} and the rest, but as one string \textit{iffoo},
+which might be a variable name in a program. As another
+example consider the string $xy$ and the regular expression
+\mbox{$(x + y + xy)^*$}. Either the input string can be
+matched in two `iterations' by the single letter-regular
+expressions $x$ and $y$, or directly in one iteration by $xy$.
+The first case corresponds to greedy matching, which first
+matches with the left-most symbol and only matches the next
+symbol in case of a mismatch. The second case is POSIX
+matching, which prefers the longest match. In case more than
+one (longest) matches exist, only then it prefers the
+left-most match. While POSIX matching seems natural, it turns
+out to be much more subtle than greedy matching in terms of
+implementations and in terms of proving properties about it.
+If POSIX matching is implemented using automata, then one has
+to follow transitions (according to the input string) until
+one finds an accepting state, record this state and look for
+further transition which might lead to another accepting state
+that represents a longer input initial substring to be
+matched. Only if none can be found, the last accepting state
+is returned.
+
+
+Sulzmann and Lu's paper \cite{Sulzmann2014} targets POSIX
+regular expression matching. They write that it is known to be
+to be a non-trivial problem and nearly all POSIX matching
+implementations are ``buggy'' \cite[Page 203]{Sulzmann2014}.
+For this they cite a study by Kuklewicz \cite{Kuklewicz}. My
+current work is about formalising the proofs in the paper by
+Sulzmann and Lu. Specifically, they propose in this paper a
+POSIX matching algorithm and give some details of a
+correctness proof for this algorithm inside the paper and some
+more details in an appendix. This correctness proof is
+unformalised, meaning it is just a ``pencil-and-paper'' proof,
+not done in a theorem prover. Though, the paper and presumably
+the proof have been peer-reviewed. Unfortunately their proof
+does not give enough details such that it can be
+straightforwardly implemented in a theorem prover, say
+Isabelle. In fact, the purported proof they outline does not
+work in central places. We discovered this when filling in
+many gaps and attempting to formalise the proof in Isabelle.
+
+
+
+{\bf Contributions:}
+
+*}
+
+section {* Preliminaries *}
+
+text {* \noindent Strings in Isabelle/HOL are lists of characters with
+ the empty string being represented by the empty list, written @{term
+ "[]"}, and list-cons being written as @{term "DUMMY # DUMMY"}. By
+ using the type char we have a supply of finitely many characters
+ roughly corresponding to the ASCII character set.
Regular exprtessions
\begin{center}
@{text "r :="}
- @{const "NULL"} $\mid$
- @{const "EMPTY"} $\mid$
+ @{const "ZERO"} $\mid$
+ @{const "ONE"} $\mid$
@{term "CHAR c"} $\mid$
@{term "ALT r\<^sub>1 r\<^sub>2"} $\mid$
@{term "SEQ r\<^sub>1 r\<^sub>2"} $\mid$
@{term "STAR r"}
\end{center}
+*}
+
+section {* POSIX Regular Expression Matching *}
+
+section {* The Argument by Sulzmmann and Lu *}
+
+section {* Conclusion *}
+
+text {*
+
+ Nipkow lexer from 2000
+
+*}
+
+
+text {*
+
+
+
+
\noindent
Values
@@ -216,7 +319,6 @@
\begin{center}
\begin{tabular}{c}
@{thm Values_def}\medskip\\
- @{thm NValues_def}
\end{tabular}
\end{center}
@@ -238,9 +340,9 @@
@{thm[mode=IfThen] mkeps_flat}
- @{thm[mode=IfThen] v3}
+ @{thm[mode=IfThen] Prf_injval}
- @{thm[mode=IfThen] v4}
+ @{thm[mode=IfThen] Prf_injval_flat}
@{thm[mode=IfThen] PMatch_mkeps}
--- a/thys/Paper/document/root.tex Thu Feb 25 20:13:41 2016 +0000
+++ b/thys/Paper/document/root.tex Sun Feb 28 14:01:12 2016 +0000
@@ -39,12 +39,29 @@
\maketitle
\begin{abstract}
-\Brz{} introduced the notion of derivatives of regular expressions
-that can be used for very simple regular expression matching algorithms.
-BLA BLA Sulzmann and Lu \cite{Sulzmann2014}
+\Brz{} introduced the notion of derivatives for regular
+expressions. They can be used for a very simple regular expression
+matching algorithm. Sulzmann and Lu cleverly extended this algorithm
+in order to deal with POSIX matching, which is the underlying
+disambiguation strategy for regular expressions needed in
+lexers. Sulzmann and Lu have made available on-line what they call a
+``rigorous proof'' of the correctness of their algorithm w.r.t.~their
+specification; regrettably, it appears to us to have unfillable gaps.
+In the first part of this paper we give our inductive definition of
+what a POSIX value is and show $(i)$ that such a value is unique (for
+given regular expression and string being matched) and $(ii)$ that
+Sulzmann and Lu's algorithm always generates such a value (provided
+that the regular expression matches the string). We also prove the
+correctness of an optimised version of the POSIX matching
+algorithm. Our definitions and proof are much simpler than those by
+Sulzmann and Lu and can be easily formalised in Isabelle/HOL. In the
+second part we analyse the correctness argument by Sulzmann and Lu in
+more detail and explain why it seems hard to turn it into a proof
+rigorous enough to be accepted by a system such as Isabelle/HOL.
-{\bf Keywords:}
+{\bf Keywords:} POSIX matching, Derivatives of Regular Expressions,
+Isabelle/HOL
\end{abstract}
\input{session}
--- a/thys/ReStar.thy Thu Feb 25 20:13:41 2016 +0000
+++ b/thys/ReStar.thy Sun Feb 28 14:01:12 2016 +0000
@@ -103,8 +103,8 @@
section {* Regular Expressions *}
datatype rexp =
- NULL
-| EMPTY
+ ZERO
+| ONE
| CHAR char
| SEQ rexp rexp
| ALT rexp rexp
@@ -115,28 +115,65 @@
fun
L :: "rexp \<Rightarrow> string set"
where
- "L (NULL) = {}"
-| "L (EMPTY) = {[]}"
+ "L (ZERO) = {}"
+| "L (ONE) = {[]}"
| "L (CHAR c) = {[c]}"
| "L (SEQ r1 r2) = (L r1) ;; (L r2)"
| "L (ALT r1 r2) = (L r1) \<union> (L r2)"
| "L (STAR r) = (L r)\<star>"
+
+section {* Nullable, Derivatives *}
+
fun
nullable :: "rexp \<Rightarrow> bool"
where
- "nullable (NULL) = False"
-| "nullable (EMPTY) = True"
+ "nullable (ZERO) = False"
+| "nullable (ONE) = True"
| "nullable (CHAR c) = False"
| "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
| "nullable (STAR r) = True"
+
+fun
+ der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
+where
+ "der c (ZERO) = ZERO"
+| "der c (ONE) = ZERO"
+| "der c (CHAR c') = (if c = c' then ONE else ZERO)"
+| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
+| "der c (SEQ r1 r2) =
+ (if nullable r1
+ then ALT (SEQ (der c r1) r2) (der c r2)
+ else SEQ (der c r1) r2)"
+| "der c (STAR r) = SEQ (der c r) (STAR r)"
+
+fun
+ ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
+where
+ "ders [] r = r"
+| "ders (c # s) r = ders s (der c r)"
+
+
lemma nullable_correctness:
shows "nullable r \<longleftrightarrow> [] \<in> (L r)"
by (induct r) (auto simp add: Sequ_def)
+lemma der_correctness:
+ shows "L (der c r) = Der c (L r)"
+apply(induct r)
+apply(simp_all add: nullable_correctness)
+done
+
+lemma ders_correctness:
+ shows "L (ders s r) = Ders s (L r)"
+apply(induct s arbitrary: r)
+apply(simp_all add: der_correctness Der_def Ders_def)
+done
+
+
section {* Values *}
datatype val =
@@ -173,7 +210,7 @@
"\<lbrakk>\<turnstile> v1 : r1; \<turnstile> v2 : r2\<rbrakk> \<Longrightarrow> \<turnstile> Seq v1 v2 : SEQ r1 r2"
| "\<turnstile> v1 : r1 \<Longrightarrow> \<turnstile> Left v1 : ALT r1 r2"
| "\<turnstile> v2 : r2 \<Longrightarrow> \<turnstile> Right v2 : ALT r1 r2"
-| "\<turnstile> Void : EMPTY"
+| "\<turnstile> Void : ONE"
| "\<turnstile> Char c : CHAR c"
| "\<turnstile> Stars [] : STAR r"
| "\<lbrakk>\<turnstile> v : r; \<turnstile> Stars vs : STAR r\<rbrakk> \<Longrightarrow> \<turnstile> Stars (v # vs) : STAR r"
@@ -245,196 +282,16 @@
done
-section {* Values Sets *}
-
-definition prefix :: "string \<Rightarrow> string \<Rightarrow> bool" ("_ \<sqsubseteq> _" [100, 100] 100)
-where
- "s1 \<sqsubseteq> s2 \<equiv> \<exists>s3. s1 @ s3 = s2"
-
-definition sprefix :: "string \<Rightarrow> string \<Rightarrow> bool" ("_ \<sqsubset> _" [100, 100] 100)
-where
- "s1 \<sqsubset> s2 \<equiv> (s1 \<sqsubseteq> s2 \<and> s1 \<noteq> s2)"
-
-lemma length_sprefix:
- "s1 \<sqsubset> s2 \<Longrightarrow> length s1 < length s2"
-unfolding sprefix_def prefix_def
-by (auto)
-
-definition Prefixes :: "string \<Rightarrow> string set" where
- "Prefixes s \<equiv> {sp. sp \<sqsubseteq> s}"
-
-definition Suffixes :: "string \<Rightarrow> string set" where
- "Suffixes s \<equiv> rev ` (Prefixes (rev s))"
-
-definition SPrefixes :: "string \<Rightarrow> string set" where
- "SPrefixes s \<equiv> {sp. sp \<sqsubset> s}"
-
-definition SSuffixes :: "string \<Rightarrow> string set" where
- "SSuffixes s \<equiv> rev ` (SPrefixes (rev s))"
-
-lemma Suffixes_in:
- "\<exists>s1. s1 @ s2 = s3 \<Longrightarrow> s2 \<in> Suffixes s3"
-unfolding Suffixes_def Prefixes_def prefix_def image_def
-apply(auto)
-by (metis rev_rev_ident)
-
-lemma SSuffixes_in:
- "\<exists>s1. s1 \<noteq> [] \<and> s1 @ s2 = s3 \<Longrightarrow> s2 \<in> SSuffixes s3"
-unfolding SSuffixes_def Suffixes_def SPrefixes_def Prefixes_def sprefix_def prefix_def image_def
-apply(auto)
-by (metis append_self_conv rev.simps(1) rev_rev_ident)
-
-lemma Prefixes_Cons:
- "Prefixes (c # s) = {[]} \<union> {c # sp | sp. sp \<in> Prefixes s}"
-unfolding Prefixes_def prefix_def
-apply(auto simp add: append_eq_Cons_conv)
-done
-
-lemma finite_Prefixes:
- "finite (Prefixes s)"
-apply(induct s)
-apply(auto simp add: Prefixes_def prefix_def)[1]
-apply(simp add: Prefixes_Cons)
-done
-
-lemma finite_Suffixes:
- "finite (Suffixes s)"
-unfolding Suffixes_def
-apply(rule finite_imageI)
-apply(rule finite_Prefixes)
-done
-
-lemma prefix_Cons:
- "((c # s1) \<sqsubseteq> (c # s2)) = (s1 \<sqsubseteq> s2)"
-apply(auto simp add: prefix_def)
-done
-
-lemma prefix_append:
- "((s @ s1) \<sqsubseteq> (s @ s2)) = (s1 \<sqsubseteq> s2)"
-apply(induct s)
-apply(simp)
-apply(simp add: prefix_Cons)
-done
-
-
-definition Values :: "rexp \<Rightarrow> string \<Rightarrow> val set" where
- "Values r s \<equiv> {v. \<turnstile> v : r \<and> flat v \<sqsubseteq> s}"
-
-definition rest :: "val \<Rightarrow> string \<Rightarrow> string" where
- "rest v s \<equiv> drop (length (flat v)) s"
-
-lemma rest_Nil:
- "rest v [] = []"
-apply(simp add: rest_def)
-done
-
-lemma rest_Suffixes:
- "rest v s \<in> Suffixes s"
-unfolding rest_def
-by (metis Suffixes_in append_take_drop_id)
-
-lemma Values_recs:
- "Values (NULL) s = {}"
- "Values (EMPTY) s = {Void}"
- "Values (CHAR c) s = (if [c] \<sqsubseteq> s then {Char c} else {})"
- "Values (ALT r1 r2) s = {Left v | v. v \<in> Values r1 s} \<union> {Right v | v. v \<in> Values r2 s}"
- "Values (SEQ r1 r2) s = {Seq v1 v2 | v1 v2. v1 \<in> Values r1 s \<and> v2 \<in> Values r2 (rest v1 s)}"
- "Values (STAR r) s =
- {Stars []} \<union> {Stars (v # vs) | v vs. v \<in> Values r s \<and> Stars vs \<in> Values (STAR r) (rest v s)}"
-unfolding Values_def
-apply(auto)
-(*NULL*)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-(*EMPTY*)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-apply(rule Prf.intros)
-apply (metis append_Nil prefix_def)
-(*CHAR*)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-apply(rule Prf.intros)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-(*ALT*)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-apply (metis Prf.intros(2))
-apply (metis Prf.intros(3))
-(*SEQ*)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-apply (simp add: append_eq_conv_conj prefix_def rest_def)
-apply (metis Prf.intros(1))
-apply (simp add: append_eq_conv_conj prefix_def rest_def)
-(*STAR*)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-apply(rule conjI)
-apply(simp add: prefix_def)
-apply(auto)[1]
-apply(simp add: prefix_def)
-apply(auto)[1]
-apply (metis append_eq_conv_conj rest_def)
-apply (metis Prf.intros(6))
-apply (metis append_Nil prefix_def)
-apply (metis Prf.intros(7))
-by (metis append_eq_conv_conj prefix_append prefix_def rest_def)
-
-lemma finite_image_set2:
- "finite {x. P x} \<Longrightarrow> finite {y. Q y} \<Longrightarrow> finite {(x, y) | x y. P x \<and> Q y}"
- by (rule finite_subset [where B = "\<Union>x \<in> {x. P x}. \<Union>y \<in> {y. Q y}. {(x, y)}"]) auto
-
section {* Sulzmann functions *}
fun
mkeps :: "rexp \<Rightarrow> val"
where
- "mkeps(EMPTY) = Void"
+ "mkeps(ONE) = Void"
| "mkeps(SEQ r1 r2) = Seq (mkeps r1) (mkeps r2)"
| "mkeps(ALT r1 r2) = (if nullable(r1) then Left (mkeps r1) else Right (mkeps r2))"
| "mkeps(STAR r) = Stars []"
-section {* Derivatives *}
-
-fun
- der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
-where
- "der c (NULL) = NULL"
-| "der c (EMPTY) = NULL"
-| "der c (CHAR c') = (if c = c' then EMPTY else NULL)"
-| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
-| "der c (SEQ r1 r2) =
- (if nullable r1
- then ALT (SEQ (der c r1) r2) (der c r2)
- else SEQ (der c r1) r2)"
-| "der c (STAR r) = SEQ (der c r) (STAR r)"
-
-fun
- ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
-where
- "ders [] r = r"
-| "ders (c # s) r = ders s (der c r)"
-
-
-lemma der_correctness:
- shows "L (der c r) = Der c (L r)"
-apply(induct r)
-apply(simp_all add: nullable_correctness)
-done
-
-lemma ders_correctness:
- shows "L (ders s r) = Ders s (L r)"
-apply(induct s arbitrary: r)
-apply(simp add: Ders_def)
-apply(simp)
-apply(subst der_correctness)
-apply(simp add: Ders_def Der_def)
-done
-
-section {* Injection function *}
-
fun injval :: "rexp \<Rightarrow> char \<Rightarrow> val \<Rightarrow> val"
where
"injval (CHAR d) c Void = Char d"
@@ -445,34 +302,27 @@
| "injval (SEQ r1 r2) c (Right v2) = Seq (mkeps r1) (injval r2 c v2)"
| "injval (STAR r) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)"
+
+section {* Matcher *}
+
fun
- lex :: "rexp \<Rightarrow> string \<Rightarrow> val option"
+ matcher :: "rexp \<Rightarrow> string \<Rightarrow> val option"
where
- "lex r [] = (if nullable r then Some(mkeps r) else None)"
-| "lex r (c#s) = (case (lex (der c r) s) of
+ "matcher r [] = (if nullable r then Some(mkeps r) else None)"
+| "matcher r (c#s) = (case (matcher (der c r) s) of
None \<Rightarrow> None
| Some(v) \<Rightarrow> Some(injval r c v))"
fun
- lex2 :: "rexp \<Rightarrow> string \<Rightarrow> val"
-where
- "lex2 r [] = mkeps r"
-| "lex2 r (c#s) = injval r c (lex2 (der c r) s)"
-
-section {* Projection function *}
-
-fun projval :: "rexp \<Rightarrow> char \<Rightarrow> val \<Rightarrow> val"
+ matcher2 :: "rexp \<Rightarrow> string \<Rightarrow> val"
where
- "projval (CHAR d) c _ = Void"
-| "projval (ALT r1 r2) c (Left v1) = Left (projval r1 c v1)"
-| "projval (ALT r1 r2) c (Right v2) = Right (projval r2 c v2)"
-| "projval (SEQ r1 r2) c (Seq v1 v2) =
- (if flat v1 = [] then Right(projval r2 c v2)
- else if nullable r1 then Left (Seq (projval r1 c v1) v2)
- else Seq (projval r1 c v1) v2)"
-| "projval (STAR r) c (Stars (v # vs)) = Seq (projval r c v) (Stars vs)"
+ "matcher2 r [] = mkeps r"
+| "matcher2 r (c#s) = injval r c (matcher2 (der c r) s)"
+
+section {* Mkeps, injval *}
+
lemma mkeps_nullable:
assumes "nullable(r)"
shows "\<turnstile> mkeps r : r"
@@ -489,33 +339,29 @@
apply(auto)
done
-
-lemma v3:
+lemma Prf_injval:
assumes "\<turnstile> v : der c r"
shows "\<turnstile> (injval r c v) : r"
using assms
-apply(induct arbitrary: v rule: der.induct)
-apply(simp)
+apply(induct r arbitrary: c v rule: rexp.induct)
+apply(simp_all)
+(* ZERO *)
apply(erule Prf.cases)
apply(simp_all)[7]
+(* ONE *)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+(* CHAR *)
+apply(case_tac "c = char")
apply(simp)
apply(erule Prf.cases)
apply(simp_all)[7]
-apply(case_tac "c = c'")
-apply(simp)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-apply (metis Prf.intros(5))
+apply(rule Prf.intros(5))
apply(simp)
apply(erule Prf.cases)
apply(simp_all)[7]
-apply(simp)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-apply (metis Prf.intros(2))
-apply (metis Prf.intros(3))
-apply(simp)
-apply(case_tac "nullable r1")
+(* SEQ *)
+apply(case_tac "nullable rexp1")
apply(simp)
apply(erule Prf.cases)
apply(simp_all)[7]
@@ -523,8 +369,8 @@
apply(erule Prf.cases)
apply(simp_all)[7]
apply(auto)[1]
-apply (metis Prf.intros(1))
-apply(auto)[1]
+apply(rule Prf.intros)
+apply(auto)[2]
apply (metis Prf.intros(1) mkeps_nullable)
apply(simp)
apply(erule Prf.cases)
@@ -532,7 +378,13 @@
apply(auto)[1]
apply(rule Prf.intros)
apply(auto)[2]
-apply(simp)
+(* ALT *)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+apply(clarify)
+apply (metis Prf.intros(2))
+apply (metis Prf.intros(3))
+(* STAR *)
apply(erule Prf.cases)
apply(simp_all)[7]
apply(clarify)
@@ -543,9 +395,7 @@
apply (metis Prf.intros(6) Prf.intros(7))
by (metis Prf.intros(7))
-
-
-lemma v4:
+lemma Prf_injval_flat:
assumes "\<turnstile> v : der c r"
shows "flat (injval r c v) = c # (flat v)"
using assms
@@ -599,7 +449,7 @@
inductive
PMatch :: "string \<Rightarrow> rexp \<Rightarrow> val \<Rightarrow> bool" ("_ \<in> _ \<rightarrow> _" [100, 100, 100] 100)
where
- "[] \<in> EMPTY \<rightarrow> Void"
+ "[] \<in> ONE \<rightarrow> Void"
| "[c] \<in> (CHAR c) \<rightarrow> (Char c)"
| "s \<in> r1 \<rightarrow> v \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Left v)"
| "\<lbrakk>s \<in> r2 \<rightarrow> v; s \<notin> L(r1)\<rbrakk> \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Right v)"
@@ -616,14 +466,8 @@
shows "\<turnstile> v : r" "flat v = s"
using assms
apply(induct s r v rule: PMatch.induct)
-apply(auto)
-apply (metis Prf.intros(4))
-apply (metis Prf.intros(5))
-apply (metis Prf.intros(2))
-apply (metis Prf.intros(3))
-apply (metis Prf.intros(1))
-apply (metis Prf.intros(7))
-by (metis Prf.intros(6))
+apply(auto intro: Prf.intros)
+done
lemma PMatch_mkeps:
assumes "nullable r"
@@ -745,12 +589,6 @@
-lemma PMatch_Values:
- assumes "s \<in> r \<rightarrow> v"
- shows "v \<in> Values r s"
-using assms
-apply(simp add: Values_def PMatch1)
-by (metis append_Nil2 prefix_def)
(* a proof that does not need proj *)
lemma PMatch2_roy_version:
@@ -759,10 +597,10 @@
using assms
apply(induct r arbitrary: s v c rule: rexp.induct)
apply(auto)
-(* NULL case *)
+(* ZERO case *)
apply(erule PMatch.cases)
apply(simp_all)[7]
-(* EMPTY case *)
+(* ONE case *)
apply(erule PMatch.cases)
apply(simp_all)[7]
(* CHAR case *)
@@ -851,7 +689,7 @@
apply(simp)
apply(simp)
apply (metis L.simps(6))
-apply(subst v4)
+apply(subst Prf_injval_flat)
apply (metis PMatch1)
apply(simp)
apply(auto)[1]
@@ -868,17 +706,16 @@
apply(drule PMatch.intros(6))
apply(rule PMatch.intros(7))
(* HERE *)
-apply (metis PMatch1(1) list.distinct(1) v4)
+apply (metis PMatch1(1) list.distinct(1) Prf_injval_flat)
apply (metis Nil_is_append_conv)
apply(simp)
apply(subst der_correctness)
apply(simp add: Der_def)
done
-
lemma lex_correct1:
assumes "s \<notin> L r"
- shows "lex r s = None"
+ shows "matcher r s = None"
using assms
apply(induct s arbitrary: r)
apply(simp)
@@ -887,13 +724,13 @@
apply(drule_tac x="der a r" in meta_spec)
apply(drule meta_mp)
apply(auto)
-apply(simp add: L_flat_Prf)
-by (metis v3 v4)
+apply(simp add: der_correctness Der_def)
+done
lemma lex_correct2:
assumes "s \<in> L r"
- shows "\<exists>v. lex r s = Some(v) \<and> \<turnstile> v : r \<and> flat v = s"
+ shows "\<exists>v. matcher r s = Some(v) \<and> \<turnstile> v : r \<and> flat v = s"
using assms
apply(induct s arbitrary: r)
apply(simp)
@@ -902,13 +739,13 @@
apply(drule meta_mp)
apply(simp add: der_correctness Der_def)
apply(auto)
-apply (metis v3)
-apply(rule v4)
+apply (metis Prf_injval)
+apply(rule Prf_injval_flat)
by simp
lemma lex_correct3:
assumes "s \<in> L r"
- shows "\<exists>v. lex r s = Some(v) \<and> s \<in> r \<rightarrow> v"
+ shows "\<exists>v. matcher r s = Some(v) \<and> s \<in> r \<rightarrow> v"
using assms
apply(induct s arbitrary: r)
apply(simp)
@@ -921,7 +758,7 @@
lemma lex_correct5:
assumes "s \<in> L r"
- shows "s \<in> r \<rightarrow> (lex2 r s)"
+ shows "s \<in> r \<rightarrow> (matcher2 r s)"
using assms
apply(induct s arbitrary: r)
apply(simp)
@@ -935,11 +772,176 @@
done
lemma
- "lex2 (ALT (CHAR a) (ALT (CHAR b) (SEQ (CHAR a) (CHAR b)))) [a,b] = Right (Right (Seq (Char a) (Char b)))"
+ "matcher2 (ALT (CHAR a) (ALT (CHAR b) (SEQ (CHAR a) (CHAR b)))) [a,b] = Right (Right (Seq (Char a) (Char b)))"
apply(simp)
done
+section {* Attic stuff below *}
+
+section {* Projection function *}
+
+fun projval :: "rexp \<Rightarrow> char \<Rightarrow> val \<Rightarrow> val"
+where
+ "projval (CHAR d) c _ = Void"
+| "projval (ALT r1 r2) c (Left v1) = Left (projval r1 c v1)"
+| "projval (ALT r1 r2) c (Right v2) = Right (projval r2 c v2)"
+| "projval (SEQ r1 r2) c (Seq v1 v2) =
+ (if flat v1 = [] then Right(projval r2 c v2)
+ else if nullable r1 then Left (Seq (projval r1 c v1) v2)
+ else Seq (projval r1 c v1) v2)"
+| "projval (STAR r) c (Stars (v # vs)) = Seq (projval r c v) (Stars vs)"
+
+
+
+section {* Values Sets *}
+
+definition prefix :: "string \<Rightarrow> string \<Rightarrow> bool" ("_ \<sqsubseteq> _" [100, 100] 100)
+where
+ "s1 \<sqsubseteq> s2 \<equiv> \<exists>s3. s1 @ s3 = s2"
+
+definition sprefix :: "string \<Rightarrow> string \<Rightarrow> bool" ("_ \<sqsubset> _" [100, 100] 100)
+where
+ "s1 \<sqsubset> s2 \<equiv> (s1 \<sqsubseteq> s2 \<and> s1 \<noteq> s2)"
+
+lemma length_sprefix:
+ "s1 \<sqsubset> s2 \<Longrightarrow> length s1 < length s2"
+unfolding sprefix_def prefix_def
+by (auto)
+
+definition Prefixes :: "string \<Rightarrow> string set" where
+ "Prefixes s \<equiv> {sp. sp \<sqsubseteq> s}"
+
+definition Suffixes :: "string \<Rightarrow> string set" where
+ "Suffixes s \<equiv> rev ` (Prefixes (rev s))"
+
+definition SPrefixes :: "string \<Rightarrow> string set" where
+ "SPrefixes s \<equiv> {sp. sp \<sqsubset> s}"
+
+definition SSuffixes :: "string \<Rightarrow> string set" where
+ "SSuffixes s \<equiv> rev ` (SPrefixes (rev s))"
+
+lemma Suffixes_in:
+ "\<exists>s1. s1 @ s2 = s3 \<Longrightarrow> s2 \<in> Suffixes s3"
+unfolding Suffixes_def Prefixes_def prefix_def image_def
+apply(auto)
+by (metis rev_rev_ident)
+
+lemma SSuffixes_in:
+ "\<exists>s1. s1 \<noteq> [] \<and> s1 @ s2 = s3 \<Longrightarrow> s2 \<in> SSuffixes s3"
+unfolding SSuffixes_def Suffixes_def SPrefixes_def Prefixes_def sprefix_def prefix_def image_def
+apply(auto)
+by (metis append_self_conv rev.simps(1) rev_rev_ident)
+
+lemma Prefixes_Cons:
+ "Prefixes (c # s) = {[]} \<union> {c # sp | sp. sp \<in> Prefixes s}"
+unfolding Prefixes_def prefix_def
+apply(auto simp add: append_eq_Cons_conv)
+done
+
+lemma finite_Prefixes:
+ "finite (Prefixes s)"
+apply(induct s)
+apply(auto simp add: Prefixes_def prefix_def)[1]
+apply(simp add: Prefixes_Cons)
+done
+
+lemma finite_Suffixes:
+ "finite (Suffixes s)"
+unfolding Suffixes_def
+apply(rule finite_imageI)
+apply(rule finite_Prefixes)
+done
+
+lemma prefix_Cons:
+ "((c # s1) \<sqsubseteq> (c # s2)) = (s1 \<sqsubseteq> s2)"
+apply(auto simp add: prefix_def)
+done
+
+lemma prefix_append:
+ "((s @ s1) \<sqsubseteq> (s @ s2)) = (s1 \<sqsubseteq> s2)"
+apply(induct s)
+apply(simp)
+apply(simp add: prefix_Cons)
+done
+
+
+definition Values :: "rexp \<Rightarrow> string \<Rightarrow> val set" where
+ "Values r s \<equiv> {v. \<turnstile> v : r \<and> flat v \<sqsubseteq> s}"
+
+definition rest :: "val \<Rightarrow> string \<Rightarrow> string" where
+ "rest v s \<equiv> drop (length (flat v)) s"
+
+lemma rest_Nil:
+ "rest v [] = []"
+apply(simp add: rest_def)
+done
+
+lemma rest_Suffixes:
+ "rest v s \<in> Suffixes s"
+unfolding rest_def
+by (metis Suffixes_in append_take_drop_id)
+
+lemma Values_recs:
+ "Values (ZERO) s = {}"
+ "Values (ONE) s = {Void}"
+ "Values (CHAR c) s = (if [c] \<sqsubseteq> s then {Char c} else {})"
+ "Values (ALT r1 r2) s = {Left v | v. v \<in> Values r1 s} \<union> {Right v | v. v \<in> Values r2 s}"
+ "Values (SEQ r1 r2) s = {Seq v1 v2 | v1 v2. v1 \<in> Values r1 s \<and> v2 \<in> Values r2 (rest v1 s)}"
+ "Values (STAR r) s =
+ {Stars []} \<union> {Stars (v # vs) | v vs. v \<in> Values r s \<and> Stars vs \<in> Values (STAR r) (rest v s)}"
+unfolding Values_def
+apply(auto)
+(*ZERO*)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+(*ONE*)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+apply(rule Prf.intros)
+apply (metis append_Nil prefix_def)
+(*CHAR*)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+apply(rule Prf.intros)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+(*ALT*)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+apply (metis Prf.intros(2))
+apply (metis Prf.intros(3))
+(*SEQ*)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+apply (simp add: append_eq_conv_conj prefix_def rest_def)
+apply (metis Prf.intros(1))
+apply (simp add: append_eq_conv_conj prefix_def rest_def)
+(*STAR*)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+apply(rule conjI)
+apply(simp add: prefix_def)
+apply(auto)[1]
+apply(simp add: prefix_def)
+apply(auto)[1]
+apply (metis append_eq_conv_conj rest_def)
+apply (metis Prf.intros(6))
+apply (metis append_Nil prefix_def)
+apply (metis Prf.intros(7))
+by (metis append_eq_conv_conj prefix_append prefix_def rest_def)
+
+lemma PMatch_Values:
+ assumes "s \<in> r \<rightarrow> v"
+ shows "v \<in> Values r s"
+using assms
+apply(simp add: Values_def PMatch1)
+by (metis append_Nil2 prefix_def)
+
+lemma finite_image_set2:
+ "finite {x. P x} \<Longrightarrow> finite {y. Q y} \<Longrightarrow> finite {(x, y) | x y. P x \<and> Q y}"
+ by (rule finite_subset [where B = "\<Union>x \<in> {x. P x}. \<Union>y \<in> {y. Q y}. {(x, y)}"]) auto
+
section {* Connection with Sulzmann's Ordering of values *}
@@ -951,7 +953,7 @@
| "length (flat v2) > length (flat v1) \<Longrightarrow> (Right v2) \<succ>(ALT r1 r2) (Left v1)"
| "v2 \<succ>r2 v2' \<Longrightarrow> (Right v2) \<succ>(ALT r1 r2) (Right v2')"
| "v1 \<succ>r1 v1' \<Longrightarrow> (Left v1) \<succ>(ALT r1 r2) (Left v1')"
-| "Void \<succ>EMPTY Void"
+| "Void \<succ>ONE Void"
| "(Char c) \<succ>(CHAR c) (Char c)"
| "flat (Stars (v # vs)) = [] \<Longrightarrow> (Stars []) \<succ>(STAR r) (Stars (v # vs))"
| "flat (Stars (v # vs)) \<noteq> [] \<Longrightarrow> (Stars (v # vs)) \<succ>(STAR r) (Stars [])"
@@ -969,7 +971,7 @@
"\<lbrakk>\<Turnstile> v1 : r1; \<Turnstile> v2 : r2\<rbrakk> \<Longrightarrow> \<Turnstile> Seq v1 v2 : SEQ r1 r2"
| "\<Turnstile> v1 : r1 \<Longrightarrow> \<Turnstile> Left v1 : ALT r1 r2"
| "\<Turnstile> v2 : r2 \<Longrightarrow> \<Turnstile> Right v2 : ALT r1 r2"
-| "\<Turnstile> Void : EMPTY"
+| "\<Turnstile> Void : ONE"
| "\<Turnstile> Char c : CHAR c"
| "\<Turnstile> Stars [] : STAR r"
| "\<lbrakk>\<Turnstile> v : r; \<Turnstile> Stars vs : STAR r; flat v \<noteq> []\<rbrakk> \<Longrightarrow> \<Turnstile> Stars (v # vs) : STAR r"
@@ -1163,10 +1165,10 @@
using assms
apply(induct c r arbitrary: s v rule: der.induct)
apply(auto)
-(* NULL case *)
+(* ZERO case *)
apply(erule PMatch.cases)
apply(simp_all)[7]
-(* EMPTY case *)
+(* ONE case *)
apply(erule PMatch.cases)
apply(simp_all)[7]
(* CHAR case *)
@@ -1265,7 +1267,7 @@
apply(simp)
apply(simp)
apply (metis L.simps(6))
-apply(subst v4)
+apply(subst Prf_injval_flat)
apply (metis NPrf_imp_Prf PMatch1N)
apply(simp)
apply(auto)[1]
@@ -1280,7 +1282,7 @@
apply(rotate_tac 2)
apply(drule PMatch.intros(6))
apply(rule PMatch.intros(7))
-apply (metis PMatch1(1) list.distinct(1) v4)
+apply (metis PMatch1(1) list.distinct(1) Prf_injval_flat)
apply (metis Nil_is_append_conv)
apply(simp)
apply(subst der_correctness)
@@ -1289,7 +1291,7 @@
lemma lex_correct4:
assumes "s \<in> L r"
- shows "\<exists>v. lex r s = Some(v) \<and> \<Turnstile> v : r \<and> flat v = s"
+ shows "\<exists>v. matcher r s = Some(v) \<and> \<Turnstile> v : r \<and> flat v = s"
using lex_correct3[OF assms]
apply(auto)
apply (metis PMatch1N)
Binary file thys/paper.pdf has changed