lexing: changeset 107:6adda4a667b1

Binary file Literature/ktl16regexp-agda.pdf has changed

--- a/thys/Paper/Paper.thy	Thu Feb 25 20:13:41 2016 +0000
+++ b/thys/Paper/Paper.thy	Sun Feb 28 14:01:12 2016 +0000
@@ -7,7 +7,13 @@
 
 notation (latex output)
    If  ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and
-  Cons ("_::_" [78,77] 73) and
+  Cons ("_\<^raw:\mbox{$\,$}>::\<^raw:\mbox{$\,$}>_" [78,77] 73) and
+  ZERO ("\<^raw:\textrm{0}>" 78) and 
+  ONE ("\<^raw:\textrm{1}>" 78) and 
+  CHAR ("_" [1000] 10) and
+  ALT ("_ + _" [1000,1000] 78) and
+  SEQ ("_ \<cdot> _" [1000,1000] 78) and
+  STAR ("_\<^sup>\<star>" [1000] 78) and
   val.Char ("Char _" [1000] 78) and
   val.Left ("Left _" [1000] 78) and
   val.Right ("Right _" [1000] 78) and
@@ -23,20 +29,117 @@
 section {* Introduction *}
 
 text {*
+  
 
-  \noindent
+Sulzmann and Lu \cite{Sulzmann2014}
+
+there are two commonly used
+disambiguation strategies to create a unique matching tree:
+one is called \emph{greedy} matching \cite{Frisch2004} and the
+other is \emph{POSIX} matching~\cite{Kuklewicz,Sulzmann2014}.
+For the latter there are two rough rules:  
+
+\begin{itemize}
+\item The Longest Match Rule (or ``maximal munch rule''):\smallskip\\ The
+      longest initial substring matched by any regular
+      expression is taken as next token.
+
+\item Rule Priority:\smallskip\\ For a particular longest initial
+      substring, the first regular expression that can match
+      determines the token.
+\end{itemize}
+
+\noindent In the context of lexing, POSIX is the more
+interesting disambiguation strategy as it produces longest
+matches, which is necessary for tokenising programs. For
+example the string \textit{iffoo} should not match the keyword
+\textit{if} and the rest, but as one string \textit{iffoo},
+which might be a variable name in a program. As another
+example consider the string $xy$ and the regular expression
+\mbox{$(x + y + xy)^*$}. Either the input string can be
+matched in two `iterations' by the single letter-regular
+expressions $x$ and $y$, or directly in one iteration by $xy$.
+The first case corresponds to greedy matching, which first
+matches with the left-most symbol and only matches the next
+symbol in case of a mismatch. The second case is POSIX
+matching, which prefers the longest match. In case more than
+one (longest) matches exist, only then it prefers the
+left-most match. While POSIX matching seems natural, it turns
+out to be much more subtle than greedy matching in terms of
+implementations and in terms of proving properties about it.
+If POSIX matching is implemented using automata, then one has
+to follow transitions (according to the input string) until
+one finds an accepting state, record this state and look for
+further transition which might lead to another accepting state
+that represents a longer input initial substring to be
+matched. Only if none can be found, the last accepting state
+is returned.
+
+
+Sulzmann and Lu's paper \cite{Sulzmann2014} targets POSIX
+regular expression matching. They write that it is known to be
+to be a non-trivial problem and nearly all POSIX matching
+implementations are ``buggy'' \cite[Page 203]{Sulzmann2014}.
+For this they cite a study by Kuklewicz \cite{Kuklewicz}. My
+current work is about formalising the proofs in the paper by
+Sulzmann and Lu. Specifically, they propose in this paper a
+POSIX matching algorithm and give some details of a
+correctness proof for this algorithm inside the paper and some
+more details in an appendix. This correctness proof is
+unformalised, meaning it is just a ``pencil-and-paper'' proof,
+not done in a theorem prover. Though, the paper and presumably
+the proof have been peer-reviewed. Unfortunately their proof
+does not give enough details such that it can be
+straightforwardly implemented in a theorem prover, say
+Isabelle. In fact, the purported proof they outline does not
+work in central places. We discovered this when filling in
+many gaps and attempting to formalise the proof in Isabelle. 
+
+
+
+{\bf Contributions:}
+
+*}
+
+section {* Preliminaries *}
+
+text {* \noindent Strings in Isabelle/HOL are lists of characters with
+  the empty string being represented by the empty list, written @{term
+  "[]"}, and list-cons being written as @{term "DUMMY # DUMMY"}. By
+  using the type char we have a supply of finitely many characters
+  roughly corresponding to the ASCII character set.
   Regular exprtessions
 
   \begin{center}
   @{text "r :="}
-  @{const "NULL"} $\mid$
-  @{const "EMPTY"} $\mid$
+  @{const "ZERO"} $\mid$
+  @{const "ONE"} $\mid$
   @{term "CHAR c"} $\mid$
   @{term "ALT r\<^sub>1 r\<^sub>2"} $\mid$
   @{term "SEQ r\<^sub>1 r\<^sub>2"} $\mid$
   @{term "STAR r"} 
   \end{center}
 
+*}
+
+section {* POSIX Regular Expression Matching *}
+
+section {* The Argument by Sulzmmann and Lu *}
+
+section {* Conclusion *}
+
+text {*
+
+  Nipkow lexer from 2000
+
+*}
+
+
+text {*
+
+
+
+
   \noindent
   Values
 
@@ -216,7 +319,6 @@
   \begin{center}
   \begin{tabular}{c}
   @{thm Values_def}\medskip\\
-  @{thm NValues_def}
   \end{tabular}
   \end{center}
 
@@ -238,9 +340,9 @@
 
   @{thm[mode=IfThen] mkeps_flat}
 
-  @{thm[mode=IfThen] v3}
+  @{thm[mode=IfThen] Prf_injval}
 
-  @{thm[mode=IfThen] v4}
+  @{thm[mode=IfThen] Prf_injval_flat}
   
   @{thm[mode=IfThen] PMatch_mkeps}

--- a/thys/Paper/document/root.tex	Thu Feb 25 20:13:41 2016 +0000
+++ b/thys/Paper/document/root.tex	Sun Feb 28 14:01:12 2016 +0000
@@ -39,12 +39,29 @@
 \maketitle
 
 \begin{abstract}
-\Brz{} introduced the notion of derivatives of regular expressions
-that can be used for very simple regular expression matching algorithms.
 
-BLA BLA  Sulzmann and Lu \cite{Sulzmann2014}
+\Brz{} introduced the notion of derivatives for regular
+expressions. They can be used for a very simple regular expression
+matching algorithm.  Sulzmann and Lu cleverly extended this algorithm
+in order to deal with POSIX matching, which is the underlying
+disambiguation strategy for regular expressions needed in
+lexers. Sulzmann and Lu have made available on-line what they call a
+``rigorous proof'' of the correctness of their algorithm w.r.t.~their
+specification; regrettably, it appears to us to have unfillable gaps.
+In the first part of this paper we give our inductive definition of
+what a POSIX value is and show $(i)$ that such a value is unique (for
+given regular expression and string being matched) and $(ii)$ that
+Sulzmann and Lu's algorithm always generates such a value (provided
+that the regular expression matches the string).  We also prove the
+correctness of an optimised version of the POSIX matching
+algorithm. Our definitions and proof are much simpler than those by
+Sulzmann and Lu and can be easily formalised in Isabelle/HOL.  In the
+second part we analyse the correctness argument by Sulzmann and Lu in
+more detail and explain why it seems hard to turn it into a proof
+rigorous enough to be accepted by a system such as Isabelle/HOL.
 
-{\bf Keywords:} 
+{\bf Keywords:} POSIX matching, Derivatives of Regular Expressions,
+Isabelle/HOL
 \end{abstract}
 
 \input{session}

--- a/thys/ReStar.thy	Thu Feb 25 20:13:41 2016 +0000
+++ b/thys/ReStar.thy	Sun Feb 28 14:01:12 2016 +0000
@@ -103,8 +103,8 @@
 section {* Regular Expressions *}
 
 datatype rexp =
-  NULL
-| EMPTY
+  ZERO
+| ONE
 | CHAR char
 | SEQ rexp rexp
 | ALT rexp rexp
@@ -115,28 +115,65 @@
 fun
   L :: "rexp \<Rightarrow> string set"
 where
-  "L (NULL) = {}"
-| "L (EMPTY) = {[]}"
+  "L (ZERO) = {}"
+| "L (ONE) = {[]}"
 | "L (CHAR c) = {[c]}"
 | "L (SEQ r1 r2) = (L r1) ;; (L r2)"
 | "L (ALT r1 r2) = (L r1) \<union> (L r2)"
 | "L (STAR r) = (L r)\<star>"
 
+
+section {* Nullable, Derivatives *}
+
 fun
  nullable :: "rexp \<Rightarrow> bool"
 where
-  "nullable (NULL) = False"
-| "nullable (EMPTY) = True"
+  "nullable (ZERO) = False"
+| "nullable (ONE) = True"
 | "nullable (CHAR c) = False"
 | "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
 | "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
 | "nullable (STAR r) = True"
 
+
+fun
+ der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
+where
+  "der c (ZERO) = ZERO"
+| "der c (ONE) = ZERO"
+| "der c (CHAR c') = (if c = c' then ONE else ZERO)"
+| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
+| "der c (SEQ r1 r2) = 
+     (if nullable r1
+      then ALT (SEQ (der c r1) r2) (der c r2)
+      else SEQ (der c r1) r2)"
+| "der c (STAR r) = SEQ (der c r) (STAR r)"
+
+fun 
+ ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
+where
+  "ders [] r = r"
+| "ders (c # s) r = ders s (der c r)"
+
+
 lemma nullable_correctness:
   shows "nullable r  \<longleftrightarrow> [] \<in> (L r)"
 by (induct r) (auto simp add: Sequ_def) 
 
 
+lemma der_correctness:
+  shows "L (der c r) = Der c (L r)"
+apply(induct r) 
+apply(simp_all add: nullable_correctness)
+done
+
+lemma ders_correctness:
+  shows "L (ders s r) = Ders s (L r)"
+apply(induct s arbitrary: r) 
+apply(simp_all add: der_correctness Der_def Ders_def)
+done
+
+
 section {* Values *}
 
 datatype val = 
@@ -173,7 +210,7 @@
  "\<lbrakk>\<turnstile> v1 : r1; \<turnstile> v2 : r2\<rbrakk> \<Longrightarrow> \<turnstile> Seq v1 v2 : SEQ r1 r2"
 | "\<turnstile> v1 : r1 \<Longrightarrow> \<turnstile> Left v1 : ALT r1 r2"
 | "\<turnstile> v2 : r2 \<Longrightarrow> \<turnstile> Right v2 : ALT r1 r2"
-| "\<turnstile> Void : EMPTY"
+| "\<turnstile> Void : ONE"
 | "\<turnstile> Char c : CHAR c"
 | "\<turnstile> Stars [] : STAR r"
 | "\<lbrakk>\<turnstile> v : r; \<turnstile> Stars vs : STAR r\<rbrakk> \<Longrightarrow> \<turnstile> Stars (v # vs) : STAR r"
@@ -245,196 +282,16 @@
 done
 
 
-section {* Values Sets *}
-
-definition prefix :: "string \<Rightarrow> string \<Rightarrow> bool" ("_ \<sqsubseteq> _" [100, 100] 100)
-where
-  "s1 \<sqsubseteq> s2 \<equiv> \<exists>s3. s1 @ s3 = s2"
-
-definition sprefix :: "string \<Rightarrow> string \<Rightarrow> bool" ("_ \<sqsubset> _" [100, 100] 100)
-where
-  "s1 \<sqsubset> s2 \<equiv> (s1 \<sqsubseteq> s2 \<and> s1 \<noteq> s2)"
-
-lemma length_sprefix:
-  "s1 \<sqsubset> s2 \<Longrightarrow> length s1 < length s2"
-unfolding sprefix_def prefix_def
-by (auto)
-
-definition Prefixes :: "string \<Rightarrow> string set" where
-  "Prefixes s \<equiv> {sp. sp \<sqsubseteq> s}"
-
-definition Suffixes :: "string \<Rightarrow> string set" where
-  "Suffixes s \<equiv> rev ` (Prefixes (rev s))"
-
-definition SPrefixes :: "string \<Rightarrow> string set" where
-  "SPrefixes s \<equiv> {sp. sp \<sqsubset> s}"
-
-definition SSuffixes :: "string \<Rightarrow> string set" where
-  "SSuffixes s \<equiv> rev ` (SPrefixes (rev s))"
-
-lemma Suffixes_in: 
-  "\<exists>s1. s1 @ s2 = s3 \<Longrightarrow> s2 \<in> Suffixes s3"
-unfolding Suffixes_def Prefixes_def prefix_def image_def
-apply(auto)
-by (metis rev_rev_ident)
-
-lemma SSuffixes_in: 
-  "\<exists>s1. s1 \<noteq> [] \<and> s1 @ s2 = s3 \<Longrightarrow> s2 \<in> SSuffixes s3"
-unfolding SSuffixes_def Suffixes_def SPrefixes_def Prefixes_def sprefix_def prefix_def image_def
-apply(auto)
-by (metis append_self_conv rev.simps(1) rev_rev_ident)
-
-lemma Prefixes_Cons:
-  "Prefixes (c # s) = {[]} \<union> {c # sp | sp. sp \<in> Prefixes s}"
-unfolding Prefixes_def prefix_def
-apply(auto simp add: append_eq_Cons_conv) 
-done
-
-lemma finite_Prefixes:
-  "finite (Prefixes s)"
-apply(induct s)
-apply(auto simp add: Prefixes_def prefix_def)[1]
-apply(simp add: Prefixes_Cons)
-done
-
-lemma finite_Suffixes:
-  "finite (Suffixes s)"
-unfolding Suffixes_def
-apply(rule finite_imageI)
-apply(rule finite_Prefixes)
-done
-
-lemma prefix_Cons:
-  "((c # s1) \<sqsubseteq> (c # s2)) = (s1 \<sqsubseteq> s2)"
-apply(auto simp add: prefix_def)
-done
-
-lemma prefix_append:
-  "((s @ s1) \<sqsubseteq> (s @ s2)) = (s1 \<sqsubseteq> s2)"
-apply(induct s)
-apply(simp)
-apply(simp add: prefix_Cons)
-done
-
-
-definition Values :: "rexp \<Rightarrow> string \<Rightarrow> val set" where
-  "Values r s \<equiv> {v. \<turnstile> v : r \<and> flat v \<sqsubseteq> s}"
-
-definition rest :: "val \<Rightarrow> string \<Rightarrow> string" where
-  "rest v s \<equiv> drop (length (flat v)) s"
-
-lemma rest_Nil:
-  "rest v [] = []"
-apply(simp add: rest_def)
-done
-
-lemma rest_Suffixes:
-  "rest v s \<in> Suffixes s"
-unfolding rest_def
-by (metis Suffixes_in append_take_drop_id)
-
-lemma Values_recs:
-  "Values (NULL) s = {}"
-  "Values (EMPTY) s = {Void}"
-  "Values (CHAR c) s = (if [c] \<sqsubseteq> s then {Char c} else {})" 
-  "Values (ALT r1 r2) s = {Left v | v. v \<in> Values r1 s} \<union> {Right v | v. v \<in> Values r2 s}"
-  "Values (SEQ r1 r2) s = {Seq v1 v2 | v1 v2. v1 \<in> Values r1 s \<and> v2 \<in> Values r2 (rest v1 s)}"
-  "Values (STAR r) s = 
-      {Stars []} \<union> {Stars (v # vs) | v vs. v \<in> Values r s \<and> Stars vs \<in> Values (STAR r) (rest v s)}"
-unfolding Values_def
-apply(auto)
-(*NULL*)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-(*EMPTY*)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-apply(rule Prf.intros)
-apply (metis append_Nil prefix_def)
-(*CHAR*)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-apply(rule Prf.intros)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-(*ALT*)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-apply (metis Prf.intros(2))
-apply (metis Prf.intros(3))
-(*SEQ*)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-apply (simp add: append_eq_conv_conj prefix_def rest_def)
-apply (metis Prf.intros(1))
-apply (simp add: append_eq_conv_conj prefix_def rest_def)
-(*STAR*)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-apply(rule conjI)
-apply(simp add: prefix_def)
-apply(auto)[1]
-apply(simp add: prefix_def)
-apply(auto)[1]
-apply (metis append_eq_conv_conj rest_def)
-apply (metis Prf.intros(6))
-apply (metis append_Nil prefix_def)
-apply (metis Prf.intros(7))
-by (metis append_eq_conv_conj prefix_append prefix_def rest_def)
-
-lemma finite_image_set2:
-  "finite {x. P x} \<Longrightarrow> finite {y. Q y} \<Longrightarrow> finite {(x, y) | x y. P x \<and> Q y}"
-  by (rule finite_subset [where B = "\<Union>x \<in> {x. P x}. \<Union>y \<in> {y. Q y}. {(x, y)}"]) auto
-
 section {* Sulzmann functions *}
 
 fun 
   mkeps :: "rexp \<Rightarrow> val"
 where
-  "mkeps(EMPTY) = Void"
+  "mkeps(ONE) = Void"
 | "mkeps(SEQ r1 r2) = Seq (mkeps r1) (mkeps r2)"
 | "mkeps(ALT r1 r2) = (if nullable(r1) then Left (mkeps r1) else Right (mkeps r2))"
 | "mkeps(STAR r) = Stars []"
 
-section {* Derivatives *}
-
-fun
- der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
-where
-  "der c (NULL) = NULL"
-| "der c (EMPTY) = NULL"
-| "der c (CHAR c') = (if c = c' then EMPTY else NULL)"
-| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
-| "der c (SEQ r1 r2) = 
-     (if nullable r1
-      then ALT (SEQ (der c r1) r2) (der c r2)
-      else SEQ (der c r1) r2)"
-| "der c (STAR r) = SEQ (der c r) (STAR r)"
-
-fun 
- ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
-where
-  "ders [] r = r"
-| "ders (c # s) r = ders s (der c r)"
-
-
-lemma der_correctness:
-  shows "L (der c r) = Der c (L r)"
-apply(induct r) 
-apply(simp_all add: nullable_correctness)
-done
-
-lemma ders_correctness:
-  shows "L (ders s r) = Ders s (L r)"
-apply(induct s arbitrary: r) 
-apply(simp add: Ders_def)
-apply(simp)
-apply(subst der_correctness)
-apply(simp add: Ders_def Der_def)
-done
-
-section {* Injection function *}
-
 fun injval :: "rexp \<Rightarrow> char \<Rightarrow> val \<Rightarrow> val"
 where
   "injval (CHAR d) c Void = Char d"
@@ -445,34 +302,27 @@
 | "injval (SEQ r1 r2) c (Right v2) = Seq (mkeps r1) (injval r2 c v2)"
 | "injval (STAR r) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)" 
 
+
+section {* Matcher *}
+
 fun 
-  lex :: "rexp \<Rightarrow> string \<Rightarrow> val option"
+  matcher :: "rexp \<Rightarrow> string \<Rightarrow> val option"
 where
-  "lex r [] = (if nullable r then Some(mkeps r) else None)"
-| "lex r (c#s) = (case (lex (der c r) s) of  
+  "matcher r [] = (if nullable r then Some(mkeps r) else None)"
+| "matcher r (c#s) = (case (matcher (der c r) s) of  
                     None \<Rightarrow> None
                   | Some(v) \<Rightarrow> Some(injval r c v))"
 
 fun 
-  lex2 :: "rexp \<Rightarrow> string \<Rightarrow> val"
-where
-  "lex2 r [] = mkeps r"
-| "lex2 r (c#s) = injval r c (lex2 (der c r) s)"
-
-section {* Projection function *}
-
-fun projval :: "rexp \<Rightarrow> char \<Rightarrow> val \<Rightarrow> val"
+  matcher2 :: "rexp \<Rightarrow> string \<Rightarrow> val"
 where
-  "projval (CHAR d) c _ = Void"
-| "projval (ALT r1 r2) c (Left v1) = Left (projval r1 c v1)"
-| "projval (ALT r1 r2) c (Right v2) = Right (projval r2 c v2)"
-| "projval (SEQ r1 r2) c (Seq v1 v2) = 
-     (if flat v1 = [] then Right(projval r2 c v2) 
-      else if nullable r1 then Left (Seq (projval r1 c v1) v2)
-                          else Seq (projval r1 c v1) v2)"
-| "projval (STAR r) c (Stars (v # vs)) = Seq (projval r c v) (Stars vs)"
+  "matcher2 r [] = mkeps r"
+| "matcher2 r (c#s) = injval r c (matcher2 (der c r) s)"
 
 
+
+section {* Mkeps, injval *}
+
 lemma mkeps_nullable:
   assumes "nullable(r)" 
   shows "\<turnstile> mkeps r : r"
@@ -489,33 +339,29 @@
 apply(auto)
 done
 
-
-lemma v3:
+lemma Prf_injval:
   assumes "\<turnstile> v : der c r" 
   shows "\<turnstile> (injval r c v) : r"
 using assms
-apply(induct arbitrary: v rule: der.induct)
-apply(simp)
+apply(induct r arbitrary: c v rule: rexp.induct)
+apply(simp_all)
+(* ZERO *)
 apply(erule Prf.cases)
 apply(simp_all)[7]
+(* ONE *)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+(* CHAR *)
+apply(case_tac "c = char")
 apply(simp)
 apply(erule Prf.cases)
 apply(simp_all)[7]
-apply(case_tac "c = c'")
-apply(simp)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-apply (metis Prf.intros(5))
+apply(rule Prf.intros(5))
 apply(simp)
 apply(erule Prf.cases)
 apply(simp_all)[7]
-apply(simp)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-apply (metis Prf.intros(2))
-apply (metis Prf.intros(3))
-apply(simp)
-apply(case_tac "nullable r1")
+(* SEQ *)
+apply(case_tac "nullable rexp1")
 apply(simp)
 apply(erule Prf.cases)
 apply(simp_all)[7]
@@ -523,8 +369,8 @@
 apply(erule Prf.cases)
 apply(simp_all)[7]
 apply(auto)[1]
-apply (metis Prf.intros(1))
-apply(auto)[1]
+apply(rule Prf.intros)
+apply(auto)[2]
 apply (metis Prf.intros(1) mkeps_nullable)
 apply(simp)
 apply(erule Prf.cases)
@@ -532,7 +378,13 @@
 apply(auto)[1]
 apply(rule Prf.intros)
 apply(auto)[2]
-apply(simp)
+(* ALT *)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+apply(clarify)
+apply (metis Prf.intros(2))
+apply (metis Prf.intros(3))
+(* STAR *)
 apply(erule Prf.cases)
 apply(simp_all)[7]
 apply(clarify)
@@ -543,9 +395,7 @@
 apply (metis Prf.intros(6) Prf.intros(7))
 by (metis Prf.intros(7))
 
-
-
-lemma v4:
+lemma Prf_injval_flat:
   assumes "\<turnstile> v : der c r" 
   shows "flat (injval r c v) = c # (flat v)"
 using assms
@@ -599,7 +449,7 @@
 inductive 
   PMatch :: "string \<Rightarrow> rexp \<Rightarrow> val \<Rightarrow> bool" ("_ \<in> _ \<rightarrow> _" [100, 100, 100] 100)
 where
-  "[] \<in> EMPTY \<rightarrow> Void"
+  "[] \<in> ONE \<rightarrow> Void"
 | "[c] \<in> (CHAR c) \<rightarrow> (Char c)"
 | "s \<in> r1 \<rightarrow> v \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Left v)"
 | "\<lbrakk>s \<in> r2 \<rightarrow> v; s \<notin> L(r1)\<rbrakk> \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Right v)"
@@ -616,14 +466,8 @@
   shows "\<turnstile> v : r" "flat v = s"
 using assms
 apply(induct s r v rule: PMatch.induct)
-apply(auto)
-apply (metis Prf.intros(4))
-apply (metis Prf.intros(5))
-apply (metis Prf.intros(2))
-apply (metis Prf.intros(3))
-apply (metis Prf.intros(1))
-apply (metis Prf.intros(7))
-by (metis Prf.intros(6))
+apply(auto intro: Prf.intros)
+done
 
 lemma PMatch_mkeps:
   assumes "nullable r"
@@ -745,12 +589,6 @@
 
 
 
-lemma PMatch_Values:
-  assumes "s \<in> r \<rightarrow> v"
-  shows "v \<in> Values r s"
-using assms
-apply(simp add: Values_def PMatch1)
-by (metis append_Nil2 prefix_def)
 
 (* a proof that does not need proj *)
 lemma PMatch2_roy_version:
@@ -759,10 +597,10 @@
 using assms
 apply(induct r arbitrary: s v c rule: rexp.induct)
 apply(auto)
-(* NULL case *)
+(* ZERO case *)
 apply(erule PMatch.cases)
 apply(simp_all)[7]
-(* EMPTY case *)
+(* ONE case *)
 apply(erule PMatch.cases)
 apply(simp_all)[7]
 (* CHAR case *)
@@ -851,7 +689,7 @@
 apply(simp)
 apply(simp)
 apply (metis L.simps(6))
-apply(subst v4)
+apply(subst Prf_injval_flat)
 apply (metis PMatch1)
 apply(simp)
 apply(auto)[1]
@@ -868,17 +706,16 @@
 apply(drule PMatch.intros(6))
 apply(rule PMatch.intros(7))
 (* HERE *)
-apply (metis PMatch1(1) list.distinct(1) v4)
+apply (metis PMatch1(1) list.distinct(1) Prf_injval_flat)
 apply (metis Nil_is_append_conv)
 apply(simp)
 apply(subst der_correctness)
 apply(simp add: Der_def)
 done 
 
-
 lemma lex_correct1:
   assumes "s \<notin> L r"
-  shows "lex r s = None"
+  shows "matcher r s = None"
 using assms
 apply(induct s arbitrary: r)
 apply(simp)
@@ -887,13 +724,13 @@
 apply(drule_tac x="der a r" in meta_spec)
 apply(drule meta_mp)
 apply(auto)
-apply(simp add: L_flat_Prf)
-by (metis v3 v4)
+apply(simp add: der_correctness Der_def)
+done
 
 
 lemma lex_correct2:
   assumes "s \<in> L r"
-  shows "\<exists>v. lex r s = Some(v) \<and> \<turnstile> v : r \<and> flat v = s"
+  shows "\<exists>v. matcher r s = Some(v) \<and> \<turnstile> v : r \<and> flat v = s"
 using assms
 apply(induct s arbitrary: r)
 apply(simp)
@@ -902,13 +739,13 @@
 apply(drule meta_mp)
 apply(simp add: der_correctness Der_def)
 apply(auto)
-apply (metis v3)
-apply(rule v4)
+apply (metis Prf_injval)
+apply(rule Prf_injval_flat)
 by simp
 
 lemma lex_correct3:
   assumes "s \<in> L r"
-  shows "\<exists>v. lex r s = Some(v) \<and> s \<in> r \<rightarrow> v"
+  shows "\<exists>v. matcher r s = Some(v) \<and> s \<in> r \<rightarrow> v"
 using assms
 apply(induct s arbitrary: r)
 apply(simp)
@@ -921,7 +758,7 @@
 
 lemma lex_correct5:
   assumes "s \<in> L r"
-  shows "s \<in> r \<rightarrow> (lex2 r s)"
+  shows "s \<in> r \<rightarrow> (matcher2 r s)"
 using assms
 apply(induct s arbitrary: r)
 apply(simp)
@@ -935,11 +772,176 @@
 done
 
 lemma 
-  "lex2 (ALT (CHAR a) (ALT (CHAR b) (SEQ (CHAR a) (CHAR b)))) [a,b] = Right (Right (Seq (Char a) (Char b)))"
+  "matcher2 (ALT (CHAR a) (ALT (CHAR b) (SEQ (CHAR a) (CHAR b)))) [a,b] = Right (Right (Seq (Char a) (Char b)))"
 apply(simp)
 done
 
 
+section {* Attic stuff below *}
+
+section {* Projection function *}
+
+fun projval :: "rexp \<Rightarrow> char \<Rightarrow> val \<Rightarrow> val"
+where
+  "projval (CHAR d) c _ = Void"
+| "projval (ALT r1 r2) c (Left v1) = Left (projval r1 c v1)"
+| "projval (ALT r1 r2) c (Right v2) = Right (projval r2 c v2)"
+| "projval (SEQ r1 r2) c (Seq v1 v2) = 
+     (if flat v1 = [] then Right(projval r2 c v2) 
+      else if nullable r1 then Left (Seq (projval r1 c v1) v2)
+                          else Seq (projval r1 c v1) v2)"
+| "projval (STAR r) c (Stars (v # vs)) = Seq (projval r c v) (Stars vs)"
+
+
+
+section {* Values Sets *}
+
+definition prefix :: "string \<Rightarrow> string \<Rightarrow> bool" ("_ \<sqsubseteq> _" [100, 100] 100)
+where
+  "s1 \<sqsubseteq> s2 \<equiv> \<exists>s3. s1 @ s3 = s2"
+
+definition sprefix :: "string \<Rightarrow> string \<Rightarrow> bool" ("_ \<sqsubset> _" [100, 100] 100)
+where
+  "s1 \<sqsubset> s2 \<equiv> (s1 \<sqsubseteq> s2 \<and> s1 \<noteq> s2)"
+
+lemma length_sprefix:
+  "s1 \<sqsubset> s2 \<Longrightarrow> length s1 < length s2"
+unfolding sprefix_def prefix_def
+by (auto)
+
+definition Prefixes :: "string \<Rightarrow> string set" where
+  "Prefixes s \<equiv> {sp. sp \<sqsubseteq> s}"
+
+definition Suffixes :: "string \<Rightarrow> string set" where
+  "Suffixes s \<equiv> rev ` (Prefixes (rev s))"
+
+definition SPrefixes :: "string \<Rightarrow> string set" where
+  "SPrefixes s \<equiv> {sp. sp \<sqsubset> s}"
+
+definition SSuffixes :: "string \<Rightarrow> string set" where
+  "SSuffixes s \<equiv> rev ` (SPrefixes (rev s))"
+
+lemma Suffixes_in: 
+  "\<exists>s1. s1 @ s2 = s3 \<Longrightarrow> s2 \<in> Suffixes s3"
+unfolding Suffixes_def Prefixes_def prefix_def image_def
+apply(auto)
+by (metis rev_rev_ident)
+
+lemma SSuffixes_in: 
+  "\<exists>s1. s1 \<noteq> [] \<and> s1 @ s2 = s3 \<Longrightarrow> s2 \<in> SSuffixes s3"
+unfolding SSuffixes_def Suffixes_def SPrefixes_def Prefixes_def sprefix_def prefix_def image_def
+apply(auto)
+by (metis append_self_conv rev.simps(1) rev_rev_ident)
+
+lemma Prefixes_Cons:
+  "Prefixes (c # s) = {[]} \<union> {c # sp | sp. sp \<in> Prefixes s}"
+unfolding Prefixes_def prefix_def
+apply(auto simp add: append_eq_Cons_conv) 
+done
+
+lemma finite_Prefixes:
+  "finite (Prefixes s)"
+apply(induct s)
+apply(auto simp add: Prefixes_def prefix_def)[1]
+apply(simp add: Prefixes_Cons)
+done
+
+lemma finite_Suffixes:
+  "finite (Suffixes s)"
+unfolding Suffixes_def
+apply(rule finite_imageI)
+apply(rule finite_Prefixes)
+done
+
+lemma prefix_Cons:
+  "((c # s1) \<sqsubseteq> (c # s2)) = (s1 \<sqsubseteq> s2)"
+apply(auto simp add: prefix_def)
+done
+
+lemma prefix_append:
+  "((s @ s1) \<sqsubseteq> (s @ s2)) = (s1 \<sqsubseteq> s2)"
+apply(induct s)
+apply(simp)
+apply(simp add: prefix_Cons)
+done
+
+
+definition Values :: "rexp \<Rightarrow> string \<Rightarrow> val set" where
+  "Values r s \<equiv> {v. \<turnstile> v : r \<and> flat v \<sqsubseteq> s}"
+
+definition rest :: "val \<Rightarrow> string \<Rightarrow> string" where
+  "rest v s \<equiv> drop (length (flat v)) s"
+
+lemma rest_Nil:
+  "rest v [] = []"
+apply(simp add: rest_def)
+done
+
+lemma rest_Suffixes:
+  "rest v s \<in> Suffixes s"
+unfolding rest_def
+by (metis Suffixes_in append_take_drop_id)
+
+lemma Values_recs:
+  "Values (ZERO) s = {}"
+  "Values (ONE) s = {Void}"
+  "Values (CHAR c) s = (if [c] \<sqsubseteq> s then {Char c} else {})" 
+  "Values (ALT r1 r2) s = {Left v | v. v \<in> Values r1 s} \<union> {Right v | v. v \<in> Values r2 s}"
+  "Values (SEQ r1 r2) s = {Seq v1 v2 | v1 v2. v1 \<in> Values r1 s \<and> v2 \<in> Values r2 (rest v1 s)}"
+  "Values (STAR r) s = 
+      {Stars []} \<union> {Stars (v # vs) | v vs. v \<in> Values r s \<and> Stars vs \<in> Values (STAR r) (rest v s)}"
+unfolding Values_def
+apply(auto)
+(*ZERO*)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+(*ONE*)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+apply(rule Prf.intros)
+apply (metis append_Nil prefix_def)
+(*CHAR*)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+apply(rule Prf.intros)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+(*ALT*)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+apply (metis Prf.intros(2))
+apply (metis Prf.intros(3))
+(*SEQ*)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+apply (simp add: append_eq_conv_conj prefix_def rest_def)
+apply (metis Prf.intros(1))
+apply (simp add: append_eq_conv_conj prefix_def rest_def)
+(*STAR*)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+apply(rule conjI)
+apply(simp add: prefix_def)
+apply(auto)[1]
+apply(simp add: prefix_def)
+apply(auto)[1]
+apply (metis append_eq_conv_conj rest_def)
+apply (metis Prf.intros(6))
+apply (metis append_Nil prefix_def)
+apply (metis Prf.intros(7))
+by (metis append_eq_conv_conj prefix_append prefix_def rest_def)
+
+lemma PMatch_Values:
+  assumes "s \<in> r \<rightarrow> v"
+  shows "v \<in> Values r s"
+using assms
+apply(simp add: Values_def PMatch1)
+by (metis append_Nil2 prefix_def)
+
+lemma finite_image_set2:
+  "finite {x. P x} \<Longrightarrow> finite {y. Q y} \<Longrightarrow> finite {(x, y) | x y. P x \<and> Q y}"
+  by (rule finite_subset [where B = "\<Union>x \<in> {x. P x}. \<Union>y \<in> {y. Q y}. {(x, y)}"]) auto
+
 
 section {* Connection with Sulzmann's Ordering of values *}
 
@@ -951,7 +953,7 @@
 | "length (flat v2) > length (flat v1) \<Longrightarrow> (Right v2) \<succ>(ALT r1 r2) (Left v1)"
 | "v2 \<succ>r2 v2' \<Longrightarrow> (Right v2) \<succ>(ALT r1 r2) (Right v2')"
 | "v1 \<succ>r1 v1' \<Longrightarrow> (Left v1) \<succ>(ALT r1 r2) (Left v1')"
-| "Void \<succ>EMPTY Void"
+| "Void \<succ>ONE Void"
 | "(Char c) \<succ>(CHAR c) (Char c)"
 | "flat (Stars (v # vs)) = [] \<Longrightarrow> (Stars []) \<succ>(STAR r) (Stars (v # vs))"
 | "flat (Stars (v # vs)) \<noteq> [] \<Longrightarrow> (Stars (v # vs)) \<succ>(STAR r) (Stars [])"
@@ -969,7 +971,7 @@
  "\<lbrakk>\<Turnstile> v1 : r1; \<Turnstile> v2 : r2\<rbrakk> \<Longrightarrow> \<Turnstile> Seq v1 v2 : SEQ r1 r2"
 | "\<Turnstile> v1 : r1 \<Longrightarrow> \<Turnstile> Left v1 : ALT r1 r2"
 | "\<Turnstile> v2 : r2 \<Longrightarrow> \<Turnstile> Right v2 : ALT r1 r2"
-| "\<Turnstile> Void : EMPTY"
+| "\<Turnstile> Void : ONE"
 | "\<Turnstile> Char c : CHAR c"
 | "\<Turnstile> Stars [] : STAR r"
 | "\<lbrakk>\<Turnstile> v : r; \<Turnstile> Stars vs : STAR r; flat v \<noteq> []\<rbrakk> \<Longrightarrow> \<Turnstile> Stars (v # vs) : STAR r"
@@ -1163,10 +1165,10 @@
 using assms
 apply(induct c r arbitrary: s v rule: der.induct)
 apply(auto)
-(* NULL case *)
+(* ZERO case *)
 apply(erule PMatch.cases)
 apply(simp_all)[7]
-(* EMPTY case *)
+(* ONE case *)
 apply(erule PMatch.cases)
 apply(simp_all)[7]
 (* CHAR case *)
@@ -1265,7 +1267,7 @@
 apply(simp)
 apply(simp)
 apply (metis L.simps(6))
-apply(subst v4)
+apply(subst Prf_injval_flat)
 apply (metis NPrf_imp_Prf PMatch1N)
 apply(simp)
 apply(auto)[1]
@@ -1280,7 +1282,7 @@
 apply(rotate_tac 2)
 apply(drule PMatch.intros(6))
 apply(rule PMatch.intros(7))
-apply (metis PMatch1(1) list.distinct(1) v4)
+apply (metis PMatch1(1) list.distinct(1) Prf_injval_flat)
 apply (metis Nil_is_append_conv)
 apply(simp)
 apply(subst der_correctness)
@@ -1289,7 +1291,7 @@
 
 lemma lex_correct4:
   assumes "s \<in> L r"
-  shows "\<exists>v. lex r s = Some(v) \<and> \<Turnstile> v : r \<and> flat v = s"
+  shows "\<exists>v. matcher r s = Some(v) \<and> \<Turnstile> v : r \<and> flat v = s"
 using lex_correct3[OF assms]
 apply(auto)
 apply (metis PMatch1N)

Binary file thys/paper.pdf has changed

author	Christian Urban <christian dot urban at kcl dot ac dot uk>
	Sun, 28 Feb 2016 14:01:12 +0000
changeset 107	6adda4a667b1
parent 106	489dfa0d7ec9
child 108	73f7dc60c285

Literature/ktl16regexp-agda.pdf		file \| annotate \| diff \| comparison \| revisions
thys/Paper/Paper.thy		file \| annotate \| diff \| comparison \| revisions
thys/Paper/document/root.tex		file \| annotate \| diff \| comparison \| revisions
thys/ReStar.thy		file \| annotate \| diff \| comparison \| revisions
thys/paper.pdf		file \| annotate \| diff \| comparison \| revisions