# HG changeset patch
# User Christian Urban <christian dot urban at kcl dot ac dot uk>
# Date 1457387690 0
# Node ID 7c6c907660d89e7ceef155745671fffe0b9ee911
# Parent  4c85af262ee792bbdbfc4b2ca358ff64f61d6464
updated

diff -r 4c85af262ee7 -r 7c6c907660d8 thys/Paper/Paper.thy
--- a/thys/Paper/Paper.thy	Mon Mar 07 18:56:41 2016 +0000
+++ b/thys/Paper/Paper.thy	Mon Mar 07 21:54:50 2016 +0000
@@ -386,7 +386,7 @@
   unique value that satisfies the (informal) POSIX rules from the
   Introduction. Graphically the POSIX value calculation algorithm by
   Sulzmann and Lu can be illustrated by the picture in Figure~\ref{Sulz}
-  where the path from the left to the right involving @{const der}/@{const
+  where the path from the left to the right involving @{term derivatives}/@{const
   nullable} is the first phase of the algorithm (calculating successive
   \Brz's derivatives) and @{const mkeps}/@{text inj}, the path from right to
   left, the second phase. This picture shows the steps required when a
@@ -494,7 +494,7 @@
   might be instructive to look first at the three sequence cases (clauses
   (4)--(6)). In each case we need to construct an ``injected value'' for
   @{term "SEQ r\<^sub>1 r\<^sub>2"}. This must be a value of the form @{term
-  "Seq DUMMY DUMMY"}. Recall the clause of the @{const der}-function
+  "Seq DUMMY DUMMY"}. Recall the clause of the @{text derivative}-function
   for sequence regular expressions:
 
   \begin{center}
@@ -529,15 +529,21 @@
   The idea of @{term inj} to ``inject back'' a character into a value can
   be made precise by the first part of the following lemma; the second
   part shows that the underlying string of an @{const mkeps}-value is
-  the empty string.
+  always the empty string.
 
-  \begin{lemma}\mbox{}\\\label{Prf_injval_flat}
+  \begin{lemma}\mbox{}\smallskip\\\label{Prf_injval_flat}
   \begin{tabular}{ll}
   (1) & @{thm[mode=IfThen] Prf_injval_flat}\\
   (2) & @{thm[mode=IfThen] mkeps_flat}
   \end{tabular}
   \end{lemma}
 
+  \begin{proof}
+  Both properties are by routine inductions: the first one, for example,
+  by an induction over the definition of @{term derivatives}; the second by
+  induction on @{term r}. There are no interesting cases.
+  \end{proof}
+
   Having defined the @{const mkeps} and @{text inj} function we can extend
   \Brz's matcher so that a [lexical] value is constructed (assuming the
   regular expression matches the string). The clauses of the lexer are
@@ -551,7 +557,7 @@
   \end{tabular}
   \end{center}
 
-  \noindent If the regular expression does not match, @{const None} is
+  \noindent If the regular expression does not match the string, @{const None} is
   returned, indicating an error is raised. If the regular expression does
   match the string, then @{const Some} value is returned. One important
   virtue of this algorithm is that it can be implemented with ease in a
@@ -562,53 +568,82 @@
   match and priority rule; as correctly argued in \cite{Sulzmann2014}, this
   needs formal specification. Sulzmann and Lu define a \emph{dominance}
   relation\footnote{Sulzmann and Lu call it an ordering relation, but
-  without giving evidence that it is transitive.} between values and argue that
-  there is a maximum value, as given by the derivative-based algorithm. In
-  contrast, we shall next introduce a simple inductive definition to specify
-  what a \emph{POSIX value} is, incorporating the POSIX-specific choices
-  into the side-conditions of our rules. Our definition is inspired by the
-  matching relation given in \cite{Vansummeren2006}. The relation we define
-  is ternary and written as \mbox{@{term "s \<in> r \<rightarrow> v"}}, relating strings,
-  regular expressions and values.
+  without giving evidence that it is transitive.} between values and argue
+  that there is a maximum value, as given by the derivative-based algorithm.
+  In contrast, we shall introduce a simple inductive definition that
+  specifies directly what a \emph{POSIX value} is, incorporating the
+  POSIX-specific choices into the side-conditions of our rules. Our
+  definition is inspired by the matching relation given in
+  \cite{Vansummeren2006}. The relation we define is ternary and written as
+  \mbox{@{term "s \<in> r \<rightarrow> v"}}, relating strings, regular expressions and
+  values.
 
   \begin{center}
   \begin{tabular}{c}
-  @{thm[mode=Axiom] PMatch.intros(1)} \qquad
-  @{thm[mode=Axiom] PMatch.intros(2)}\bigskip\\
-  @{thm[mode=Rule] PMatch.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\qquad
-  @{thm[mode=Rule] PMatch.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\bigskip\\
+  @{thm[mode=Axiom] PMatch.intros(1)}@{text "P"}@{term "ONE"} \qquad
+  @{thm[mode=Axiom] PMatch.intros(2)}@{text "P"}@{term "c"}\bigskip\\
+  @{thm[mode=Rule] PMatch.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}@{text "P+L"}\qquad
+  @{thm[mode=Rule] PMatch.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}@{text "P+R"}\bigskip\\
   $\mprset{flushleft}
    \inferrule
    {@{thm (prem 1) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad
     @{thm (prem 2) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\
     @{thm (prem 3) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}
-   {@{thm (concl) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\\
-  @{thm[mode=Axiom] PMatch.intros(7)}\bigskip\\
+   {@{thm (concl) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$@{text "PS"}\\
+  @{thm[mode=Axiom] PMatch.intros(7)}@{text "P[]"}\bigskip\\
   $\mprset{flushleft}
    \inferrule
    {@{thm (prem 1) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad
     @{thm (prem 2) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad
     @{thm (prem 3) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\
     @{thm (prem 4) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}
-   {@{thm (concl) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$
+   {@{thm (concl) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$@{text "P\<star>"}
   \end{tabular}
   \end{center}
 
   \noindent We claim that this relation captures the idea behind the two
-  informal POSIX rules shown in the Introduction: Consider the second line
-  where the POSIX values for an alternative regular expression is
-  specified---it is always a @{text "Left"}-value, \emph{except} when the
-  string to be matched is not in the language of @{term "r\<^sub>1"}; only then it
-  is a @{text Right}-value (see the side-condition in the rule on the
-  right). Interesting is also the rule for sequence regular expressions
-  (third line). The first two premises state that @{term "v\<^sub>1"} and
-  @{term "v\<^sub>2"} are the POSIX values for @{term "(s\<^sub>1,
-  r\<^sub>1)"} and @{term "(s\<^sub>2, r\<^sub>2)"} respectively.
-  
+  informal POSIX rules shown in the Introduction: Consider for example the
+  rules @{text "P+L"} and @{text "P+R"} where the POSIX value for an
+  alternative regular expression is specified---it is always a @{text
+  "Left"}-value, \emph{except} when the string to be matched is not in the
+  language of @{term "r\<^sub>1"}; only then it is a @{text Right}-value (see the
+  side-condition in @{text "P+R"}). Interesting is also the rule for
+  sequence regular expressions (@{text "PS"}). The first two premises state
+  that @{term "v\<^sub>1"} and @{term "v\<^sub>2"} are the POSIX values for
+  @{term "(s\<^sub>1, r\<^sub>1)"} and @{term "(s\<^sub>2, r\<^sub>2)"}
+  respectively. Consider now the third premise and note that the POSIX value
+  of this rule should match the string @{term "s\<^sub>1 @ s\<^sub>2"}. According to the
+  longest match rule, we want that the @{term "s\<^sub>1"} is the longest initial
+  split of @{term "s\<^sub>1 @ s\<^sub>2"} such that @{term "s\<^sub>2"} is still recognised
+  by @{term "r\<^sub>2"}. Let us assume, contrary to the third premise, that there
+  \emph{exists} an @{term "s\<^sub>3"} and @{term "s\<^sub>4"} such that @{term "s\<^sub>2"}
+  can be split up into a non-empty @{term "s\<^sub>3"} and @{term "s\<^sub>4"}. Moreover
+  the longer @{term "s\<^sub>1 @ s\<^sub>3"} can be matched by @{text "r\<^sub>1"} and the
+  shorter @{term "s\<^sub>4"} can still be matched by @{term "r\<^sub>2"}. In this case
+  @{term "s\<^sub>1"} would not be the longest initial split of @{term "s\<^sub>1 @
+  s\<^sub>2"} and therefore @{term "Seq v\<^sub>1 v\<^sub>2"} cannot be a POSIX value
+  for @{term "(s\<^sub>1 @ s\<^sub>2, SEQ r\<^sub>1 r\<^sub>2)"}. A similar condition is imposed
+  onto the POSIX value in the @{text "P\<star>"}-rule. Also there we want that
+   @{term "s\<^sub>1"} is the longest initial split of @{term "s\<^sub>1 @ s\<^sub>2"} and
+   furthermore the corresponding value @{term v} cannot be flatten to
+   the empty string. In effect, we require that in each ``iteration''
+   of the star, some parts of the string need to be ``nibbled'' away; only
+   in case of the empty string weBy accept @{term "Stars []"} as the 
+   POSIX value.
+
+   We can prove that given a string @{term s} and regular expression @{term
+   r}, the POSIX value @{term v} is uniquely determined by @{term "s \<in> r \<rightarrow>
+   v"}.
+
   \begin{theorem}
   @{thm[mode=IfThen] PMatch_determ(1)[of _ _ "v\<^sub>1" "v\<^sub>2"]}
   \end{theorem}
 
+  \begin{proof}
+  By induction on the definition of @{term "s \<in> r \<rightarrow> v\<^sub>1"} and a case
+  analysis of @{term "s \<in> r \<rightarrow> v\<^sub>2"}.
+  \end{proof}
+
   \begin{lemma}
   @{thm[mode=IfThen] PMatch_mkeps}
   \end{lemma}
diff -r 4c85af262ee7 -r 7c6c907660d8 thys/ReStar.thy
--- a/thys/ReStar.thy	Mon Mar 07 18:56:41 2016 +0000
+++ b/thys/ReStar.thy	Mon Mar 07 21:54:50 2016 +0000
@@ -460,6 +460,14 @@
 apply(auto intro: Prf.intros)
 done
 
+lemma PMatch1a:
+  assumes "s \<in> r \<rightarrow> v"
+  shows "s \<in> L r"
+using assms
+apply(induct s r v rule: PMatch.induct)
+apply(auto simp add: Sequ_def)
+done
+
 lemma PMatch_mkeps:
   assumes "nullable r"
   shows "[] \<in> r \<rightarrow> mkeps r"
@@ -482,105 +490,60 @@
 apply(metis PMatch.intros(7))
 done
 
-find_theorems Stars
-thm compat_val_list.induct compat_val.induct
-
-
 lemma PMatch_determ:
-  shows "\<lbrakk>s \<in> r \<rightarrow> v1; s \<in> r \<rightarrow> v2\<rbrakk> \<Longrightarrow> v1 = v2"
-  and   "\<lbrakk>s \<in> (STAR r) \<rightarrow> Stars vs1; s \<in> (STAR r) \<rightarrow> Stars vs2\<rbrakk> \<Longrightarrow> vs1 = vs2"
-apply(induct v1 and vs1 arbitrary: s r v2 and s r vs2 rule: compat_val.induct compat_val_list.induct)
-apply(erule PMatch.cases)
-apply(simp_all)[7]
-apply(erule PMatch.cases)
-apply(simp_all)[7]
-apply(erule PMatch.cases)
-apply(simp_all)[7]
-apply(erule PMatch.cases)
-apply(simp_all)[7]
+  assumes "s \<in> r \<rightarrow> v1" "s \<in> r \<rightarrow> v2"
+  shows "v1 = v2"
+using assms
+apply(induct s r v1 arbitrary: v2 rule: PMatch.induct)
 apply(erule PMatch.cases)
 apply(simp_all)[7]
 apply(erule PMatch.cases)
 apply(simp_all)[7]
-apply(clarify)
-apply(subgoal_tac "s1 = s1a \<and> s2 = s2a")
-apply metis
-apply(rule conjI)
-apply(simp add: append_eq_append_conv2)
-apply(auto)[1]
-apply (metis PMatch1(1) PMatch1(2) Prf_flat_L)
-apply (metis PMatch1(1) PMatch1(2) Prf_flat_L)
-apply(simp add: append_eq_append_conv2)
-apply(auto)[1]
-apply (metis PMatch1(1) PMatch1(2) Prf_flat_L)
-apply (metis PMatch1(1) PMatch1(2) Prf_flat_L)
-apply(erule PMatch.cases)
-apply(simp_all)[7]
-apply(clarify)
-apply(erule PMatch.cases)
-apply(simp_all)[7]
-apply(clarify)
-apply (metis PMatch1(1) PMatch1(2) Prf_flat_L)
+apply(rotate_tac 2)
 apply(erule PMatch.cases)
-apply(simp_all)[7]
+apply(simp_all (no_asm_use))[7]
+apply(clarify)
+apply(force)
 apply(clarify)
-apply(erule PMatch.cases)
-apply(simp_all)[7]
-apply(clarify)
-apply (metis PMatch1(1) PMatch1(2) Prf_flat_L)
-(* star case *)
-defer
+apply(drule PMatch1a)
+apply(simp)
+apply(rotate_tac 3)
 apply(erule PMatch.cases)
-apply(simp_all)[7]
-apply(clarify)
-apply(erule PMatch.cases)
-apply(simp_all)[7]
-apply(clarify)
-apply (metis PMatch1(2))
-apply(rotate_tac  3)
-apply(erule PMatch.cases)
-apply(simp_all)[7]
-apply(clarify)
+apply(simp_all (no_asm_use))[7]
+apply(drule PMatch1a)+
+apply(simp)
+apply(simp)
+apply(rotate_tac 5)
 apply(erule PMatch.cases)
-apply(simp_all)[7]
+apply(simp_all (no_asm_use))[7]
 apply(clarify)
-apply(subgoal_tac "s1 = s1a \<and> s2 = s2a")
-apply metis
+apply(subgoal_tac "s1 = s1a")
+apply(blast)
 apply(simp add: append_eq_append_conv2)
-apply(auto)[1]
-apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L)
-apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L)
-apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L)
-apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L)
+apply(clarify)
+apply (metis PMatch1a append_self_conv)
+apply(rotate_tac 6)
 apply(erule PMatch.cases)
-apply(simp_all)[7]
-apply(clarify)
-apply (metis PMatch1(2))
-apply(erule PMatch.cases)
-apply(simp_all)[7]
-apply(clarify)
-apply(erule PMatch.cases)
-apply(simp_all)[7]
+apply(simp_all (no_asm_use))[7]
 apply(clarify)
-apply(subgoal_tac "s1 = s1a \<and> s2 = s2a")
-apply(drule_tac x="s1 @ s2" in meta_spec)
-apply(drule_tac x="rb" in meta_spec)
-apply(drule_tac x="(va#vsa)" in meta_spec)
+apply(subgoal_tac "s1 = s1a")
 apply(simp)
-apply(drule meta_mp)
-apply (metis L.simps(6) PMatch.intros(6))
-apply (metis L.simps(6) PMatch.intros(6))
-apply(simp add: append_eq_append_conv2)
-apply(auto)[1]
-apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L)
-apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L)
-apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L)
-apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L)
-apply (metis PMatch1(2))
+apply(blast)
+apply(simp  (no_asm_use) add: append_eq_append_conv2)
+apply(clarify)
+apply (metis L.simps(6) PMatch1a append_self_conv)
+apply(clarify)
+apply(rotate_tac 2)
 apply(erule PMatch.cases)
-apply(simp_all)[7]
+apply(simp_all (no_asm_use))[7]
+using PMatch1(2) apply auto[1]
+using PMatch1(2) apply blast
+apply(erule PMatch.cases)
+apply(simp_all (no_asm_use))[7]
 apply(clarify)
-by (metis PMatch1(2))
+apply (simp add: PMatch1(2))
+apply(simp)
+done
 
 (* a proof that does not need proj *)
 lemma PMatch2_roy_version:
diff -r 4c85af262ee7 -r 7c6c907660d8 thys/paper.pdf
Binary file thys/paper.pdf has changed