updated
authorChristian Urban <urbanc@in.tum.de>
Fri, 25 Aug 2017 23:52:49 +0200
changeset 270 462d893ecb3d
parent 269 12772d537b71
child 271 f46ebc84408d
updated
thys/Journal/Paper.thy
thys/Journal/document/root.tex
thys/Positions.thy
--- a/thys/Journal/Paper.thy	Fri Aug 25 15:05:20 2017 +0200
+++ b/thys/Journal/Paper.thy	Fri Aug 25 23:52:49 2017 +0200
@@ -1152,56 +1152,130 @@
   @{thm [mode=IfThen] Posix_PosOrd[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
   \end{theorem}
 
-  \begin{proof} By induction on our POSIX rules. It is clear that
-  @{text "v\<^sub>1"} and @{text "v\<^sub>2"} have the same underlying
-  string.
+  \begin{proof} By induction on our POSIX rules. By
+  Thm~\ref{posixdeterm} and the definition of @{const LV}, it is clear
+  that @{text "v\<^sub>1"} and @{text "v\<^sub>2"} have the same
+  underlying string @{term s}.  The three base cases are
+  straightforward: for example for @{term "v\<^sub>1 = Void"}, we have
+  that @{term "v\<^sub>2 \<in> LV ONE []"} must also be of the form
+  \mbox{@{term "v\<^sub>2 = Void"}}. Therefore we have @{term
+  "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"}.  The inductive cases for
+  @{term "ALT r\<^sub>1 r\<^sub>2"} and @{term "SEQ r\<^sub>1
+  r\<^sub>2"} are as follows:
 
-  The two base cases are straightforward: for example for @{term
-  "v\<^sub>1 = Void"}, we have that @{term "v\<^sub>2 \<in> LV ONE
-  []"} must also be of the form \mbox{@{term "v\<^sub>2 =
-  Void"}}. Therefore we have @{term "v\<^sub>1 :\<sqsubseteq>val
-  v\<^sub>2"}.  The inductive cases are as follows:
+
+  \begin{itemize} 
 
-  \begin{itemize} \item[$\bullet$] Case @{term "s \<in> (ALT r\<^sub>1
-  r\<^sub>2) \<rightarrow> (Left w\<^sub>1)"}: In this case @{term
-  "v\<^sub>1 = Left w\<^sub>1"} and the value @{term "v\<^sub>2"} is
-  either of the form @{term "Left w\<^sub>2"} or @{term "Right
-  w\<^sub>2"}. In the latter case we can immediately conclude with
-  @{term "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"} since a @{text
-  Left}-value with the same underlying string @{text s} is always
-  smaller or equal than a @{text Right}-value.  In the former case we
-  have @{term "w\<^sub>2 \<in> LV r\<^sub>1 s"} and can use the
-  induction hypothesis to infer @{term "w\<^sub>1 :\<sqsubseteq>val
-  w\<^sub>2"}. Because @{term "w\<^sub>1"} and @{term "w\<^sub>2"}
-  have the same underlying string @{text s}, we can conclude with
-  @{term "Left w\<^sub>1 :\<sqsubseteq>val Left w\<^sub>2"}.\smallskip
+  \item[$\bullet$] Case @{term "s \<in> (ALT r\<^sub>1 r\<^sub>2)
+  \<rightarrow> (Left w\<^sub>1)"}: In this case @{term "v\<^sub>1 =
+  Left w\<^sub>1"} and the value @{term "v\<^sub>2"} is either of the
+  form @{term "Left w\<^sub>2"} or @{term "Right w\<^sub>2"}. In the
+  latter case we can immediately conclude with @{term "v\<^sub>1
+  :\<sqsubseteq>val v\<^sub>2"} since a @{text Left}-value with the
+  same underlying string @{text s} is always smaller or equal than a
+  @{text Right}-value.  In the former case we have @{term "w\<^sub>2
+  \<in> LV r\<^sub>1 s"} and can use the induction hypothesis to infer
+  @{term "w\<^sub>1 :\<sqsubseteq>val w\<^sub>2"}. Because @{term
+  "w\<^sub>1"} and @{term "w\<^sub>2"} have the same underlying string
+  @{text s}, we can conclude with @{term "Left w\<^sub>1
+  :\<sqsubseteq>val Left w\<^sub>2"} by Prop ???.\smallskip
 
   \item[$\bullet$] Case @{term "s \<in> (ALT r\<^sub>1 r\<^sub>2)
   \<rightarrow> (Right w\<^sub>1)"}: This case similar as the previous
   case, except that we know that @{term "s \<notin> L
-  r\<^sub>1"}. This is needed when @{term "v\<^sub>2 = Left
-  w\<^sub>2"}: since \mbox{@{term "flat v\<^sub>2 = flat w\<^sub>2"}
-  @{text "= s"}} and @{term "\<Turnstile> w\<^sub>2 : r\<^sub>1"}, we
-  can derive a contradiction using Prop.~\ref{inhabs}. So also in this
-  case \mbox{@{term "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"}}.\smallskip
+  r\<^sub>1"}. This is needed when @{term "v\<^sub>2"} is of the form
+  @{term "Left w\<^sub>2"}: since \mbox{@{term "flat v\<^sub>2 = flat
+  w\<^sub>2"} @{text "= s"}} and @{term "\<Turnstile> w\<^sub>2 :
+  r\<^sub>1"}, we can derive a contradiction using
+  Prop.~\ref{inhabs}. So also in this case \mbox{@{term "v\<^sub>1
+  :\<sqsubseteq>val v\<^sub>2"}}.\smallskip
 
-  \item[$\bullet$]  Case @{term "(s\<^sub>1 @ s\<^sub>2) \<in> (SEQ r\<^sub>1 r\<^sub>2)
-  \<rightarrow> (Seq w\<^sub>1 w\<^sub>2)"}: Assume @{term "v\<^sub>2 =
-  Seq (u\<^sub>1) (u\<^sub>2)"} with @{term "\<Turnstile> u\<^sub>1 : r\<^sub>1"}
-  and \mbox{@{term "\<Turnstile> u\<^sub>2 : r\<^sub>2"}}. We have
-  
+  \item[$\bullet$] Case @{term "(s\<^sub>1 @ s\<^sub>2) \<in> (SEQ
+  r\<^sub>1 r\<^sub>2) \<rightarrow> (Seq w\<^sub>1 w\<^sub>2)"}: We
+  can assume @{term "v\<^sub>2 = Seq (u\<^sub>1) (u\<^sub>2)"} with
+  @{term "\<Turnstile> u\<^sub>1 : r\<^sub>1"} and \mbox{@{term
+  "\<Turnstile> u\<^sub>2 : r\<^sub>2"}}. We have @{term "s\<^sub>1 @
+  s\<^sub>2 = (flat u\<^sub>1) @ (flat u\<^sub>2)"}.  By the
+  side-condition on out @{text PS}-rule we know that either @{term
+  "s\<^sub>1 = flat u\<^sub>1"} or @{term "flat u\<^sub>1"} is a
+  strict prefix ??? of @{term "s\<^sub>1"}. In the latter case we can
+  infer @{term "w\<^sub>1 :\<sqsubset>val u\<^sub>1"} by ???  and from
+  this @{term "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"} by ???.  In the
+  former case we know @{term "u\<^sub>1 \<in> LV r\<^sub>1 s\<^sub>1"}
+  and @{term "u\<^sub>2 \<in> LV r\<^sub>2 s\<^sub>2"}. With this we
+  can use the induction hypotheses to infer @{term "w\<^sub>1
+  :\<sqsubseteq>val u\<^sub>1"} and @{term "w\<^sub>2
+  :\<sqsubseteq>val u\<^sub>2"}. By ??? we can again infer @{term
+  "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"} and are done.
+
   \end{itemize}
+
+  \noindent The case for @{text "P\<star>"} is similar to the @{text PS}-case.\qed
   \end{proof}
 
-  Given a lexical value @{text "v\<^sub>1"}, say, in @{term "LV r s"} for which there does 
-  not exists any smaller value in @{term "LV r s"}, then this value is our POSIX value:
+  \noindent This theorem shows that our posix value @{text
+  "v\<^sub>1"} with @{term "s \<in> r \<rightarrow> v\<^sub>1"} is a
+  minimal element for the values in @{text "LV r s"}. By ??? we also
+  know that any value in @{text "LV s' r"}, with @{term "s'"} being a
+  prefix, cannot be smaller than @{text "v\<^sub>1"}. The next theorem
+  shows the opposite---namely any minimal element must be a @{text
+  POSIX}-value.  Given a lexical value @{text "v\<^sub>1"}, say, in
+  @{term "LV r s"}, for which there does not exists any smaller value
+  in @{term "LV r s"}, then this value is our @{text POSIX}-value:
 
   \begin{theorem}
   @{thm [mode=IfThen] PosOrd_Posix[where ?v1.0="v\<^sub>1"]}
   \end{theorem}
 
-  \begin{proof}
-  By induction on @{text r}.
+  \begin{proof} By induction on @{text r}. The tree base cases are
+  again straightforward.  For example if @{term "v\<^sub>1 \<in> LV
+  ONE s"} then @{term "v\<^sub>1 = Void"} and @{term "s = []"}. We
+  know that @{term "[] \<in> ONE \<rightarrow> Void"} holds.  In the
+  cases for @{term "ALT r\<^sub>1 r\<^sub>2"} and @{term "SEQ
+  r\<^sub>1 r\<^sub>2"} we reason as follows:
+
+  \begin{itemize}
+
+  \item[$\bullet$] Case @{term "v\<^sub>1 \<in> LV (ALT r\<^sub>1
+  r\<^sub>2) s"} with @{term "v\<^sub>1 = Left w\<^sub>1"} and @{term
+  "w\<^sub>1 \<in> LV r\<^sub>1 s"}: In order to use the induction
+  hypothesis we need to infer 
+
+  \begin{center}
+  @{term "\<forall>v'
+  \<in> LV (ALT r\<^sub>1 r\<^sub>2) s. \<not> (v' :\<sqsubset>val
+  Left w\<^sub>1)"}
+  implies
+  @{term "\<forall>v' \<in> LV r\<^sub>1
+  s. \<not> (v' :\<sqsubset>val w\<^sub>1)"}
+  \end{center}
+
+  \noindent This can be done because of ?? we can infer that for every
+  @{text v'} in @{term "LV r\<^sub>1 s"} and @{term "v'
+  :\<sqsubset>val w\<^sub>1"} also @{term "Left v' :\<sqsubset>val
+  Left w\<^sub>1"} holds. This gives a contradiction. Consequently, we
+  can use the induction hypothesis to obtain @{term "s \<in> r\<^sub>1
+  \<rightarrow> w\<^sub>1"} and then conclude this case with @{term "s
+  \<in> ALT r\<^sub>1 r\<^sub>2 \<rightarrow> v\<^sub>1"}.\smallskip
+
+  \item[$\bullet$] Case @{term "v\<^sub>1 \<in> LV (ALT r\<^sub>1
+  r\<^sub>2) s"} with @{term "v\<^sub>1 = Right w\<^sub>1"} and @{term
+  "w\<^sub>1 \<in> LV r\<^sub>2 s"}: Like above we can use the 
+  induction hypothesis in order to infer @{term "s \<in> r\<^sub>2
+  \<rightarrow> w\<^sub>1"}. In order to conclude we still need to
+  obtain @{term "s \<notin> L r\<^sub>1"}. Let us suppose the opposite.
+  Then there is a @{term v'} such that @{term "v' \<in> LV r\<^sub>1 s"}
+  by Prop ??? and hence @{term "Left v' \<in> LV (ALT r\<^sub>1 r\<^sub>2) s"}.
+  But then we can use the second assumption of the theorem to infer that 
+  @{term "\<not>(Left v' :\<sqsubset>val Right w\<^sub>1)"}, which cannot be the case.
+  Therefore @{term "s \<notin> L r\<^sub>1"} must hold and we can also conclude in this
+  case.
+
+ 
+
+
+  \end{itemize}
+
   \end{proof}
 *}
 
--- a/thys/Journal/document/root.tex	Fri Aug 25 15:05:20 2017 +0200
+++ b/thys/Journal/document/root.tex	Fri Aug 25 23:52:49 2017 +0200
@@ -72,18 +72,17 @@
 in order to deal with POSIX matching, which is the underlying
 disambiguation strategy for regular expressions needed in lexers.
 Their algorithm generates POSIX values which encode the information of
-\emph{how} a regular expression matched a string---that is which part
+\emph{how} a regular expression matched a string---that is, which part
 of the string is matched by which part of the regular expression.  In
-the first part of this paper we give our inductive definition of what
-a POSIX value is and show $(i)$ that such a value is unique (for given
-regular expression and string being matched) and $(ii)$ that Sulzmann
-and Lu's algorithm always generates such a value (provided that the
-regular expression matches the string). We also prove the correctness
-of an optimised version of the POSIX matching algorithm.  In the
-second part we show that $(iii)$ our inductive definition of a POSIX
-value is equivalent to an alternative definition by Okui and Suzuki
-which identifies POSIX values as least elements according to an
-ordering of values. \smallskip
+this paper we give our inductive definition of what a POSIX value is
+and show $(i)$ that such a value is unique (for given regular
+expression and string being matched) and $(ii)$ that Sulzmann and Lu's
+algorithm always generates such a value (provided that the regular
+expression matches the string). We show that $(iii)$ our inductive
+definition of a POSIX value is equivalent to an alternative definition
+by Okui and Suzuki which identifies POSIX values as least elements
+according to an ordering of values.  We also prove the correctness of
+an optimised version of the POSIX matching algorithm.  \smallskip
 
 {\bf Keywords:} POSIX matching, Derivatives of Regular Expressions,
 Isabelle/HOL
--- a/thys/Positions.thy	Fri Aug 25 15:05:20 2017 +0200
+++ b/thys/Positions.thy	Fri Aug 25 23:52:49 2017 +0200
@@ -671,6 +671,7 @@
     by (auto simp add: LV_def)
   then have "v1 :\<sqsubset>val v3a \<or> (v1 :\<sqsubseteq>val v3a \<and> v2 :\<sqsubseteq>val v3b)" using IH1 IH2 by blast         
   then have "Seq v1 v2 :\<sqsubseteq>val Seq v3a v3b" using eqs q2 as1
+    thm PosOrd_SeqI1 PosOrd_SeqI2
     unfolding  PosOrd_ex_eq_def by (auto simp add: PosOrd_SeqI1 PosOrd_SeqI2) 
   then show "Seq v1 v2 :\<sqsubseteq>val v3" unfolding eqs by blast
 next 
@@ -690,11 +691,10 @@
   unfolding LV_def  
   apply(auto)
   apply(erule Prf.cases)
-  apply(simp_all)
-  apply(auto)[1]
+  apply(auto)
   apply(case_tac vs)
-  apply(auto)
-  using Prf.intros(6) by blast
+  apply(auto intro: Prf.intros)
+  done
   then show "Stars (v # vs) :\<sqsubseteq>val v3" 
     proof (cases)
       case (NonEmpty v3a vs3)
@@ -821,8 +821,10 @@
 next 
   case (ONE s v1)
   have "v1 \<in> LV ONE s" by fact
-  then show "s \<in> ONE \<rightarrow> v1" unfolding LV_def
-    by(auto elim!: Prf.cases intro: Posix.intros)
+  then have "v1 = Void" "s = []" unfolding LV_def
+    by(auto elim: Prf.cases)
+  then show "s \<in> ONE \<rightarrow> v1" 
+    by(auto intro: Posix.intros)
 next 
   case (CHAR c s v1)
   have "v1 \<in> LV (CHAR c) s" by fact
@@ -832,7 +834,7 @@
   case (ALT r1 r2 s v1)
   have IH1: "\<And>s v1. \<lbrakk>v1 \<in> LV r1 s; \<forall>v2 \<in> LV r1 s. \<not> v2 :\<sqsubset>val v1\<rbrakk> \<Longrightarrow> s \<in> r1 \<rightarrow> v1" by fact
   have IH2: "\<And>s v1. \<lbrakk>v1 \<in> LV r2 s; \<forall>v2 \<in> LV r2 s. \<not> v2 :\<sqsubset>val v1\<rbrakk> \<Longrightarrow> s \<in> r2 \<rightarrow> v1" by fact
-  have as1: "\<forall>v2\<in>LV (ALT r1 r2) s. \<not> v2 :\<sqsubset>val v1" by fact
+  have as1: "\<forall>v2 \<in> LV (ALT r1 r2) s. \<not> v2 :\<sqsubset>val v1" by fact
   have as2: "v1 \<in> LV (ALT r1 r2) s" by fact
   then consider 
      (Left) v1' where
@@ -845,18 +847,17 @@
   then show "s \<in> ALT r1 r2 \<rightarrow> v1"
    proof (cases)
      case (Left v1')
-     have "v1' \<in> LV r1 s" using as2
-       unfolding LV_def Left by (auto elim: Prf.cases)
+     have "v1' \<in> LV r1 s" using Left(3) .
      moreover
      have "\<forall>v2 \<in> LV r1 s. \<not> v2 :\<sqsubset>val v1'" using as1
-       unfolding LV_def Left using Prf.intros(2) PosOrd_Left_eq by force  
+       unfolding Left(1,2) unfolding LV_def 
+       using Prf.intros(2) PosOrd_Left_eq by force  
      ultimately have "s \<in> r1 \<rightarrow> v1'" using IH1 by simp
      then have "s \<in> ALT r1 r2 \<rightarrow> Left v1'" by (rule Posix.intros)
      then show "s \<in> ALT r1 r2 \<rightarrow> v1" using Left by simp
    next
      case (Right v1')
-     have "v1' \<in> LV r2 s" using as2
-       unfolding LV_def Right by (auto elim: Prf.cases)
+     have "v1' \<in> LV r2 s" using Right(3) .
      moreover
      have "\<forall>v2 \<in> LV r2 s. \<not> v2 :\<sqsubset>val v1'" using as1
        unfolding LV_def Right using Prf.intros(3) PosOrd_RightI by force