lexing: changeset 272:f16019b11179

--- a/thys/Journal/Paper.thy	Fri Aug 25 23:54:10 2017 +0200
+++ b/thys/Journal/Paper.thy	Sun Aug 27 00:03:31 2017 +0300
@@ -52,6 +52,7 @@
   val.Stars ("Stars _" [79] 78) and
 
   L ("L'(_')" [10] 78) and
+  LV ("LV _ _" [80,73] 78) and
   der_syn ("_\\_" [79, 1000] 76) and  
   ders_syn ("_\\_" [79, 1000] 76) and
   flat ("|_|" [75] 74) and
@@ -100,7 +101,7 @@
 (*
 comments not implemented
 
-p9. The condtion "not exists s3 s4..." appears often enough (in particular in
+p9. The condition "not exists s3 s4..." appears often enough (in particular in
 the proof of Lemma 3) to warrant a definition.
 
 *)
@@ -186,46 +187,56 @@
 matches also. By the Star Rule we know @{text "(r\<^bsub>key\<^esub> +
 r\<^bsub>id\<^esub>)\<^sup>\<star>"} matches @{text "iffoo"},
 respectively @{text "if"}, in exactly one `iteration' of the star. The
-Empty String Rule is for cases where @{text
-"(a\<^sup>\<star>)\<^sup>\<star>"}, for example, matches against the
+Empty String Rule is for cases where, for example, @{text
+"(a\<^sup>\<star>)\<^sup>\<star>"} matches against the
 string @{text "bc"}. Then the longest initial matched substring is the
 empty string, which is matched by both the whole regular expression
-and the parenthesised sub-expression.
+and the parenthesised subexpression.
 
 
 One limitation of Brzozowski's matcher is that it only generates a
 YES/NO answer for whether a string is being matched by a regular
 expression.  Sulzmann and Lu~\cite{Sulzmann2014} extended this matcher
 to allow generation not just of a YES/NO answer but of an actual
-matching, called a [lexical] {\em value}. \marginpar{explain values;
-who introduced them} They give a simple algorithm to calculate a value
-that appears to be the value associated with POSIX matching.  The
-challenge then is to specify that value, in an algorithm-independent
-fashion, and to show that Sulzmann and Lu's derivative-based algorithm
-does indeed calculate a value that is correct according to the
-specification.
+matching, called a [lexical] {\em value}. Assuming a regular
+expression matches a string, values encode the information of
+\emph{how} the string is matched by the regular expression---that is,
+which part of the string is matched by which part of the regular
+expression. For this consider again the the string @{text "xy"} and
+the regular expression \mbox{@{text "(x + (y +
+xy))\<^sup>\<star>"}}. The POSIX value, which corresponds to using the
+star in only one repetition,
+
+
+\marginpar{explain values; who introduced them} 
+
 
-The answer given by Sulzmann and Lu \cite{Sulzmann2014} is to define a
-relation (called an ``order relation'') on the set of values of @{term
-r}, and to show that (once a string to be matched is chosen) there is
-a maximum element and that it is computed by their derivative-based
-algorithm. This proof idea is inspired by work of Frisch and Cardelli
-\cite{Frisch2004} on a GREEDY regular expression matching
-algorithm. However, we were not able to establish transitivity and
-totality for the ``order relation'' by Sulzmann and Lu. 
-There are some inherent problems with their approach (of
-which some of the proofs are not published in \cite{Sulzmann2014});
-perhaps more importantly, we give in this paper a simple inductive (and
-algorithm-independent) definition of what we call being a {\em POSIX
-value} for a regular expression @{term r} and a string @{term s}; we
-show that the algorithm computes such a value and that such a value is
-unique. Our proofs are both done by hand and checked in Isabelle/HOL.  The
-experience of doing our proofs has been that this mechanical checking
-was absolutely essential: this subject area has hidden snares. This
-was also noted by Kuklewicz \cite{Kuklewicz} who found that nearly all
-POSIX matching implementations are ``buggy'' \cite[Page
-203]{Sulzmann2014} and by Grathwohl et al \cite[Page 36]{CrashCourse2014}
-who wrote:
+Sulzmann and Lu give a simple algorithm to calculate a value that
+appears to be the value associated with POSIX matching.  The challenge
+then is to specify that value, in an algorithm-independent fashion,
+and to show that Sulzmann and Lu's derivative-based algorithm does
+indeed calculate a value that is correct according to the
+specification.  The answer given by Sulzmann and Lu
+\cite{Sulzmann2014} is to define a relation (called an ``order
+relation'') on the set of values of @{term r}, and to show that (once
+a string to be matched is chosen) there is a maximum element and that
+it is computed by their derivative-based algorithm. This proof idea is
+inspired by work of Frisch and Cardelli \cite{Frisch2004} on a GREEDY
+regular expression matching algorithm. However, we were not able to
+establish transitivity and totality for the ``order relation'' by
+Sulzmann and Lu.  There are some inherent problems with their approach
+(of which some of the proofs are not published in
+\cite{Sulzmann2014}); perhaps more importantly, we give in this paper
+a simple inductive (and algorithm-independent) definition of what we
+call being a {\em POSIX value} for a regular expression @{term r} and
+a string @{term s}; we show that the algorithm by Sulzmann and Lu
+computes such a value and that such a value is unique. Our proofs are
+both done by hand and checked in Isabelle/HOL.  The experience of
+doing our proofs has been that this mechanical checking was absolutely
+essential: this subject area has hidden snares. This was also noted by
+Kuklewicz \cite{Kuklewicz} who found that nearly all POSIX matching
+implementations are ``buggy'' \cite[Page 203]{Sulzmann2014} and by
+Grathwohl et al \cite[Page 36]{CrashCourse2014} who wrote:
 
 \begin{quote}
 \it{}``The POSIX strategy is more complicated than the greedy because of 
@@ -263,7 +274,7 @@
 regular expressions than the initial one; various optimisations are
 possible. We prove the correctness when simplifications of @{term "ALT ZERO
 r"}, @{term "ALT r ZERO"}, @{term "SEQ ONE r"} and @{term "SEQ r ONE"} to
-@{term r} are applied.
+@{term r} are applied. We extend our results to ???
 
 *}
 
@@ -449,7 +460,7 @@
   \noindent Sulzmann and Lu also define inductively an inhabitation relation
   that associates values to regular expressions. We define this relation as 
   follows:\footnote{Note that the rule for @{term Stars} differs from our 
-  erlier paper \cite{AusafDyckhoffUrban2016}. There we used the original
+  earlier paper \cite{AusafDyckhoffUrban2016}. There we used the original
   definition by Sulzmann and Lu which does not require that the values @{term "v \<in> set vs"}
   flatten to a non-empty string. The reason for introducing the 
   more restricted version of lexical values is convenience later 
@@ -748,9 +759,9 @@
 
   \begin{theorem}\mbox{}\smallskip\\\label{posixdeterm}
   \begin{tabular}{ll}
-  @{text "(1)"} & If @{thm (prem 1) Posix1(1)} then @{thm (concl)
+  (1) & If @{thm (prem 1) Posix1(1)} then @{thm (concl)
   Posix1(1)} and @{thm (concl) Posix1(2)}.\\
-  @{text "(2)"} & @{thm[mode=IfThen] Posix_determ(1)[of _ _ "v" "v'"]}
+  (2) & @{thm[mode=IfThen] Posix_determ(1)[of _ _ "v" "v'"]}
   \end{tabular}
   \end{theorem}
 
@@ -772,7 +783,7 @@
   are the POSIX values for @{term "(s\<^sub>1, r\<^sub>1)"} and @{term "(s\<^sub>2, r\<^sub>2)"}
   respectively. Consider now the third premise and note that the POSIX value
   of this rule should match the string \mbox{@{term "s\<^sub>1 @ s\<^sub>2"}}. According to the
-  longest match rule, we want that the @{term "s\<^sub>1"} is the longest initial
+  Longest Match Rule, we want that the @{term "s\<^sub>1"} is the longest initial
   split of \mbox{@{term "s\<^sub>1 @ s\<^sub>2"}} such that @{term "s\<^sub>2"} is still recognised
   by @{term "r\<^sub>2"}. Let us assume, contrary to the third premise, that there
   \emph{exist} an @{term "s\<^sub>3"} and @{term "s\<^sub>4"} such that @{term "s\<^sub>2"}
@@ -782,8 +793,8 @@
   matched by @{term "r\<^sub>2"}. In this case @{term "s\<^sub>1"} would \emph{not} be the
   longest initial split of \mbox{@{term "s\<^sub>1 @ s\<^sub>2"}} and therefore @{term "Seq v\<^sub>1
   v\<^sub>2"} cannot be a POSIX value for @{term "(s\<^sub>1 @ s\<^sub>2, SEQ r\<^sub>1 r\<^sub>2)"}. 
-  The main point is that our side-condition ensures the longest 
-  match rule is satisfied.
+  The main point is that our side-condition ensures the Longest 
+  Match Rule is satisfied.
 
   A similar condition is imposed on the POSIX value in the @{text
   "P\<star>"}-rule. Also there we want that @{term "s\<^sub>1"} is the longest initial
@@ -792,10 +803,10 @@
   that in each ``iteration'' of the star, some non-empty substring needs to
   be ``chipped'' away; only in case of the empty string we accept @{term
   "Stars []"} as the POSIX value. Indeed we can show that our POSIX value
-  is a lexical value which excludes those @{text Stars} containing values 
+  is a lexical value which excludes those @{text Stars} containing subvalues 
   that flatten to the empty string.
 
-  \begin{lemma}
+  \begin{lemma}\label{LVposix}
   @{thm [mode=IfThen] Posix_LV}
   \end{lemma}
 
@@ -835,8 +846,8 @@
   \<in> r\<^sub>1 \<rightarrow> injval r\<^sub>1 c v'"} by induction hypothesis and hence @{term "(c #
   s) \<in> ALT r\<^sub>1 r\<^sub>2 \<rightarrow> injval (ALT r\<^sub>1 r\<^sub>2) c (Left v')"} as needed. Similarly
   in subcase @{text "(b)"} where, however, in addition we have to use
-  Prop.~\ref{derprop}(2) in order to infer @{term "c # s \<notin> L r\<^sub>1"} from @{term
-  "s \<notin> L (der c r\<^sub>1)"}.
+  Proposition~\ref{derprop}(2) in order to infer @{term "c # s \<notin> L r\<^sub>1"} from @{term
+  "s \<notin> L (der c r\<^sub>1)"}.\smallskip
 
   \item[$\bullet$] Case @{term "r = SEQ r\<^sub>1 r\<^sub>2"}. There are three subcases:
   
@@ -853,7 +864,7 @@
   %
   \[@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \<and> s\<^sub>1 @ s\<^sub>3 \<in> L (der c r\<^sub>1) \<and> s\<^sub>4 \<in> L r\<^sub>2)"}\]
 
-  \noindent From the latter we can infer by Prop.~\ref{derprop}(2):
+  \noindent From the latter we can infer by Proposition~\ref{derprop}(2):
   %
   \[@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \<and> (c # s\<^sub>1) @ s\<^sub>3 \<in> L r\<^sub>1 \<and> s\<^sub>4 \<in> L r\<^sub>2)"}\]
 
@@ -869,7 +880,7 @@
   %
   \[@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = c # s \<and> s\<^sub>3 \<in> L r\<^sub>1 \<and> s\<^sub>4 \<in> L r\<^sub>2)"}\]
 
-  \noindent By Lem.~\ref{lemmkeps} we know @{term "[] \<in> r\<^sub>1 \<rightarrow> (mkeps r\<^sub>1)"}
+  \noindent By Lemma~\ref{lemmkeps} we know @{term "[] \<in> r\<^sub>1 \<rightarrow> (mkeps r\<^sub>1)"}
   holds. Putting this all together, we can conclude with @{term "(c #
   s) \<in> SEQ r\<^sub>1 r\<^sub>2 \<rightarrow> Seq (mkeps r\<^sub>1) (injval r\<^sub>2 c v\<^sub>1)"}, as required.
 
@@ -882,7 +893,7 @@
   \end{proof}
 
   \noindent
-  With Lem.~\ref{Posix2} in place, it is completely routine to establish
+  With Lemma~\ref{Posix2} in place, it is completely routine to establish
   that the Sulzmann and Lu lexer satisfies our specification (returning
   the null value @{term "None"} iff the string is not in the language of the regular expression,
   and returning a unique POSIX value iff the string \emph{is} in the language):
@@ -895,10 +906,10 @@
   \end{theorem}
 
   \begin{proof}
-  By induction on @{term s} using Lem.~\ref{lemmkeps} and \ref{Posix2}.\qed  
+  By induction on @{term s} using Lemma~\ref{lemmkeps} and \ref{Posix2}.\qed  
   \end{proof}
 
-  \noindent In (2) we further know by Thm.~\ref{posixdeterm} that the
+  \noindent In (2) we further know by Theorem~\ref{posixdeterm} that the
   value returned by the lexer must be unique.   A simple corollary 
   of our two theorems is:
 
@@ -909,14 +920,12 @@
   \end{tabular}
   \end{corollary}
 
-  \noindent
-  This concludes our
-  correctness proof. Note that we have not changed the algorithm of
-  Sulzmann and Lu,\footnote{All deviations we introduced are
-  harmless.} but introduced our own specification for what a correct
-  result---a POSIX value---should be. A strong point in favour of
-  Sulzmann and Lu's algorithm is that it can be extended in various
-  ways.
+  \noindent This concludes our correctness proof. Note that we have
+  not changed the algorithm of Sulzmann and Lu,\footnote{All
+  deviations we introduced are harmless.} but introduced our own
+  specification for what a correct result---a POSIX value---should be.
+  In the next section we show that our specification coincides with
+  another one given by Okui and Suzuki using a different technique.
 
 *}
 
@@ -940,7 +949,7 @@
   correctness of their automata-based algorithm for POSIX matching.
   Their ordering resembles some aspects of the one given by Sulzmann
   and Lu, but is quite different. To begin with, Okui and Suzuki
-  identify POSIX values as least, rather than maximal, elements in
+  identify POSIX values as minimal, rather than maximal, elements in
   their ordering. A more substantial difference is that the ordering
   by Okui and Suzuki uses \emph{positions} in order to identify and
   compare subvalues. Positions are lists of natural numbers. This
@@ -994,7 +1003,7 @@
   \end{center}
 
   \noindent 
-  In the last clause @{text len} stands for the length of a list. Clearly
+  whereby @{text len} stands for the length of a list. Clearly
   for every position inside a value there exists a subvalue at that position.
  
 
@@ -1027,25 +1036,28 @@
 
   \noindent where we take the length of the flattened value at
   position @{text p}, provided the position is inside @{text v}; if
-  not, then the norm is @{text "-1"}. The default is crucial
-  for the POSIX requirement of preferring a @{text Left}-value
-  over a @{text Right}-value (if they can match the same string---see
-  the Priority Rule from the Introduction). For this consider
+  not, then the norm is @{text "-1"}. The default for outside
+  positions is crucial for the POSIX requirement of preferring a
+  @{text Left}-value over a @{text Right}-value (if they can match the
+  same string---see the Priority Rule from the Introduction). For this
+  consider
 
   \begin{center}
   @{term "v == Left (Char x)"} \qquad and \qquad @{term "w == Right (Char x)"}
   \end{center}
 
   \noindent Both values match @{text x}, but at position @{text "[0]"}
-  the norm of @{term v} is @{text 1} (the subvalue matches @{text x}), but the
-  norm of @{text w} is @{text "-1"} (the position is outside @{text w}
-  according to how we defined the `inside' positions of @{text Left}-
-  and @{text Right}-values).  Of course at position @{text "[1]"}, the
-  norms @{term "pflat_len v [1]"} and @{term "pflat_len w [1]"} are
-  reversed, but the point is that subvalues will be analysed according to
-  lexicographically orderd positions.  This order, written @{term
-  "DUMMY \<sqsubset>lex DUMMY"}, can be conveniently formalised by
-  three inference rules
+  the norm of @{term v} is @{text 1} (the subvalue matches @{text x}),
+  but the norm of @{text w} is @{text "-1"} (the position is outside
+  @{text w} according to how we defined the `inside' positions of
+  @{text Left}- and @{text Right}-values).  Of course at position
+  @{text "[1]"}, the norms @{term "pflat_len v [1]"} and @{term
+  "pflat_len w [1]"} are reversed, but the point is that subvalues
+  will be analysed according to lexicographically ordered
+  positions. According to this ordering, the position @{text "[0]"}
+  takes precedence.  The lexicographic ordering of positions, written
+  @{term "DUMMY \<sqsubset>lex DUMMY"}, can be conveniently formalised
+  by three inference rules
 
   \begin{center}
   \begin{tabular}{ccc}
@@ -1056,7 +1068,7 @@
   \end{tabular}
   \end{center}
 
-  With the norm and lexicographic order of positions in place,
+  With the norm and lexicographic order in place,
   we can state the key definition of Okui and Suzuki
   \cite{OkuiSuzuki2010}: a value @{term "v\<^sub>1"} is \emph{smaller} than
   @{term "v\<^sub>2"} if and only if  $(i)$ the norm at position @{text p} is
@@ -1080,7 +1092,7 @@
   \emph{first distinct position} of @{text "v\<^sub>1"} and @{text
   "v\<^sub>2"}, where both values match strings of different length
   \cite{OkuiSuzuki2010}.  Since at @{text p} the values @{text
-  "v\<^sub>1"} and @{text "v\<^sub>2"} macth different strings, the
+  "v\<^sub>1"} and @{text "v\<^sub>2"} match different strings, the
   ordering is irreflexive. Derived from the definition above
   are the following two orderings:
   
@@ -1091,7 +1103,7 @@
   \end{tabular}
   \end{center}
 
- While we encountred a number of obstacles for establishing properties like
+ While we encountered a number of obstacles for establishing properties like
  transitivity for the ordering of Sulzmann and Lu (and which we failed
  to overcome), it is relatively straightforward to establish this
  property for the ordering by Okui and Suzuki.
@@ -1117,16 +1129,16 @@
  first assumption that @{term "pflat_len v\<^sub>1 p' = pflat_len v\<^sub>2 p'"}.
  But this means that @{term "p'"} must be in  @{term "Pos v\<^sub>2"} too.
  Hence we can use the second assumption and infer  @{term "pflat_len v\<^sub>2 p' = pflat_len v\<^sub>3 p'"}, which concludes
- this case with @{term "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"}. 
+ this case with @{term "v\<^sub>1 :\<sqsubset>val v\<^sub>3"}. 
  The reasoning in the other cases is similar.\qed
  \end{proof}
 
- \noindent We can show that @{term "DUMMY :\<sqsubseteq>val DUMMY"} is
- a partial order.  Okui and Suzuki also show that it is a linear order
- for lexical values \cite{OkuiSuzuki2010} of a given regular
- expression and given string, but we have not done this. It is not
- essential for our results. What we are going to show below is that
- for a given @{text r} and @{text s}, the ordering has a unique
+ \noindent It is straightforward to show that @{text "\<prec>"} and
+ $\preccurlyeq$ are partial orders.  Okui and Suzuki also show that it
+ is a linear order for lexical values \cite{OkuiSuzuki2010} of a given
+ regular expression and given string, but we have not done this. It is
+ not essential for our results. What we are going to show below is
+ that for a given @{text r} and @{text s}, the ordering has a unique
  minimal element on the set @{term "LV r s"}, which is the POSIX value
  we defined in the previous section.
 
@@ -1142,37 +1154,37 @@
  
 
   Next we establish how Okui and Suzuki's ordering relates to our
-  definition of POSIX values.  Given a POSIX value @{text "v\<^sub>1"}
+  definition of POSIX values.  Given a @{text POSIX} value @{text "v\<^sub>1"}
   for @{text r} and @{text s}, then any other lexical value @{text
   "v\<^sub>2"} in @{term "LV r s"} is greater or equal than @{text
-  "v\<^sub>1"}:
+  "v\<^sub>1"}, namely:
 
 
-  \begin{theorem}
+  \begin{theorem}\label{orderone}
   @{thm [mode=IfThen] Posix_PosOrd[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
   \end{theorem}
 
   \begin{proof} By induction on our POSIX rules. By
-  Thm~\ref{posixdeterm} and the definition of @{const LV}, it is clear
+  Theorem~\ref{posixdeterm} and the definition of @{const LV}, it is clear
   that @{text "v\<^sub>1"} and @{text "v\<^sub>2"} have the same
   underlying string @{term s}.  The three base cases are
   straightforward: for example for @{term "v\<^sub>1 = Void"}, we have
   that @{term "v\<^sub>2 \<in> LV ONE []"} must also be of the form
   \mbox{@{term "v\<^sub>2 = Void"}}. Therefore we have @{term
   "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"}.  The inductive cases for
-  @{term "ALT r\<^sub>1 r\<^sub>2"} and @{term "SEQ r\<^sub>1
-  r\<^sub>2"} are as follows:
+  @{text r} being of the form @{term "ALT r\<^sub>1 r\<^sub>2"} and
+  @{term "SEQ r\<^sub>1 r\<^sub>2"} are as follows:
 
 
   \begin{itemize} 
 
-  \item[$\bullet$] Case @{term "s \<in> (ALT r\<^sub>1 r\<^sub>2)
+  \item[$\bullet$] Case @{text "P+L"} with @{term "s \<in> (ALT r\<^sub>1 r\<^sub>2)
   \<rightarrow> (Left w\<^sub>1)"}: In this case @{term "v\<^sub>1 =
   Left w\<^sub>1"} and the value @{term "v\<^sub>2"} is either of the
   form @{term "Left w\<^sub>2"} or @{term "Right w\<^sub>2"}. In the
   latter case we can immediately conclude with @{term "v\<^sub>1
   :\<sqsubseteq>val v\<^sub>2"} since a @{text Left}-value with the
-  same underlying string @{text s} is always smaller or equal than a
+  same underlying string @{text s} is always smaller than a
   @{text Right}-value.  In the former case we have @{term "w\<^sub>2
   \<in> LV r\<^sub>1 s"} and can use the induction hypothesis to infer
   @{term "w\<^sub>1 :\<sqsubseteq>val w\<^sub>2"}. Because @{term
@@ -1180,24 +1192,25 @@
   @{text s}, we can conclude with @{term "Left w\<^sub>1
   :\<sqsubseteq>val Left w\<^sub>2"} by Prop ???.\smallskip
 
-  \item[$\bullet$] Case @{term "s \<in> (ALT r\<^sub>1 r\<^sub>2)
-  \<rightarrow> (Right w\<^sub>1)"}: This case similar as the previous
-  case, except that we know that @{term "s \<notin> L
+  \item[$\bullet$] Case @{text "P+R"} with @{term "s \<in> (ALT r\<^sub>1 r\<^sub>2)
+  \<rightarrow> (Right w\<^sub>1)"}: This case similar to the previous
+  case, except that we additionally know @{term "s \<notin> L
   r\<^sub>1"}. This is needed when @{term "v\<^sub>2"} is of the form
-  @{term "Left w\<^sub>2"}: since \mbox{@{term "flat v\<^sub>2 = flat
+  @{term "Left w\<^sub>2"}. Since \mbox{@{term "flat v\<^sub>2 = flat
   w\<^sub>2"} @{text "= s"}} and @{term "\<Turnstile> w\<^sub>2 :
-  r\<^sub>1"}, we can derive a contradiction using
-  Prop.~\ref{inhabs}. So also in this case \mbox{@{term "v\<^sub>1
+  r\<^sub>1"}, we can derive a contradiction for \mbox{@{term "s \<notin> L
+  r\<^sub>1"}} using
+  Proposition~\ref{inhabs}. So also in this case \mbox{@{term "v\<^sub>1
   :\<sqsubseteq>val v\<^sub>2"}}.\smallskip
 
-  \item[$\bullet$] Case @{term "(s\<^sub>1 @ s\<^sub>2) \<in> (SEQ
+  \item[$\bullet$] Case @{text "PS"} with @{term "(s\<^sub>1 @ s\<^sub>2) \<in> (SEQ
   r\<^sub>1 r\<^sub>2) \<rightarrow> (Seq w\<^sub>1 w\<^sub>2)"}: We
   can assume @{term "v\<^sub>2 = Seq (u\<^sub>1) (u\<^sub>2)"} with
   @{term "\<Turnstile> u\<^sub>1 : r\<^sub>1"} and \mbox{@{term
   "\<Turnstile> u\<^sub>2 : r\<^sub>2"}}. We have @{term "s\<^sub>1 @
   s\<^sub>2 = (flat u\<^sub>1) @ (flat u\<^sub>2)"}.  By the
-  side-condition on out @{text PS}-rule we know that either @{term
-  "s\<^sub>1 = flat u\<^sub>1"} or @{term "flat u\<^sub>1"} is a
+  side-condition of the @{text PS}-rule we know that either @{term
+  "s\<^sub>1 = flat u\<^sub>1"} or that @{term "flat u\<^sub>1"} is a
   strict prefix ??? of @{term "s\<^sub>1"}. In the latter case we can
   infer @{term "w\<^sub>1 :\<sqsubset>val u\<^sub>1"} by ???  and from
   this @{term "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"} by ???.  In the
@@ -1206,123 +1219,56 @@
   can use the induction hypotheses to infer @{term "w\<^sub>1
   :\<sqsubseteq>val u\<^sub>1"} and @{term "w\<^sub>2
   :\<sqsubseteq>val u\<^sub>2"}. By ??? we can again infer @{term
-  "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"} and are done.
+  "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"}.
 
   \end{itemize}
 
-  \noindent The case for @{text "P\<star>"} is similar to the @{text PS}-case.\qed
+  \noindent The case for @{text "P\<star>"} is similar to the @{text PS}-case and omitted.\qed
   \end{proof}
 
-  \noindent This theorem shows that our posix value @{text
-  "v\<^sub>1"} with @{term "s \<in> r \<rightarrow> v\<^sub>1"} is a
-  minimal element for the values in @{text "LV r s"}. By ??? we also
+  \noindent This theorem shows that our @{text POSIX} value for a
+  regular expression @{text r} and string @{term s} is in fact a
+  minimal element of the values in @{text "LV r s"}. By ??? we also
   know that any value in @{text "LV s' r"}, with @{term "s'"} being a
   prefix, cannot be smaller than @{text "v\<^sub>1"}. The next theorem
-  shows the opposite---namely any minimal element must be a @{text
-  POSIX}-value.  Given a lexical value @{text "v\<^sub>1"}, say, in
-  @{term "LV r s"}, for which there does not exists any smaller value
-  in @{term "LV r s"}, then this value is our @{text POSIX}-value:
+  shows the opposite---namely any minimal element in @{term "LV r s"}
+  must be a @{text POSIX} value. For this it helps that we proved in
+  the previous section that whenever a string @{term "s \<in> L r"} then 
+  a corresponding @{text POSIX} value exists and is a lexical value, 
+  see Theorem ??? and Lemma ???. 
 
   \begin{theorem}
-  @{thm [mode=IfThen] PosOrd_Posix[where ?v1.0="v\<^sub>1"]}
+  @{thm [mode=IfThen] PosOrd_Posix[where ?v1.0="v\<^sub>1"]} 
   \end{theorem}
 
-  \begin{proof} By induction on @{text r}. The tree base cases are
-  again straightforward.  For example if @{term "v\<^sub>1 \<in> LV
-  ONE s"} then @{term "v\<^sub>1 = Void"} and @{term "s = []"}. We
-  know that @{term "[] \<in> ONE \<rightarrow> Void"} holds.  In the
-  cases for @{term "ALT r\<^sub>1 r\<^sub>2"} and @{term "SEQ
-  r\<^sub>1 r\<^sub>2"} we reason as follows:
-
-  \begin{itemize}
-
-  \item[$\bullet$] Case @{term "v\<^sub>1 \<in> LV (ALT r\<^sub>1
-  r\<^sub>2) s"} with @{term "v\<^sub>1 = Left w\<^sub>1"} and @{term
-  "w\<^sub>1 \<in> LV r\<^sub>1 s"}: In order to use the induction
-  hypothesis we need to infer 
-
-  \begin{center}
-  @{term "\<forall>v'
-  \<in> LV (ALT r\<^sub>1 r\<^sub>2) s. \<not> (v' :\<sqsubset>val
-  Left w\<^sub>1)"}
-  implies
-  @{term "\<forall>v' \<in> LV r\<^sub>1
-  s. \<not> (v' :\<sqsubset>val w\<^sub>1)"}
-  \end{center}
+  \begin{proof} 
+  If @{thm (prem 1) PosOrd_Posix[where ?v1.0="v\<^sub>1"]} then 
+  @{term "s \<in> L r"} by Proposition~\ref{inhabs}. Hence by Theorem~\ref{lexercorrect}(2) 
+  there exists a
+  @{text POSIX} value @{term "v\<^sub>P"} with @{term "s \<in> r \<rightarrow> v\<^sub>P"}
+  and by Lemma~\ref{LVposix} we also have @{term "v\<^sub>P \<in> LV r s"}.
+  By Theorem~\ref{orderone} we therefore have 
+  @{term "v\<^sub>P :\<sqsubseteq>val v\<^sub>1"}. If @{term "v\<^sub>P = v\<^sub>1"} then
+  we are done. Otherwise we have @{term "v\<^sub>P :\<sqsubset>val v\<^sub>1"} which 
+  however contradicts the second assumption and we are done too.\qed
+  \end{proof}
 
-  \noindent This can be done because of ?? we can infer that for every
-  @{text v'} in @{term "LV r\<^sub>1 s"} and @{term "v'
-  :\<sqsubset>val w\<^sub>1"} also @{term "Left v' :\<sqsubset>val
-  Left w\<^sub>1"} holds. This gives a contradiction. Consequently, we
-  can use the induction hypothesis to obtain @{term "s \<in> r\<^sub>1
-  \<rightarrow> w\<^sub>1"} and then conclude this case with @{term "s
-  \<in> ALT r\<^sub>1 r\<^sub>2 \<rightarrow> v\<^sub>1"}.\smallskip
+  \begin{corollary}
+  @{thm [mode=IfThen] Least_existence1}
+  \end{corollary}
 
-  \item[$\bullet$] Case @{term "v\<^sub>1 \<in> LV (ALT r\<^sub>1
-  r\<^sub>2) s"} with @{term "v\<^sub>1 = Right w\<^sub>1"} and @{term
-  "w\<^sub>1 \<in> LV r\<^sub>2 s"}: Like above we can use the 
-  induction hypothesis in order to infer @{term "s \<in> r\<^sub>2
-  \<rightarrow> w\<^sub>1"}. In order to conclude we still need to
-  obtain @{term "s \<notin> L r\<^sub>1"}. Let us suppose the opposite.
-  Then there is a @{term v'} such that @{term "v' \<in> LV r\<^sub>1 s"}
-  by Prop ??? and hence @{term "Left v' \<in> LV (ALT r\<^sub>1 r\<^sub>2) s"}.
-  But then we can use the second assumption of the theorem to infer that 
-  @{term "\<not>(Left v' :\<sqsubset>val Right w\<^sub>1)"}, which cannot be the case.
-  Therefore @{term "s \<notin> L r\<^sub>1"} must hold and we can also conclude in this
-  case.
-
- 
+  \noindent To sum up, we have shown that minimal elements according
+  to the ordering by Okui and Suzuki are exactly the @{text POSIX}
+  values we defined inductively in Section~\ref{posixsec} 
 
 
-  \end{itemize}
-
-  \end{proof}
-
-  To sum up, we have shown that minimal elements according to the ordering
-  by Okui and Suzuki are exactly the @{text POSIX}-values we defined inductively
-  in Section ???
+   IS THE minimal element unique? We have not shown totality.
 *}
 
-
-section {* Extensions and Optimisations*}
+section {* Optimisations *}
 
 text {*
 
-  If we are interested in tokenising a string, then we need to not just
-  split up the string into tokens, but also ``classify'' the tokens (for
-  example whether it is a keyword or an identifier). This can be done with
-  only minor modifications to the algorithm by introducing \emph{record
-  regular expressions} and \emph{record values} (for example
-  \cite{Sulzmann2014b}):
-
-  \begin{center}  
-  @{text "r :="}
-  @{text "..."} $\mid$
-  @{text "(l : r)"} \qquad\qquad
-  @{text "v :="}
-  @{text "..."} $\mid$
-  @{text "(l : v)"}
-  \end{center}
-  
-  \noindent where @{text l} is a label, say a string, @{text r} a regular
-  expression and @{text v} a value. All functions can be smoothly extended
-  to these regular expressions and values. For example \mbox{@{text "(l :
-  r)"}} is nullable iff @{term r} is, and so on. The purpose of the record
-  regular expression is to mark certain parts of a regular expression and
-  then record in the calculated value which parts of the string were matched
-  by this part. The label can then serve as classification for the tokens.
-  For this recall the regular expression @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"} for
-  keywords and identifiers from the Introduction. With the record regular
-  expression we can form \mbox{@{text "((key : r\<^bsub>key\<^esub>) + (id : r\<^bsub>id\<^esub>))\<^sup>\<star>"}}
-  and then traverse the calculated value and only collect the underlying
-  strings in record values. With this we obtain finite sequences of pairs of
-  labels and strings, for example
-
-  \[@{text "(l\<^sub>1 : s\<^sub>1), ..., (l\<^sub>n : s\<^sub>n)"}\]
-  
-  \noindent from which tokens with classifications (keyword-token,
-  identifier-token and so on) can be extracted.
-
   Derivatives as calculated by \Brz's method are usually more complex
   regular expressions than the initial one; the result is that the
   derivative-based matching and lexing algorithms are often abysmally slow.
@@ -1460,27 +1406,74 @@
   "fst (simp (der c r))"}, and @{term "f\<^sub>r"} be the rectification
   function, that is @{term "snd (simp (der c r))"}.  We distinguish the cases
   whether (*) @{term "s \<in> L (der c r)"} or not. In the first case we
-  have by Thm.~\ref{lexercorrect}(2) a value @{term "v"} so that @{term
+  have by Theorem~\ref{lexercorrect}(2) a value @{term "v"} so that @{term
   "lexer (der c r) s = Some v"} and @{term "s \<in> der c r \<rightarrow> v"} hold.
-  By Lem.~\ref{slexeraux}(1) we can also infer from~(*) that @{term "s
-  \<in> L r\<^sub>s"} holds.  Hence we know by Thm.~\ref{lexercorrect}(2) that
+  By Lemma~\ref{slexeraux}(1) we can also infer from~(*) that @{term "s
+  \<in> L r\<^sub>s"} holds.  Hence we know by Theorem~\ref{lexercorrect}(2) that
   there exists a @{term "v'"} with @{term "lexer r\<^sub>s s = Some v'"} and
   @{term "s \<in> r\<^sub>s \<rightarrow> v'"}. From the latter we know by
-  Lem.~\ref{slexeraux}(2) that @{term "s \<in> der c r \<rightarrow> (f\<^sub>r v')"} holds.
-  By the uniqueness of the POSIX relation (Thm.~\ref{posixdeterm}) we
+  Lemma~\ref{slexeraux}(2) that @{term "s \<in> der c r \<rightarrow> (f\<^sub>r v')"} holds.
+  By the uniqueness of the POSIX relation (Theorem~\ref{posixdeterm}) we
   can infer that @{term v} is equal to @{term "f\<^sub>r v'"}---that is the 
   rectification function applied to @{term "v'"}
   produces the original @{term "v"}.  Now the case follows by the
   definitions of @{const lexer} and @{const slexer}.
 
   In the second case where @{term "s \<notin> L (der c r)"} we have that
-  @{term "lexer (der c r) s = None"} by Thm.~\ref{lexercorrect}(1).  We
-  also know by Lem.~\ref{slexeraux}(1) that @{term "s \<notin> L r\<^sub>s"}. Hence
-  @{term "lexer r\<^sub>s s = None"} by Thm.~\ref{lexercorrect}(1) and
+  @{term "lexer (der c r) s = None"} by Theorem~\ref{lexercorrect}(1).  We
+  also know by Lemma~\ref{slexeraux}(1) that @{term "s \<notin> L r\<^sub>s"}. Hence
+  @{term "lexer r\<^sub>s s = None"} by Theorem~\ref{lexercorrect}(1) and
   by IH then also @{term "slexer r\<^sub>s s = None"}. With this we can
   conclude in this case too.\qed   
 
   \end{proof} 
+
+*}
+
+section {* Extensions*}
+
+text {*
+
+  A strong point in favour of
+  Sulzmann and Lu's algorithm is that it can be extended in various
+  ways.
+
+  If we are interested in tokenising a string, then we need to not just
+  split up the string into tokens, but also ``classify'' the tokens (for
+  example whether it is a keyword or an identifier). This can be done with
+  only minor modifications to the algorithm by introducing \emph{record
+  regular expressions} and \emph{record values} (for example
+  \cite{Sulzmann2014b}):
+
+  \begin{center}  
+  @{text "r :="}
+  @{text "..."} $\mid$
+  @{text "(l : r)"} \qquad\qquad
+  @{text "v :="}
+  @{text "..."} $\mid$
+  @{text "(l : v)"}
+  \end{center}
+  
+  \noindent where @{text l} is a label, say a string, @{text r} a regular
+  expression and @{text v} a value. All functions can be smoothly extended
+  to these regular expressions and values. For example \mbox{@{text "(l :
+  r)"}} is nullable iff @{term r} is, and so on. The purpose of the record
+  regular expression is to mark certain parts of a regular expression and
+  then record in the calculated value which parts of the string were matched
+  by this part. The label can then serve as classification for the tokens.
+  For this recall the regular expression @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"} for
+  keywords and identifiers from the Introduction. With the record regular
+  expression we can form \mbox{@{text "((key : r\<^bsub>key\<^esub>) + (id : r\<^bsub>id\<^esub>))\<^sup>\<star>"}}
+  and then traverse the calculated value and only collect the underlying
+  strings in record values. With this we obtain finite sequences of pairs of
+  labels and strings, for example
+
+  \[@{text "(l\<^sub>1 : s\<^sub>1), ..., (l\<^sub>n : s\<^sub>n)"}\]
+  
+  \noindent from which tokens with classifications (keyword-token,
+  identifier-token and so on) can be extracted.
+
+
 *}

--- a/thys/Positions.thy	Fri Aug 25 23:54:10 2017 +0200
+++ b/thys/Positions.thy	Sun Aug 27 00:03:31 2017 +0300
@@ -739,249 +739,24 @@
 by (metis Posix_PosOrd less_irrefl PosOrd_def 
     PosOrd_ex_eq_def PosOrd_ex_def PosOrd_trans)
 
-
-
-lemma PosOrd_Posix_Stars:
-  assumes "\<forall>v \<in> set vs. flat v \<in> r \<rightarrow> v \<and> flat v \<noteq> []"
-  and "\<not>(\<exists>vs2 \<in> LV (STAR r) (flat (Stars vs)). vs2 :\<sqsubset>val (Stars vs))"
-  shows "(flat (Stars vs)) \<in> (STAR r) \<rightarrow> Stars vs" 
-using assms
-proof(induct vs)
-  case Nil
-  show "flat (Stars []) \<in> STAR r \<rightarrow> Stars []"
-    by(simp add: Posix.intros)
-next
-  case (Cons v vs)
-  have IH: "\<lbrakk>\<forall>v\<in>set vs. flat v \<in> r \<rightarrow> v \<and> flat v \<noteq> []; 
-             \<not> (\<exists>vs2\<in>LV (STAR r) (flat (Stars vs)). vs2 :\<sqsubset>val Stars vs)\<rbrakk>
-             \<Longrightarrow> flat (Stars vs) \<in> STAR r \<rightarrow> Stars vs" by fact
-  have as2: "\<forall>v\<in>set (v # vs). flat v \<in> r \<rightarrow> v \<and> flat v \<noteq> []" by fact
-  have as3: "\<not> (\<exists>vs2\<in>LV (STAR r) (flat (Stars (v # vs))). vs2 :\<sqsubset>val Stars (v # vs))" by fact
-  have "flat v \<in> r \<rightarrow> v" "flat v \<noteq> []" using as2 by auto
-  moreover
-  have  "flat (Stars vs) \<in> STAR r \<rightarrow> Stars vs" 
-    proof (rule IH)
-      show "\<forall>v\<in>set vs. flat v \<in> r \<rightarrow> v \<and> flat v \<noteq> []" using as2 by simp
-    next 
-      show "\<not> (\<exists>vs2\<in>LV (STAR r) (flat (Stars vs)). vs2 :\<sqsubset>val Stars vs)" using as3
-        apply(auto)
-        apply(subst (asm) (2) LV_def)
-        apply(auto)
-        apply(erule Prf.cases)
-        apply(simp_all)
-        apply(drule_tac x="Stars (v # vs)" in bspec)
-        apply(simp add: LV_def)
-        using Posix_LV Prf.intros(6) calculation
-        apply(rule_tac Prf.intros)
-        apply(simp add:)
-        prefer 2
-        apply (simp add: PosOrd_StarsI2)
-        apply(drule Posix_LV) 
-        apply(simp add: LV_def)
-        done
-    qed
-  moreover
-  have "flat v \<noteq> []" using as2 by simp
-  moreover
-  have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = flat (Stars vs) \<and> flat v @ s\<^sub>3 \<in> L r \<and> s\<^sub>4 \<in> L (STAR r))" 
-   using as3
-   apply(auto)
-   apply(drule L_flat_Prf2)
-   apply(erule exE)
-   apply(simp only: L.simps[symmetric])
-   apply(drule L_flat_Prf2)
-   apply(erule exE)
-   apply(clarify)
-   apply(rotate_tac 5)
-   apply(erule Prf.cases)
-   apply(simp_all)
-   apply(clarify)
-   apply(drule_tac x="Stars (va#vs)" in bspec)
-   apply(auto simp add: LV_def)[1]   
-   apply(rule Prf.intros)
-   apply(simp)
-   by (simp add: PosOrd_StarsI PosOrd_shorterI)
-  ultimately show "flat (Stars (v # vs)) \<in> STAR r \<rightarrow> Stars (v # vs)"
-  by (simp add: Posix.intros)
-qed
-
-
-
-section {* The Smallest Value is indeed the Posix Value *}
-
 lemma PosOrd_Posix:
   assumes "v1 \<in> LV r s" "\<forall>v\<^sub>2 \<in> LV r s. \<not> v\<^sub>2 :\<sqsubset>val v1"
   shows "s \<in> r \<rightarrow> v1" 
-using assms
-proof(induct r arbitrary: s v1)
-  case (ZERO s v1)
-  have "v1 \<in> LV ZERO s" by fact
-  then show "s \<in> ZERO \<rightarrow> v1" unfolding LV_def
-    by (auto elim: Prf.cases)
-next 
-  case (ONE s v1)
-  have "v1 \<in> LV ONE s" by fact
-  then have "v1 = Void" "s = []" unfolding LV_def
-    by(auto elim: Prf.cases)
-  then show "s \<in> ONE \<rightarrow> v1" 
-    by(auto intro: Posix.intros)
-next 
-  case (CHAR c s v1)
-  have "v1 \<in> LV (CHAR c) s" by fact
-  then show "s \<in> CHAR c \<rightarrow> v1" unfolding LV_def
-    by (auto elim!: Prf.cases intro: Posix.intros)
-next
-  case (ALT r1 r2 s v1)
-  have IH1: "\<And>s v1. \<lbrakk>v1 \<in> LV r1 s; \<forall>v2 \<in> LV r1 s. \<not> v2 :\<sqsubset>val v1\<rbrakk> \<Longrightarrow> s \<in> r1 \<rightarrow> v1" by fact
-  have IH2: "\<And>s v1. \<lbrakk>v1 \<in> LV r2 s; \<forall>v2 \<in> LV r2 s. \<not> v2 :\<sqsubset>val v1\<rbrakk> \<Longrightarrow> s \<in> r2 \<rightarrow> v1" by fact
-  have as1: "\<forall>v2 \<in> LV (ALT r1 r2) s. \<not> v2 :\<sqsubset>val v1" by fact
-  have as2: "v1 \<in> LV (ALT r1 r2) s" by fact
-  then consider 
-     (Left) v1' where
-        "v1 = Left v1'" "s = flat v1'"
-        "v1' \<in> LV r1 s"
-  |  (Right) v1' where
-        "v1 = Right v1'" "s = flat v1'"
-        "v1' \<in> LV r2 s"
-  unfolding LV_def by (auto elim: Prf.cases)
-  then show "s \<in> ALT r1 r2 \<rightarrow> v1"
-   proof (cases)
-     case (Left v1')
-     have "v1' \<in> LV r1 s" using Left(3) .
-     moreover
-     have "\<forall>v2 \<in> LV r1 s. \<not> v2 :\<sqsubset>val v1'" using as1
-       unfolding Left(1,2) unfolding LV_def 
-       using Prf.intros(2) PosOrd_Left_eq by force  
-     ultimately have "s \<in> r1 \<rightarrow> v1'" using IH1 by simp
-     then have "s \<in> ALT r1 r2 \<rightarrow> Left v1'" by (rule Posix.intros)
-     then show "s \<in> ALT r1 r2 \<rightarrow> v1" using Left by simp
-   next
-     case (Right v1')
-     have "v1' \<in> LV r2 s" using Right(3) .
-     moreover
-     have "\<forall>v2 \<in> LV r2 s. \<not> v2 :\<sqsubset>val v1'" using as1
-       unfolding LV_def Right using Prf.intros(3) PosOrd_RightI by force   
-     ultimately have "s \<in> r2 \<rightarrow> v1'" using IH2 by simp
-     moreover 
-       { assume "s \<in> L r1"
-         then obtain v' where "v' \<in>  LV r1 s"
-            unfolding LV_def using L_flat_Prf2 by blast
-         then have "Left v' \<in>  LV (ALT r1 r2) s" 
-            unfolding LV_def by (auto intro: Prf.intros)
-         with as1 have "\<not> (Left v' :\<sqsubset>val Right v1') \<and> (flat v' = s)" 
-            unfolding LV_def Right 
-            by (auto)
-         then have False using PosOrd_Left_Right Right by blast  
-       }
-     then have "s \<notin> L r1" by rule 
-     ultimately have "s \<in> ALT r1 r2 \<rightarrow> Right v1'" by (rule Posix.intros)
-     then show "s \<in> ALT r1 r2 \<rightarrow> v1" using Right by simp
-  qed
-next 
-  case (SEQ r1 r2 s v1)
-  have IH1: "\<And>s v1. \<lbrakk>v1 \<in> LV r1 s; \<forall>v2 \<in> LV r1 s. \<not> v2 :\<sqsubset>val v1\<rbrakk> \<Longrightarrow> s \<in> r1 \<rightarrow> v1" by fact
-  have IH2: "\<And>s v1. \<lbrakk>v1 \<in> LV r2 s; \<forall>v2 \<in> LV r2 s. \<not> v2 :\<sqsubset>val v1\<rbrakk> \<Longrightarrow> s \<in> r2 \<rightarrow> v1" by fact
-  have as1: "\<forall>v2\<in>LV (SEQ r1 r2) s. \<not> v2 :\<sqsubset>val v1" by fact
-  have as2: "v1 \<in> LV (SEQ r1 r2) s" by fact
-  then obtain 
-    v1a v1b where eqs:
-        "v1 = Seq v1a v1b" "s = flat v1a @ flat v1b"
-        "v1a \<in> LV r1 (flat v1a)" "v1b \<in> LV r2 (flat v1b)" 
-  unfolding LV_def by(auto elim: Prf.cases)
-  have "\<forall>v2 \<in> LV r1 (flat v1a). \<not> v2 :\<sqsubset>val v1a"
-    proof
-      fix v2
-      assume "v2 \<in> LV r1 (flat v1a)"
-      with eqs(2,4) have "Seq v2 v1b \<in> LV (SEQ r1 r2) s"
-         by (simp add: LV_def Prf.intros(1))
-      with as1 have "\<not> Seq v2 v1b :\<sqsubset>val Seq v1a v1b \<and> flat (Seq v2 v1b) = flat (Seq v1a v1b)" 
-         using eqs by (simp add: LV_def) 
-      then show "\<not> v2 :\<sqsubset>val v1a"
-         using PosOrd_SeqI1 by blast
-    qed     
-  then have "flat v1a \<in> r1 \<rightarrow> v1a" using IH1 eqs by simp
+proof -
+  have "s \<in> L r" using assms(1) unfolding LV_def
+    using L_flat_Prf1 by blast 
+  then obtain vposix where vp: "s \<in> r \<rightarrow> vposix"
+    using lexer_correct_Some by blast 
+  with assms(1) have "vposix :\<sqsubseteq>val v1" by (simp add: Posix_PosOrd) 
+  then have "vposix = v1 \<or> vposix :\<sqsubset>val v1" unfolding PosOrd_ex_eq2 by auto
   moreover
-  have "\<forall>v2 \<in> LV r2 (flat v1b). \<not> v2 :\<sqsubset>val v1b"
-    proof 
-      fix v2
-      assume "v2 \<in> LV r2 (flat v1b)"
-      with eqs(2,3,4) have "Seq v1a v2 \<in> LV (SEQ r1 r2) s"
-         by (simp add: LV_def Prf.intros)
-      with as1 have "\<not> Seq v1a v2 :\<sqsubset>val Seq v1a v1b \<and> flat v2 = flat v1b" 
-         using eqs by (simp add: LV_def) 
-      then show "\<not> v2 :\<sqsubset>val v1b"
-         using PosOrd_SeqI2 by auto
-    qed     
-  then have "flat v1b \<in> r2 \<rightarrow> v1b" using IH2 eqs by simp    
-  moreover
-  have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = flat v1b \<and> flat v1a @ s\<^sub>3 \<in> L r1 \<and> s\<^sub>4 \<in> L r2)" 
-  proof
-     assume "\<exists>s3 s4. s3 \<noteq> [] \<and> s3 @ s4 = flat v1b \<and> flat v1a @ s3 \<in> L r1 \<and> s4 \<in> L r2"
-     then obtain s3 s4 where q1: "s3 \<noteq> [] \<and> s3 @ s4 = flat v1b \<and> flat v1a @ s3 \<in> L r1 \<and> s4 \<in> L r2" by blast
-     then obtain vA vB where q2: "flat vA = flat v1a @ s3" "\<Turnstile> vA : r1" "flat vB = s4" "\<Turnstile> vB : r2"
-        using L_flat_Prf2 by blast
-     then have "Seq vA vB \<in> LV (SEQ r1 r2) s" unfolding eqs using q1
-       by (auto simp add: LV_def intro!: Prf.intros)
-     with as1 have "\<not> Seq vA vB :\<sqsubset>val Seq v1a v1b" unfolding eqs by auto
-     then have "\<not> vA :\<sqsubset>val v1a \<and> length (flat vA) > length (flat v1a)" using q1 q2 PosOrd_SeqI1 by auto 
-     then show "False"
-       using PosOrd_shorterI by blast  
-  qed
-  ultimately
-  show "s \<in> SEQ r1 r2 \<rightarrow> v1" unfolding eqs
-    by (rule Posix.intros)
-next
-   case (STAR r s v1)
-   have IH: "\<And>s v1. \<lbrakk>v1 \<in> LV r s; \<forall>v2\<in>LV r s. \<not> v2 :\<sqsubset>val v1\<rbrakk> \<Longrightarrow> s \<in> r \<rightarrow> v1" by fact
-   have as1: "\<forall>v2\<in>LV (STAR r) s. \<not> v2 :\<sqsubset>val v1" by fact
-   have as2: "v1 \<in> LV (STAR r) s" by fact
-   then obtain 
-    vs where eqs:
-        "v1 = Stars vs" "s = flat (Stars vs)"
-        "\<forall>v \<in> set vs. v \<in> LV r (flat v)"
-        unfolding LV_def by (auto elim: Prf.cases)
-   have "\<forall>v\<in>set vs. flat v \<in> r \<rightarrow> v \<and> flat v \<noteq> []" 
-     proof 
-        fix v
-        assume a: "v \<in> set vs"
-        then obtain pre post where e: "vs = pre @ [v] @ post"
-          by (metis append_Cons append_Nil in_set_conv_decomp_first)
-        then have q: "\<forall>v2\<in>LV (STAR r) s. \<not> v2 :\<sqsubset>val Stars (pre @ [v] @ post)" 
-          using as1 unfolding eqs by simp
-        have "\<forall>v2\<in>LV r (flat v). \<not> v2 :\<sqsubset>val v" unfolding eqs 
-          proof (rule ballI, rule notI) 
-             fix v2
-             assume w: "v2 :\<sqsubset>val v"
-             assume "v2 \<in> LV r (flat v)"
-             then have "Stars (pre @ [v2] @ post) \<in> LV (STAR r) s" 
-                 using as2 unfolding e eqs
-                 apply(auto simp add: LV_def intro!: Prf.intros elim: Prf_elims dest: Prf_Stars_appendE)
-                 apply(auto dest!: Prf_Stars_appendE elim: Prf.cases)
-                 done
-             then have  "\<not> Stars (pre @ [v2] @ post) :\<sqsubset>val Stars (pre @ [v] @ post)"
-                using q by simp     
-             with w show "False"
-                using LV_def \<open>v2 \<in> LV r (flat v)\<close> append_Cons flat.simps(7) mem_Collect_eq 
-                PosOrd_StarsI PosOrd_Stars_appendI by auto
-          qed     
-        with IH
-        show "flat v \<in> r \<rightarrow> v \<and> flat v \<noteq> []" using a as2 unfolding eqs LV_def
-        by (auto elim: Prf.cases)
-     qed
-   moreover
-   have "\<not> (\<exists>vs2\<in>LV (STAR r) (flat (Stars vs)). vs2 :\<sqsubset>val Stars vs)" 
-     proof 
-       assume "\<exists>vs2 \<in> LV (STAR r) (flat (Stars vs)). vs2 :\<sqsubset>val Stars vs"
-       then obtain vs2 where "\<Turnstile> Stars vs2 : STAR r" "flat (Stars vs2) = flat (Stars vs)"
-                             "Stars vs2 :\<sqsubset>val Stars vs" 
-         unfolding LV_def by (force elim: Prf_elims intro: Prf.intros)
-       then show "False" using as1 unfolding eqs
-         by (auto simp add: LV_def)
-     qed
-   ultimately have "flat (Stars vs) \<in> STAR r \<rightarrow> Stars vs"
-     thm PosOrd_Posix_Stars
-     by (rule PosOrd_Posix_Stars)
-   then show "s \<in> STAR r \<rightarrow> v1" unfolding eqs .
+    { assume "vposix :\<sqsubset>val v1"
+      moreover
+      have "vposix \<in> LV r s" using vp 
+         using Posix_LV by blast 
+      ultimately have "False" using assms(2) by blast
+    }
+  ultimately show "s \<in> r \<rightarrow> v1" using vp by blast
 qed
 
 lemma Least_existence:

--- a/thys/Spec.thy	Fri Aug 25 23:54:10 2017 +0200
+++ b/thys/Spec.thy	Sun Aug 27 00:03:31 2017 +0300
@@ -543,34 +543,15 @@
 
 
 text {*
-  Our POSIX value is a canonical value.
+  Our POSIX value is a lexical value.
 *}
 
 lemma Posix_LV:
   assumes "s \<in> r \<rightarrow> v"
   shows "v \<in> LV r s"
-using assms
+using assms unfolding LV_def
 apply(induct rule: Posix.induct)
-apply(auto simp add: LV_def intro: Prf.intros elim: Prf.cases)
-apply(rotate_tac 5)
-apply(erule Prf.cases)
-apply(simp_all)
-apply(rule Prf.intros)
-apply(simp_all)
+apply(auto simp add: intro!: Prf.intros elim!: Prf_elims)
 done
 
-(*
-lemma test2: 
-  assumes "\<forall>v \<in> set vs. flat v \<in> r \<rightarrow> v \<and> flat v \<noteq> []"
-  shows "(Stars vs) \<in> LV (STAR r) (flat (Stars vs))" 
-using assms
-apply(induct vs)
-apply(auto simp add: LV_def)
-apply(rule Prf.intros)
-apply(simp)
-apply(rule Prf.intros)
-apply(simp_all)
-by (metis (no_types, lifting) LV_def Posix_LV mem_Collect_eq)
-*)
-
 end
\ No newline at end of file

author	Christian Urban <urbanc@in.tum.de>
	Sun, 27 Aug 2017 00:03:31 +0300
changeset 272	f16019b11179
parent 271	f46ebc84408d
child 273	e85099ac4c6c

thys/Journal/Paper.thy		file \| annotate \| diff \| comparison \| revisions
thys/Positions.thy		file \| annotate \| diff \| comparison \| revisions
thys/Spec.thy		file \| annotate \| diff \| comparison \| revisions