thys/Journal/Paper.thy
changeset 268 6746f5e1f1f8
parent 267 32b222d77fa0
child 269 12772d537b71
equal deleted inserted replaced
267:32b222d77fa0 268:6746f5e1f1f8
    61   length ("len _" [73] 73) and
    61   length ("len _" [73] 73) and
    62   intlen ("len _" [73] 73) and
    62   intlen ("len _" [73] 73) and
    63   set ("_" [73] 73) and
    63   set ("_" [73] 73) and
    64  
    64  
    65   Prf ("_ : _" [75,75] 75) and
    65   Prf ("_ : _" [75,75] 75) and
    66   CPrf ("_ \<^raw:\mbox{\textbf{\textlengthmark}}> _" [75,75] 75) and
       
    67   Posix ("'(_, _') \<rightarrow> _" [63,75,75] 75) and
    66   Posix ("'(_, _') \<rightarrow> _" [63,75,75] 75) and
    68  
    67  
    69   lexer ("lexer _ _" [78,78] 77) and 
    68   lexer ("lexer _ _" [78,78] 77) and 
    70   F_RIGHT ("F\<^bsub>Right\<^esub> _") and
    69   F_RIGHT ("F\<^bsub>Right\<^esub> _") and
    71   F_LEFT ("F\<^bsub>Left\<^esub> _") and  
    70   F_LEFT ("F\<^bsub>Left\<^esub> _") and  
    81   lex_list ("_ \<prec>\<^bsub>lex\<^esub> _") and
    80   lex_list ("_ \<prec>\<^bsub>lex\<^esub> _") and
    82   PosOrd ("_ \<prec>\<^bsub>_\<^esub> _" [77,77,77] 77) and
    81   PosOrd ("_ \<prec>\<^bsub>_\<^esub> _" [77,77,77] 77) and
    83   PosOrd_ex ("_ \<prec> _" [77,77] 77) and
    82   PosOrd_ex ("_ \<prec> _" [77,77] 77) and
    84   PosOrd_ex_eq ("_ \<^raw:\mbox{$\preccurlyeq$}> _" [77,77] 77) and
    83   PosOrd_ex_eq ("_ \<^raw:\mbox{$\preccurlyeq$}> _" [77,77] 77) and
    85   pflat_len ("\<parallel>_\<parallel>\<^bsub>_\<^esub>") and
    84   pflat_len ("\<parallel>_\<parallel>\<^bsub>_\<^esub>") and
    86   nprec ("_ \<^raw:\mbox{$\not\prec$}> _" [77,77] 77) 
    85   nprec ("_ \<^raw:\mbox{$\not\prec$}> _" [77,77] 77) and
    87 
    86 
    88   (*
    87   DUMMY ("\<^raw:\underline{\hspace{2mm}}>")
    89   ValOrd ("_ >\<^bsub>_\<^esub> _" [77,77,77] 77) and
    88 
    90   ValOrdEq ("_ \<ge>\<^bsub>_\<^esub> _" [77,77,77] 77)
       
    91   *)
       
    92 
    89 
    93 definition 
    90 definition 
    94   "match r s \<equiv> nullable (ders s r)"
    91   "match r s \<equiv> nullable (ders s r)"
    95 
    92 
    96 
    93 
    97 lemma CV_STAR_ONE_empty: 
    94 lemma LV_STAR_ONE_empty: 
    98   shows "CV (STAR ONE) [] = {Stars []}"
    95   shows "LV (STAR ONE) [] = {Stars []}"
    99 by(auto simp add: CV_def elim: CPrf.cases intro: CPrf.intros)
    96 by(auto simp add: LV_def elim: Prf.cases intro: Prf.intros)
   100 
    97 
   101 
    98 
   102 
    99 
   103 (*
   100 (*
   104 comments not implemented
   101 comments not implemented
   137 
   134 
   138 If a regular expression matches a string, then in general there is more than
   135 If a regular expression matches a string, then in general there is more than
   139 one way of how the string is matched. There are two commonly used
   136 one way of how the string is matched. There are two commonly used
   140 disambiguation strategies to generate a unique answer: one is called GREEDY
   137 disambiguation strategies to generate a unique answer: one is called GREEDY
   141 matching \cite{Frisch2004} and the other is POSIX
   138 matching \cite{Frisch2004} and the other is POSIX
   142 matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2013,Sulzmann2014,Vansummeren2006}. For example consider
   139 matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}. For example consider
   143 the string @{term xy} and the regular expression \mbox{@{term "STAR (ALT
   140 the string @{term xy} and the regular expression \mbox{@{term "STAR (ALT
   144 (ALT x y) xy)"}}. Either the string can be matched in two `iterations' by
   141 (ALT x y) xy)"}}. Either the string can be matched in two `iterations' by
   145 the single letter-regular expressions @{term x} and @{term y}, or directly
   142 the single letter-regular expressions @{term x} and @{term y}, or directly
   146 in one iteration by @{term xy}. The first case corresponds to GREEDY
   143 in one iteration by @{term xy}. The first case corresponds to GREEDY
   147 matching, which first matches with the left-most symbol and only matches the
   144 matching, which first matches with the left-most symbol and only matches the
   148 next symbol in case of a mismatch (this is greedy in the sense of preferring
   145 next symbol in case of a mismatch (this is greedy in the sense of preferring
   149 instant gratification to delayed repletion). The second case is POSIX
   146 instant gratification to delayed repletion). The second case is POSIX
   150 matching, which prefers the longest match.
   147 matching, which prefers the longest match.
   151 
   148 
   152 In the context of lexing, where an input string needs to be split up into a
   149 In the context of lexing, where an input string needs to be split up
   153 sequence of tokens, POSIX is the more natural disambiguation strategy for
   150 into a sequence of tokens, POSIX is the more natural disambiguation
   154 what programmers consider basic syntactic building blocks in their programs.
   151 strategy for what programmers consider basic syntactic building blocks
   155 These building blocks are often specified by some regular expressions, say
   152 in their programs.  These building blocks are often specified by some
   156 @{text "r\<^bsub>key\<^esub>"} and @{text "r\<^bsub>id\<^esub>"} for recognising keywords and
   153 regular expressions, say @{text "r\<^bsub>key\<^esub>"} and @{text
   157 identifiers, respectively. There are a few underlying (informal) rules behind
   154 "r\<^bsub>id\<^esub>"} for recognising keywords and identifiers,
   158 tokenising a string in a POSIX \cite{POSIX} fashion according to a collection of regular
   155 respectively. There are a few underlying (informal) rules behind
   159 expressions:
   156 tokenising a string in a POSIX \cite{POSIX} fashion according to a
       
   157 collection of regular expressions:
   160 
   158 
   161 \begin{itemize} 
   159 \begin{itemize} 
   162 \item[$\bullet$] \emph{The Longest Match Rule} (or \emph{``{M}aximal {M}unch {R}ule''}):
   160 \item[$\bullet$] \emph{The Longest Match Rule} (or \emph{``{M}aximal {M}unch {R}ule''}):
   163 The longest initial substring matched by any regular expression is taken as
   161 The longest initial substring matched by any regular expression is taken as
   164 next token.\smallskip
   162 next token.\smallskip
   169 
   167 
   170 \item[$\bullet$] \emph{Star Rule:} A subexpression repeated by ${}^\star$ shall 
   168 \item[$\bullet$] \emph{Star Rule:} A subexpression repeated by ${}^\star$ shall 
   171 not match an empty string unless this is the only match for the repetition.\smallskip
   169 not match an empty string unless this is the only match for the repetition.\smallskip
   172 
   170 
   173 \item[$\bullet$] \emph{Empty String Rule:} An empty string shall be considered to 
   171 \item[$\bullet$] \emph{Empty String Rule:} An empty string shall be considered to 
   174 be longer than no match at all.\marginpar{Explain its purpose}
   172 be longer than no match at all.
   175 \end{itemize}
   173 \end{itemize}
   176 
   174 
   177 \noindent Consider for example a regular expression @{text "r\<^bsub>key\<^esub>"} for recognising keywords
   175 \noindent Consider for example a regular expression @{text
   178 such as @{text "if"}, @{text "then"} and so on; and @{text "r\<^bsub>id\<^esub>"}
   176 "r\<^bsub>key\<^esub>"} for recognising keywords such as @{text "if"},
       
   177 @{text "then"} and so on; and @{text "r\<^bsub>id\<^esub>"}
   179 recognising identifiers (say, a single character followed by
   178 recognising identifiers (say, a single character followed by
   180 characters or numbers).  Then we can form the regular expression
   179 characters or numbers).  Then we can form the regular expression
   181 @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"} and use POSIX matching to tokenise strings,
   180 @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"}
   182 say @{text "iffoo"} and @{text "if"}.  For @{text "iffoo"} we obtain
   181 and use POSIX matching to tokenise strings, say @{text "iffoo"} and
   183 by the Longest Match Rule a single identifier token, not a keyword
   182 @{text "if"}.  For @{text "iffoo"} we obtain by the Longest Match Rule
   184 followed by an identifier. For @{text "if"} we obtain by the Priority
   183 a single identifier token, not a keyword followed by an
   185 Rule a keyword token, not an identifier token---even if @{text "r\<^bsub>id\<^esub>"}
   184 identifier. For @{text "if"} we obtain by the Priority Rule a keyword
   186 matches also. By the Star Rule we know @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"} matches @{text "iffoo"}, respectively @{text "if"}, in exactly one
   185 token, not an identifier token---even if @{text "r\<^bsub>id\<^esub>"}
   187 `iteration' of the star.
   186 matches also. By the Star Rule we know @{text "(r\<^bsub>key\<^esub> +
       
   187 r\<^bsub>id\<^esub>)\<^sup>\<star>"} matches @{text "iffoo"},
       
   188 respectively @{text "if"}, in exactly one `iteration' of the star. The
       
   189 Empty String Rule is for cases where @{text
       
   190 "(a\<^sup>\<star>)\<^sup>\<star>"}, for example, matches against the
       
   191 string @{text "bc"}. Then the longest initial matched substring is the
       
   192 empty string, which is matched by both the whole regular expression
       
   193 and the parenthesised sub-expression.
   188 
   194 
   189 
   195 
   190 One limitation of Brzozowski's matcher is that it only generates a
   196 One limitation of Brzozowski's matcher is that it only generates a
   191 YES/NO answer for whether a string is being matched by a regular
   197 YES/NO answer for whether a string is being matched by a regular
   192 expression.  Sulzmann and Lu~\cite{Sulzmann2014} extended this matcher
   198 expression.  Sulzmann and Lu~\cite{Sulzmann2014} extended this matcher
   203 r}, and to show that (once a string to be matched is chosen) there is
   209 r}, and to show that (once a string to be matched is chosen) there is
   204 a maximum element and that it is computed by their derivative-based
   210 a maximum element and that it is computed by their derivative-based
   205 algorithm. This proof idea is inspired by work of Frisch and Cardelli
   211 algorithm. This proof idea is inspired by work of Frisch and Cardelli
   206 \cite{Frisch2004} on a GREEDY regular expression matching
   212 \cite{Frisch2004} on a GREEDY regular expression matching
   207 algorithm. However, we were not able to establish transitivity and
   213 algorithm. However, we were not able to establish transitivity and
   208 totality for the ``order relation'' by Sulzmann and Lu. \marginpar{We probably drop this section} 
   214 totality for the ``order relation'' by Sulzmann and Lu. 
   209 ??In Section
   215 There are some inherent problems with their approach (of
   210 \ref{argu} we identify some inherent problems with their approach (of
       
   211 which some of the proofs are not published in \cite{Sulzmann2014});
   216 which some of the proofs are not published in \cite{Sulzmann2014});
   212 perhaps more importantly, we give a simple inductive (and
   217 perhaps more importantly, we give in this paper a simple inductive (and
   213 algorithm-independent) definition of what we call being a {\em POSIX
   218 algorithm-independent) definition of what we call being a {\em POSIX
   214 value} for a regular expression @{term r} and a string @{term s}; we
   219 value} for a regular expression @{term r} and a string @{term s}; we
   215 show that the algorithm computes such a value and that such a value is
   220 show that the algorithm computes such a value and that such a value is
   216 unique. Our proofs are both done by hand and checked in Isabelle/HOL.  The
   221 unique. Our proofs are both done by hand and checked in Isabelle/HOL.  The
   217 experience of doing our proofs has been that this mechanical checking
   222 experience of doing our proofs has been that this mechanical checking
   250 extended version already includes remarks in the appendix that their
   255 extended version already includes remarks in the appendix that their
   251 informal proof contains gaps, and possible fixes are not fully worked out.}
   256 informal proof contains gaps, and possible fixes are not fully worked out.}
   252 Our specification of a POSIX value consists of a simple inductive definition
   257 Our specification of a POSIX value consists of a simple inductive definition
   253 that given a string and a regular expression uniquely determines this value.
   258 that given a string and a regular expression uniquely determines this value.
   254 We also show that our definition is equivalent to an ordering 
   259 We also show that our definition is equivalent to an ordering 
   255 of values based on positions by Okui and Suzuki \cite{OkuiSuzuki2013}.
   260 of values based on positions by Okui and Suzuki \cite{OkuiSuzuki2010}.
   256 Derivatives as calculated by Brzozowski's method are usually more complex
   261 Derivatives as calculated by Brzozowski's method are usually more complex
   257 regular expressions than the initial one; various optimisations are
   262 regular expressions than the initial one; various optimisations are
   258 possible. We prove the correctness when simplifications of @{term "ALT ZERO
   263 possible. We prove the correctness when simplifications of @{term "ALT ZERO
   259 r"}, @{term "ALT r ZERO"}, @{term "SEQ ONE r"} and @{term "SEQ r ONE"} to
   264 r"}, @{term "ALT r ZERO"}, @{term "SEQ ONE r"} and @{term "SEQ r ONE"} to
   260 @{term r} are applied.
   265 @{term r} are applied.
   400 
   405 
   401 section {* POSIX Regular Expression Matching\label{posixsec} *}
   406 section {* POSIX Regular Expression Matching\label{posixsec} *}
   402 
   407 
   403 text {* 
   408 text {* 
   404 
   409 
   405   The clever idea by Sulzmann and Lu \cite{Sulzmann2014} is to use
   410   There have been many previous works that use values for encoding 
   406   values for encoding \emph{how} a regular expression matches a string
   411   \emph{how} a regular expression matches a string.
   407   and then define a function on values that mirrors (but inverts) the
   412   The clever idea by Sulzmann and Lu \cite{Sulzmann2014} is to 
       
   413   define a function on values that mirrors (but inverts) the
   408   construction of the derivative on regular expressions. \emph{Values}
   414   construction of the derivative on regular expressions. \emph{Values}
   409   are defined as the inductive datatype
   415   are defined as the inductive datatype
   410 
   416 
   411   \begin{center}
   417   \begin{center}
   412   @{text "v :="}
   418   @{text "v :="}
   438   @{thm (lhs) flat.simps(7)} & $\dn$ & @{thm (rhs) flat.simps(7)}\\
   444   @{thm (lhs) flat.simps(7)} & $\dn$ & @{thm (rhs) flat.simps(7)}\\
   439   \end{tabular}
   445   \end{tabular}
   440   \end{center}
   446   \end{center}
   441 
   447 
   442   \noindent Sulzmann and Lu also define inductively an inhabitation relation
   448   \noindent Sulzmann and Lu also define inductively an inhabitation relation
   443   that associates values to regular expressions
   449   that associates values to regular expressions. We define this relation as 
   444 
   450   follows:\footnote{Note that the rule for @{term Stars} differs from our 
   445   \begin{center}
   451   erlier paper \cite{AusafDyckhoffUrban2016}. There we used the original
   446   \begin{tabular}{c}
   452   definition by Sulzmann and Lu which does not require that the values @{term "v \<in> set vs"}
       
   453   flatten to a non-empty string. The reason for introducing the 
       
   454   more restricted version of lexical values is convenience later 
       
   455   on when reasoning about 
       
   456   an ordering relation for values.} 
       
   457 
       
   458   \begin{center}
       
   459   \begin{tabular}{c@ {\hspace{12mm}}c}
   447   \\[-8mm]
   460   \\[-8mm]
   448   @{thm[mode=Axiom] Prf.intros(4)} \qquad
   461   @{thm[mode=Axiom] Prf.intros(4)} & 
   449   @{thm[mode=Axiom] Prf.intros(5)[of "c"]}\\[4mm]
   462   @{thm[mode=Axiom] Prf.intros(5)[of "c"]}\\[4mm]
   450   @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} \qquad 
   463   @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} &
   451   @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\\[4mm]
   464   @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\\[4mm]
   452   @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]} \qquad
   465   @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}  &
   453   @{thm[mode=Rule] Prf.intros(6)[of "vs"]}
   466   @{thm[mode=Rule] Prf.intros(6)[of "vs"]}
   454   \end{tabular}
   467   \end{tabular}
   455   \end{center}
   468   \end{center}
   456 
   469 
   457   \noindent 
   470   \noindent where in the clause for @{const "Stars"} we use the
   458   where in the clause for @{const "Stars"} we use the notation @{term "v \<in> set vs"}
   471   notation @{term "v \<in> set vs"} for indicating that @{text v} is a
   459   for indicating that @{text v} is a member in the list @{text vs}.
   472   member in the list @{text vs}.  We require in this rule that every
   460   Note that no values are associated with the regular expression
   473   value in @{term vs} flattens to a non-empty string. The idea is that
   461   @{term ZERO}, and that the only value associated with the regular
   474   @{term "Stars"}-values satisfy the informal Star Rule (see Introduction)
   462   expression @{term ONE} is @{term Void}, pronounced (if one must) as @{text
   475   where the $^\star$ does not match the empty string unless this is
   463   "Void"}. It is routine to establish how values ``inhabiting'' a regular
   476   the only match for the repetition.  Note also that no values are
   464   expression correspond to the language of a regular expression, namely
   477   associated with the regular expression @{term ZERO}, and that the
       
   478   only value associated with the regular expression @{term ONE} is
       
   479   @{term Void}.  It is routine to establish how values ``inhabiting''
       
   480   a regular expression correspond to the language of a regular
       
   481   expression, namely
   465 
   482 
   466   \begin{proposition}
   483   \begin{proposition}
   467   @{thm L_flat_Prf}
   484   @{thm L_flat_Prf}
   468   \end{proposition}
   485   \end{proposition}
   469 
   486 
   470   \noindent
   487   \noindent
   471   Given a regular expression @{text r} and a string @{text s}, we can define the 
   488   Given a regular expression @{text r} and a string @{text s}, we define the 
   472   set of all \emph{Lexical Values} inhabited by @{text r} with the underlying string 
   489   set of all \emph{Lexical Values} inhabited by @{text r} with the underlying string 
   473   being @{text s} by
   490   being @{text s}:\footnote{Okui and Suzuki refer to our lexical values 
       
   491   as \emph{canonical values} in \cite{OkuiSuzuki2010}. The notion of \emph{non-problematic
       
   492   values} by Cardelli and Frisch \cite{Frisch2004} is similar, but not identical
       
   493   to our lexical values.}
   474   
   494   
   475   \begin{center}
   495   \begin{center}
   476   @{thm LV_def}
   496   @{thm LV_def}
   477   \end{center}
   497   \end{center}
   478 
   498 
   479   \noindent However, later on it will sometimes be necessary to
   499   \noindent The main property of @{term "LV r s"} is that it is alway finite.
   480   restrict the set of lexical values to a subset called
   500 
   481   \emph{Canonical Values}. The idea of canonical values is that they
   501   \begin{proposition}
   482   satisfy the Star Rule (see Introduction) where the $^\star$ does not
   502   @{thm LV_finite}
   483   match the empty string unless this is the only match for the
   503   \end{proposition}
   484   repetition.  One way to define canonical values formally is to use a
   504 
   485   stronger inhabitation relation, written @{term "\<Turnstile> DUMMY : DUMMY"}, which has the same rules as @{term
   505   \noindent This finiteness property does not hold in general if we
   486   "\<turnstile> DUMMY : DUMMY"} shown above, except that the rule for 
   506   remove the side-condition about @{term "flat v \<noteq> []"} in the
   487   @{term Stars} has
   507   @{term Stars}-rule above. For example using Sulzmann and Lu's
   488   the additional side-condition of flattened values not being the
   508   less restrictive definition, @{term "LV (STAR ONE) []"} would contain
   489   empty string, namely
   509   infinitely many values, but according to our more restricted
   490 
   510   definition @{thm LV_STAR_ONE_empty}.
   491   \begin{center}
   511 
   492   @{thm [mode=Rule] CPrf.intros(6)}
   512   If a regular expression @{text r} matches a string @{text s}, then
   493   \end{center}
   513   generally the set @{term "LV r s"} is not just a singleton set.  In
   494 
   514   case of POSIX matching the problem is to calculate the unique lexical value
   495   \noindent
   515   that satisfies the (informal) POSIX rules from the Introduction.
   496   With this we can define
   516   Graphically the POSIX value calculation algorithm by Sulzmann and Lu
   497   
   517   can be illustrated by the picture in Figure~\ref{Sulz} where the
   498   \begin{center}
   518   path from the left to the right involving @{term
   499   @{thm CV_def}
   519   derivatives}/@{const nullable} is the first phase of the algorithm
   500   \end{center}
   520   (calculating successive \Brz's derivatives) and @{const
   501 
   521   mkeps}/@{text inj}, the path from right to left, the second
   502   \noindent
   522   phase. This picture shows the steps required when a regular
   503   Clearly we have @{thm LV_CV_subset}.
   523   expression, say @{text "r\<^sub>1"}, matches the string @{term
   504   The main point of canonical values is that for every regular expression @{text r} and every
   524   "[a,b,c]"}. We first build the three derivatives (according to
   505   string @{text s}, the set @{term "CV r s"} is finite.
   525   @{term a}, @{term b} and @{term c}). We then use @{const nullable}
   506 
   526   to find out whether the resulting derivative regular expression
   507   \begin{lemma}
   527   @{term "r\<^sub>4"} can match the empty string. If yes, we call the
   508   @{thm CV_finite}
   528   function @{const mkeps} that produces a value @{term "v\<^sub>4"}
   509   \end{lemma}
   529   for how @{term "r\<^sub>4"} can match the empty string (taking into
   510 
   530   account the POSIX constraints in case there are several ways). This
   511   \noindent This finiteness property does not generally hold for lexical values where
   531   function is defined by the clauses:
   512   for example @{term "LV (STAR ONE) []"} contains infinitely many
       
   513   values, but @{thm CV_STAR_ONE_empty}. However, if a regular
       
   514   expression @{text r} matches a string @{text s}, then in general the
       
   515   set @{term "CV r s"} is not just a
       
   516   singleton set.  In case of POSIX matching the problem is to
       
   517   calculate the unique value that satisfies the (informal) POSIX rules
       
   518   from the Introduction. It will turn out that this POSIX value is in fact a
       
   519   canonical value.
       
   520 
       
   521   Graphically the POSIX value calculation algorithm by
       
   522   Sulzmann and Lu can be illustrated by the picture in Figure~\ref{Sulz}
       
   523   where the path from the left to the right involving @{term derivatives}/@{const
       
   524   nullable} is the first phase of the algorithm (calculating successive
       
   525   \Brz's derivatives) and @{const mkeps}/@{text inj}, the path from right to
       
   526   left, the second phase. This picture shows the steps required when a
       
   527   regular expression, say @{text "r\<^sub>1"}, matches the string @{term
       
   528   "[a,b,c]"}. We first build the three derivatives (according to @{term a},
       
   529   @{term b} and @{term c}). We then use @{const nullable} to find out
       
   530   whether the resulting derivative regular expression @{term "r\<^sub>4"}
       
   531   can match the empty string. If yes, we call the function @{const mkeps}
       
   532   that produces a value @{term "v\<^sub>4"} for how @{term "r\<^sub>4"} can
       
   533   match the empty string (taking into account the POSIX constraints in case
       
   534   there are several ways). This function is defined by the clauses:
       
   535 
   532 
   536 \begin{figure}[t]
   533 \begin{figure}[t]
   537 \begin{center}
   534 \begin{center}
   538 \begin{tikzpicture}[scale=2,node distance=1.3cm,
   535 \begin{tikzpicture}[scale=2,node distance=1.3cm,
   539                     every node/.style={minimum size=6mm}]
   536                     every node/.style={minimum size=6mm}]
   792   split of @{term "s\<^sub>1 @ s\<^sub>2"} and furthermore the corresponding value
   789   split of @{term "s\<^sub>1 @ s\<^sub>2"} and furthermore the corresponding value
   793   @{term v} cannot be flattened to the empty string. In effect, we require
   790   @{term v} cannot be flattened to the empty string. In effect, we require
   794   that in each ``iteration'' of the star, some non-empty substring needs to
   791   that in each ``iteration'' of the star, some non-empty substring needs to
   795   be ``chipped'' away; only in case of the empty string we accept @{term
   792   be ``chipped'' away; only in case of the empty string we accept @{term
   796   "Stars []"} as the POSIX value. Indeed we can show that our POSIX value
   793   "Stars []"} as the POSIX value. Indeed we can show that our POSIX value
   797   is a canonical value which excludes those @{text Stars} containing values 
   794   is a lexical value which excludes those @{text Stars} containing values 
   798   that flatten to the empty string.
   795   that flatten to the empty string.
   799 
   796 
   800   \begin{lemma}
   797   \begin{lemma}
   801   @{thm [mode=IfThen] Posix_CV}
   798   @{thm [mode=IfThen] Posix_LV}
   802   \end{lemma}
   799   \end{lemma}
   803 
   800 
   804   \begin{proof}
   801   \begin{proof}
   805   By routine induction on @{thm (prem 1) Posix_CV}.\qed 
   802   By routine induction on @{thm (prem 1) Posix_LV}.\qed 
   806   \end{proof}
   803   \end{proof}
   807 
   804 
   808   \noindent
   805   \noindent
   809   Next is the lemma that shows the function @{term "mkeps"} calculates
   806   Next is the lemma that shows the function @{term "mkeps"} calculates
   810   the POSIX value for the empty string and a nullable regular expression.
   807   the POSIX value for the empty string and a nullable regular expression.
   919   result---a POSIX value---should be. A strong point in favour of
   916   result---a POSIX value---should be. A strong point in favour of
   920   Sulzmann and Lu's algorithm is that it can be extended in various
   917   Sulzmann and Lu's algorithm is that it can be extended in various
   921   ways.
   918   ways.
   922 
   919 
   923 *}
   920 *}
       
   921 
       
   922 section {* Ordering of Values according to Okui and Suzuki*}
       
   923 
       
   924 text {*
       
   925   
       
   926   While in the previous section we have defined POSIX values directly
       
   927   in terms of a ternary relation (see inference rules in Figure~\ref{POSIXrules}),
       
   928   Sulzmann and Lu took a different approach in \cite{Sulzmann2014}:
       
   929   they introduced an ordering for values and identified POSIX values
       
   930   as the maximal elements.  An extended version of \cite{Sulzmann2014}
       
   931   is available at the website of its first author; this includes more
       
   932   details of their proofs, but which are evidently not in final form
       
   933   yet. Unfortunately, we were not able to verify claims that their
       
   934   ordering has properties such as being transitive or having maximal
       
   935   elements.
       
   936  
       
   937   Okui and Suzuki \cite{OkuiSuzuki2010,OkuiSuzukiTech} described
       
   938    another ordering of values, which they use to establish the correctness of
       
   939    their automata-based algorithm for POSIX matching.  Their ordering
       
   940    resembles some aspects of the one given by Sulzmann and Lu, but
       
   941    is quite different. To begin with, Okui and Suzuki identify POSIX
       
   942    values as least elements in their ordering. A more substantial 
       
   943     difference is that the ordering by Okui
       
   944    and Suzuki uses \emph{positions} in order to identify and
       
   945    compare subvalues, whereby positions are lists of natural
       
   946    numbers. This allows them to quite naturally formalise the Longest
       
   947    Match and Priority rules of the informal POSIX standard.  Consider
       
   948    for example the value @{term v} of the form @{term "Stars [Seq
       
   949    (Char x) (Char y), Char z]"}, say.  At position @{text "[0,1]"} of
       
   950    this value is the subvalue @{text "Char y"} and at position @{text
       
   951    "[1]"} the subvalue @{term "Char z"}.  At the `root' position, or
       
   952    empty list @{term "[]"}, is the whole value @{term v}. The
       
   953    positions @{text "[0,1,0]"} and @{text "[2]"}, for example, are
       
   954    outside of @{text v}. If it exists, the subvalue of @{term v} at a
       
   955    position @{text p}, written @{term "at v p"}, can be recursively
       
   956    defined by
       
   957   
       
   958   \begin{center}
       
   959   \begin{tabular}{r@ {\hspace{0mm}}lcl}
       
   960   @{term v} &  @{text "\<downharpoonleft>\<^bsub>[]\<^esub>"} & @{text "\<equiv>"}& @{thm (rhs) at.simps(1)}\\
       
   961   @{term "Left v"} & @{text "\<downharpoonleft>\<^bsub>0::ps\<^esub>"} & @{text "\<equiv>"}& @{thm (rhs) at.simps(2)}\\
       
   962   @{term "Right v"} & @{text "\<downharpoonleft>\<^bsub>1::ps\<^esub>"} & @{text "\<equiv>"} & 
       
   963   @{thm (rhs) at.simps(3)[simplified Suc_0_fold]}\\
       
   964   @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\<downharpoonleft>\<^bsub>0::ps\<^esub>"} & @{text "\<equiv>"} & 
       
   965   @{thm (rhs) at.simps(4)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} \\
       
   966   @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\<downharpoonleft>\<^bsub>1::ps\<^esub>"}
       
   967   & @{text "\<equiv>"} & 
       
   968   @{thm (rhs) at.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2", simplified Suc_0_fold]} \\
       
   969   @{term "Stars vs"} & @{text "\<downharpoonleft>\<^bsub>n::ps\<^esub>"} & @{text "\<equiv>"}& @{thm (rhs) at.simps(6)}\\
       
   970   \end{tabular} 
       
   971   \end{center}
       
   972 
       
   973   \noindent We use Isabelle's notation @{term "vs ! n"} for the
       
   974   @{text n}th element in a list.  The set of positions inside a value @{text v},
       
   975   written @{term "Pos v"}, is given by the clauses
       
   976 
       
   977   \begin{center}
       
   978   \begin{tabular}{lcl}
       
   979   @{thm (lhs) Pos.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(1)}\\
       
   980   @{thm (lhs) Pos.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(2)}\\
       
   981   @{thm (lhs) Pos.simps(3)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(3)}\\
       
   982   @{thm (lhs) Pos.simps(4)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(4)}\\
       
   983   @{thm (lhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
       
   984   & @{text "\<equiv>"} 
       
   985   & @{thm (rhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\
       
   986   @{thm (lhs) Pos_stars} & @{text "\<equiv>"} & @{thm (rhs) Pos_stars}\\
       
   987   \end{tabular}
       
   988   \end{center}
       
   989 
       
   990   \noindent 
       
   991   In the last clause @{text len} stands for the length of a list. Clearly
       
   992   for every position inside a value there exists a subvalue at that position.
       
   993  
       
   994 
       
   995   To help understanding the ordering of Okui and Suzuki, consider again 
       
   996   the earlier value
       
   997   @{text v} and compare it with the following @{text w}:
       
   998 
       
   999   \begin{center}
       
  1000   \begin{tabular}{l}
       
  1001   @{term "v == Stars [Seq (Char x) (Char y), Char z]"}\\
       
  1002   @{term "w == Stars [Char x, Char y, Char z]"}  
       
  1003   \end{tabular}
       
  1004   \end{center}
       
  1005 
       
  1006   \noindent Both values match the string @{text "xyz"}, that means if
       
  1007   we flatten the values at their respective root position, we obtain
       
  1008   @{text "xyz"}. However, at position @{text "[0]"}, @{text v} matches
       
  1009   @{text xy} whereas @{text w} matches only the shorter @{text x}. So
       
  1010   according to the Longest Match Rule, we should prefer @{text v},
       
  1011   rather than @{text w} as POSIX value for string @{text xyz} (and
       
  1012   corresponding regular expression). In order to
       
  1013   formalise this idea, Okui and Suzuki introduce a measure for
       
  1014   subvalues at position @{text p}, called the \emph{norm} of @{text v}
       
  1015   at position @{text p}. We can define this measure in Isabelle as an
       
  1016   integer as follows
       
  1017   
       
  1018   \begin{center}
       
  1019   @{thm pflat_len_def}
       
  1020   \end{center}
       
  1021 
       
  1022   \noindent where we take the length of the flattened value at
       
  1023   position @{text p}, provided the position is inside @{text v}; if
       
  1024   not, then the norm is @{text "-1"}. The default is crucial
       
  1025   for the POSIX requirement of preferring a @{text Left}-value
       
  1026   over a @{text Right}-value (if they can match the same string---see
       
  1027   the Priority Rule from the Introduction). For this consider
       
  1028 
       
  1029   \begin{center}
       
  1030   @{term "v == Left (Char x)"} \qquad and \qquad @{term "w == Right (Char x)"}
       
  1031   \end{center}
       
  1032 
       
  1033   \noindent Both values match @{text x}, but at position @{text "[0]"}
       
  1034   the norm of @{term v} is @{text 1} (the subvalue matches @{text x}), but the
       
  1035   norm of @{text w} is @{text "-1"} (the position is outside @{text w}
       
  1036   according to how we defined the `inside' positions of @{text Left}-
       
  1037   and @{text Right}-values).  Of course at position @{text "[1]"}, the
       
  1038   norms @{term "pflat_len v [1]"} and @{term "pflat_len w [1]"} are
       
  1039   reversed, but the point is that subvalues will be analysed according to
       
  1040   lexicographically orderd positions.  This order, written @{term
       
  1041   "DUMMY \<sqsubset>lex DUMMY"}, can be conveniently formalised by
       
  1042   three inference rules
       
  1043 
       
  1044   \begin{center}
       
  1045   \begin{tabular}{ccc}
       
  1046   @{thm [mode=Axiom] lex_list.intros(1)}\hspace{1cm} &
       
  1047   @{thm [mode=Rule] lex_list.intros(3)[where ?p1.0="p\<^sub>1" and ?p2.0="p\<^sub>2" and
       
  1048                                             ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]}\hspace{1cm} &
       
  1049   @{thm [mode=Rule] lex_list.intros(2)[where ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]}
       
  1050   \end{tabular}
       
  1051   \end{center}
       
  1052 
       
  1053   With the norm and lexicographic order of positions in place,
       
  1054   we can state the key definition of Okui and Suzuki
       
  1055   \cite{OkuiSuzuki2010}: a value @{term "v\<^sub>1"} is \emph{smaller} than
       
  1056   @{term "v\<^sub>2"} if and only if  $(i)$ the norm at position @{text p} is
       
  1057   greater in @{term "v\<^sub>1"} (that is the string @{term "flat (at v\<^sub>1 p)"} is longer 
       
  1058   than @{term "flat (at v\<^sub>2 p)"}) and $(ii)$ all subvalues at 
       
  1059   positions that are inside @{term "v\<^sub>1"} or @{term "v\<^sub>2"} and that are
       
  1060   lexicographically smaller than @{text p}, we have the same norm, namely
       
  1061 
       
  1062  \begin{center}
       
  1063  \begin{tabular}{c}
       
  1064  @{thm (lhs) PosOrd_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} 
       
  1065  @{text "\<equiv>"} 
       
  1066  $\begin{cases}
       
  1067  (i) & @{term "pflat_len v\<^sub>1 p > pflat_len v\<^sub>2 p"}   \quad\text{and}\smallskip \\
       
  1068  (ii) & @{term "(\<forall>q \<in> Pos v\<^sub>1 \<union> Pos v\<^sub>2. q \<sqsubset>lex p --> pflat_len v\<^sub>1 q = pflat_len v\<^sub>2 q)"}
       
  1069  \end{cases}$
       
  1070  \end{tabular}
       
  1071  \end{center}
       
  1072 
       
  1073  \noindent The position @{text p} in this definition acts as the
       
  1074   \emph{first distinct position} of @{text "v\<^sub>1"} and @{text
       
  1075   "v\<^sub>2"}, where both values match strings of different length
       
  1076   \cite{OkuiSuzuki2010}.  Since at @{text p} the values @{text
       
  1077   "v\<^sub>1"} and @{text "v\<^sub>2"} macth different strings, the
       
  1078   ordering is irreflexive. Derived from the definition above
       
  1079   are the following two orderings:
       
  1080   
       
  1081   \begin{center}
       
  1082   \begin{tabular}{l}
       
  1083   @{thm PosOrd_ex_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\
       
  1084   @{thm PosOrd_ex_eq_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
       
  1085   \end{tabular}
       
  1086   \end{center}
       
  1087 
       
  1088  While we encountred a number of obstacles for establishing properties like
       
  1089  transitivity for the ordering of Sulzmann and Lu (and which we failed
       
  1090  to overcome), it is relatively straightforward to establish this
       
  1091  property for the ordering by Okui and Suzuki.
       
  1092 
       
  1093  \begin{lemma}[Transitivity]\label{transitivity}
       
  1094  @{thm [mode=IfThen] PosOrd_trans[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and ?v3.0="v\<^sub>3"]} 
       
  1095  \end{lemma}
       
  1096 
       
  1097  \begin{proof} From the assumption we obtain two positions @{text p}
       
  1098  and @{text q}, where the values @{text "v\<^sub>1"} and @{text
       
  1099  "v\<^sub>2"} (respectively @{text "v\<^sub>2"} and @{text
       
  1100  "v\<^sub>3"}) are `distinct'.  Since @{text
       
  1101  "\<prec>\<^bsub>lex\<^esub>"} is trichotomous, we need to consider
       
  1102  three cases, namely @{term "p = q"}, @{term "p \<sqsubset>lex q"} and
       
  1103  @{term "q \<sqsubset>lex p"}. Let us look at the first case.
       
  1104  Clearly @{term "pflat_len v\<^sub>2 p < pflat_len v\<^sub>1 p"}
       
  1105  and @{term "pflat_len v\<^sub>3 p < pflat_len v\<^sub>2 p"}
       
  1106  imply @{term "pflat_len v\<^sub>3 p < pflat_len v\<^sub>1 p"}.
       
  1107  It remains to show for a @{term "p' \<in> Pos v\<^sub>1 \<union> Pos v\<^sub>3"}
       
  1108  with @{term "p' \<sqsubset>lex p"} that  
       
  1109  @{term "pflat_len v\<^sub>1 p' = pflat_len v\<^sub>3 p'"} holds.
       
  1110  Suppose @{term "p' \<in> Pos v\<^sub>1"}, then we can infer from the 
       
  1111  first assumption that @{term "pflat_len v\<^sub>1 p' = pflat_len v\<^sub>2 p'"}.
       
  1112  But this means that @{term "p'"} must be in  @{term "Pos v\<^sub>2"} too.
       
  1113  Hence we can use the second assumption and infer  @{term "pflat_len v\<^sub>2 p' = pflat_len v\<^sub>3 p'"}, which concludes
       
  1114  this case with @{term "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"}. 
       
  1115  The reasoning in the other cases is similar.\qed
       
  1116  \end{proof}
       
  1117 
       
  1118  \noindent We can show that @{term "DUMMY :\<sqsubseteq>val DUMMY"} is
       
  1119  a partial order.  Okui and Suzuki also show that it is a linear order
       
  1120  for lexical values \cite{OkuiSuzuki2010}, but we have not done
       
  1121  this. What we are going to show below is that for a given @{text r}
       
  1122  and @{text s}, the ordering has a unique minimal element on the set
       
  1123  @{term "LV r s"} , which is the POSIX value we defined in the
       
  1124  previous section.
       
  1125 
       
  1126 
       
  1127  Lemma 1
       
  1128 
       
  1129  @{thm [mode=IfThen] PosOrd_shorterE[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
       
  1130 
       
  1131  but in the other direction only
       
  1132 
       
  1133  @{thm [mode=IfThen] PosOrd_shorterI[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} 
       
  1134 
       
  1135  
       
  1136 
       
  1137   Next we establish how Okui and Suzuki's ordering relates to our
       
  1138   definition of POSIX values.  Given a POSIX value @{text "v\<^sub>1"}
       
  1139   for @{text r} and @{text s}, then any other lexical value @{text
       
  1140   "v\<^sub>2"} in @{term "LV r s"} is greater or equal than @{text
       
  1141   "v\<^sub>1"}:
       
  1142 
       
  1143 
       
  1144   \begin{theorem}
       
  1145   @{thm [mode=IfThen] Posix_PosOrd[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
       
  1146   \end{theorem}
       
  1147 
       
  1148   \begin{proof}
       
  1149   By induction on our POSIX rules. The two base cases are straightforward: for example 
       
  1150   for @{term "v\<^sub>1 = Void"}, we have that @{term "v\<^sub>2 \<in> LV ONE []"} must also 
       
  1151   be of the form \mbox{@{term "v\<^sub>2 = Void"}}. Therfore we have @{term "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"}.
       
  1152   The inductive cases are as follows:
       
  1153 
       
  1154   \begin{itemize}
       
  1155   \item[$\bullet$] Case @{term "s \<in> (ALT r\<^sub>1 r\<^sub>2) \<rightarrow> (Left w\<^sub>1)"}:
       
  1156   In this case @{term "v\<^sub>1 = Left w\<^sub>1"} and the value @{term "v\<^sub>2"} is either 
       
  1157   of the form @{term "Left w\<^sub>2"} or @{term "Right w\<^sub>2"}. In the latter case we 
       
  1158   can immediately conclude with @{term "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"} since a @{text Left}-value 
       
  1159   with the same underlying string @{text s} is always smaller or equal than a @{text Right}-value.
       
  1160   In the former case we have @{term "w\<^sub>2 \<in> LV r\<^sub>1 s"} and can use the induction
       
  1161   hypothesis to infer @{term "w\<^sub>1 :\<sqsubseteq>val w\<^sub>2"}. Because @{term "w\<^sub>1"}
       
  1162   and @{term "w\<^sub>2"} have the same underlying string @{text s}, we can conclude with 
       
  1163   @{term "Left w\<^sub>1 :\<sqsubseteq>val Left w\<^sub>2"}. 
       
  1164 
       
  1165   \item[$\bullet$] Case @{term "s \<in> (ALT r\<^sub>1 r\<^sub>2) \<rightarrow> (Right w\<^sub>1)"}:
       
  1166   Similarly for the case where
       
  1167   @{term "v\<^sub>1 = Right w\<^sub>1"}.
       
  1168 
       
  1169   \item[$\bullet$]
       
  1170   \end{itemize}
       
  1171   \end{proof}
       
  1172 
       
  1173   Given a lexical value @{text "v\<^sub>1"}, say, in @{term "LV r s"} for which there does 
       
  1174   not exists any smaller value in @{term "LV r s"}, then this value is our POSIX value:
       
  1175 
       
  1176   \begin{theorem}
       
  1177   @{thm [mode=IfThen] PosOrd_Posix[where ?v1.0="v\<^sub>1"]}
       
  1178   \end{theorem}
       
  1179 
       
  1180   \begin{proof}
       
  1181   By induction on @{text r}.
       
  1182   \end{proof}
       
  1183 *}
       
  1184 
   924 
  1185 
   925 section {* Extensions and Optimisations*}
  1186 section {* Extensions and Optimisations*}
   926 
  1187 
   927 text {*
  1188 text {*
   928 
  1189 
  1119   conclude in this case too.\qed   
  1380   conclude in this case too.\qed   
  1120 
  1381 
  1121   \end{proof} 
  1382   \end{proof} 
  1122 *}
  1383 *}
  1123 
  1384 
  1124 section {* Ordering of Values according to Okui and Suzuki*}
       
  1125 
       
  1126 text {*
       
  1127 
       
  1128  Positions are lists of natural numbers.
       
  1129 
       
  1130  The subvalue at position @{text p}, written @{term "at v p"}, is defined 
       
  1131   
       
  1132 
       
  1133  \begin{center}
       
  1134  \begin{tabular}{r@ {\hspace{0mm}}lcl}
       
  1135  @{term v} &  @{text "\<downharpoonleft>\<^bsub>[]\<^esub>"} & @{text "\<equiv>"}& @{thm (rhs) at.simps(1)}\\
       
  1136  @{term "Left v"} & @{text "\<downharpoonleft>\<^bsub>0::ps\<^esub>"} & @{text "\<equiv>"}& @{thm (rhs) at.simps(2)}\\
       
  1137  @{term "Right v"} & @{text "\<downharpoonleft>\<^bsub>1::ps\<^esub>"} & @{text "\<equiv>"} & 
       
  1138  @{thm (rhs) at.simps(3)[simplified Suc_0_fold]}\\
       
  1139  @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\<downharpoonleft>\<^bsub>0::ps\<^esub>"} & @{text "\<equiv>"} & 
       
  1140  @{thm (rhs) at.simps(4)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} \\
       
  1141  @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\<downharpoonleft>\<^bsub>1::ps\<^esub>"}
       
  1142  & @{text "\<equiv>"} & 
       
  1143  @{thm (rhs) at.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2", simplified Suc_0_fold]} \\
       
  1144  @{term "Stars vs"} & @{text "\<downharpoonleft>\<^bsub>n::ps\<^esub>"} & @{text "\<equiv>"}& @{thm (rhs) at.simps(6)}\\
       
  1145  \end{tabular} 
       
  1146  \end{center}
       
  1147 
       
  1148  \begin{center}
       
  1149  \begin{tabular}{lcl}
       
  1150  @{thm (lhs) Pos.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(1)}\\
       
  1151  @{thm (lhs) Pos.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(2)}\\
       
  1152  @{thm (lhs) Pos.simps(3)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(3)}\\
       
  1153  @{thm (lhs) Pos.simps(4)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(4)}\\
       
  1154  @{thm (lhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
       
  1155   & @{text "\<equiv>"} 
       
  1156   & @{thm (rhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\
       
  1157  @{thm (lhs) Pos_stars} & @{text "\<equiv>"} & @{thm (rhs) Pos_stars}\\
       
  1158  \end{tabular}
       
  1159  \end{center}
       
  1160 
       
  1161  @{thm pflat_len_def}
       
  1162 
       
  1163 
       
  1164  \begin{center}
       
  1165  \begin{tabular}{ccc}
       
  1166  @{thm [mode=Axiom] lex_list.intros(1)}\hspace{1cm} &
       
  1167  @{thm [mode=Rule] lex_list.intros(2)[where ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]}\hspace{1cm} &
       
  1168  @{thm [mode=Rule] lex_list.intros(3)[where ?p1.0="p\<^sub>1" and ?p2.0="p\<^sub>2" and
       
  1169                                             ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]}
       
  1170  \end{tabular}
       
  1171  \end{center}
       
  1172 
       
  1173 
       
  1174  Main definition by Okui and Suzuki.
       
  1175 
       
  1176  \begin{center}
       
  1177  \begin{tabular}{ccl}
       
  1178  @{thm (lhs) PosOrd_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} &
       
  1179  @{text "\<equiv>"} &
       
  1180  @{term "pflat_len v\<^sub>1 p > pflat_len v\<^sub>2 p"}   and\\
       
  1181  & & @{term "(\<forall>q \<in> Pos v\<^sub>1 \<union> Pos v\<^sub>2. q \<sqsubset>lex p --> pflat_len v\<^sub>1 q = pflat_len v\<^sub>2 q)"}
       
  1182  \end{tabular}
       
  1183  \end{center}
       
  1184 
       
  1185  @{thm PosOrd_ex_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
       
  1186 
       
  1187  @{thm PosOrd_ex_eq_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
       
  1188 
       
  1189  Lemma 1
       
  1190 
       
  1191  @{thm [mode=IfThen] PosOrd_shorterE[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
       
  1192 
       
  1193  but in the other direction only
       
  1194 
       
  1195  @{thm [mode=IfThen] PosOrd_shorterI[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} 
       
  1196 
       
  1197  Lemma Transitivity:
       
  1198 
       
  1199  @{thm [mode=IfThen] PosOrd_trans[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and ?v3.0="v\<^sub>3"]} 
       
  1200 
       
  1201 
       
  1202 *}
       
  1203 
       
  1204 text {*
       
  1205 
       
  1206  Theorem 1:
       
  1207 
       
  1208  @{thm [mode=IfThen] Posix_PosOrd[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
       
  1209 
       
  1210  Theorem 2:
       
  1211 
       
  1212  @{thm [mode=IfThen] PosOrd_Posix[where ?v1.0="v\<^sub>1"]}
       
  1213 *}
       
  1214 
  1385 
  1215 
  1386 
  1216 section {* The Correctness Argument by Sulzmann and Lu\label{argu} *}
  1387 section {* The Correctness Argument by Sulzmann and Lu\label{argu} *}
  1217 
  1388 
  1218 text {*
  1389 text {*