--- a/thys/Journal/Paper.thy Fri Aug 11 20:29:01 2017 +0100
+++ b/thys/Journal/Paper.thy Fri Aug 18 14:51:29 2017 +0100
@@ -63,7 +63,6 @@
set ("_" [73] 73) and
Prf ("_ : _" [75,75] 75) and
- CPrf ("_ \<^raw:\mbox{\textbf{\textlengthmark}}> _" [75,75] 75) and
Posix ("'(_, _') \<rightarrow> _" [63,75,75] 75) and
lexer ("lexer _ _" [78,78] 77) and
@@ -83,20 +82,18 @@
PosOrd_ex ("_ \<prec> _" [77,77] 77) and
PosOrd_ex_eq ("_ \<^raw:\mbox{$\preccurlyeq$}> _" [77,77] 77) and
pflat_len ("\<parallel>_\<parallel>\<^bsub>_\<^esub>") and
- nprec ("_ \<^raw:\mbox{$\not\prec$}> _" [77,77] 77)
+ nprec ("_ \<^raw:\mbox{$\not\prec$}> _" [77,77] 77) and
- (*
- ValOrd ("_ >\<^bsub>_\<^esub> _" [77,77,77] 77) and
- ValOrdEq ("_ \<ge>\<^bsub>_\<^esub> _" [77,77,77] 77)
- *)
+ DUMMY ("\<^raw:\underline{\hspace{2mm}}>")
+
definition
"match r s \<equiv> nullable (ders s r)"
-lemma CV_STAR_ONE_empty:
- shows "CV (STAR ONE) [] = {Stars []}"
-by(auto simp add: CV_def elim: CPrf.cases intro: CPrf.intros)
+lemma LV_STAR_ONE_empty:
+ shows "LV (STAR ONE) [] = {Stars []}"
+by(auto simp add: LV_def elim: Prf.cases intro: Prf.intros)
@@ -139,7 +136,7 @@
one way of how the string is matched. There are two commonly used
disambiguation strategies to generate a unique answer: one is called GREEDY
matching \cite{Frisch2004} and the other is POSIX
-matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2013,Sulzmann2014,Vansummeren2006}. For example consider
+matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}. For example consider
the string @{term xy} and the regular expression \mbox{@{term "STAR (ALT
(ALT x y) xy)"}}. Either the string can be matched in two `iterations' by
the single letter-regular expressions @{term x} and @{term y}, or directly
@@ -149,14 +146,15 @@
instant gratification to delayed repletion). The second case is POSIX
matching, which prefers the longest match.
-In the context of lexing, where an input string needs to be split up into a
-sequence of tokens, POSIX is the more natural disambiguation strategy for
-what programmers consider basic syntactic building blocks in their programs.
-These building blocks are often specified by some regular expressions, say
-@{text "r\<^bsub>key\<^esub>"} and @{text "r\<^bsub>id\<^esub>"} for recognising keywords and
-identifiers, respectively. There are a few underlying (informal) rules behind
-tokenising a string in a POSIX \cite{POSIX} fashion according to a collection of regular
-expressions:
+In the context of lexing, where an input string needs to be split up
+into a sequence of tokens, POSIX is the more natural disambiguation
+strategy for what programmers consider basic syntactic building blocks
+in their programs. These building blocks are often specified by some
+regular expressions, say @{text "r\<^bsub>key\<^esub>"} and @{text
+"r\<^bsub>id\<^esub>"} for recognising keywords and identifiers,
+respectively. There are a few underlying (informal) rules behind
+tokenising a string in a POSIX \cite{POSIX} fashion according to a
+collection of regular expressions:
\begin{itemize}
\item[$\bullet$] \emph{The Longest Match Rule} (or \emph{``{M}aximal {M}unch {R}ule''}):
@@ -171,20 +169,28 @@
not match an empty string unless this is the only match for the repetition.\smallskip
\item[$\bullet$] \emph{Empty String Rule:} An empty string shall be considered to
-be longer than no match at all.\marginpar{Explain its purpose}
+be longer than no match at all.
\end{itemize}
-\noindent Consider for example a regular expression @{text "r\<^bsub>key\<^esub>"} for recognising keywords
-such as @{text "if"}, @{text "then"} and so on; and @{text "r\<^bsub>id\<^esub>"}
+\noindent Consider for example a regular expression @{text
+"r\<^bsub>key\<^esub>"} for recognising keywords such as @{text "if"},
+@{text "then"} and so on; and @{text "r\<^bsub>id\<^esub>"}
recognising identifiers (say, a single character followed by
characters or numbers). Then we can form the regular expression
-@{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"} and use POSIX matching to tokenise strings,
-say @{text "iffoo"} and @{text "if"}. For @{text "iffoo"} we obtain
-by the Longest Match Rule a single identifier token, not a keyword
-followed by an identifier. For @{text "if"} we obtain by the Priority
-Rule a keyword token, not an identifier token---even if @{text "r\<^bsub>id\<^esub>"}
-matches also. By the Star Rule we know @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"} matches @{text "iffoo"}, respectively @{text "if"}, in exactly one
-`iteration' of the star.
+@{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"}
+and use POSIX matching to tokenise strings, say @{text "iffoo"} and
+@{text "if"}. For @{text "iffoo"} we obtain by the Longest Match Rule
+a single identifier token, not a keyword followed by an
+identifier. For @{text "if"} we obtain by the Priority Rule a keyword
+token, not an identifier token---even if @{text "r\<^bsub>id\<^esub>"}
+matches also. By the Star Rule we know @{text "(r\<^bsub>key\<^esub> +
+r\<^bsub>id\<^esub>)\<^sup>\<star>"} matches @{text "iffoo"},
+respectively @{text "if"}, in exactly one `iteration' of the star. The
+Empty String Rule is for cases where @{text
+"(a\<^sup>\<star>)\<^sup>\<star>"}, for example, matches against the
+string @{text "bc"}. Then the longest initial matched substring is the
+empty string, which is matched by both the whole regular expression
+and the parenthesised sub-expression.
One limitation of Brzozowski's matcher is that it only generates a
@@ -205,11 +211,10 @@
algorithm. This proof idea is inspired by work of Frisch and Cardelli
\cite{Frisch2004} on a GREEDY regular expression matching
algorithm. However, we were not able to establish transitivity and
-totality for the ``order relation'' by Sulzmann and Lu. \marginpar{We probably drop this section}
-??In Section
-\ref{argu} we identify some inherent problems with their approach (of
+totality for the ``order relation'' by Sulzmann and Lu.
+There are some inherent problems with their approach (of
which some of the proofs are not published in \cite{Sulzmann2014});
-perhaps more importantly, we give a simple inductive (and
+perhaps more importantly, we give in this paper a simple inductive (and
algorithm-independent) definition of what we call being a {\em POSIX
value} for a regular expression @{term r} and a string @{term s}; we
show that the algorithm computes such a value and that such a value is
@@ -252,7 +257,7 @@
Our specification of a POSIX value consists of a simple inductive definition
that given a string and a regular expression uniquely determines this value.
We also show that our definition is equivalent to an ordering
-of values based on positions by Okui and Suzuki \cite{OkuiSuzuki2013}.
+of values based on positions by Okui and Suzuki \cite{OkuiSuzuki2010}.
Derivatives as calculated by Brzozowski's method are usually more complex
regular expressions than the initial one; various optimisations are
possible. We prove the correctness when simplifications of @{term "ALT ZERO
@@ -402,9 +407,10 @@
text {*
- The clever idea by Sulzmann and Lu \cite{Sulzmann2014} is to use
- values for encoding \emph{how} a regular expression matches a string
- and then define a function on values that mirrors (but inverts) the
+ There have been many previous works that use values for encoding
+ \emph{how} a regular expression matches a string.
+ The clever idea by Sulzmann and Lu \cite{Sulzmann2014} is to
+ define a function on values that mirrors (but inverts) the
construction of the derivative on regular expressions. \emph{Values}
are defined as the inductive datatype
@@ -440,98 +446,89 @@
\end{center}
\noindent Sulzmann and Lu also define inductively an inhabitation relation
- that associates values to regular expressions
+ that associates values to regular expressions. We define this relation as
+ follows:\footnote{Note that the rule for @{term Stars} differs from our
+ erlier paper \cite{AusafDyckhoffUrban2016}. There we used the original
+ definition by Sulzmann and Lu which does not require that the values @{term "v \<in> set vs"}
+ flatten to a non-empty string. The reason for introducing the
+ more restricted version of lexical values is convenience later
+ on when reasoning about
+ an ordering relation for values.}
\begin{center}
- \begin{tabular}{c}
+ \begin{tabular}{c@ {\hspace{12mm}}c}
\\[-8mm]
- @{thm[mode=Axiom] Prf.intros(4)} \qquad
+ @{thm[mode=Axiom] Prf.intros(4)} &
@{thm[mode=Axiom] Prf.intros(5)[of "c"]}\\[4mm]
- @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} \qquad
+ @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} &
@{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\\[4mm]
- @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]} \qquad
+ @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]} &
@{thm[mode=Rule] Prf.intros(6)[of "vs"]}
\end{tabular}
\end{center}
- \noindent
- where in the clause for @{const "Stars"} we use the notation @{term "v \<in> set vs"}
- for indicating that @{text v} is a member in the list @{text vs}.
- Note that no values are associated with the regular expression
- @{term ZERO}, and that the only value associated with the regular
- expression @{term ONE} is @{term Void}, pronounced (if one must) as @{text
- "Void"}. It is routine to establish how values ``inhabiting'' a regular
- expression correspond to the language of a regular expression, namely
+ \noindent where in the clause for @{const "Stars"} we use the
+ notation @{term "v \<in> set vs"} for indicating that @{text v} is a
+ member in the list @{text vs}. We require in this rule that every
+ value in @{term vs} flattens to a non-empty string. The idea is that
+ @{term "Stars"}-values satisfy the informal Star Rule (see Introduction)
+ where the $^\star$ does not match the empty string unless this is
+ the only match for the repetition. Note also that no values are
+ associated with the regular expression @{term ZERO}, and that the
+ only value associated with the regular expression @{term ONE} is
+ @{term Void}. It is routine to establish how values ``inhabiting''
+ a regular expression correspond to the language of a regular
+ expression, namely
\begin{proposition}
@{thm L_flat_Prf}
\end{proposition}
\noindent
- Given a regular expression @{text r} and a string @{text s}, we can define the
+ Given a regular expression @{text r} and a string @{text s}, we define the
set of all \emph{Lexical Values} inhabited by @{text r} with the underlying string
- being @{text s} by
+ being @{text s}:\footnote{Okui and Suzuki refer to our lexical values
+ as \emph{canonical values} in \cite{OkuiSuzuki2010}. The notion of \emph{non-problematic
+ values} by Cardelli and Frisch \cite{Frisch2004} is similar, but not identical
+ to our lexical values.}
\begin{center}
@{thm LV_def}
\end{center}
- \noindent However, later on it will sometimes be necessary to
- restrict the set of lexical values to a subset called
- \emph{Canonical Values}. The idea of canonical values is that they
- satisfy the Star Rule (see Introduction) where the $^\star$ does not
- match the empty string unless this is the only match for the
- repetition. One way to define canonical values formally is to use a
- stronger inhabitation relation, written @{term "\<Turnstile> DUMMY : DUMMY"}, which has the same rules as @{term
- "\<turnstile> DUMMY : DUMMY"} shown above, except that the rule for
- @{term Stars} has
- the additional side-condition of flattened values not being the
- empty string, namely
+ \noindent The main property of @{term "LV r s"} is that it is alway finite.
+
+ \begin{proposition}
+ @{thm LV_finite}
+ \end{proposition}
- \begin{center}
- @{thm [mode=Rule] CPrf.intros(6)}
- \end{center}
-
- \noindent
- With this we can define
-
- \begin{center}
- @{thm CV_def}
- \end{center}
-
- \noindent
- Clearly we have @{thm LV_CV_subset}.
- The main point of canonical values is that for every regular expression @{text r} and every
- string @{text s}, the set @{term "CV r s"} is finite.
+ \noindent This finiteness property does not hold in general if we
+ remove the side-condition about @{term "flat v \<noteq> []"} in the
+ @{term Stars}-rule above. For example using Sulzmann and Lu's
+ less restrictive definition, @{term "LV (STAR ONE) []"} would contain
+ infinitely many values, but according to our more restricted
+ definition @{thm LV_STAR_ONE_empty}.
- \begin{lemma}
- @{thm CV_finite}
- \end{lemma}
-
- \noindent This finiteness property does not generally hold for lexical values where
- for example @{term "LV (STAR ONE) []"} contains infinitely many
- values, but @{thm CV_STAR_ONE_empty}. However, if a regular
- expression @{text r} matches a string @{text s}, then in general the
- set @{term "CV r s"} is not just a
- singleton set. In case of POSIX matching the problem is to
- calculate the unique value that satisfies the (informal) POSIX rules
- from the Introduction. It will turn out that this POSIX value is in fact a
- canonical value.
-
- Graphically the POSIX value calculation algorithm by
- Sulzmann and Lu can be illustrated by the picture in Figure~\ref{Sulz}
- where the path from the left to the right involving @{term derivatives}/@{const
- nullable} is the first phase of the algorithm (calculating successive
- \Brz's derivatives) and @{const mkeps}/@{text inj}, the path from right to
- left, the second phase. This picture shows the steps required when a
- regular expression, say @{text "r\<^sub>1"}, matches the string @{term
- "[a,b,c]"}. We first build the three derivatives (according to @{term a},
- @{term b} and @{term c}). We then use @{const nullable} to find out
- whether the resulting derivative regular expression @{term "r\<^sub>4"}
- can match the empty string. If yes, we call the function @{const mkeps}
- that produces a value @{term "v\<^sub>4"} for how @{term "r\<^sub>4"} can
- match the empty string (taking into account the POSIX constraints in case
- there are several ways). This function is defined by the clauses:
+ If a regular expression @{text r} matches a string @{text s}, then
+ generally the set @{term "LV r s"} is not just a singleton set. In
+ case of POSIX matching the problem is to calculate the unique lexical value
+ that satisfies the (informal) POSIX rules from the Introduction.
+ Graphically the POSIX value calculation algorithm by Sulzmann and Lu
+ can be illustrated by the picture in Figure~\ref{Sulz} where the
+ path from the left to the right involving @{term
+ derivatives}/@{const nullable} is the first phase of the algorithm
+ (calculating successive \Brz's derivatives) and @{const
+ mkeps}/@{text inj}, the path from right to left, the second
+ phase. This picture shows the steps required when a regular
+ expression, say @{text "r\<^sub>1"}, matches the string @{term
+ "[a,b,c]"}. We first build the three derivatives (according to
+ @{term a}, @{term b} and @{term c}). We then use @{const nullable}
+ to find out whether the resulting derivative regular expression
+ @{term "r\<^sub>4"} can match the empty string. If yes, we call the
+ function @{const mkeps} that produces a value @{term "v\<^sub>4"}
+ for how @{term "r\<^sub>4"} can match the empty string (taking into
+ account the POSIX constraints in case there are several ways). This
+ function is defined by the clauses:
\begin{figure}[t]
\begin{center}
@@ -794,15 +791,15 @@
that in each ``iteration'' of the star, some non-empty substring needs to
be ``chipped'' away; only in case of the empty string we accept @{term
"Stars []"} as the POSIX value. Indeed we can show that our POSIX value
- is a canonical value which excludes those @{text Stars} containing values
+ is a lexical value which excludes those @{text Stars} containing values
that flatten to the empty string.
\begin{lemma}
- @{thm [mode=IfThen] Posix_CV}
+ @{thm [mode=IfThen] Posix_LV}
\end{lemma}
\begin{proof}
- By routine induction on @{thm (prem 1) Posix_CV}.\qed
+ By routine induction on @{thm (prem 1) Posix_LV}.\qed
\end{proof}
\noindent
@@ -922,6 +919,270 @@
*}
+section {* Ordering of Values according to Okui and Suzuki*}
+
+text {*
+
+ While in the previous section we have defined POSIX values directly
+ in terms of a ternary relation (see inference rules in Figure~\ref{POSIXrules}),
+ Sulzmann and Lu took a different approach in \cite{Sulzmann2014}:
+ they introduced an ordering for values and identified POSIX values
+ as the maximal elements. An extended version of \cite{Sulzmann2014}
+ is available at the website of its first author; this includes more
+ details of their proofs, but which are evidently not in final form
+ yet. Unfortunately, we were not able to verify claims that their
+ ordering has properties such as being transitive or having maximal
+ elements.
+
+ Okui and Suzuki \cite{OkuiSuzuki2010,OkuiSuzukiTech} described
+ another ordering of values, which they use to establish the correctness of
+ their automata-based algorithm for POSIX matching. Their ordering
+ resembles some aspects of the one given by Sulzmann and Lu, but
+ is quite different. To begin with, Okui and Suzuki identify POSIX
+ values as least elements in their ordering. A more substantial
+ difference is that the ordering by Okui
+ and Suzuki uses \emph{positions} in order to identify and
+ compare subvalues, whereby positions are lists of natural
+ numbers. This allows them to quite naturally formalise the Longest
+ Match and Priority rules of the informal POSIX standard. Consider
+ for example the value @{term v} of the form @{term "Stars [Seq
+ (Char x) (Char y), Char z]"}, say. At position @{text "[0,1]"} of
+ this value is the subvalue @{text "Char y"} and at position @{text
+ "[1]"} the subvalue @{term "Char z"}. At the `root' position, or
+ empty list @{term "[]"}, is the whole value @{term v}. The
+ positions @{text "[0,1,0]"} and @{text "[2]"}, for example, are
+ outside of @{text v}. If it exists, the subvalue of @{term v} at a
+ position @{text p}, written @{term "at v p"}, can be recursively
+ defined by
+
+ \begin{center}
+ \begin{tabular}{r@ {\hspace{0mm}}lcl}
+ @{term v} & @{text "\<downharpoonleft>\<^bsub>[]\<^esub>"} & @{text "\<equiv>"}& @{thm (rhs) at.simps(1)}\\
+ @{term "Left v"} & @{text "\<downharpoonleft>\<^bsub>0::ps\<^esub>"} & @{text "\<equiv>"}& @{thm (rhs) at.simps(2)}\\
+ @{term "Right v"} & @{text "\<downharpoonleft>\<^bsub>1::ps\<^esub>"} & @{text "\<equiv>"} &
+ @{thm (rhs) at.simps(3)[simplified Suc_0_fold]}\\
+ @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\<downharpoonleft>\<^bsub>0::ps\<^esub>"} & @{text "\<equiv>"} &
+ @{thm (rhs) at.simps(4)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} \\
+ @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\<downharpoonleft>\<^bsub>1::ps\<^esub>"}
+ & @{text "\<equiv>"} &
+ @{thm (rhs) at.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2", simplified Suc_0_fold]} \\
+ @{term "Stars vs"} & @{text "\<downharpoonleft>\<^bsub>n::ps\<^esub>"} & @{text "\<equiv>"}& @{thm (rhs) at.simps(6)}\\
+ \end{tabular}
+ \end{center}
+
+ \noindent We use Isabelle's notation @{term "vs ! n"} for the
+ @{text n}th element in a list. The set of positions inside a value @{text v},
+ written @{term "Pos v"}, is given by the clauses
+
+ \begin{center}
+ \begin{tabular}{lcl}
+ @{thm (lhs) Pos.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(1)}\\
+ @{thm (lhs) Pos.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(2)}\\
+ @{thm (lhs) Pos.simps(3)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(3)}\\
+ @{thm (lhs) Pos.simps(4)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(4)}\\
+ @{thm (lhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
+ & @{text "\<equiv>"}
+ & @{thm (rhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\
+ @{thm (lhs) Pos_stars} & @{text "\<equiv>"} & @{thm (rhs) Pos_stars}\\
+ \end{tabular}
+ \end{center}
+
+ \noindent
+ In the last clause @{text len} stands for the length of a list. Clearly
+ for every position inside a value there exists a subvalue at that position.
+
+
+ To help understanding the ordering of Okui and Suzuki, consider again
+ the earlier value
+ @{text v} and compare it with the following @{text w}:
+
+ \begin{center}
+ \begin{tabular}{l}
+ @{term "v == Stars [Seq (Char x) (Char y), Char z]"}\\
+ @{term "w == Stars [Char x, Char y, Char z]"}
+ \end{tabular}
+ \end{center}
+
+ \noindent Both values match the string @{text "xyz"}, that means if
+ we flatten the values at their respective root position, we obtain
+ @{text "xyz"}. However, at position @{text "[0]"}, @{text v} matches
+ @{text xy} whereas @{text w} matches only the shorter @{text x}. So
+ according to the Longest Match Rule, we should prefer @{text v},
+ rather than @{text w} as POSIX value for string @{text xyz} (and
+ corresponding regular expression). In order to
+ formalise this idea, Okui and Suzuki introduce a measure for
+ subvalues at position @{text p}, called the \emph{norm} of @{text v}
+ at position @{text p}. We can define this measure in Isabelle as an
+ integer as follows
+
+ \begin{center}
+ @{thm pflat_len_def}
+ \end{center}
+
+ \noindent where we take the length of the flattened value at
+ position @{text p}, provided the position is inside @{text v}; if
+ not, then the norm is @{text "-1"}. The default is crucial
+ for the POSIX requirement of preferring a @{text Left}-value
+ over a @{text Right}-value (if they can match the same string---see
+ the Priority Rule from the Introduction). For this consider
+
+ \begin{center}
+ @{term "v == Left (Char x)"} \qquad and \qquad @{term "w == Right (Char x)"}
+ \end{center}
+
+ \noindent Both values match @{text x}, but at position @{text "[0]"}
+ the norm of @{term v} is @{text 1} (the subvalue matches @{text x}), but the
+ norm of @{text w} is @{text "-1"} (the position is outside @{text w}
+ according to how we defined the `inside' positions of @{text Left}-
+ and @{text Right}-values). Of course at position @{text "[1]"}, the
+ norms @{term "pflat_len v [1]"} and @{term "pflat_len w [1]"} are
+ reversed, but the point is that subvalues will be analysed according to
+ lexicographically orderd positions. This order, written @{term
+ "DUMMY \<sqsubset>lex DUMMY"}, can be conveniently formalised by
+ three inference rules
+
+ \begin{center}
+ \begin{tabular}{ccc}
+ @{thm [mode=Axiom] lex_list.intros(1)}\hspace{1cm} &
+ @{thm [mode=Rule] lex_list.intros(3)[where ?p1.0="p\<^sub>1" and ?p2.0="p\<^sub>2" and
+ ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]}\hspace{1cm} &
+ @{thm [mode=Rule] lex_list.intros(2)[where ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]}
+ \end{tabular}
+ \end{center}
+
+ With the norm and lexicographic order of positions in place,
+ we can state the key definition of Okui and Suzuki
+ \cite{OkuiSuzuki2010}: a value @{term "v\<^sub>1"} is \emph{smaller} than
+ @{term "v\<^sub>2"} if and only if $(i)$ the norm at position @{text p} is
+ greater in @{term "v\<^sub>1"} (that is the string @{term "flat (at v\<^sub>1 p)"} is longer
+ than @{term "flat (at v\<^sub>2 p)"}) and $(ii)$ all subvalues at
+ positions that are inside @{term "v\<^sub>1"} or @{term "v\<^sub>2"} and that are
+ lexicographically smaller than @{text p}, we have the same norm, namely
+
+ \begin{center}
+ \begin{tabular}{c}
+ @{thm (lhs) PosOrd_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
+ @{text "\<equiv>"}
+ $\begin{cases}
+ (i) & @{term "pflat_len v\<^sub>1 p > pflat_len v\<^sub>2 p"} \quad\text{and}\smallskip \\
+ (ii) & @{term "(\<forall>q \<in> Pos v\<^sub>1 \<union> Pos v\<^sub>2. q \<sqsubset>lex p --> pflat_len v\<^sub>1 q = pflat_len v\<^sub>2 q)"}
+ \end{cases}$
+ \end{tabular}
+ \end{center}
+
+ \noindent The position @{text p} in this definition acts as the
+ \emph{first distinct position} of @{text "v\<^sub>1"} and @{text
+ "v\<^sub>2"}, where both values match strings of different length
+ \cite{OkuiSuzuki2010}. Since at @{text p} the values @{text
+ "v\<^sub>1"} and @{text "v\<^sub>2"} macth different strings, the
+ ordering is irreflexive. Derived from the definition above
+ are the following two orderings:
+
+ \begin{center}
+ \begin{tabular}{l}
+ @{thm PosOrd_ex_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\
+ @{thm PosOrd_ex_eq_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
+ \end{tabular}
+ \end{center}
+
+ While we encountred a number of obstacles for establishing properties like
+ transitivity for the ordering of Sulzmann and Lu (and which we failed
+ to overcome), it is relatively straightforward to establish this
+ property for the ordering by Okui and Suzuki.
+
+ \begin{lemma}[Transitivity]\label{transitivity}
+ @{thm [mode=IfThen] PosOrd_trans[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and ?v3.0="v\<^sub>3"]}
+ \end{lemma}
+
+ \begin{proof} From the assumption we obtain two positions @{text p}
+ and @{text q}, where the values @{text "v\<^sub>1"} and @{text
+ "v\<^sub>2"} (respectively @{text "v\<^sub>2"} and @{text
+ "v\<^sub>3"}) are `distinct'. Since @{text
+ "\<prec>\<^bsub>lex\<^esub>"} is trichotomous, we need to consider
+ three cases, namely @{term "p = q"}, @{term "p \<sqsubset>lex q"} and
+ @{term "q \<sqsubset>lex p"}. Let us look at the first case.
+ Clearly @{term "pflat_len v\<^sub>2 p < pflat_len v\<^sub>1 p"}
+ and @{term "pflat_len v\<^sub>3 p < pflat_len v\<^sub>2 p"}
+ imply @{term "pflat_len v\<^sub>3 p < pflat_len v\<^sub>1 p"}.
+ It remains to show for a @{term "p' \<in> Pos v\<^sub>1 \<union> Pos v\<^sub>3"}
+ with @{term "p' \<sqsubset>lex p"} that
+ @{term "pflat_len v\<^sub>1 p' = pflat_len v\<^sub>3 p'"} holds.
+ Suppose @{term "p' \<in> Pos v\<^sub>1"}, then we can infer from the
+ first assumption that @{term "pflat_len v\<^sub>1 p' = pflat_len v\<^sub>2 p'"}.
+ But this means that @{term "p'"} must be in @{term "Pos v\<^sub>2"} too.
+ Hence we can use the second assumption and infer @{term "pflat_len v\<^sub>2 p' = pflat_len v\<^sub>3 p'"}, which concludes
+ this case with @{term "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"}.
+ The reasoning in the other cases is similar.\qed
+ \end{proof}
+
+ \noindent We can show that @{term "DUMMY :\<sqsubseteq>val DUMMY"} is
+ a partial order. Okui and Suzuki also show that it is a linear order
+ for lexical values \cite{OkuiSuzuki2010}, but we have not done
+ this. What we are going to show below is that for a given @{text r}
+ and @{text s}, the ordering has a unique minimal element on the set
+ @{term "LV r s"} , which is the POSIX value we defined in the
+ previous section.
+
+
+ Lemma 1
+
+ @{thm [mode=IfThen] PosOrd_shorterE[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
+
+ but in the other direction only
+
+ @{thm [mode=IfThen] PosOrd_shorterI[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
+
+
+
+ Next we establish how Okui and Suzuki's ordering relates to our
+ definition of POSIX values. Given a POSIX value @{text "v\<^sub>1"}
+ for @{text r} and @{text s}, then any other lexical value @{text
+ "v\<^sub>2"} in @{term "LV r s"} is greater or equal than @{text
+ "v\<^sub>1"}:
+
+
+ \begin{theorem}
+ @{thm [mode=IfThen] Posix_PosOrd[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
+ \end{theorem}
+
+ \begin{proof}
+ By induction on our POSIX rules. The two base cases are straightforward: for example
+ for @{term "v\<^sub>1 = Void"}, we have that @{term "v\<^sub>2 \<in> LV ONE []"} must also
+ be of the form \mbox{@{term "v\<^sub>2 = Void"}}. Therfore we have @{term "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"}.
+ The inductive cases are as follows:
+
+ \begin{itemize}
+ \item[$\bullet$] Case @{term "s \<in> (ALT r\<^sub>1 r\<^sub>2) \<rightarrow> (Left w\<^sub>1)"}:
+ In this case @{term "v\<^sub>1 = Left w\<^sub>1"} and the value @{term "v\<^sub>2"} is either
+ of the form @{term "Left w\<^sub>2"} or @{term "Right w\<^sub>2"}. In the latter case we
+ can immediately conclude with @{term "v\<^sub>1 :\<sqsubseteq>val v\<^sub>2"} since a @{text Left}-value
+ with the same underlying string @{text s} is always smaller or equal than a @{text Right}-value.
+ In the former case we have @{term "w\<^sub>2 \<in> LV r\<^sub>1 s"} and can use the induction
+ hypothesis to infer @{term "w\<^sub>1 :\<sqsubseteq>val w\<^sub>2"}. Because @{term "w\<^sub>1"}
+ and @{term "w\<^sub>2"} have the same underlying string @{text s}, we can conclude with
+ @{term "Left w\<^sub>1 :\<sqsubseteq>val Left w\<^sub>2"}.
+
+ \item[$\bullet$] Case @{term "s \<in> (ALT r\<^sub>1 r\<^sub>2) \<rightarrow> (Right w\<^sub>1)"}:
+ Similarly for the case where
+ @{term "v\<^sub>1 = Right w\<^sub>1"}.
+
+ \item[$\bullet$]
+ \end{itemize}
+ \end{proof}
+
+ Given a lexical value @{text "v\<^sub>1"}, say, in @{term "LV r s"} for which there does
+ not exists any smaller value in @{term "LV r s"}, then this value is our POSIX value:
+
+ \begin{theorem}
+ @{thm [mode=IfThen] PosOrd_Posix[where ?v1.0="v\<^sub>1"]}
+ \end{theorem}
+
+ \begin{proof}
+ By induction on @{text r}.
+ \end{proof}
+*}
+
+
section {* Extensions and Optimisations*}
text {*
@@ -1121,96 +1382,6 @@
\end{proof}
*}
-section {* Ordering of Values according to Okui and Suzuki*}
-
-text {*
-
- Positions are lists of natural numbers.
-
- The subvalue at position @{text p}, written @{term "at v p"}, is defined
-
-
- \begin{center}
- \begin{tabular}{r@ {\hspace{0mm}}lcl}
- @{term v} & @{text "\<downharpoonleft>\<^bsub>[]\<^esub>"} & @{text "\<equiv>"}& @{thm (rhs) at.simps(1)}\\
- @{term "Left v"} & @{text "\<downharpoonleft>\<^bsub>0::ps\<^esub>"} & @{text "\<equiv>"}& @{thm (rhs) at.simps(2)}\\
- @{term "Right v"} & @{text "\<downharpoonleft>\<^bsub>1::ps\<^esub>"} & @{text "\<equiv>"} &
- @{thm (rhs) at.simps(3)[simplified Suc_0_fold]}\\
- @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\<downharpoonleft>\<^bsub>0::ps\<^esub>"} & @{text "\<equiv>"} &
- @{thm (rhs) at.simps(4)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} \\
- @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\<downharpoonleft>\<^bsub>1::ps\<^esub>"}
- & @{text "\<equiv>"} &
- @{thm (rhs) at.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2", simplified Suc_0_fold]} \\
- @{term "Stars vs"} & @{text "\<downharpoonleft>\<^bsub>n::ps\<^esub>"} & @{text "\<equiv>"}& @{thm (rhs) at.simps(6)}\\
- \end{tabular}
- \end{center}
-
- \begin{center}
- \begin{tabular}{lcl}
- @{thm (lhs) Pos.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(1)}\\
- @{thm (lhs) Pos.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(2)}\\
- @{thm (lhs) Pos.simps(3)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(3)}\\
- @{thm (lhs) Pos.simps(4)} & @{text "\<equiv>"} & @{thm (rhs) Pos.simps(4)}\\
- @{thm (lhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
- & @{text "\<equiv>"}
- & @{thm (rhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\
- @{thm (lhs) Pos_stars} & @{text "\<equiv>"} & @{thm (rhs) Pos_stars}\\
- \end{tabular}
- \end{center}
-
- @{thm pflat_len_def}
-
-
- \begin{center}
- \begin{tabular}{ccc}
- @{thm [mode=Axiom] lex_list.intros(1)}\hspace{1cm} &
- @{thm [mode=Rule] lex_list.intros(2)[where ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]}\hspace{1cm} &
- @{thm [mode=Rule] lex_list.intros(3)[where ?p1.0="p\<^sub>1" and ?p2.0="p\<^sub>2" and
- ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]}
- \end{tabular}
- \end{center}
-
-
- Main definition by Okui and Suzuki.
-
- \begin{center}
- \begin{tabular}{ccl}
- @{thm (lhs) PosOrd_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} &
- @{text "\<equiv>"} &
- @{term "pflat_len v\<^sub>1 p > pflat_len v\<^sub>2 p"} and\\
- & & @{term "(\<forall>q \<in> Pos v\<^sub>1 \<union> Pos v\<^sub>2. q \<sqsubset>lex p --> pflat_len v\<^sub>1 q = pflat_len v\<^sub>2 q)"}
- \end{tabular}
- \end{center}
-
- @{thm PosOrd_ex_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
-
- @{thm PosOrd_ex_eq_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
-
- Lemma 1
-
- @{thm [mode=IfThen] PosOrd_shorterE[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
-
- but in the other direction only
-
- @{thm [mode=IfThen] PosOrd_shorterI[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
-
- Lemma Transitivity:
-
- @{thm [mode=IfThen] PosOrd_trans[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and ?v3.0="v\<^sub>3"]}
-
-
-*}
-
-text {*
-
- Theorem 1:
-
- @{thm [mode=IfThen] Posix_PosOrd[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}
-
- Theorem 2:
-
- @{thm [mode=IfThen] PosOrd_Posix[where ?v1.0="v\<^sub>1"]}
-*}
section {* The Correctness Argument by Sulzmann and Lu\label{argu} *}