diff -r aa0a2a3f90a0 -r f5866f1d6a59 thys2/Journal/Paper.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/thys2/Journal/Paper.tex Sat Jan 08 15:26:33 2022 +0000 @@ -0,0 +1,3375 @@ +% +\begin{isabellebody}% +\setisabellecontext{Paper}% +% +\isadelimtheory +% +\endisadelimtheory +% +\isatagtheory +% +\endisatagtheory +{\isafoldtheory}% +% +\isadelimtheory +% +\endisadelimtheory +% +\isadelimproof +% +\endisadelimproof +% +\isatagproof +% +\endisatagproof +{\isafoldproof}% +% +\isadelimproof +% +\endisadelimproof +% +\isadelimproof +% +\endisadelimproof +% +\isatagproof +% +\endisatagproof +{\isafoldproof}% +% +\isadelimproof +% +\endisadelimproof +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{Core of the proof% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +This paper builds on previous work by Ausaf and Urban using +regular expression'd bit-coded derivatives to do lexing that +is both fast and satisfies the POSIX specification. +In their work, a bit-coded algorithm introduced by Sulzmann and Lu +was formally verified in Isabelle, by a very clever use of +flex function and retrieve to carefully mimic the way a value is +built up by the injection funciton. + +In the previous work, Ausaf and Urban established the below equality: +\begin{lemma} +\isa{{\normalsize{}If\,}\ v\ {\isacharcolon}{\kern0pt}\ r{\isacharbackslash}{\kern0pt}s\ {\normalsize \,then\,}\ Some\ {\isacharparenleft}{\kern0pt}flex\ r\ id\ s\ v{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ decode\ {\isacharparenleft}{\kern0pt}retrieve\ {\isacharparenleft}{\kern0pt}r\mbox{$^\uparrow$}\mbox{$\bbslash$}s{\isacharparenright}{\kern0pt}\ v{\isacharparenright}{\kern0pt}\ r{\isachardot}{\kern0pt}} +\end{lemma} + +This lemma establishes a link with the lexer without bit-codes. + +With it we get the correctness of bit-coded algorithm. +\begin{lemma} +\isa{lexer\mbox{$_b$}\ r\ s\ {\isacharequal}{\kern0pt}\ lexer\ r\ s} +\end{lemma} + +However what is not certain is whether we can add simplification +to the bit-coded algorithm, without breaking the correct lexing output. + + +The reason that we do need to add a simplification phase +after each derivative step of $\textit{blexer}$ is +because it produces intermediate +regular expressions that can grow exponentially. +For example, the regular expression $(a+aa)^*$ after taking +derivative against just 10 $a$s will have size 8192. + +%TODO: add figure for this? + + +Therefore, we insert a simplification phase +after each derivation step, as defined below: +\begin{lemma} +\isa{blexer{\isacharunderscore}{\kern0pt}simp\ r\ s\ {\isasymequiv}\ \textrm{if}\ nullable\mbox{$_b$}\ {\isacharparenleft}{\kern0pt}bders{\isacharunderscore}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r\mbox{$^\uparrow$}{\isacharparenright}{\kern0pt}\ s{\isacharparenright}{\kern0pt}\ \textrm{then}\ decode\ {\isacharparenleft}{\kern0pt}mkeps\mbox{$_b$}\ {\isacharparenleft}{\kern0pt}bders{\isacharunderscore}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r\mbox{$^\uparrow$}{\isacharparenright}{\kern0pt}\ s{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ r\ \textrm{else}\ None} +\end{lemma} + +The simplification function is given as follows: + +\begin{center} + \begin{tabular}{lcl} + \isa{bsimp\ {\isacharparenleft}{\kern0pt}ASEQ\ bs\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{bsimp{\isacharunderscore}{\kern0pt}ASEQ\ bs\ {\isacharparenleft}{\kern0pt}bsimp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}bsimp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{bsimp\ {\isacharparenleft}{\kern0pt}AALTs\ bs{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}\ rs{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{bsimp{\isacharunderscore}{\kern0pt}AALTs\ bs{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}\ {\isacharparenleft}{\kern0pt}distinctBy\ {\isacharparenleft}{\kern0pt}flts\ {\isacharparenleft}{\kern0pt}map\ bsimp\ rs{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ erase\ {\isasymemptyset}{\isacharparenright}{\kern0pt}}\\ + \isa{bsimp\ AZERO} & $\dn$ & \isa{AZERO}\\ + +\end{tabular} +\end{center} + +And the two helper functions are: +\begin{center} + \begin{tabular}{lcl} + \isa{bsimp{\isacharunderscore}{\kern0pt}AALTs\ bs\isactrlsub {\isadigit{1}}\ {\isacharbrackleft}{\kern0pt}r{\isacharbrackright}{\kern0pt}} & $\dn$ & \isa{bsimp{\isacharunderscore}{\kern0pt}ASEQ\ bs\isactrlsub {\isadigit{1}}\ {\isacharparenleft}{\kern0pt}bsimp\ r{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}bsimp\ r{\isadigit{2}}{\isachardot}{\kern0pt}{\isadigit{0}}{\isacharparenright}{\kern0pt}}\\ + \isa{bsimp{\isacharunderscore}{\kern0pt}AALTs\ bs{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}\ {\isacharbrackleft}{\kern0pt}r{\isacharbrackright}{\kern0pt}} & $\dn$ & \isa{bsimp{\isacharunderscore}{\kern0pt}AALTs\ bs{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}\ {\isacharparenleft}{\kern0pt}distinctBy\ {\isacharparenleft}{\kern0pt}flts\ {\isacharparenleft}{\kern0pt}map\ bsimp\ rs{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ erase\ {\isasymemptyset}{\isacharparenright}{\kern0pt}}\\ + \isa{bsimp{\isacharunderscore}{\kern0pt}AALTs\ bs{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}\ {\isacharparenleft}{\kern0pt}v\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vb\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vc{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{AZERO}\\ + +\end{tabular} +\end{center} + + +This might sound trivial in the case of producing a YES/NO answer, +but once we require a lexing output to be produced (which is required +in applications like compiler front-end, malicious attack domain extraction, +etc.), it is not straightforward if we still extract what is needed according +to the POSIX standard. + + + + + +By simplification, we mean specifically the following rules: + +\begin{center} + \begin{tabular}{lcl} + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{ASEQ\ bs\ AZERO\ r\isactrlsub {\isadigit{2}}\ {\isasymleadsto}\ AZERO}}}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{ASEQ\ bs\ r\isactrlsub {\isadigit{1}}\ AZERO\ {\isasymleadsto}\ AZERO}}}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{ASEQ\ bs\ {\isacharparenleft}{\kern0pt}AONE\ bs\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymleadsto}\ fuse\ {\isacharparenleft}{\kern0pt}bs\ {\isacharat}{\kern0pt}\ bs\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ r\isactrlsub {\isadigit{1}}}}}\\ + \isa{\mbox{}\inferrule{\mbox{r\isactrlsub {\isadigit{1}}\ {\isasymleadsto}\ r\isactrlsub {\isadigit{2}}}}{\mbox{ASEQ\ bs\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{3}}\ {\isasymleadsto}\ ASEQ\ bs\ r\isactrlsub {\isadigit{2}}\ r\isactrlsub {\isadigit{3}}}}}\\ + \isa{\mbox{}\inferrule{\mbox{r\isactrlsub {\isadigit{3}}\ {\isasymleadsto}\ r\isactrlsub {\isadigit{4}}}}{\mbox{ASEQ\ bs\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{3}}\ {\isasymleadsto}\ ASEQ\ bs\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{4}}}}}\\ + \isa{\mbox{}\inferrule{\mbox{r\ {\isasymleadsto}\ r{\isacharprime}{\kern0pt}}}{\mbox{AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}r{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ rs\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymleadsto}\ AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}r{\isacharprime}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ rs\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}}}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub a\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}AZERO{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ rs\isactrlsub b{\isacharparenright}{\kern0pt}\ {\isasymleadsto}\ AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub a\ {\isacharat}{\kern0pt}\ rs\isactrlsub b{\isacharparenright}{\kern0pt}}}}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub a\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}AALTs\ bs\isactrlsub {\isadigit{1}}\ rs\isactrlsub {\isadigit{1}}{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ rs\isactrlsub b{\isacharparenright}{\kern0pt}\ {\isasymleadsto}\ AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub a\ {\isacharat}{\kern0pt}\ map\ {\isacharparenleft}{\kern0pt}fuse\ bs\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ rs\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ rs\isactrlsub b{\isacharparenright}{\kern0pt}}}}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{AALTs\ {\isacharparenleft}{\kern0pt}bs\ {\isacharat}{\kern0pt}\ bs\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ rs\ {\isasymleadsto}\ AALTs\ bs\ {\isacharparenleft}{\kern0pt}map\ {\isacharparenleft}{\kern0pt}fuse\ bs\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ rs{\isacharparenright}{\kern0pt}}}}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{AALTs\ bs\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymleadsto}\ AZERO}}}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{AALTs\ bs\ {\isacharbrackleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbrackright}{\kern0pt}\ {\isasymleadsto}\ fuse\ bs\ r\isactrlsub {\isadigit{1}}}}}\\ + \isa{\mbox{}\inferrule{\mbox{a\isactrlsub {\isadigit{1}}\mbox{$^\downarrow$}\ {\isacharequal}{\kern0pt}\ a\isactrlsub {\isadigit{2}}\mbox{$^\downarrow$}}}{\mbox{AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub a\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}a\isactrlsub {\isadigit{1}}{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ rs\isactrlsub b\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}a\isactrlsub {\isadigit{2}}{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ rs\isactrlsub c{\isacharparenright}{\kern0pt}\ {\isasymleadsto}\ AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub a\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}a\isactrlsub {\isadigit{1}}{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ rs\isactrlsub b\ {\isacharat}{\kern0pt}\ rs\isactrlsub c{\isacharparenright}{\kern0pt}}}}\\ + + \end{tabular} +\end{center} + + +And these can be made compact by the following simplification function: + +where the function $\mathit{bsimp_AALTs}$ + +The core idea of the proof is that two regular expressions, +if "isomorphic" up to a finite number of rewrite steps, will +remain "isomorphic" when we take the same sequence of +derivatives on both of them. +This can be expressed by the following rewrite relation lemma: +\begin{lemma} +\isa{{\isacharparenleft}{\kern0pt}r\mbox{$\bbslash$}s{\isacharparenright}{\kern0pt}\ {\isasymleadsto}{\isacharasterisk}{\kern0pt}\ bders{\isacharunderscore}{\kern0pt}simp\ r\ s} +\end{lemma} + +This isomorphic relation implies a property that leads to the +correctness result: +if two (nullable) regular expressions are "rewritable" in many steps +from one another, +then a call to function $\textit{bmkeps}$ gives the same +bit-sequence : +\begin{lemma} +\isa{{\normalsize{}If\,}\ \mbox{r{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}\ {\isasymleadsto}{\isacharasterisk}{\kern0pt}\ r{\isadigit{2}}{\isachardot}{\kern0pt}{\isadigit{0}}}\ {\normalsize \,and\,}\ \mbox{nullable\mbox{$_b$}\ r{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}}\ {\normalsize \,then\,}\ mkeps\mbox{$_b$}\ r{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}\ {\isacharequal}{\kern0pt}\ mkeps\mbox{$_b$}\ r{\isadigit{2}}{\isachardot}{\kern0pt}{\isadigit{0}}{\isachardot}{\kern0pt}} +\end{lemma} + +Given the same bit-sequence, the decode function +will give out the same value, which is the output +of both lexers: +\begin{lemma} +\isa{lexer\mbox{$_b$}\ r\ s\ {\isasymequiv}\ \textrm{if}\ nullable\mbox{$_b$}\ {\isacharparenleft}{\kern0pt}r\mbox{$^\uparrow$}\mbox{$\bbslash$}s{\isacharparenright}{\kern0pt}\ \textrm{then}\ decode\ {\isacharparenleft}{\kern0pt}mkeps\mbox{$_b$}\ {\isacharparenleft}{\kern0pt}r\mbox{$^\uparrow$}\mbox{$\bbslash$}s{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ r\ \textrm{else}\ None} +\end{lemma} + +\begin{lemma} +\isa{blexer{\isacharunderscore}{\kern0pt}simp\ r\ s\ {\isasymequiv}\ \textrm{if}\ nullable\mbox{$_b$}\ {\isacharparenleft}{\kern0pt}bders{\isacharunderscore}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r\mbox{$^\uparrow$}{\isacharparenright}{\kern0pt}\ s{\isacharparenright}{\kern0pt}\ \textrm{then}\ decode\ {\isacharparenleft}{\kern0pt}mkeps\mbox{$_b$}\ {\isacharparenleft}{\kern0pt}bders{\isacharunderscore}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r\mbox{$^\uparrow$}{\isacharparenright}{\kern0pt}\ s{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ r\ \textrm{else}\ None} +\end{lemma} + +And that yields the correctness result: +\begin{lemma} +\isa{lexer\ r\ s\ {\isacharequal}{\kern0pt}\ blexer{\isacharunderscore}{\kern0pt}simp\ r\ s} +\end{lemma} + +The nice thing about the above% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{Additional Simp Rules?% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +One question someone would ask is: +can we add more "atomic" simplification/rewriting rules, +so the simplification is even more aggressive, making +the intermediate results smaller, and therefore more space-efficient? +For example, one might want to do open up alternatives who is a child +of a sequence: + +\begin{center} + \begin{tabular}{lcl} + \isa{ASEQ\ bs\ {\isacharparenleft}{\kern0pt}AALTs\ bs{\isadigit{1}}\ rs{\isacharparenright}{\kern0pt}\ r\ {\isasymleadsto}{\isacharquery}{\kern0pt}\ AALTs\ {\isacharparenleft}{\kern0pt}bs\ {\isacharat}{\kern0pt}\ bs{\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}map\ {\isacharparenleft}{\kern0pt}{\isasymlambda}r{\isacharprime}{\kern0pt}{\isachardot}{\kern0pt}\ ASEQ\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ r{\isacharprime}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ rs{\isacharparenright}{\kern0pt}}\\ + \end{tabular} +\end{center} + +This rule allows us to simplify \mbox{\isa{{\isacharparenleft}{\kern0pt}a\ {\isacharplus}{\kern0pt}\ b{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ c\ {\isacharplus}{\kern0pt}\ a\ {\isasymcdot}\ c}} + into \mbox{\isa{a\ {\isasymcdot}\ c\ {\isacharplus}{\kern0pt}\ b\ {\isasymcdot}\ c}}, +which cannot be done under the rrewrite rule because only alternatives which are +children of another alternative can be spilled out.% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{Introduction% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +Brzozowski \cite{Brzozowski1964} introduced the notion of the {\em +derivative} \isa{r{\isacharbackslash}{\kern0pt}c} of a regular expression \isa{r} w.r.t.\ +a character~\isa{c}, and showed that it gave a simple solution to the +problem of matching a string \isa{s} with a regular expression \isa{r}: if the derivative of \isa{r} w.r.t.\ (in succession) all the +characters of the string matches the empty string, then \isa{r} +matches \isa{s} (and {\em vice versa}). The derivative has the +property (which may almost be regarded as its specification) that, for +every string \isa{s} and regular expression \isa{r} and character +\isa{c}, one has \isa{cs\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}} if and only if \mbox{\isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}}}. +The beauty of Brzozowski's derivatives is that +they are neatly expressible in any functional language, and easily +definable and reasoned about in theorem provers---the definitions just +consist of inductive datatypes and simple recursive functions. A +mechanised correctness proof of Brzozowski's matcher in for example HOL4 +has been mentioned by Owens and Slind~\cite{Owens2008}. Another one in +Isabelle/HOL is part of the work by Krauss and Nipkow \cite{Krauss2011}. +And another one in Coq is given by Coquand and Siles \cite{Coquand2012}. + +If a regular expression matches a string, then in general there is more +than one way of how the string is matched. There are two commonly used +disambiguation strategies to generate a unique answer: one is called +GREEDY matching \cite{Frisch2004} and the other is POSIX +matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}. +For example consider the string \isa{xy} and the regular expression +\mbox{\isa{{\isacharparenleft}{\kern0pt}x\ {\isacharplus}{\kern0pt}\ y\ {\isacharplus}{\kern0pt}\ xy{\isacharparenright}{\kern0pt}\isactrlsup {\isasymstar}}}. Either the string can be +matched in two `iterations' by the single letter-regular expressions +\isa{x} and \isa{y}, or directly in one iteration by \isa{xy}. The +first case corresponds to GREEDY matching, which first matches with the +left-most symbol and only matches the next symbol in case of a mismatch +(this is greedy in the sense of preferring instant gratification to +delayed repletion). The second case is POSIX matching, which prefers the +longest match. + +In the context of lexing, where an input string needs to be split up +into a sequence of tokens, POSIX is the more natural disambiguation +strategy for what programmers consider basic syntactic building blocks +in their programs. These building blocks are often specified by some +regular expressions, say \isa{r\isactrlbsub key\isactrlesub } and \isa{r\isactrlbsub id\isactrlesub } for recognising keywords and identifiers, +respectively. There are a few underlying (informal) rules behind +tokenising a string in a POSIX \cite{POSIX} fashion: + +\begin{itemize} +\item[$\bullet$] \emph{The Longest Match Rule} (or \emph{``{M}aximal {M}unch {R}ule''}): +The longest initial substring matched by any regular expression is taken as +next token.\smallskip + +\item[$\bullet$] \emph{Priority Rule:} +For a particular longest initial substring, the first (leftmost) regular expression +that can match determines the token.\smallskip + +\item[$\bullet$] \emph{Star Rule:} A subexpression repeated by ${}^\star$ shall +not match an empty string unless this is the only match for the repetition.\smallskip + +\item[$\bullet$] \emph{Empty String Rule:} An empty string shall be considered to +be longer than no match at all. +\end{itemize} + +\noindent Consider for example a regular expression \isa{r\isactrlbsub key\isactrlesub } for recognising keywords such as \isa{if}, +\isa{then} and so on; and \isa{r\isactrlbsub id\isactrlesub } +recognising identifiers (say, a single character followed by +characters or numbers). Then we can form the regular expression +\isa{{\isacharparenleft}{\kern0pt}r\isactrlbsub key\isactrlesub \ {\isacharplus}{\kern0pt}\ r\isactrlbsub id\isactrlesub {\isacharparenright}{\kern0pt}\isactrlsup {\isasymstar}} +and use POSIX matching to tokenise strings, say \isa{iffoo} and +\isa{if}. For \isa{iffoo} we obtain by the Longest Match Rule +a single identifier token, not a keyword followed by an +identifier. For \isa{if} we obtain by the Priority Rule a keyword +token, not an identifier token---even if \isa{r\isactrlbsub id\isactrlesub } +matches also. By the Star Rule we know \isa{{\isacharparenleft}{\kern0pt}r\isactrlbsub key\isactrlesub \ {\isacharplus}{\kern0pt}\ r\isactrlbsub id\isactrlesub {\isacharparenright}{\kern0pt}\isactrlsup {\isasymstar}} matches \isa{iffoo}, +respectively \isa{if}, in exactly one `iteration' of the star. The +Empty String Rule is for cases where, for example, the regular expression +\isa{{\isacharparenleft}{\kern0pt}a\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\isactrlsup {\isasymstar}} matches against the +string \isa{bc}. Then the longest initial matched substring is the +empty string, which is matched by both the whole regular expression +and the parenthesised subexpression. + + +One limitation of Brzozowski's matcher is that it only generates a +YES/NO answer for whether a string is being matched by a regular +expression. Sulzmann and Lu~\cite{Sulzmann2014} extended this matcher +to allow generation not just of a YES/NO answer but of an actual +matching, called a [lexical] {\em value}. Assuming a regular +expression matches a string, values encode the information of +\emph{how} the string is matched by the regular expression---that is, +which part of the string is matched by which part of the regular +expression. For this consider again the string \isa{xy} and +the regular expression \mbox{\isa{{\isacharparenleft}{\kern0pt}x\ {\isacharplus}{\kern0pt}\ {\isacharparenleft}{\kern0pt}y\ {\isacharplus}{\kern0pt}\ xy{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\isactrlsup {\isasymstar}}} +(this time fully parenthesised). We can view this regular expression +as tree and if the string \isa{xy} is matched by two Star +`iterations', then the \isa{x} is matched by the left-most +alternative in this tree and the \isa{y} by the right-left alternative. This +suggests to record this matching as + +\begin{center} +\isa{Stars\ {\isacharbrackleft}{\kern0pt}Left\ {\isacharparenleft}{\kern0pt}Char\ x{\isacharparenright}{\kern0pt}{\isacharcomma}{\kern0pt}\ Right\ {\isacharparenleft}{\kern0pt}Left\ {\isacharparenleft}{\kern0pt}Char\ y{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isacharbrackright}{\kern0pt}} +\end{center} + +\noindent where \isa{Stars}, \isa{Left}, \isa{Right} and \isa{Char} are constructors for values. \isa{Stars} records how many +iterations were used; \isa{Left}, respectively \isa{Right}, which +alternative is used. This `tree view' leads naturally to the idea that +regular expressions act as types and values as inhabiting those types +(see, for example, \cite{HosoyaVouillonPierce2005}). The value for +matching \isa{xy} in a single `iteration', i.e.~the POSIX value, +would look as follows + +\begin{center} +\isa{Stars\ {\isacharbrackleft}{\kern0pt}Seq\ {\isacharparenleft}{\kern0pt}Char\ x{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}Char\ y{\isacharparenright}{\kern0pt}{\isacharbrackright}{\kern0pt}} +\end{center} + +\noindent where \isa{Stars} has only a single-element list for the +single iteration and \isa{Seq} indicates that \isa{xy} is matched +by a sequence regular expression. + +%, which we will in what follows +%write more formally as \isa{x\ {\isasymcdot}\ y}. + + +Sulzmann and Lu give a simple algorithm to calculate a value that +appears to be the value associated with POSIX matching. The challenge +then is to specify that value, in an algorithm-independent fashion, +and to show that Sulzmann and Lu's derivative-based algorithm does +indeed calculate a value that is correct according to the +specification. The answer given by Sulzmann and Lu +\cite{Sulzmann2014} is to define a relation (called an ``order +relation'') on the set of values of \isa{r}, and to show that (once +a string to be matched is chosen) there is a maximum element and that +it is computed by their derivative-based algorithm. This proof idea is +inspired by work of Frisch and Cardelli \cite{Frisch2004} on a GREEDY +regular expression matching algorithm. However, we were not able to +establish transitivity and totality for the ``order relation'' by +Sulzmann and Lu. There are some inherent problems with their approach +(of which some of the proofs are not published in +\cite{Sulzmann2014}); perhaps more importantly, we give in this paper +a simple inductive (and algorithm-independent) definition of what we +call being a {\em POSIX value} for a regular expression \isa{r} and +a string \isa{s}; we show that the algorithm by Sulzmann and Lu +computes such a value and that such a value is unique. Our proofs are +both done by hand and checked in Isabelle/HOL. The experience of +doing our proofs has been that this mechanical checking was absolutely +essential: this subject area has hidden snares. This was also noted by +Kuklewicz \cite{Kuklewicz} who found that nearly all POSIX matching +implementations are ``buggy'' \cite[Page 203]{Sulzmann2014} and by +Grathwohl et al \cite[Page 36]{CrashCourse2014} who wrote: + +\begin{quote} +\it{}``The POSIX strategy is more complicated than the greedy because of +the dependence on information about the length of matched strings in the +various subexpressions.'' +\end{quote} + + + +\noindent {\bf Contributions:} We have implemented in Isabelle/HOL the +derivative-based regular expression matching algorithm of +Sulzmann and Lu \cite{Sulzmann2014}. We have proved the correctness of this +algorithm according to our specification of what a POSIX value is (inspired +by work of Vansummeren \cite{Vansummeren2006}). Sulzmann +and Lu sketch in \cite{Sulzmann2014} an informal correctness proof: but to +us it contains unfillable gaps.\footnote{An extended version of +\cite{Sulzmann2014} is available at the website of its first author; this +extended version already includes remarks in the appendix that their +informal proof contains gaps, and possible fixes are not fully worked out.} +Our specification of a POSIX value consists of a simple inductive definition +that given a string and a regular expression uniquely determines this value. +We also show that our definition is equivalent to an ordering +of values based on positions by Okui and Suzuki \cite{OkuiSuzuki2010}. + +%Derivatives as calculated by Brzozowski's method are usually more complex +%regular expressions than the initial one; various optimisations are +%possible. We prove the correctness when simplifications of \isa{\isactrlbold {\isadigit{0}}\ {\isacharplus}{\kern0pt}\ r}, +%\isa{r\ {\isacharplus}{\kern0pt}\ \isactrlbold {\isadigit{0}}}, \isa{\isactrlbold {\isadigit{1}}\ {\isasymcdot}\ r} and \isa{r\ {\isasymcdot}\ \isactrlbold {\isadigit{1}}} to +%\isa{r} are applied. + +We extend our results to ??? Bitcoded version??% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{Preliminaries% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +\noindent Strings in Isabelle/HOL are lists of characters with +the empty string being represented by the empty list, written \isa{{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}, and list-cons being written as \isa{\underline{\hspace{2mm}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}\underline{\hspace{2mm}}}. Often +we use the usual bracket notation for lists also for strings; for +example a string consisting of just a single character \isa{c} is +written \isa{{\isacharbrackleft}{\kern0pt}c{\isacharbrackright}{\kern0pt}}. We use the usual definitions for +\emph{prefixes} and \emph{strict prefixes} of strings. By using the +type \isa{char} for characters we have a supply of finitely many +characters roughly corresponding to the ASCII character set. Regular +expressions are defined as usual as the elements of the following +inductive datatype: + + \begin{center} + \isa{r\ {\isacharcolon}{\kern0pt}{\isacharequal}{\kern0pt}} + \isa{\isactrlbold {\isadigit{0}}} $\mid$ + \isa{\isactrlbold {\isadigit{1}}} $\mid$ + \isa{c} $\mid$ + \isa{r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}} $\mid$ + \isa{r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}} $\mid$ + \isa{r\isactrlsup {\isasymstar}} + \end{center} + + \noindent where \isa{\isactrlbold {\isadigit{0}}} stands for the regular expression that does + not match any string, \isa{\isactrlbold {\isadigit{1}}} for the regular expression that matches + only the empty string and \isa{c} for matching a character literal. The + language of a regular expression is also defined as usual by the + recursive function \isa{L} with the six clauses: + + \begin{center} + \begin{tabular}{l@ {\hspace{4mm}}rcl} + \textit{(1)} & \isa{L{\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{0}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isasymemptyset}}\\ + \textit{(2)} & \isa{L{\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{1}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}}\\ + \textit{(3)} & \isa{L{\isacharparenleft}{\kern0pt}c{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}c{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}}\\ + \textit{(4)} & \isa{L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & + \isa{L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharat}{\kern0pt}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \textit{(5)} & \isa{L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & + \isa{L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymunion}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \textit{(6)} & \isa{L{\isacharparenleft}{\kern0pt}r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isasymstar}}\\ + \end{tabular} + \end{center} + + \noindent In clause \textit{(4)} we use the operation \isa{\underline{\hspace{2mm}}\ {\isacharat}{\kern0pt}\ \underline{\hspace{2mm}}} for the concatenation of two languages (it is also list-append for + strings). We use the star-notation for regular expressions and for + languages (in the last clause above). The star for languages is defined + inductively by two clauses: \isa{{\isacharparenleft}{\kern0pt}i{\isacharparenright}{\kern0pt}} the empty string being in + the star of a language and \isa{{\isacharparenleft}{\kern0pt}ii{\isacharparenright}{\kern0pt}} if \isa{s\isactrlsub {\isadigit{1}}} is in a + language and \isa{s\isactrlsub {\isadigit{2}}} in the star of this language, then also \isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}} is in the star of this language. It will also be convenient + to use the following notion of a \emph{semantic derivative} (or \emph{left + quotient}) of a language defined as + % + \begin{center} + \isa{Der\ c\ A\ {\isasymequiv}\ {\isacharbraceleft}{\kern0pt}s\ \mbox{\boldmath$\mid$}\ c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s\ {\isasymin}\ A{\isacharbraceright}{\kern0pt}}\;. + \end{center} + + \noindent + For semantic derivatives we have the following equations (for example + mechanically proved in \cite{Krauss2011}): + % + \begin{equation}\label{SemDer} + \begin{array}{lcl} + \isa{Der\ c\ {\isasymemptyset}} & \dn & \isa{{\isasymemptyset}}\\ + \isa{Der\ c\ {\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}} & \dn & \isa{{\isasymemptyset}}\\ + \isa{Der\ c\ {\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}d{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}} & \dn & \isa{\textrm{if}\ c\ {\isacharequal}{\kern0pt}\ d\ \textrm{then}\ {\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ \textrm{else}\ {\isasymemptyset}}\\ + \isa{Der\ c\ {\isacharparenleft}{\kern0pt}A\ {\isasymunion}\ B{\isacharparenright}{\kern0pt}} & \dn & \isa{Der\ c\ A\ {\isasymunion}\ Der\ c\ B}\\ + \isa{Der\ c\ {\isacharparenleft}{\kern0pt}A\ {\isacharat}{\kern0pt}\ B{\isacharparenright}{\kern0pt}} & \dn & \isa{{\isacharparenleft}{\kern0pt}Der\ c\ A\ {\isacharat}{\kern0pt}\ B{\isacharparenright}{\kern0pt}\ {\isasymunion}\ {\isacharparenleft}{\kern0pt}\textrm{if}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymin}\ A\ \textrm{then}\ Der\ c\ B\ \textrm{else}\ {\isasymemptyset}{\isacharparenright}{\kern0pt}}\\ + \isa{Der\ c\ {\isacharparenleft}{\kern0pt}A{\isasymstar}{\isacharparenright}{\kern0pt}} & \dn & \isa{Der\ c\ A\ {\isacharat}{\kern0pt}\ A{\isasymstar}} + \end{array} + \end{equation} + + + \noindent \emph{\Brz's derivatives} of regular expressions + \cite{Brzozowski1964} can be easily defined by two recursive functions: + the first is from regular expressions to booleans (implementing a test + when a regular expression can match the empty string), and the second + takes a regular expression and a character to a (derivative) regular + expression: + + \begin{center} + \begin{tabular}{lcl} + \isa{nullable\ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{0}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{False}\\ + \isa{nullable\ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{1}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{True}\\ + \isa{nullable\ {\isacharparenleft}{\kern0pt}c{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{False}\\ + \isa{nullable\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{nullable\ r\isactrlsub {\isadigit{1}}\ {\isasymor}\ nullable\ r\isactrlsub {\isadigit{2}}}\\ + \isa{nullable\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{nullable\ r\isactrlsub {\isadigit{1}}\ {\isasymand}\ nullable\ r\isactrlsub {\isadigit{2}}}\\ + \isa{nullable\ {\isacharparenleft}{\kern0pt}r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{True}\medskip\\ + +% \end{tabular} +% \end{center} + +% \begin{center} +% \begin{tabular}{lcl} + + \isa{\isactrlbold {\isadigit{0}}{\isacharbackslash}{\kern0pt}c} & $\dn$ & \isa{\isactrlbold {\isadigit{0}}}\\ + \isa{\isactrlbold {\isadigit{1}}{\isacharbackslash}{\kern0pt}c} & $\dn$ & \isa{\isactrlbold {\isadigit{0}}}\\ + \isa{d{\isacharbackslash}{\kern0pt}c} & $\dn$ & \isa{\textrm{if}\ c\ {\isacharequal}{\kern0pt}\ d\ \textrm{then}\ \isactrlbold {\isadigit{1}}\ \textrm{else}\ \isactrlbold {\isadigit{0}}}\\ + \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isacharplus}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}}\\ + \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c} & $\dn$ & \isa{\textrm{if}\ nullable\ r\isactrlsub {\isadigit{1}}\ \textrm{then}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}\ {\isacharplus}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ \textrm{else}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}}\\ + \isa{{\isacharparenleft}{\kern0pt}r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsup {\isasymstar}} + \end{tabular} + \end{center} + + \noindent + We may extend this definition to give derivatives w.r.t.~strings: + + \begin{center} + \begin{tabular}{lcl} + \isa{r{\isacharbackslash}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} & $\dn$ & \isa{r}\\ + \isa{r{\isacharbackslash}{\kern0pt}{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}s}\\ + \end{tabular} + \end{center} + + \noindent Given the equations in \eqref{SemDer}, it is a relatively easy + exercise in mechanical reasoning to establish that + + \begin{proposition}\label{derprop}\mbox{}\\ + \begin{tabular}{ll} + \textit{(1)} & \isa{nullable\ r} if and only if + \isa{{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}}, and \\ + \textit{(2)} & \isa{L{\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ Der\ c\ {\isacharparenleft}{\kern0pt}L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}}. + \end{tabular} + \end{proposition} + + \noindent With this in place it is also very routine to prove that the + regular expression matcher defined as + % + \begin{center} + \isa{match\ r\ s\ {\isasymequiv}\ nullable\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}s{\isacharparenright}{\kern0pt}} + \end{center} + + \noindent gives a positive answer if and only if \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}}. + Consequently, this regular expression matching algorithm satisfies the + usual specification for regular expression matching. While the matcher + above calculates a provably correct YES/NO answer for whether a regular + expression matches a string or not, the novel idea of Sulzmann and Lu + \cite{Sulzmann2014} is to append another phase to this algorithm in order + to calculate a [lexical] value. We will explain the details next.% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{POSIX Regular Expression Matching\label{posixsec}% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +There have been many previous works that use values for encoding + \emph{how} a regular expression matches a string. + The clever idea by Sulzmann and Lu \cite{Sulzmann2014} is to + define a function on values that mirrors (but inverts) the + construction of the derivative on regular expressions. \emph{Values} + are defined as the inductive datatype + + \begin{center} + \isa{v\ {\isacharcolon}{\kern0pt}{\isacharequal}{\kern0pt}} + \isa{Empty} $\mid$ + \isa{Char\ c} $\mid$ + \isa{Left\ v} $\mid$ + \isa{Right\ v} $\mid$ + \isa{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}} $\mid$ + \isa{Stars\ vs} + \end{center} + + \noindent where we use \isa{vs} to stand for a list of + values. (This is similar to the approach taken by Frisch and + Cardelli for GREEDY matching \cite{Frisch2004}, and Sulzmann and Lu + for POSIX matching \cite{Sulzmann2014}). The string underlying a + value can be calculated by the \isa{flat} function, written + \isa{{\isacharbar}{\kern0pt}\underline{\hspace{2mm}}{\isacharbar}{\kern0pt}} and defined as: + + \begin{center} + \begin{tabular}[t]{lcl} + \isa{{\isacharbar}{\kern0pt}Empty{\isacharbar}{\kern0pt}} & $\dn$ & \isa{{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}\\ + \isa{{\isacharbar}{\kern0pt}Char\ c{\isacharbar}{\kern0pt}} & $\dn$ & \isa{{\isacharbrackleft}{\kern0pt}c{\isacharbrackright}{\kern0pt}}\\ + \isa{{\isacharbar}{\kern0pt}Left\ v{\isacharbar}{\kern0pt}} & $\dn$ & \isa{{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}}\\ + \isa{{\isacharbar}{\kern0pt}Right\ v{\isacharbar}{\kern0pt}} & $\dn$ & \isa{{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}} + \end{tabular}\hspace{14mm} + \begin{tabular}[t]{lcl} + \isa{{\isacharbar}{\kern0pt}Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} & $\dn$ & \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharat}{\kern0pt}\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}}\\ + \isa{{\isacharbar}{\kern0pt}Stars\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbar}{\kern0pt}} & $\dn$ & \isa{{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}\\ + \isa{{\isacharbar}{\kern0pt}Stars\ {\isacharparenleft}{\kern0pt}v\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs{\isacharparenright}{\kern0pt}{\isacharbar}{\kern0pt}} & $\dn$ & \isa{{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}\ {\isacharat}{\kern0pt}\ {\isacharbar}{\kern0pt}Stars\ vs{\isacharbar}{\kern0pt}}\\ + \end{tabular} + \end{center} + + \noindent We will sometimes refer to the underlying string of a + value as \emph{flattened value}. We will also overload our notation and + use \isa{{\isacharbar}{\kern0pt}vs{\isacharbar}{\kern0pt}} for flattening a list of values and concatenating + the resulting strings. + + Sulzmann and Lu define + inductively an \emph{inhabitation relation} that associates values to + regular expressions. We define this relation as + follows:\footnote{Note that the rule for \isa{Stars} differs from + our earlier paper \cite{AusafDyckhoffUrban2016}. There we used the + original definition by Sulzmann and Lu which does not require that + the values \isa{v\ {\isasymin}\ vs} flatten to a non-empty + string. The reason for introducing the more restricted version of + lexical values is convenience later on when reasoning about an + ordering relation for values.} + + \begin{center} + \begin{tabular}{c@ {\hspace{12mm}}c}\label{prfintros} + \\[-8mm] + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{Empty\ {\isacharcolon}{\kern0pt}\ \isactrlbold {\isadigit{1}}}}} & + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{Char\ c\ {\isacharcolon}{\kern0pt}\ c}}}\\[4mm] + \isa{\mbox{}\inferrule{\mbox{v\isactrlsub {\isadigit{1}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{1}}}}{\mbox{Left\ v\isactrlsub {\isadigit{1}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}}}} & + \isa{\mbox{}\inferrule{\mbox{v\isactrlsub {\isadigit{2}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{1}}}}{\mbox{Right\ v\isactrlsub {\isadigit{2}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{2}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{1}}}}}\\[4mm] + \isa{\mbox{}\inferrule{\mbox{v\isactrlsub {\isadigit{1}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{1}}}\\\ \mbox{v\isactrlsub {\isadigit{2}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{2}}}}{\mbox{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}}}} & + \isa{\mbox{}\inferrule{\mbox{{\isasymforall}v{\isasymin}vs{\isachardot}{\kern0pt}\ v\ {\isacharcolon}{\kern0pt}\ r\ {\isasymand}\ {\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}}{\mbox{Stars\ vs\ {\isacharcolon}{\kern0pt}\ r\isactrlsup {\isasymstar}}}} + \end{tabular} + \end{center} + + \noindent where in the clause for \isa{Stars} we use the + notation \isa{v\ {\isasymin}\ vs} for indicating that \isa{v} is a + member in the list \isa{vs}. We require in this rule that every + value in \isa{vs} flattens to a non-empty string. The idea is that + \isa{Stars}-values satisfy the informal Star Rule (see Introduction) + where the $^\star$ does not match the empty string unless this is + the only match for the repetition. Note also that no values are + associated with the regular expression \isa{\isactrlbold {\isadigit{0}}}, and that the + only value associated with the regular expression \isa{\isactrlbold {\isadigit{1}}} is + \isa{Empty}. It is routine to establish how values ``inhabiting'' + a regular expression correspond to the language of a regular + expression, namely + + \begin{proposition}\label{inhabs} + \isa{L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbraceleft}{\kern0pt}{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}\ \mbox{\boldmath$\mid$}\ v\ {\isacharcolon}{\kern0pt}\ r{\isacharbraceright}{\kern0pt}} + \end{proposition} + + \noindent + Given a regular expression \isa{r} and a string \isa{s}, we define the + set of all \emph{Lexical Values} inhabited by \isa{r} with the underlying string + being \isa{s}:\footnote{Okui and Suzuki refer to our lexical values + as \emph{canonical values} in \cite{OkuiSuzuki2010}. The notion of \emph{non-problematic + values} by Cardelli and Frisch \cite{Frisch2004} is related, but not identical + to our lexical values.} + + \begin{center} + \isa{LV\ r\ s\ {\isasymequiv}\ {\isacharbraceleft}{\kern0pt}v\ \mbox{\boldmath$\mid$}\ v\ {\isacharcolon}{\kern0pt}\ r\ {\isasymand}\ {\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ s{\isacharbraceright}{\kern0pt}} + \end{center} + + \noindent The main property of \isa{LV\ r\ s} is that it is alway finite. + + \begin{proposition} + \isa{finite\ {\isacharparenleft}{\kern0pt}LV\ r\ s{\isacharparenright}{\kern0pt}} + \end{proposition} + + \noindent This finiteness property does not hold in general if we + remove the side-condition about \isa{{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} in the + \isa{Stars}-rule above. For example using Sulzmann and Lu's + less restrictive definition, \isa{LV\ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{1}}\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} would contain + infinitely many values, but according to our more restricted + definition only a single value, namely \isa{LV\ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{1}}\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbraceleft}{\kern0pt}Stars\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}}. + + If a regular expression \isa{r} matches a string \isa{s}, then + generally the set \isa{LV\ r\ s} is not just a singleton set. In + case of POSIX matching the problem is to calculate the unique lexical value + that satisfies the (informal) POSIX rules from the Introduction. + Graphically the POSIX value calculation algorithm by Sulzmann and Lu + can be illustrated by the picture in Figure~\ref{Sulz} where the + path from the left to the right involving \isa{derivatives}/\isa{nullable} is the first phase of the algorithm + (calculating successive \Brz's derivatives) and \isa{mkeps}/\isa{inj}, the path from right to left, the second + phase. This picture shows the steps required when a regular + expression, say \isa{r\isactrlsub {\isadigit{1}}}, matches the string \isa{{\isacharbrackleft}{\kern0pt}a{\isacharcomma}{\kern0pt}\ b{\isacharcomma}{\kern0pt}\ c{\isacharbrackright}{\kern0pt}}. We first build the three derivatives (according to + \isa{a}, \isa{b} and \isa{c}). We then use \isa{nullable} + to find out whether the resulting derivative regular expression + \isa{r\isactrlsub {\isadigit{4}}} can match the empty string. If yes, we call the + function \isa{mkeps} that produces a value \isa{v\isactrlsub {\isadigit{4}}} + for how \isa{r\isactrlsub {\isadigit{4}}} can match the empty string (taking into + account the POSIX constraints in case there are several ways). This + function is defined by the clauses: + +\begin{figure}[t] +\begin{center} +\begin{tikzpicture}[scale=2,node distance=1.3cm, + every node/.style={minimum size=6mm}] +\node (r1) {\isa{r\isactrlsub {\isadigit{1}}}}; +\node (r2) [right=of r1]{\isa{r\isactrlsub {\isadigit{2}}}}; +\draw[->,line width=1mm](r1)--(r2) node[above,midway] {\isa{\underline{\hspace{2mm}}{\isacharbackslash}{\kern0pt}a}}; +\node (r3) [right=of r2]{\isa{r\isactrlsub {\isadigit{3}}}}; +\draw[->,line width=1mm](r2)--(r3) node[above,midway] {\isa{\underline{\hspace{2mm}}{\isacharbackslash}{\kern0pt}b}}; +\node (r4) [right=of r3]{\isa{r\isactrlsub {\isadigit{4}}}}; +\draw[->,line width=1mm](r3)--(r4) node[above,midway] {\isa{\underline{\hspace{2mm}}{\isacharbackslash}{\kern0pt}c}}; +\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\isa{nullable}}}; +\node (v4) [below=of r4]{\isa{v\isactrlsub {\isadigit{4}}}}; +\draw[->,line width=1mm](r4) -- (v4); +\node (v3) [left=of v4] {\isa{v\isactrlsub {\isadigit{3}}}}; +\draw[->,line width=1mm](v4)--(v3) node[below,midway] {\isa{inj\ r\isactrlsub {\isadigit{3}}\ c}}; +\node (v2) [left=of v3]{\isa{v\isactrlsub {\isadigit{2}}}}; +\draw[->,line width=1mm](v3)--(v2) node[below,midway] {\isa{inj\ r\isactrlsub {\isadigit{2}}\ b}}; +\node (v1) [left=of v2] {\isa{v\isactrlsub {\isadigit{1}}}}; +\draw[->,line width=1mm](v2)--(v1) node[below,midway] {\isa{inj\ r\isactrlsub {\isadigit{1}}\ a}}; +\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\isa{mkeps}}}; +\end{tikzpicture} +\end{center} +\mbox{}\\[-13mm] + +\caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014}, +matching the string \isa{{\isacharbrackleft}{\kern0pt}a{\isacharcomma}{\kern0pt}\ b{\isacharcomma}{\kern0pt}\ c{\isacharbrackright}{\kern0pt}}. The first phase (the arrows from +left to right) is \Brz's matcher building successive derivatives. If the +last regular expression is \isa{nullable}, then the functions of the +second phase are called (the top-down and right-to-left arrows): first +\isa{mkeps} calculates a value \isa{v\isactrlsub {\isadigit{4}}} witnessing +how the empty string has been recognised by \isa{r\isactrlsub {\isadigit{4}}}. After +that the function \isa{inj} ``injects back'' the characters of the string into +the values. +\label{Sulz}} +\end{figure} + + \begin{center} + \begin{tabular}{lcl} + \isa{mkeps\ \isactrlbold {\isadigit{1}}} & $\dn$ & \isa{Empty}\\ + \isa{mkeps\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{Seq\ {\isacharparenleft}{\kern0pt}mkeps\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}mkeps\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{mkeps\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{\textrm{if}\ nullable\ r\isactrlsub {\isadigit{1}}\ \textrm{then}\ Left\ {\isacharparenleft}{\kern0pt}mkeps\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ \textrm{else}\ Right\ {\isacharparenleft}{\kern0pt}mkeps\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{mkeps\ {\isacharparenleft}{\kern0pt}r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{Stars\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}\\ + \end{tabular} + \end{center} + + \noindent Note that this function needs only to be partially defined, + namely only for regular expressions that are nullable. In case \isa{nullable} fails, the string \isa{{\isacharbrackleft}{\kern0pt}a{\isacharcomma}{\kern0pt}\ b{\isacharcomma}{\kern0pt}\ c{\isacharbrackright}{\kern0pt}} cannot be matched by \isa{r\isactrlsub {\isadigit{1}}} and the null value \isa{None} is returned. Note also how this function + makes some subtle choices leading to a POSIX value: for example if an + alternative regular expression, say \isa{r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}}, can + match the empty string and furthermore \isa{r\isactrlsub {\isadigit{1}}} can match the + empty string, then we return a \isa{Left}-value. The \isa{Right}-value will only be returned if \isa{r\isactrlsub {\isadigit{1}}} cannot match the empty + string. + + The most interesting idea from Sulzmann and Lu \cite{Sulzmann2014} is + the construction of a value for how \isa{r\isactrlsub {\isadigit{1}}} can match the + string \isa{{\isacharbrackleft}{\kern0pt}a{\isacharcomma}{\kern0pt}\ b{\isacharcomma}{\kern0pt}\ c{\isacharbrackright}{\kern0pt}} from the value how the last derivative, \isa{r\isactrlsub {\isadigit{4}}} in Fig.~\ref{Sulz}, can match the empty string. Sulzmann and + Lu achieve this by stepwise ``injecting back'' the characters into the + values thus inverting the operation of building derivatives, but on the level + of values. The corresponding function, called \isa{inj}, takes three + arguments, a regular expression, a character and a value. For example in + the first (or right-most) \isa{inj}-step in Fig.~\ref{Sulz} the regular + expression \isa{r\isactrlsub {\isadigit{3}}}, the character \isa{c} from the last + derivative step and \isa{v\isactrlsub {\isadigit{4}}}, which is the value corresponding + to the derivative regular expression \isa{r\isactrlsub {\isadigit{4}}}. The result is + the new value \isa{v\isactrlsub {\isadigit{3}}}. The final result of the algorithm is + the value \isa{v\isactrlsub {\isadigit{1}}}. The \isa{inj} function is defined by recursion on regular + expressions and by analysing the shape of values (corresponding to + the derivative regular expressions). + % + \begin{center} + \begin{tabular}{l@ {\hspace{5mm}}lcl} + \textit{(1)} & \isa{inj\ d\ c\ {\isacharparenleft}{\kern0pt}Empty{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{Char\ d}\\ + \textit{(2)} & \isa{inj\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ c\ {\isacharparenleft}{\kern0pt}Left\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}} & $\dn$ & + \isa{Left\ {\isacharparenleft}{\kern0pt}inj\ r\isactrlsub {\isadigit{1}}\ c\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}}\\ + \textit{(3)} & \isa{inj\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ c\ {\isacharparenleft}{\kern0pt}Right\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & + \isa{Right\ {\isacharparenleft}{\kern0pt}inj\ r\isactrlsub {\isadigit{2}}\ c\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \textit{(4)} & \isa{inj\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ c\ {\isacharparenleft}{\kern0pt}Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ + & \isa{Seq\ {\isacharparenleft}{\kern0pt}inj\ r\isactrlsub {\isadigit{1}}\ c\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ v\isactrlsub {\isadigit{2}}}\\ + \textit{(5)} & \isa{inj\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ c\ {\isacharparenleft}{\kern0pt}Left\ {\isacharparenleft}{\kern0pt}Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}} & $\dn$ + & \isa{Seq\ {\isacharparenleft}{\kern0pt}inj\ r\isactrlsub {\isadigit{1}}\ c\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ v\isactrlsub {\isadigit{2}}}\\ + \textit{(6)} & \isa{inj\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ c\ {\isacharparenleft}{\kern0pt}Right\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ + & \isa{Seq\ {\isacharparenleft}{\kern0pt}mkeps\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}inj\ r\isactrlsub {\isadigit{2}}\ c\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \textit{(7)} & \isa{inj\ {\isacharparenleft}{\kern0pt}r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\ c\ {\isacharparenleft}{\kern0pt}Seq\ v\ {\isacharparenleft}{\kern0pt}Stars\ vs{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}} & $\dn$ + & \isa{Stars\ {\isacharparenleft}{\kern0pt}inj\ r\ c\ v\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs{\isacharparenright}{\kern0pt}}\\ + \end{tabular} + \end{center} + + \noindent To better understand what is going on in this definition it + might be instructive to look first at the three sequence cases (clauses + \textit{(4)} -- \textit{(6)}). In each case we need to construct an ``injected value'' for + \isa{r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}}. This must be a value of the form \isa{Seq\ \underline{\hspace{2mm}}\ \underline{\hspace{2mm}}}\,. Recall the clause of the \isa{derivative}-function + for sequence regular expressions: + + \begin{center} + \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c} $\dn$ \isa{\textrm{if}\ nullable\ r\isactrlsub {\isadigit{1}}\ \textrm{then}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}\ {\isacharplus}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ \textrm{else}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}} + \end{center} + + \noindent Consider first the \isa{else}-branch where the derivative is \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}}. The corresponding value must therefore + be of the form \isa{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}}, which matches the left-hand + side in clause~\textit{(4)} of \isa{inj}. In the \isa{if}-branch the derivative is an + alternative, namely \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}\ {\isacharplus}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}}. This means we either have to consider a \isa{Left}- or + \isa{Right}-value. In case of the \isa{Left}-value we know further it + must be a value for a sequence regular expression. Therefore the pattern + we match in the clause \textit{(5)} is \isa{Left\ {\isacharparenleft}{\kern0pt}Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}, + while in \textit{(6)} it is just \isa{Right\ v\isactrlsub {\isadigit{2}}}. One more interesting + point is in the right-hand side of clause \textit{(6)}: since in this case the + regular expression \isa{r\isactrlsub {\isadigit{1}}} does not ``contribute'' to + matching the string, that means it only matches the empty string, we need to + call \isa{mkeps} in order to construct a value for how \isa{r\isactrlsub {\isadigit{1}}} + can match this empty string. A similar argument applies for why we can + expect in the left-hand side of clause \textit{(7)} that the value is of the form + \isa{Seq\ v\ {\isacharparenleft}{\kern0pt}Stars\ vs{\isacharparenright}{\kern0pt}}---the derivative of a star is \isa{{\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsup {\isasymstar}}. Finally, the reason for why we can ignore the second argument + in clause \textit{(1)} of \isa{inj} is that it will only ever be called in cases + where \isa{c\ {\isacharequal}{\kern0pt}\ d}, but the usual linearity restrictions in patterns do + not allow us to build this constraint explicitly into our function + definition.\footnote{Sulzmann and Lu state this clause as \isa{inj\ c\ c\ {\isacharparenleft}{\kern0pt}Empty{\isacharparenright}{\kern0pt}} $\dn$ \isa{Char\ c}, + but our deviation is harmless.} + + The idea of the \isa{inj}-function to ``inject'' a character, say + \isa{c}, into a value can be made precise by the first part of the + following lemma, which shows that the underlying string of an injected + value has a prepended character \isa{c}; the second part shows that + the underlying string of an \isa{mkeps}-value is always the empty + string (given the regular expression is nullable since otherwise + \isa{mkeps} might not be defined). + + \begin{lemma}\mbox{}\smallskip\\\label{Prf_injval_flat} + \begin{tabular}{ll} + (1) & \isa{{\normalsize{}If\,}\ v\ {\isacharcolon}{\kern0pt}\ r{\isacharbackslash}{\kern0pt}c\ {\normalsize \,then\,}\ {\isacharbar}{\kern0pt}inj\ r\ c\ v{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}{\isachardot}{\kern0pt}}\\ + (2) & \isa{{\normalsize{}If\,}\ nullable\ r\ {\normalsize \,then\,}\ {\isacharbar}{\kern0pt}mkeps\ r{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isachardot}{\kern0pt}} + \end{tabular} + \end{lemma} + + \begin{proof} + Both properties are by routine inductions: the first one can, for example, + be proved by induction over the definition of \isa{derivatives}; the second by + an induction on \isa{r}. There are no interesting cases.\qed + \end{proof} + + Having defined the \isa{mkeps} and \isa{inj} function we can extend + \Brz's matcher so that a value is constructed (assuming the + regular expression matches the string). The clauses of the Sulzmann and Lu lexer are + + \begin{center} + \begin{tabular}{lcl} + \isa{lexer\ r\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} & $\dn$ & \isa{\textrm{if}\ nullable\ r\ \textrm{then}\ Some\ {\isacharparenleft}{\kern0pt}mkeps\ r{\isacharparenright}{\kern0pt}\ \textrm{else}\ None}\\ + \isa{lexer\ r\ {\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{case} \isa{lexer\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ s} \isa{of}\\ + & & \phantom{$|$} \isa{None} \isa{{\isasymRightarrow}} \isa{None}\\ + & & $|$ \isa{Some\ v} \isa{{\isasymRightarrow}} \isa{Some\ {\isacharparenleft}{\kern0pt}inj\ r\ c\ v{\isacharparenright}{\kern0pt}} + \end{tabular} + \end{center} + + \noindent If the regular expression does not match the string, \isa{None} is + returned. If the regular expression \emph{does} + match the string, then \isa{Some} value is returned. One important + virtue of this algorithm is that it can be implemented with ease in any + functional programming language and also in Isabelle/HOL. In the remaining + part of this section we prove that this algorithm is correct. + + The well-known idea of POSIX matching is informally defined by some + rules such as the Longest Match and Priority Rules (see + Introduction); as correctly argued in \cite{Sulzmann2014}, this + needs formal specification. Sulzmann and Lu define an ``ordering + relation'' between values and argue that there is a maximum value, + as given by the derivative-based algorithm. In contrast, we shall + introduce a simple inductive definition that specifies directly what + a \emph{POSIX value} is, incorporating the POSIX-specific choices + into the side-conditions of our rules. Our definition is inspired by + the matching relation given by Vansummeren~\cite{Vansummeren2006}. + The relation we define is ternary and + written as \mbox{\isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}}, relating + strings, regular expressions and values; the inductive rules are given in + Figure~\ref{POSIXrules}. + We can prove that given a string \isa{s} and regular expression \isa{r}, the POSIX value \isa{v} is uniquely determined by \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}. + + % + \begin{figure}[t] + \begin{center} + \begin{tabular}{c} + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{{\isacharparenleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharcomma}{\kern0pt}\ \isactrlbold {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Empty}}}\isa{P}\isa{\isactrlbold {\isadigit{1}}} \qquad + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{{\isacharparenleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}c{\isacharbrackright}{\kern0pt}{\isacharcomma}{\kern0pt}\ c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Char\ c}}}\isa{P}\isa{c}\medskip\\ + \isa{\mbox{}\inferrule{\mbox{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}}{\mbox{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Left\ v}}}\isa{P{\isacharplus}{\kern0pt}L}\qquad + \isa{\mbox{}\inferrule{\mbox{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}\\\ \mbox{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}}}{\mbox{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Right\ v}}}\isa{P{\isacharplus}{\kern0pt}R}\medskip\\ + $\mprset{flushleft} + \inferrule + {\isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{1}}} \qquad + \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{2}}} \\\\ + \isa{{\isasymnexists}s\isactrlsub {\isadigit{3}}\ s\isactrlsub {\isadigit{4}}{\isachardot}{\kern0pt}a{\isachardot}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{3}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{4}}\ {\isacharequal}{\kern0pt}\ s\isactrlsub {\isadigit{2}}\ {\isasymand}\ s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{4}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}} + {\isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}}}$\isa{PS}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{{\isacharparenleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharcomma}{\kern0pt}\ r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Stars\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}}}\isa{P{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}\medskip\\ + $\mprset{flushleft} + \inferrule + {\isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v} \qquad + \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Stars\ vs} \qquad + \isa{{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} \\\\ + \isa{{\isasymnexists}s\isactrlsub {\isadigit{3}}\ s\isactrlsub {\isadigit{4}}{\isachardot}{\kern0pt}a{\isachardot}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{3}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{4}}\ {\isacharequal}{\kern0pt}\ s\isactrlsub {\isadigit{2}}\ {\isasymand}\ s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{4}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}}} + {\isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Stars\ {\isacharparenleft}{\kern0pt}v\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs{\isacharparenright}{\kern0pt}}}$\isa{P{\isasymstar}} + \end{tabular} + \end{center} + \caption{Our inductive definition of POSIX values.}\label{POSIXrules} + \end{figure} + + + + \begin{theorem}\mbox{}\smallskip\\\label{posixdeterm} + \begin{tabular}{ll} + (1) & If \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v} then \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}} and \isa{{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ s}.\\ + (2) & \isa{{\normalsize{}If\,}\ \mbox{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}\ {\normalsize \,and\,}\ \mbox{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isacharprime}{\kern0pt}}\ {\normalsize \,then\,}\ v\ {\isacharequal}{\kern0pt}\ v{\isacharprime}{\kern0pt}{\isachardot}{\kern0pt}} + \end{tabular} + \end{theorem} + + \begin{proof} Both by induction on the definition of \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}. + The second parts follows by a case analysis of \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isacharprime}{\kern0pt}} and + the first part.\qed + \end{proof} + + \noindent + We claim that our \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v} relation captures the idea behind the four + informal POSIX rules shown in the Introduction: Consider for example the + rules \isa{P{\isacharplus}{\kern0pt}L} and \isa{P{\isacharplus}{\kern0pt}R} where the POSIX value for a string + and an alternative regular expression, that is \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}, + is specified---it is always a \isa{Left}-value, \emph{except} when the + string to be matched is not in the language of \isa{r\isactrlsub {\isadigit{1}}}; only then it + is a \isa{Right}-value (see the side-condition in \isa{P{\isacharplus}{\kern0pt}R}). + Interesting is also the rule for sequence regular expressions (\isa{PS}). The first two premises state that \isa{v\isactrlsub {\isadigit{1}}} and \isa{v\isactrlsub {\isadigit{2}}} + are the POSIX values for \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}} and \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} + respectively. Consider now the third premise and note that the POSIX value + of this rule should match the string \mbox{\isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}}}. According to the + Longest Match Rule, we want that the \isa{s\isactrlsub {\isadigit{1}}} is the longest initial + split of \mbox{\isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}}} such that \isa{s\isactrlsub {\isadigit{2}}} is still recognised + by \isa{r\isactrlsub {\isadigit{2}}}. Let us assume, contrary to the third premise, that there + \emph{exist} an \isa{s\isactrlsub {\isadigit{3}}} and \isa{s\isactrlsub {\isadigit{4}}} such that \isa{s\isactrlsub {\isadigit{2}}} + can be split up into a non-empty string \isa{s\isactrlsub {\isadigit{3}}} and a possibly empty + string \isa{s\isactrlsub {\isadigit{4}}}. Moreover the longer string \isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{3}}} can be + matched by \isa{r\isactrlsub {\isadigit{1}}} and the shorter \isa{s\isactrlsub {\isadigit{4}}} can still be + matched by \isa{r\isactrlsub {\isadigit{2}}}. In this case \isa{s\isactrlsub {\isadigit{1}}} would \emph{not} be the + longest initial split of \mbox{\isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}}} and therefore \isa{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}} cannot be a POSIX value for \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}. + The main point is that our side-condition ensures the Longest + Match Rule is satisfied. + + A similar condition is imposed on the POSIX value in the \isa{P{\isasymstar}}-rule. Also there we want that \isa{s\isactrlsub {\isadigit{1}}} is the longest initial + split of \isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}} and furthermore the corresponding value + \isa{v} cannot be flattened to the empty string. In effect, we require + that in each ``iteration'' of the star, some non-empty substring needs to + be ``chipped'' away; only in case of the empty string we accept \isa{Stars\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} as the POSIX value. Indeed we can show that our POSIX values + are lexical values which exclude those \isa{Stars} that contain subvalues + that flatten to the empty string. + + \begin{lemma}\label{LVposix} + \isa{{\normalsize{}If\,}\ {\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\ {\normalsize \,then\,}\ v\ {\isasymin}\ LV\ r\ s{\isachardot}{\kern0pt}} + \end{lemma} + + \begin{proof} + By routine induction on \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}.\qed + \end{proof} + + \noindent + Next is the lemma that shows the function \isa{mkeps} calculates + the POSIX value for the empty string and a nullable regular expression. + + \begin{lemma}\label{lemmkeps} + \isa{{\normalsize{}If\,}\ nullable\ r\ {\normalsize \,then\,}\ {\isacharparenleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ mkeps\ r{\isachardot}{\kern0pt}} + \end{lemma} + + \begin{proof} + By routine induction on \isa{r}.\qed + \end{proof} + + \noindent + The central lemma for our POSIX relation is that the \isa{inj}-function + preserves POSIX values. + + \begin{lemma}\label{Posix2} + \isa{{\normalsize{}If\,}\ {\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\ {\normalsize \,then\,}\ {\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ inj\ r\ c\ v{\isachardot}{\kern0pt}} + \end{lemma} + + \begin{proof} + By induction on \isa{r}. We explain two cases. + + \begin{itemize} + \item[$\bullet$] Case \isa{r\ {\isacharequal}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}}. There are + two subcases, namely \isa{{\isacharparenleft}{\kern0pt}a{\isacharparenright}{\kern0pt}} \mbox{\isa{v\ {\isacharequal}{\kern0pt}\ Left\ v{\isacharprime}{\kern0pt}}} and \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isacharprime}{\kern0pt}}; and \isa{{\isacharparenleft}{\kern0pt}b{\isacharparenright}{\kern0pt}} \isa{v\ {\isacharequal}{\kern0pt}\ Right\ v{\isacharprime}{\kern0pt}}, \isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}} and \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isacharprime}{\kern0pt}}. In \isa{{\isacharparenleft}{\kern0pt}a{\isacharparenright}{\kern0pt}} we + know \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isacharprime}{\kern0pt}}, from which we can infer \isa{{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ inj\ r\isactrlsub {\isadigit{1}}\ c\ v{\isacharprime}{\kern0pt}} by induction hypothesis and hence \isa{{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ inj\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ c\ {\isacharparenleft}{\kern0pt}Left\ v{\isacharprime}{\kern0pt}{\isacharparenright}{\kern0pt}} as needed. Similarly + in subcase \isa{{\isacharparenleft}{\kern0pt}b{\isacharparenright}{\kern0pt}} where, however, in addition we have to use + Proposition~\ref{derprop}(2) in order to infer \isa{c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}} from \isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}}.\smallskip + + \item[$\bullet$] Case \isa{r\ {\isacharequal}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}}. There are three subcases: + + \begin{quote} + \begin{description} + \item[\isa{{\isacharparenleft}{\kern0pt}a{\isacharparenright}{\kern0pt}}] \isa{v\ {\isacharequal}{\kern0pt}\ Left\ {\isacharparenleft}{\kern0pt}Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} and \isa{nullable\ r\isactrlsub {\isadigit{1}}} + \item[\isa{{\isacharparenleft}{\kern0pt}b{\isacharparenright}{\kern0pt}}] \isa{v\ {\isacharequal}{\kern0pt}\ Right\ v\isactrlsub {\isadigit{1}}} and \isa{nullable\ r\isactrlsub {\isadigit{1}}} + \item[\isa{{\isacharparenleft}{\kern0pt}c{\isacharparenright}{\kern0pt}}] \isa{v\ {\isacharequal}{\kern0pt}\ Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}} and \isa{{\isasymnot}\ nullable\ r\isactrlsub {\isadigit{1}}} + \end{description} + \end{quote} + + \noindent For \isa{{\isacharparenleft}{\kern0pt}a{\isacharparenright}{\kern0pt}} we know \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{1}}} and + \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{2}}} as well as + % + \[\isa{{\isasymnexists}s\isactrlsub {\isadigit{3}}\ s\isactrlsub {\isadigit{4}}{\isachardot}{\kern0pt}a{\isachardot}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{3}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{4}}\ {\isacharequal}{\kern0pt}\ s\isactrlsub {\isadigit{2}}\ {\isasymand}\ s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{4}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\] + + \noindent From the latter we can infer by Proposition~\ref{derprop}(2): + % + \[\isa{{\isasymnexists}s\isactrlsub {\isadigit{3}}\ s\isactrlsub {\isadigit{4}}{\isachardot}{\kern0pt}a{\isachardot}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{3}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{4}}\ {\isacharequal}{\kern0pt}\ s\isactrlsub {\isadigit{2}}\ {\isasymand}\ c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{4}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\] + + \noindent We can use the induction hypothesis for \isa{r\isactrlsub {\isadigit{1}}} to obtain + \isa{{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ inj\ r\isactrlsub {\isadigit{1}}\ c\ v\isactrlsub {\isadigit{1}}}. Putting this all together allows us to infer + \isa{{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Seq\ {\isacharparenleft}{\kern0pt}inj\ r\isactrlsub {\isadigit{1}}\ c\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ v\isactrlsub {\isadigit{2}}}. The case \isa{{\isacharparenleft}{\kern0pt}c{\isacharparenright}{\kern0pt}} + is similar. + + For \isa{{\isacharparenleft}{\kern0pt}b{\isacharparenright}{\kern0pt}} we know \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{1}}} and + \isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}. From the former + we have \isa{{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ inj\ r\isactrlsub {\isadigit{2}}\ c\ v\isactrlsub {\isadigit{1}}} by induction hypothesis + for \isa{r\isactrlsub {\isadigit{2}}}. From the latter we can infer + % + \[\isa{{\isasymnexists}s\isactrlsub {\isadigit{3}}\ s\isactrlsub {\isadigit{4}}{\isachardot}{\kern0pt}a{\isachardot}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{3}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{4}}\ {\isacharequal}{\kern0pt}\ c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s\ {\isasymand}\ s\isactrlsub {\isadigit{3}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{4}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\] + + \noindent By Lemma~\ref{lemmkeps} we know \isa{{\isacharparenleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ mkeps\ r\isactrlsub {\isadigit{1}}} + holds. Putting this all together, we can conclude with \isa{{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Seq\ {\isacharparenleft}{\kern0pt}mkeps\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}inj\ r\isactrlsub {\isadigit{2}}\ c\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}}, as required. + + Finally suppose \isa{r\ {\isacharequal}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\isactrlsup {\isasymstar}}. This case is very similar to the + sequence case, except that we need to also ensure that \isa{{\isacharbar}{\kern0pt}inj\ r\isactrlsub {\isadigit{1}}\ c\ v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}. This follows from \isa{{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ inj\ r\isactrlsub {\isadigit{1}}\ c\ v\isactrlsub {\isadigit{1}}} (which in turn follows from \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{1}}} and the induction hypothesis).\qed + \end{itemize} + \end{proof} + + \noindent + With Lemma~\ref{Posix2} in place, it is completely routine to establish + that the Sulzmann and Lu lexer satisfies our specification (returning + the null value \isa{None} iff the string is not in the language of the regular expression, + and returning a unique POSIX value iff the string \emph{is} in the language): + + \begin{theorem}\mbox{}\smallskip\\\label{lexercorrect} + \begin{tabular}{ll} + (1) & \isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}} if and only if \isa{lexer\ r\ s\ {\isacharequal}{\kern0pt}\ None}\\ + (2) & \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}} if and only if \isa{{\isasymexists}v{\isachardot}{\kern0pt}\ lexer\ r\ s\ {\isacharequal}{\kern0pt}\ Some\ v\ {\isasymand}\ {\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}\\ + \end{tabular} + \end{theorem} + + \begin{proof} + By induction on \isa{s} using Lemma~\ref{lemmkeps} and \ref{Posix2}.\qed + \end{proof} + + \noindent In \textit{(2)} we further know by Theorem~\ref{posixdeterm} that the + value returned by the lexer must be unique. A simple corollary + of our two theorems is: + + \begin{corollary}\mbox{}\smallskip\\\label{lexercorrectcor} + \begin{tabular}{ll} + (1) & \isa{lexer\ r\ s\ {\isacharequal}{\kern0pt}\ None} if and only if \isa{{\isasymnexists}v{\isachardot}{\kern0pt}a{\isachardot}{\kern0pt}\ {\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}\\ + (2) & \isa{lexer\ r\ s\ {\isacharequal}{\kern0pt}\ Some\ v} if and only if \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}\\ + \end{tabular} + \end{corollary} + + \noindent This concludes our correctness proof. Note that we have + not changed the algorithm of Sulzmann and Lu,\footnote{All + deviations we introduced are harmless.} but introduced our own + specification for what a correct result---a POSIX value---should be. + In the next section we show that our specification coincides with + another one given by Okui and Suzuki using a different technique.% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{Ordering of Values according to Okui and Suzuki% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +While in the previous section we have defined POSIX values directly + in terms of a ternary relation (see inference rules in Figure~\ref{POSIXrules}), + Sulzmann and Lu took a different approach in \cite{Sulzmann2014}: + they introduced an ordering for values and identified POSIX values + as the maximal elements. An extended version of \cite{Sulzmann2014} + is available at the website of its first author; this includes more + details of their proofs, but which are evidently not in final form + yet. Unfortunately, we were not able to verify claims that their + ordering has properties such as being transitive or having maximal + elements. + + Okui and Suzuki \cite{OkuiSuzuki2010,OkuiSuzukiTech} described + another ordering of values, which they use to establish the + correctness of their automata-based algorithm for POSIX matching. + Their ordering resembles some aspects of the one given by Sulzmann + and Lu, but overall is quite different. To begin with, Okui and + Suzuki identify POSIX values as minimal, rather than maximal, + elements in their ordering. A more substantial difference is that + the ordering by Okui and Suzuki uses \emph{positions} in order to + identify and compare subvalues. Positions are lists of natural + numbers. This allows them to quite naturally formalise the Longest + Match and Priority rules of the informal POSIX standard. Consider + for example the value \isa{v} + + \begin{center} + \isa{v\ {\isasymequiv}\ Stars\ {\isacharbrackleft}{\kern0pt}Seq\ {\isacharparenleft}{\kern0pt}Char\ x{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}Char\ y{\isacharparenright}{\kern0pt}{\isacharcomma}{\kern0pt}\ Char\ z{\isacharbrackright}{\kern0pt}} + \end{center} + + \noindent + At position \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{0}}{\isacharcomma}{\kern0pt}{\isadigit{1}}{\isacharbrackright}{\kern0pt}} of this value is the + subvalue \isa{Char\ y} and at position \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{1}}{\isacharbrackright}{\kern0pt}} the + subvalue \isa{Char\ z}. At the `root' position, or empty list + \isa{{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}, is the whole value \isa{v}. Positions such as \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{0}}{\isacharcomma}{\kern0pt}{\isadigit{1}}{\isacharcomma}{\kern0pt}{\isadigit{0}}{\isacharbrackright}{\kern0pt}} or \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{2}}{\isacharbrackright}{\kern0pt}} are outside of \isa{v}. If it exists, the subvalue of \isa{v} at a position \isa{p}, written \isa{v\mbox{$\downharpoonleft$}\isactrlbsub p\isactrlesub }, can be recursively defined by + + \begin{center} + \begin{tabular}{r@ {\hspace{0mm}}lcl} + \isa{v} & \isa{{\isasymdownharpoonleft}\isactrlbsub {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\isactrlesub } & \isa{{\isasymequiv}}& \isa{v}\\ + \isa{Left\ v} & \isa{{\isasymdownharpoonleft}\isactrlbsub {\isadigit{0}}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}ps\isactrlesub } & \isa{{\isasymequiv}}& \isa{v\mbox{$\downharpoonleft$}\isactrlbsub ps\isactrlesub }\\ + \isa{Right\ v} & \isa{{\isasymdownharpoonleft}\isactrlbsub {\isadigit{1}}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}ps\isactrlesub } & \isa{{\isasymequiv}} & + \isa{v\mbox{$\downharpoonleft$}\isactrlbsub ps\isactrlesub }\\ + \isa{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}} & \isa{{\isasymdownharpoonleft}\isactrlbsub {\isadigit{0}}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}ps\isactrlesub } & \isa{{\isasymequiv}} & + \isa{v\isactrlsub {\isadigit{1}}\mbox{$\downharpoonleft$}\isactrlbsub ps\isactrlesub } \\ + \isa{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}} & \isa{{\isasymdownharpoonleft}\isactrlbsub {\isadigit{1}}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}ps\isactrlesub } + & \isa{{\isasymequiv}} & + \isa{v\isactrlsub {\isadigit{2}}\mbox{$\downharpoonleft$}\isactrlbsub ps\isactrlesub } \\ + \isa{Stars\ vs} & \isa{{\isasymdownharpoonleft}\isactrlbsub n{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}ps\isactrlesub } & \isa{{\isasymequiv}}& \isa{vs\ensuremath{_{[\mathit{n}]}}\mbox{$\downharpoonleft$}\isactrlbsub ps\isactrlesub }\\ + \end{tabular} + \end{center} + + \noindent In the last clause we use Isabelle's notation \isa{vs\ensuremath{_{[\mathit{n}]}}} for the + \isa{n}th element in a list. The set of positions inside a value \isa{v}, + written \isa{Pos\ v}, is given by + + \begin{center} + \begin{tabular}{lcl} + \isa{Pos\ {\isacharparenleft}{\kern0pt}Empty{\isacharparenright}{\kern0pt}} & \isa{{\isasymequiv}} & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}}\\ + \isa{Pos\ {\isacharparenleft}{\kern0pt}Char\ c{\isacharparenright}{\kern0pt}} & \isa{{\isasymequiv}} & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}}\\ + \isa{Pos\ {\isacharparenleft}{\kern0pt}Left\ v{\isacharparenright}{\kern0pt}} & \isa{{\isasymequiv}} & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isasymunion}\ {\isacharbraceleft}{\kern0pt}{\isadigit{0}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\ \mbox{\boldmath$\mid$}\ ps\ {\isasymin}\ Pos\ v{\isacharbraceright}{\kern0pt}}\\ + \isa{Pos\ {\isacharparenleft}{\kern0pt}Right\ v{\isacharparenright}{\kern0pt}} & \isa{{\isasymequiv}} & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isasymunion}\ {\isacharbraceleft}{\kern0pt}{\isadigit{1}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\ \mbox{\boldmath$\mid$}\ ps\ {\isasymin}\ Pos\ v{\isacharbraceright}{\kern0pt}}\\ + \isa{Pos\ {\isacharparenleft}{\kern0pt}Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} + & \isa{{\isasymequiv}} + & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isasymunion}\ {\isacharbraceleft}{\kern0pt}{\isadigit{0}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\ \mbox{\boldmath$\mid$}\ ps\ {\isasymin}\ Pos\ v\isactrlsub {\isadigit{1}}{\isacharbraceright}{\kern0pt}\ {\isasymunion}\ {\isacharbraceleft}{\kern0pt}{\isadigit{1}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\ \mbox{\boldmath$\mid$}\ ps\ {\isasymin}\ Pos\ v\isactrlsub {\isadigit{2}}{\isacharbraceright}{\kern0pt}}\\ + \isa{Pos\ {\isacharparenleft}{\kern0pt}Stars\ vs{\isacharparenright}{\kern0pt}} & \isa{{\isasymequiv}} & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isasymunion}\ {\isacharparenleft}{\kern0pt}{\isasymUnion}n\ {\isacharless}{\kern0pt}\ len\ vs\ {\isacharbraceleft}{\kern0pt}n\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\ \mbox{\boldmath$\mid$}\ ps\ {\isasymin}\ Pos\ vs\ensuremath{_{[\mathit{n}]}}{\isacharbraceright}{\kern0pt}{\isacharparenright}{\kern0pt}}\\ + \end{tabular} + \end{center} + + \noindent + whereby \isa{len} in the last clause stands for the length of a list. Clearly + for every position inside a value there exists a subvalue at that position. + + + To help understanding the ordering of Okui and Suzuki, consider again + the earlier value + \isa{v} and compare it with the following \isa{w}: + + \begin{center} + \begin{tabular}{l} + \isa{v\ {\isasymequiv}\ Stars\ {\isacharbrackleft}{\kern0pt}Seq\ {\isacharparenleft}{\kern0pt}Char\ x{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}Char\ y{\isacharparenright}{\kern0pt}{\isacharcomma}{\kern0pt}\ Char\ z{\isacharbrackright}{\kern0pt}}\\ + \isa{w\ {\isasymequiv}\ Stars\ {\isacharbrackleft}{\kern0pt}Char\ x{\isacharcomma}{\kern0pt}\ Char\ y{\isacharcomma}{\kern0pt}\ Char\ z{\isacharbrackright}{\kern0pt}} + \end{tabular} + \end{center} + + \noindent Both values match the string \isa{xyz}, that means if + we flatten these values at their respective root position, we obtain + \isa{xyz}. However, at position \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{0}}{\isacharbrackright}{\kern0pt}}, \isa{v} matches + \isa{xy} whereas \isa{w} matches only the shorter \isa{x}. So + according to the Longest Match Rule, we should prefer \isa{v}, + rather than \isa{w} as POSIX value for string \isa{xyz} (and + corresponding regular expression). In order to + formalise this idea, Okui and Suzuki introduce a measure for + subvalues at position \isa{p}, called the \emph{norm} of \isa{v} + at position \isa{p}. We can define this measure in Isabelle as an + integer as follows + + \begin{center} + \isa{{\isasymparallel}v{\isasymparallel}\isactrlbsub p\isactrlesub \ {\isasymequiv}\ \textrm{if}\ p\ {\isasymin}\ Pos\ v\ \textrm{then}\ len\ {\isacharbar}{\kern0pt}v\mbox{$\downharpoonleft$}\isactrlbsub p\isactrlesub {\isacharbar}{\kern0pt}\ \textrm{else}\ {\isacharminus}{\kern0pt}\ {\isadigit{1}}} + \end{center} + + \noindent where we take the length of the flattened value at + position \isa{p}, provided the position is inside \isa{v}; if + not, then the norm is \isa{{\isacharminus}{\kern0pt}{\isadigit{1}}}. The default for outside + positions is crucial for the POSIX requirement of preferring a + \isa{Left}-value over a \isa{Right}-value (if they can match the + same string---see the Priority Rule from the Introduction). For this + consider + + \begin{center} + \isa{v\ {\isasymequiv}\ Left\ {\isacharparenleft}{\kern0pt}Char\ x{\isacharparenright}{\kern0pt}} \qquad and \qquad \isa{w\ {\isasymequiv}\ Right\ {\isacharparenleft}{\kern0pt}Char\ x{\isacharparenright}{\kern0pt}} + \end{center} + + \noindent Both values match \isa{x}. At position \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{0}}{\isacharbrackright}{\kern0pt}} + the norm of \isa{v} is \isa{{\isadigit{1}}} (the subvalue matches \isa{x}), + but the norm of \isa{w} is \isa{{\isacharminus}{\kern0pt}{\isadigit{1}}} (the position is outside + \isa{w} according to how we defined the `inside' positions of + \isa{Left}- and \isa{Right}-values). Of course at position + \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{1}}{\isacharbrackright}{\kern0pt}}, the norms \isa{{\isasymparallel}v{\isasymparallel}\isactrlbsub {\isacharbrackleft}{\kern0pt}{\isadigit{1}}{\isacharbrackright}{\kern0pt}\isactrlesub } and \isa{{\isasymparallel}w{\isasymparallel}\isactrlbsub {\isacharbrackleft}{\kern0pt}{\isadigit{1}}{\isacharbrackright}{\kern0pt}\isactrlesub } are reversed, but the point is that subvalues + will be analysed according to lexicographically ordered + positions. According to this ordering, the position \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{0}}{\isacharbrackright}{\kern0pt}} + takes precedence over \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{1}}{\isacharbrackright}{\kern0pt}} and thus also \isa{v} will be + preferred over \isa{w}. The lexicographic ordering of positions, written + \isa{\underline{\hspace{2mm}}\ {\isasymprec}\isactrlbsub lex\isactrlesub \ \underline{\hspace{2mm}}}, can be conveniently formalised + by three inference rules + + \begin{center} + \begin{tabular}{ccc} + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymprec}\isactrlbsub lex\isactrlesub \ p\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps}}}\hspace{1cm} & + \isa{\mbox{}\inferrule{\mbox{p\isactrlsub {\isadigit{1}}\ {\isacharless}{\kern0pt}\ p\isactrlsub {\isadigit{2}}}}{\mbox{p\isactrlsub {\isadigit{1}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\isactrlsub {\isadigit{1}}\ {\isasymprec}\isactrlbsub lex\isactrlesub \ p\isactrlsub {\isadigit{2}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\isactrlsub {\isadigit{2}}}}}\hspace{1cm} & + \isa{\mbox{}\inferrule{\mbox{ps\isactrlsub {\isadigit{1}}\ {\isasymprec}\isactrlbsub lex\isactrlesub \ ps\isactrlsub {\isadigit{2}}}}{\mbox{p\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\isactrlsub {\isadigit{1}}\ {\isasymprec}\isactrlbsub lex\isactrlesub \ p\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\isactrlsub {\isadigit{2}}}}} + \end{tabular} + \end{center} + + With the norm and lexicographic order in place, + we can state the key definition of Okui and Suzuki + \cite{OkuiSuzuki2010}: a value \isa{v\isactrlsub {\isadigit{1}}} is \emph{smaller at position \isa{p}} than + \isa{v\isactrlsub {\isadigit{2}}}, written \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\isactrlbsub p\isactrlesub \ v\isactrlsub {\isadigit{2}}}, + if and only if $(i)$ the norm at position \isa{p} is + greater in \isa{v\isactrlsub {\isadigit{1}}} (that is the string \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}\mbox{$\downharpoonleft$}\isactrlbsub p\isactrlesub {\isacharbar}{\kern0pt}} is longer + than \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}\mbox{$\downharpoonleft$}\isactrlbsub p\isactrlesub {\isacharbar}{\kern0pt}}) and $(ii)$ all subvalues at + positions that are inside \isa{v\isactrlsub {\isadigit{1}}} or \isa{v\isactrlsub {\isadigit{2}}} and that are + lexicographically smaller than \isa{p}, we have the same norm, namely + + \begin{center} + \begin{tabular}{c} + \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\isactrlbsub p\isactrlesub \ v\isactrlsub {\isadigit{2}}} + \isa{{\isasymequiv}} + $\begin{cases} + (i) & \isa{{\isasymparallel}v\isactrlsub {\isadigit{2}}{\isasymparallel}\isactrlbsub p\isactrlesub \ {\isacharless}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{1}}{\isasymparallel}\isactrlbsub p\isactrlesub } \quad\text{and}\smallskip \\ + (ii) & \isa{{\isasymforall}q{\isasymin}Pos\ v\isactrlsub {\isadigit{1}}\ {\isasymunion}\ Pos\ v\isactrlsub {\isadigit{2}}{\isachardot}{\kern0pt}\ q\ {\isasymprec}\isactrlbsub lex\isactrlesub \ p\ {\isasymlongrightarrow}\ {\isasymparallel}v\isactrlsub {\isadigit{1}}{\isasymparallel}\isactrlbsub q\isactrlesub \ {\isacharequal}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{2}}{\isasymparallel}\isactrlbsub q\isactrlesub } + \end{cases}$ + \end{tabular} + \end{center} + + \noindent The position \isa{p} in this definition acts as the + \emph{first distinct position} of \isa{v\isactrlsub {\isadigit{1}}} and \isa{v\isactrlsub {\isadigit{2}}}, where both values match strings of different length + \cite{OkuiSuzuki2010}. Since at \isa{p} the values \isa{v\isactrlsub {\isadigit{1}}} and \isa{v\isactrlsub {\isadigit{2}}} match different strings, the + ordering is irreflexive. Derived from the definition above + are the following two orderings: + + \begin{center} + \begin{tabular}{l} + \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}\ {\isasymequiv}\ {\isasymexists}p{\isachardot}{\kern0pt}\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\isactrlbsub p\isactrlesub \ v\isactrlsub {\isadigit{2}}}\\ + \isa{v\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{2}}\ {\isasymequiv}\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}\ {\isasymor}\ v\isactrlsub {\isadigit{1}}\ {\isacharequal}{\kern0pt}\ v\isactrlsub {\isadigit{2}}} + \end{tabular} + \end{center} + + While we encountered a number of obstacles for establishing properties like + transitivity for the ordering of Sulzmann and Lu (and which we failed + to overcome), it is relatively straightforward to establish this + property for the orderings + \isa{\underline{\hspace{2mm}}\ {\isasymprec}\ \underline{\hspace{2mm}}} and \isa{\underline{\hspace{2mm}}\ \mbox{$\preccurlyeq$}\ \underline{\hspace{2mm}}} + by Okui and Suzuki. + + \begin{lemma}[Transitivity]\label{transitivity} + \isa{{\normalsize{}If\,}\ \mbox{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}}\ {\normalsize \,and\,}\ \mbox{v\isactrlsub {\isadigit{2}}\ {\isasymprec}\ v\isactrlsub {\isadigit{3}}}\ {\normalsize \,then\,}\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{3}}{\isachardot}{\kern0pt}} + \end{lemma} + + \begin{proof} From the assumption we obtain two positions \isa{p} + and \isa{q}, where the values \isa{v\isactrlsub {\isadigit{1}}} and \isa{v\isactrlsub {\isadigit{2}}} (respectively \isa{v\isactrlsub {\isadigit{2}}} and \isa{v\isactrlsub {\isadigit{3}}}) are `distinct'. Since \isa{{\isasymprec}\isactrlbsub lex\isactrlesub } is trichotomous, we need to consider + three cases, namely \isa{p\ {\isacharequal}{\kern0pt}\ q}, \isa{p\ {\isasymprec}\isactrlbsub lex\isactrlesub \ q} and + \isa{q\ {\isasymprec}\isactrlbsub lex\isactrlesub \ p}. Let us look at the first case. Clearly + \isa{{\isasymparallel}v\isactrlsub {\isadigit{2}}{\isasymparallel}\isactrlbsub p\isactrlesub \ {\isacharless}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{1}}{\isasymparallel}\isactrlbsub p\isactrlesub } and \isa{{\isasymparallel}v\isactrlsub {\isadigit{3}}{\isasymparallel}\isactrlbsub p\isactrlesub \ {\isacharless}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{2}}{\isasymparallel}\isactrlbsub p\isactrlesub } imply \isa{{\isasymparallel}v\isactrlsub {\isadigit{3}}{\isasymparallel}\isactrlbsub p\isactrlesub \ {\isacharless}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{1}}{\isasymparallel}\isactrlbsub p\isactrlesub }. It remains to show + that for a \isa{p{\isacharprime}{\kern0pt}\ {\isasymin}\ Pos\ v\isactrlsub {\isadigit{1}}\ {\isasymunion}\ Pos\ v\isactrlsub {\isadigit{3}}} + with \isa{p{\isacharprime}{\kern0pt}\ {\isasymprec}\isactrlbsub lex\isactrlesub \ p} that \isa{{\isasymparallel}v\isactrlsub {\isadigit{1}}{\isasymparallel}\isactrlbsub p{\isacharprime}{\kern0pt}\isactrlesub \ {\isacharequal}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{3}}{\isasymparallel}\isactrlbsub p{\isacharprime}{\kern0pt}\isactrlesub } holds. Suppose \isa{p{\isacharprime}{\kern0pt}\ {\isasymin}\ Pos\ v\isactrlsub {\isadigit{1}}}, then we can infer from the first assumption that \isa{{\isasymparallel}v\isactrlsub {\isadigit{1}}{\isasymparallel}\isactrlbsub p{\isacharprime}{\kern0pt}\isactrlesub \ {\isacharequal}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{2}}{\isasymparallel}\isactrlbsub p{\isacharprime}{\kern0pt}\isactrlesub }. But this means + that \isa{p{\isacharprime}{\kern0pt}} must be in \isa{Pos\ v\isactrlsub {\isadigit{2}}} too (the norm + cannot be \isa{{\isacharminus}{\kern0pt}{\isadigit{1}}} given \isa{p{\isacharprime}{\kern0pt}\ {\isasymin}\ Pos\ v\isactrlsub {\isadigit{1}}}). + Hence we can use the second assumption and + infer \isa{{\isasymparallel}v\isactrlsub {\isadigit{2}}{\isasymparallel}\isactrlbsub p{\isacharprime}{\kern0pt}\isactrlesub \ {\isacharequal}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{3}}{\isasymparallel}\isactrlbsub p{\isacharprime}{\kern0pt}\isactrlesub }, + which concludes this case with \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{3}}}. The reasoning in the other cases is similar.\qed + \end{proof} + + \noindent + The proof for $\preccurlyeq$ is similar and omitted. + It is also straightforward to show that \isa{{\isasymprec}} and + $\preccurlyeq$ are partial orders. Okui and Suzuki furthermore show that they + are linear orderings for lexical values \cite{OkuiSuzuki2010} of a given + regular expression and given string, but we have not formalised this in Isabelle. It is + not essential for our results. What we are going to show below is + that for a given \isa{r} and \isa{s}, the orderings have a unique + minimal element on the set \isa{LV\ r\ s}, which is the POSIX value + we defined in the previous section. We start with two properties that + show how the length of a flattened value relates to the \isa{{\isasymprec}}-ordering. + + \begin{proposition}\mbox{}\smallskip\\\label{ordlen} + \begin{tabular}{@ {}ll} + (1) & + \isa{{\normalsize{}If\,}\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}\ {\normalsize \,then\,}\ len\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}\ {\isasymle}\ len\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}{\isachardot}{\kern0pt}}\\ + (2) & + \isa{{\normalsize{}If\,}\ len\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}\ {\isacharless}{\kern0pt}\ len\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\normalsize \,then\,}\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}{\isachardot}{\kern0pt}} + \end{tabular} + \end{proposition} + + \noindent Both properties follow from the definition of the ordering. Note that + \textit{(2)} entails that a value, say \isa{v\isactrlsub {\isadigit{2}}}, whose underlying + string is a strict prefix of another flattened value, say \isa{v\isactrlsub {\isadigit{1}}}, then + \isa{v\isactrlsub {\isadigit{1}}} must be smaller than \isa{v\isactrlsub {\isadigit{2}}}. For our proofs it + will be useful to have the following properties---in each case the underlying strings + of the compared values are the same: + + \begin{proposition}\mbox{}\smallskip\\\label{ordintros} + \begin{tabular}{ll} + \textit{(1)} & + \isa{{\normalsize{}If\,}\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}\ {\normalsize \,then\,}\ Left\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ Right\ v\isactrlsub {\isadigit{2}}{\isachardot}{\kern0pt}}\\ + \textit{(2)} & If + \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} \;then\; + \isa{Left\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ Left\ v\isactrlsub {\isadigit{2}}} \;iff\; + \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}}\\ + \textit{(3)} & If + \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} \;then\; + \isa{Right\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ Right\ v\isactrlsub {\isadigit{2}}} \;iff\; + \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}}\\ + \textit{(4)} & If + \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}w\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} \;then\; + \isa{Seq\ v\ v\isactrlsub {\isadigit{2}}\ {\isasymprec}\ Seq\ v\ w\isactrlsub {\isadigit{2}}} \;iff\; + \isa{v\isactrlsub {\isadigit{2}}\ {\isasymprec}\ w\isactrlsub {\isadigit{2}}}\\ + \textit{(5)} & If + \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharat}{\kern0pt}\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}w\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharat}{\kern0pt}\ {\isacharbar}{\kern0pt}w\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} \;and\; + \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ w\isactrlsub {\isadigit{1}}} \;then\; + \isa{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}\ {\isasymprec}\ Seq\ w\isactrlsub {\isadigit{1}}\ w\isactrlsub {\isadigit{2}}}\\ + \textit{(6)} & If + \isa{{\isacharbar}{\kern0pt}vs\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}vs\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} \;then\; + \isa{Stars\ {\isacharparenleft}{\kern0pt}vs\ {\isacharat}{\kern0pt}\ vs\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymprec}\ Stars\ {\isacharparenleft}{\kern0pt}vs\ {\isacharat}{\kern0pt}\ vs\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} \;iff\; + \isa{Stars\ vs\isactrlsub {\isadigit{1}}\ {\isasymprec}\ Stars\ vs\isactrlsub {\isadigit{2}}}\\ + + \textit{(7)} & If + \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} \;and\; + \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}} \;then\; + \isa{Stars\ {\isacharparenleft}{\kern0pt}v\isactrlsub {\isadigit{1}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymprec}\ Stars\ {\isacharparenleft}{\kern0pt}v\isactrlsub {\isadigit{2}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \end{tabular} + \end{proposition} + + \noindent One might prefer that statements \textit{(4)} and \textit{(5)} + (respectively \textit{(6)} and \textit{(7)}) + are combined into a single \textit{iff}-statement (like the ones for \isa{Left} and \isa{Right}). Unfortunately this cannot be done easily: such + a single statement would require an additional assumption about the + two values \isa{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}} and \isa{Seq\ w\isactrlsub {\isadigit{1}}\ w\isactrlsub {\isadigit{2}}} + being inhabited by the same regular expression. The + complexity of the proofs involved seems to not justify such a + `cleaner' single statement. The statements given are just the properties that + allow us to establish our theorems without any difficulty. The proofs + for Proposition~\ref{ordintros} are routine. + + + Next we establish how Okui and Suzuki's orderings relate to our + definition of POSIX values. Given a \isa{POSIX} value \isa{v\isactrlsub {\isadigit{1}}} + for \isa{r} and \isa{s}, then any other lexical value \isa{v\isactrlsub {\isadigit{2}}} in \isa{LV\ r\ s} is greater or equal than \isa{v\isactrlsub {\isadigit{1}}}, namely: + + + \begin{theorem}\label{orderone} + \isa{{\normalsize{}If\,}\ \mbox{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{1}}}\ {\normalsize \,and\,}\ \mbox{v\isactrlsub {\isadigit{2}}\ {\isasymin}\ LV\ r\ s}\ {\normalsize \,then\,}\ v\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{2}}{\isachardot}{\kern0pt}} + \end{theorem} + + \begin{proof} By induction on our POSIX rules. By + Theorem~\ref{posixdeterm} and the definition of \isa{LV}, it is clear + that \isa{v\isactrlsub {\isadigit{1}}} and \isa{v\isactrlsub {\isadigit{2}}} have the same + underlying string \isa{s}. The three base cases are + straightforward: for example for \isa{v\isactrlsub {\isadigit{1}}\ {\isacharequal}{\kern0pt}\ Empty}, we have + that \isa{v\isactrlsub {\isadigit{2}}\ {\isasymin}\ LV\ \isactrlbold {\isadigit{1}}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} must also be of the form + \mbox{\isa{v\isactrlsub {\isadigit{2}}\ {\isacharequal}{\kern0pt}\ Empty}}. Therefore we have \isa{v\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{2}}}. The inductive cases for + \isa{r} being of the form \isa{r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}} and + \isa{r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}} are as follows: + + + \begin{itemize} + + \item[$\bullet$] Case \isa{P{\isacharplus}{\kern0pt}L} with \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Left\ w\isactrlsub {\isadigit{1}}}: In this case the value + \isa{v\isactrlsub {\isadigit{2}}} is either of the + form \isa{Left\ w\isactrlsub {\isadigit{2}}} or \isa{Right\ w\isactrlsub {\isadigit{2}}}. In the + latter case we can immediately conclude with \mbox{\isa{v\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{2}}}} since a \isa{Left}-value with the + same underlying string \isa{s} is always smaller than a + \isa{Right}-value by Proposition~\ref{ordintros}\textit{(1)}. + In the former case we have \isa{w\isactrlsub {\isadigit{2}}\ {\isasymin}\ LV\ r\isactrlsub {\isadigit{1}}\ s} and can use the induction hypothesis to infer + \isa{w\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ w\isactrlsub {\isadigit{2}}}. Because \isa{w\isactrlsub {\isadigit{1}}} and \isa{w\isactrlsub {\isadigit{2}}} have the same underlying string + \isa{s}, we can conclude with \isa{Left\ w\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ Left\ w\isactrlsub {\isadigit{2}}} using + Proposition~\ref{ordintros}\textit{(2)}.\smallskip + + \item[$\bullet$] Case \isa{P{\isacharplus}{\kern0pt}R} with \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Right\ w\isactrlsub {\isadigit{1}}}: This case similar to the previous + case, except that we additionally know \isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}}. This is needed when \isa{v\isactrlsub {\isadigit{2}}} is of the form + \mbox{\isa{Left\ w\isactrlsub {\isadigit{2}}}}. Since \mbox{\isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}w\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} \isa{{\isacharequal}{\kern0pt}\ s}} and \isa{w\isactrlsub {\isadigit{2}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{1}}}, we can derive a contradiction for \mbox{\isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}}} using + Proposition~\ref{inhabs}. So also in this case \mbox{\isa{v\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{2}}}}.\smallskip + + \item[$\bullet$] Case \isa{PS} with \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Seq\ w\isactrlsub {\isadigit{1}}\ w\isactrlsub {\isadigit{2}}}: We can assume \isa{v\isactrlsub {\isadigit{2}}\ {\isacharequal}{\kern0pt}\ Seq\ u\isactrlsub {\isadigit{1}}\ u\isactrlsub {\isadigit{2}}} with \isa{u\isactrlsub {\isadigit{1}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{1}}} and \mbox{\isa{u\isactrlsub {\isadigit{2}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{2}}}}. We have \isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}u\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharat}{\kern0pt}\ {\isacharbar}{\kern0pt}u\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}}. By the side-condition of the + \isa{PS}-rule we know that either \isa{s\isactrlsub {\isadigit{1}}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}u\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}} or that \isa{{\isacharbar}{\kern0pt}u\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}} is a strict prefix of + \isa{s\isactrlsub {\isadigit{1}}}. In the latter case we can infer \isa{w\isactrlsub {\isadigit{1}}\ {\isasymprec}\ u\isactrlsub {\isadigit{1}}} by + Proposition~\ref{ordlen}\textit{(2)} and from this \isa{v\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{2}}} by Proposition~\ref{ordintros}\textit{(5)} + (as noted above \isa{v\isactrlsub {\isadigit{1}}} and \isa{v\isactrlsub {\isadigit{2}}} must have the + same underlying string). + In the former case we know + \isa{u\isactrlsub {\isadigit{1}}\ {\isasymin}\ LV\ r\isactrlsub {\isadigit{1}}\ s\isactrlsub {\isadigit{1}}} and \isa{u\isactrlsub {\isadigit{2}}\ {\isasymin}\ LV\ r\isactrlsub {\isadigit{2}}\ s\isactrlsub {\isadigit{2}}}. With this we can use the + induction hypotheses to infer \isa{w\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ u\isactrlsub {\isadigit{1}}} and \isa{w\isactrlsub {\isadigit{2}}\ \mbox{$\preccurlyeq$}\ u\isactrlsub {\isadigit{2}}}. By + Proposition~\ref{ordintros}\textit{(4,5)} we can again infer + \isa{v\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{2}}}. + + \end{itemize} + + \noindent The case for \isa{P{\isasymstar}} is similar to the \isa{PS}-case and omitted.\qed + \end{proof} + + \noindent This theorem shows that our \isa{POSIX} value for a + regular expression \isa{r} and string \isa{s} is in fact a + minimal element of the values in \isa{LV\ r\ s}. By + Proposition~\ref{ordlen}\textit{(2)} we also know that any value in + \isa{LV\ r\ s{\isacharprime}{\kern0pt}}, with \isa{s{\isacharprime}{\kern0pt}} being a strict prefix, cannot be + smaller than \isa{v\isactrlsub {\isadigit{1}}}. The next theorem shows the + opposite---namely any minimal element in \isa{LV\ r\ s} must be a + \isa{POSIX} value. This can be established by induction on \isa{r}, but the proof can be drastically simplified by using the fact + from the previous section about the existence of a \isa{POSIX} value + whenever a string \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}}. + + + \begin{theorem} + \isa{{\normalsize{}If\,}\ \mbox{v\isactrlsub {\isadigit{1}}\ {\isasymin}\ LV\ r\ s}\ {\normalsize \,and\,}\ \mbox{{\isasymforall}v\isactrlsub {\isadigit{2}}{\isasymin}LV\ r\ s{\isachardot}{\kern0pt}\ v\isactrlsub {\isadigit{2}}\ \mbox{$\not\prec$}\ v\isactrlsub {\isadigit{1}}}\ {\normalsize \,then\,}\ {\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{1}}{\isachardot}{\kern0pt}} + \end{theorem} + + \begin{proof} + If \isa{v\isactrlsub {\isadigit{1}}\ {\isasymin}\ LV\ r\ s} then + \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}} by Proposition~\ref{inhabs}. Hence by Theorem~\ref{lexercorrect}(2) + there exists a + \isa{POSIX} value \isa{v\isactrlsub P} with \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub P} + and by Lemma~\ref{LVposix} we also have \mbox{\isa{v\isactrlsub P\ {\isasymin}\ LV\ r\ s}}. + By Theorem~\ref{orderone} we therefore have + \isa{v\isactrlsub P\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{1}}}. If \isa{v\isactrlsub P\ {\isacharequal}{\kern0pt}\ v\isactrlsub {\isadigit{1}}} then + we are done. Otherwise we have \isa{v\isactrlsub P\ {\isasymprec}\ v\isactrlsub {\isadigit{1}}}, which + however contradicts the second assumption about \isa{v\isactrlsub {\isadigit{1}}} being the smallest + element in \isa{LV\ r\ s}. So we are done in this case too.\qed + \end{proof} + + \noindent + From this we can also show + that if \isa{LV\ r\ s} is non-empty (or equivalently \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}}) then + it has a unique minimal element: + + \begin{corollary} + \isa{{\normalsize{}If\,}\ LV\ r\ s\ {\isasymnoteq}\ {\isasymemptyset}\ {\normalsize \,then\,}\ {\isasymexists}{\isacharbang}{\kern0pt}vmin{\isachardot}{\kern0pt}\ vmin\ {\isasymin}\ LV\ r\ s\ {\isasymand}\ {\isacharparenleft}{\kern0pt}{\isasymforall}v{\isasymin}LV\ r\ s{\isachardot}{\kern0pt}\ vmin\ \mbox{$\preccurlyeq$}\ v{\isacharparenright}{\kern0pt}{\isachardot}{\kern0pt}} + \end{corollary} + + + + \noindent To sum up, we have shown that the (unique) minimal elements + of the ordering by Okui and Suzuki are exactly the \isa{POSIX} + values we defined inductively in Section~\ref{posixsec}. This provides + an independent confirmation that our ternary relation formalises the + informal POSIX rules.% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{Bitcoded Lexing% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +Incremental calculation of the value. To simplify the proof we first define the function +\isa{flex} which calculates the ``iterated'' injection function. With this we can +rewrite the lexer as + +\begin{center} +\isa{lexer\ r\ s\ {\isacharequal}{\kern0pt}\ {\isacharparenleft}{\kern0pt}\textrm{if}\ nullable\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}s{\isacharparenright}{\kern0pt}\ \textrm{then}\ Some\ {\isacharparenleft}{\kern0pt}flex\ r\ id\ s\ {\isacharparenleft}{\kern0pt}mkeps\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}s{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ \textrm{else}\ None{\isacharparenright}{\kern0pt}} +\end{center}% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{Optimisations% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +Derivatives as calculated by \Brz's method are usually more complex + regular expressions than the initial one; the result is that the + derivative-based matching and lexing algorithms are often abysmally slow. + However, various optimisations are possible, such as the simplifications + of \isa{\isactrlbold {\isadigit{0}}\ {\isacharplus}{\kern0pt}\ r}, \isa{r\ {\isacharplus}{\kern0pt}\ \isactrlbold {\isadigit{0}}}, \isa{\isactrlbold {\isadigit{1}}\ {\isasymcdot}\ r} and + \isa{r\ {\isasymcdot}\ \isactrlbold {\isadigit{1}}} to \isa{r}. These simplifications can speed up the + algorithms considerably, as noted in \cite{Sulzmann2014}. One of the + advantages of having a simple specification and correctness proof is that + the latter can be refined to prove the correctness of such simplification + steps. While the simplification of regular expressions according to + rules like + + \begin{equation}\label{Simpl} + \begin{array}{lcllcllcllcl} + \isa{\isactrlbold {\isadigit{0}}\ {\isacharplus}{\kern0pt}\ r} & \isa{{\isasymRightarrow}} & \isa{r} \hspace{8mm}%\\ + \isa{r\ {\isacharplus}{\kern0pt}\ \isactrlbold {\isadigit{0}}} & \isa{{\isasymRightarrow}} & \isa{r} \hspace{8mm}%\\ + \isa{\isactrlbold {\isadigit{1}}\ {\isasymcdot}\ r} & \isa{{\isasymRightarrow}} & \isa{r} \hspace{8mm}%\\ + \isa{r\ {\isasymcdot}\ \isactrlbold {\isadigit{1}}} & \isa{{\isasymRightarrow}} & \isa{r} + \end{array} + \end{equation} + + \noindent is well understood, there is an obstacle with the POSIX value + calculation algorithm by Sulzmann and Lu: if we build a derivative regular + expression and then simplify it, we will calculate a POSIX value for this + simplified derivative regular expression, \emph{not} for the original (unsimplified) + derivative regular expression. Sulzmann and Lu \cite{Sulzmann2014} overcome this obstacle by + not just calculating a simplified regular expression, but also calculating + a \emph{rectification function} that ``repairs'' the incorrect value. + + The rectification functions can be (slightly clumsily) implemented in + Isabelle/HOL as follows using some auxiliary functions: + + \begin{center} + \begin{tabular}{lcl} + \isa{F\isactrlbsub Right\isactrlesub \ f\ v} & $\dn$ & \isa{Right\ {\isacharparenleft}{\kern0pt}f\ v{\isacharparenright}{\kern0pt}}\\ + \isa{F\isactrlbsub Left\isactrlesub \ f\ v} & $\dn$ & \isa{Left\ {\isacharparenleft}{\kern0pt}f\ v{\isacharparenright}{\kern0pt}}\\ + + \isa{F\isactrlbsub Alt\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}\ {\isacharparenleft}{\kern0pt}Right\ v{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{Right\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ v{\isacharparenright}{\kern0pt}}\\ + \isa{F\isactrlbsub Alt\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}\ {\isacharparenleft}{\kern0pt}Left\ v{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{Left\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ v{\isacharparenright}{\kern0pt}}\\ + + \isa{F\isactrlbsub Seq{\isadigit{1}}\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}\ v} & $\dn$ & \isa{Seq\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ {\isacharparenleft}{\kern0pt}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ v{\isacharparenright}{\kern0pt}}\\ + \isa{F\isactrlbsub Seq{\isadigit{2}}\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}\ v} & $\dn$ & \isa{Seq\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ v{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ {\isacharparenleft}{\kern0pt}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}}\\ + \isa{F\isactrlbsub Seq\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}\ {\isacharparenleft}{\kern0pt}Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{Seq\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\medskip\\ + %\end{tabular} + % + %\begin{tabular}{lcl} + \isa{simp\isactrlbsub Alt\isactrlesub \ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{0}}{\isacharcomma}{\kern0pt}\ \underline{\hspace{2mm}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F\isactrlbsub Right\isactrlesub \ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{simp\isactrlbsub Alt\isactrlesub \ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{0}}{\isacharcomma}{\kern0pt}\ \underline{\hspace{2mm}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ F\isactrlbsub Left\isactrlesub \ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}}\\ + \isa{simp\isactrlbsub Alt\isactrlesub \ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F\isactrlbsub Alt\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{simp\isactrlbsub Seq\isactrlesub \ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F\isactrlbsub Seq{\isadigit{1}}\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{simp\isactrlbsub Seq\isactrlesub \ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ F\isactrlbsub Seq{\isadigit{2}}\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{simp\isactrlbsub Seq\isactrlesub \ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F\isactrlbsub Seq\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \end{tabular} + \end{center} + + \noindent + The functions \isa{simp\isactrlbsub Alt\isactrlesub } and \isa{simp\isactrlbsub Seq\isactrlesub } encode the simplification rules + in \eqref{Simpl} and compose the rectification functions (simplifications can occur + deep inside the regular expression). The main simplification function is then + + \begin{center} + \begin{tabular}{lcl} + \isa{simp\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{simp\isactrlbsub Alt\isactrlesub \ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{simp\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{simp\isactrlbsub Seq\isactrlesub \ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{simp\ r} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r{\isacharcomma}{\kern0pt}\ id{\isacharparenright}{\kern0pt}}\\ + \end{tabular} + \end{center} + + \noindent where \isa{id} stands for the identity function. The + function \isa{simp} returns a simplified regular expression and a corresponding + rectification function. Note that we do not simplify under stars: this + seems to slow down the algorithm, rather than speed it up. The optimised + lexer is then given by the clauses: + + \begin{center} + \begin{tabular}{lcl} + \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}\ r\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} & $\dn$ & \isa{\textrm{if}\ nullable\ r\ \textrm{then}\ Some\ {\isacharparenleft}{\kern0pt}mkeps\ r{\isacharparenright}{\kern0pt}\ \textrm{else}\ None}\\ + \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}\ r\ {\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharparenright}{\kern0pt}} & $\dn$ & + \isa{let\ {\isacharparenleft}{\kern0pt}r\isactrlsub s{\isacharcomma}{\kern0pt}\ f\isactrlsub r{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ simp\ {\isacharparenleft}{\kern0pt}r}$\backslash$\isa{c{\isacharparenright}{\kern0pt}\ in}\\ + & & \isa{case} \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}\ r\isactrlsub s\ s} \isa{of}\\ + & & \phantom{$|$} \isa{None} \isa{{\isasymRightarrow}} \isa{None}\\ + & & $|$ \isa{Some\ v} \isa{{\isasymRightarrow}} \isa{Some\ {\isacharparenleft}{\kern0pt}inj\ r\ c\ {\isacharparenleft}{\kern0pt}f\isactrlsub r\ v{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}} + \end{tabular} + \end{center} + + \noindent + In the second clause we first calculate the derivative \isa{r{\isacharbackslash}{\kern0pt}c} + and then simpli + +text \isa{\ \ Incremental\ calculation\ of\ the\ value{\isachardot}{\kern0pt}\ To\ simplify\ the\ proof\ we\ first\ define\ the\ function\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}const\ flex{\isacharbraceright}{\kern0pt}\ which\ calculates\ the\ {\isacharbackquote}{\kern0pt}{\isacharbackquote}{\kern0pt}iterated{\isacharprime}{\kern0pt}{\isacharprime}{\kern0pt}\ injection\ function{\isachardot}{\kern0pt}\ With\ this\ we\ can\ rewrite\ the\ lexer\ as\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ lexer{\isacharunderscore}{\kern0pt}flex{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ {\isachardoublequote}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ {\isachardoublequote}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{7}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{7}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ areg{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}{\isacharequal}{\kern0pt}{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}AZERO{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}mid{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}AONE\ bs{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}mid{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}ACHAR\ bs\ c{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}mid{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}AALT\ bs\ r{\isadigit{1}}\ r{\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}mid{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}ASEQ\ bs\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}mid{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}ASTAR\ bs\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ Some\ simple\ facts\ about\ erase\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}lemma{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}mbox{\isacharbraceleft}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ erase{\isacharunderscore}{\kern0pt}bder{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ erase{\isacharunderscore}{\kern0pt}intern{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}lemma{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}medskip{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ \ {\isacharpercent}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharpercent}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ {\isacharpercent}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharpercent}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ \ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}medskip{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ \ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharbrackleft}{\kern0pt}mode{\isacharequal}{\kern0pt}IfThen{\isacharbrackright}{\kern0pt}\ bder{\isacharunderscore}{\kern0pt}retrieve{\isacharbraceright}{\kern0pt}\ \ By\ induction\ on\ {\isasymopen}r{\isasymclose}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}theorem{\isacharbraceright}{\kern0pt}{\isacharbrackleft}{\kern0pt}Main\ Lemma{\isacharbrackright}{\kern0pt}{\isacharbackslash}{\kern0pt}mbox{\isacharbraceleft}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharbrackleft}{\kern0pt}mode{\isacharequal}{\kern0pt}IfThen{\isacharbrackright}{\kern0pt}\ MAIN{\isacharunderscore}{\kern0pt}decode{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}theorem{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}noindent\ Definition\ of\ the\ bitcoded\ lexer\ \ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ blexer{\isacharunderscore}{\kern0pt}def{\isacharbraceright}{\kern0pt}\ \ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}theorem{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ blexer{\isacharunderscore}{\kern0pt}correctness{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}theorem{\isacharbraceright}{\kern0pt}\ \ } + +section \isa{Optimisations} + +text \isa{\ \ Derivatives\ as\ calculated\ by\ {\isacharbackslash}{\kern0pt}Brz{\isacharprime}{\kern0pt}s\ method\ are\ usually\ more\ complex\ regular\ expressions\ than\ the\ initial\ one{\isacharsemicolon}{\kern0pt}\ the\ result\ is\ that\ the\ derivative{\isacharminus}{\kern0pt}based\ matching\ and\ lexing\ algorithms\ are\ often\ abysmally\ slow{\isachardot}{\kern0pt}\ However{\isacharcomma}{\kern0pt}\ various\ optimisations\ are\ possible{\isacharcomma}{\kern0pt}\ such\ as\ the\ simplifications\ of\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}ALT\ ZERO\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharcomma}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}ALT\ r\ ZERO{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharcomma}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}SEQ\ ONE\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}SEQ\ r\ ONE{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ to\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ r{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ These\ simplifications\ can\ speed\ up\ the\ algorithms\ considerably{\isacharcomma}{\kern0pt}\ as\ noted\ in\ {\isacharbackslash}{\kern0pt}cite{\isacharbraceleft}{\kern0pt}Sulzmann{\isadigit{2}}{\isadigit{0}}{\isadigit{1}}{\isadigit{4}}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ One\ of\ the\ advantages\ of\ having\ a\ simple\ specification\ and\ correctness\ proof\ is\ that\ the\ latter\ can\ be\ refined\ to\ prove\ the\ correctness\ of\ such\ simplification\ steps{\isachardot}{\kern0pt}\ While\ the\ simplification\ of\ regular\ expressions\ according\ to\ rules\ like\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}equation{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}label{\isacharbraceleft}{\kern0pt}Simpl{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}array{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcllcllcllcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}ALT\ ZERO\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}{\isasymRightarrow}{\isasymclose}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ r{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}hspace{\isacharbraceleft}{\kern0pt}{\isadigit{8}}mm{\isacharbraceright}{\kern0pt}{\isacharpercent}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}ALT\ r\ ZERO{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}{\isasymRightarrow}{\isasymclose}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ r{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}hspace{\isacharbraceleft}{\kern0pt}{\isadigit{8}}mm{\isacharbraceright}{\kern0pt}{\isacharpercent}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}SEQ\ ONE\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ \ {\isacharampersand}{\kern0pt}\ {\isasymopen}{\isasymRightarrow}{\isasymclose}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ r{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}hspace{\isacharbraceleft}{\kern0pt}{\isadigit{8}}mm{\isacharbraceright}{\kern0pt}{\isacharpercent}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}SEQ\ r\ ONE{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ \ {\isacharampersand}{\kern0pt}\ {\isasymopen}{\isasymRightarrow}{\isasymclose}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ r{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}array{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}equation{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}noindent\ is\ well\ understood{\isacharcomma}{\kern0pt}\ there\ is\ an\ obstacle\ with\ the\ POSIX\ value\ calculation\ algorithm\ by\ Sulzmann\ and\ Lu{\isacharcolon}{\kern0pt}\ if\ we\ build\ a\ derivative\ regular\ expression\ and\ then\ simplify\ it{\isacharcomma}{\kern0pt}\ we\ will\ calculate\ a\ POSIX\ value\ for\ this\ simplified\ derivative\ regular\ expression{\isacharcomma}{\kern0pt}\ {\isacharbackslash}{\kern0pt}emph{\isacharbraceleft}{\kern0pt}not{\isacharbraceright}{\kern0pt}\ for\ the\ original\ {\isacharparenleft}{\kern0pt}unsimplified{\isacharparenright}{\kern0pt}\ derivative\ regular\ expression{\isachardot}{\kern0pt}\ Sulzmann\ and\ Lu\ {\isacharbackslash}{\kern0pt}cite{\isacharbraceleft}{\kern0pt}Sulzmann{\isadigit{2}}{\isadigit{0}}{\isadigit{1}}{\isadigit{4}}{\isacharbraceright}{\kern0pt}\ overcome\ this\ obstacle\ by\ not\ just\ calculating\ a\ simplified\ regular\ expression{\isacharcomma}{\kern0pt}\ but\ also\ calculating\ a\ {\isacharbackslash}{\kern0pt}emph{\isacharbraceleft}{\kern0pt}rectification\ function{\isacharbraceright}{\kern0pt}\ that\ {\isacharbackquote}{\kern0pt}{\isacharbackquote}{\kern0pt}repairs{\isacharprime}{\kern0pt}{\isacharprime}{\kern0pt}\ the\ incorrect\ value{\isachardot}{\kern0pt}\ \ The\ rectification\ functions\ can\ be\ {\isacharparenleft}{\kern0pt}slightly\ clumsily{\isacharparenright}{\kern0pt}\ implemented\ \ in\ Isabelle{\isacharslash}{\kern0pt}HOL\ as\ follows\ using\ some\ auxiliary\ functions{\isacharcolon}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}RIGHT{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}Right\ {\isacharparenleft}{\kern0pt}f\ v{\isacharparenright}{\kern0pt}{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}LEFT{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}Left\ {\isacharparenleft}{\kern0pt}f\ v{\isacharparenright}{\kern0pt}{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ \ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}ALT{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}Right\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ v{\isacharparenright}{\kern0pt}{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}ALT{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}Left\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ v{\isacharparenright}{\kern0pt}{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ \ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}SEQ{\isadigit{1}}{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}Seq\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ {\isacharparenleft}{\kern0pt}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ v{\isacharparenright}{\kern0pt}{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}SEQ{\isadigit{2}}{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}Seq\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ v{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ {\isacharparenleft}{\kern0pt}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}SEQ{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}Seq\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isasymclose}{\isacharbackslash}{\kern0pt}medskip{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharpercent}{\kern0pt}{\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharpercent}{\kern0pt}\ {\isacharpercent}{\kern0pt}{\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}ALT\ {\isacharparenleft}{\kern0pt}ZERO{\isacharcomma}{\kern0pt}\ DUMMY{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}RIGHT\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}ALT\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}ZERO{\isacharcomma}{\kern0pt}\ DUMMY{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}LEFT\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}ALT\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharparenleft}{\kern0pt}ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}ALT\ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}SEQ\ {\isacharparenleft}{\kern0pt}ONE{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}SEQ{\isadigit{1}}\ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}SEQ\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}ONE{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}SEQ{\isadigit{2}}\ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}SEQ\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharparenleft}{\kern0pt}SEQ\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}SEQ\ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}noindent\ The\ functions\ {\isasymopen}simp\isactrlbsub Alt\isactrlesub {\isasymclose}\ and\ {\isasymopen}simp\isactrlbsub Seq\isactrlesub {\isasymclose}\ encode\ the\ simplification\ rules\ in\ {\isacharbackslash}{\kern0pt}eqref{\isacharbraceleft}{\kern0pt}Simpl{\isacharbraceright}{\kern0pt}\ and\ compose\ the\ rectification\ functions\ {\isacharparenleft}{\kern0pt}simplifications\ can\ occur\ deep\ inside\ the\ regular\ expression{\isacharparenright}{\kern0pt}{\isachardot}{\kern0pt}\ The\ main\ simplification\ function\ is\ then\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}ALT\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}SEQ\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}SEQ\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharparenleft}{\kern0pt}r{\isacharcomma}{\kern0pt}\ id{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}noindent\ where\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}id{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ stands\ for\ the\ identity\ function{\isachardot}{\kern0pt}\ The\ function\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}const\ simp{\isacharbraceright}{\kern0pt}\ returns\ a\ simplified\ regular\ expression\ and\ a\ corresponding\ rectification\ function{\isachardot}{\kern0pt}\ Note\ that\ we\ do\ not\ simplify\ under\ stars{\isacharcolon}{\kern0pt}\ this\ seems\ to\ slow\ down\ the\ algorithm{\isacharcomma}{\kern0pt}\ rather\ than\ speed\ it\ up{\isachardot}{\kern0pt}\ The\ optimised\ lexer\ is\ then\ given\ by\ the\ clauses{\isacharcolon}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ slexer{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ slexer{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ slexer{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}let\ {\isacharparenleft}{\kern0pt}r\isactrlsub s{\isacharcomma}{\kern0pt}\ f\isactrlsub r{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ simp\ {\isacharparenleft}{\kern0pt}r\ {\isasymclose}{\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}backslash{\isachardollar}{\kern0pt}{\isasymopen}\ c{\isacharparenright}{\kern0pt}\ in{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}case{\isasymclose}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}slexer\ r\isactrlsub s\ s{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isasymopen}of{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharbackslash}{\kern0pt}phantom{\isacharbraceleft}{\kern0pt}{\isachardollar}{\kern0pt}{\isacharbar}{\kern0pt}{\isachardollar}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}None{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ \ {\isasymopen}{\isasymRightarrow}{\isasymclose}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ None{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbar}{\kern0pt}{\isachardollar}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}Some\ v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isasymopen}{\isasymRightarrow}{\isasymclose}\ {\isasymopen}Some\ {\isacharparenleft}{\kern0pt}inj\ r\ c\ {\isacharparenleft}{\kern0pt}f\isactrlsub r\ v{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isasymclose}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}noindent\ In\ the\ second\ clause\ we\ first\ calculate\ the\ derivative\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}der\ c\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ then\ simplify\ the\ result{\isachardot}{\kern0pt}\ This\ gives\ us\ a\ simplified\ derivative\ {\isasymopen}r\isactrlsub s{\isasymclose}\ and\ a\ rectification\ function\ {\isasymopen}f\isactrlsub r{\isasymclose}{\isachardot}{\kern0pt}\ The\ lexer\ is\ then\ recursively\ called\ with\ the\ simplified\ derivative{\isacharcomma}{\kern0pt}\ but\ before\ we\ inject\ the\ character\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ c{\isacharbraceright}{\kern0pt}\ into\ the\ value\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ v{\isacharbraceright}{\kern0pt}{\isacharcomma}{\kern0pt}\ we\ need\ to\ rectify\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ v{\isacharbraceright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}that\ is\ construct\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}f\isactrlsub r\ v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isachardot}{\kern0pt}\ Before\ we\ can\ establish\ the\ correctness\ of\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}slexer{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharcomma}{\kern0pt}\ we\ need\ to\ show\ that\ simplification\ preserves\ the\ language\ and\ simplification\ preserves\ our\ POSIX\ relation\ once\ the\ value\ is\ rectified\ {\isacharparenleft}{\kern0pt}recall\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}const\ {\isachardoublequote}{\kern0pt}simp{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ generates\ a\ {\isacharparenleft}{\kern0pt}regular\ expression{\isacharcomma}{\kern0pt}\ rectification\ function{\isacharparenright}{\kern0pt}\ pair{\isacharparenright}{\kern0pt}{\isacharcolon}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}lemma{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}mbox{\isacharbraceleft}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}smallskip{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}label{\isacharbraceleft}{\kern0pt}slexeraux{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}ll{\isacharbraceright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ L{\isacharunderscore}{\kern0pt}fst{\isacharunderscore}{\kern0pt}simp{\isacharbrackleft}{\kern0pt}symmetric{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm{\isacharbrackleft}{\kern0pt}mode{\isacharequal}{\kern0pt}IfThen{\isacharbrackright}{\kern0pt}\ Posix{\isacharunderscore}{\kern0pt}simp{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}lemma{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}proof{\isacharbraceright}{\kern0pt}\ Both\ are\ by\ induction\ on\ {\isasymopen}r{\isasymclose}{\isachardot}{\kern0pt}\ There\ is\ no\ interesting\ case\ for\ the\ first\ statement{\isachardot}{\kern0pt}\ For\ the\ second\ statement{\isacharcomma}{\kern0pt}\ of\ interest\ are\ the\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}r\ {\isacharequal}{\kern0pt}\ ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}r\ {\isacharequal}{\kern0pt}\ SEQ\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ cases{\isachardot}{\kern0pt}\ In\ each\ case\ we\ have\ to\ analyse\ four\ subcases\ whether\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ equals\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}const\ ZERO{\isacharbraceright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}respectively\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}const\ ONE{\isacharbraceright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isachardot}{\kern0pt}\ For\ example\ for\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}r\ {\isacharequal}{\kern0pt}\ ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharcomma}{\kern0pt}\ consider\ the\ subcase\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ ZERO{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymnoteq}\ ZERO{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ By\ assumption\ we\ know\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ fst\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ From\ this\ we\ can\ infer\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ by\ IH\ also\ {\isacharparenleft}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ r\isactrlsub {\isadigit{2}}\ {\isasymrightarrow}\ {\isacharparenleft}{\kern0pt}snd\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ v{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ Given\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ ZERO{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ we\ know\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}L\ {\isacharparenleft}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbraceleft}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ By\ the\ first\ statement\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}L\ r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ is\ the\ empty\ set{\isacharcomma}{\kern0pt}\ meaning\ {\isacharparenleft}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymnotin}\ L\ r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ Taking\ {\isacharparenleft}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharparenright}{\kern0pt}\ and\ {\isacharparenleft}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharparenright}{\kern0pt}\ together\ gives\ by\ the\ {\isacharbackslash}{\kern0pt}mbox{\isacharbraceleft}{\kern0pt}{\isasymopen}P{\isacharplus}{\kern0pt}R{\isasymclose}{\isacharbraceright}{\kern0pt}{\isacharminus}{\kern0pt}rule\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}\ {\isasymrightarrow}\ Right\ {\isacharparenleft}{\kern0pt}snd\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ v{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ In\ turn\ this\ gives\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}\ {\isasymrightarrow}\ snd\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ as\ we\ need\ to\ show{\isachardot}{\kern0pt}\ The\ other\ cases\ are\ similar{\isachardot}{\kern0pt}{\isacharbackslash}{\kern0pt}qed\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}proof{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}noindent\ We\ can\ now\ prove\ relatively\ straightforwardly\ that\ the\ optimised\ lexer\ produces\ the\ expected\ result{\isacharcolon}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}theorem{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ slexer{\isacharunderscore}{\kern0pt}correctness{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}theorem{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}proof{\isacharbraceright}{\kern0pt}\ By\ induction\ on\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ s{\isacharbraceright}{\kern0pt}\ generalising\ over\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ r{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ The\ case\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ is\ trivial{\isachardot}{\kern0pt}\ For\ the\ cons{\isacharminus}{\kern0pt}case\ suppose\ the\ string\ is\ of\ the\ form\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}c\ {\isacharhash}{\kern0pt}\ s{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ By\ induction\ hypothesis\ we\ know\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}slexer\ r\ s\ {\isacharequal}{\kern0pt}\ lexer\ r\ s{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ holds\ for\ all\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ r{\isacharbraceright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}in\ particular\ for\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ being\ the\ derivative\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}der\ c\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isachardot}{\kern0pt}\ Let\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}r\isactrlsub s{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ be\ the\ simplified\ derivative\ regular\ expression{\isacharcomma}{\kern0pt}\ that\ is\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}der\ c\ r{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharcomma}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}f\isactrlsub r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ be\ the\ rectification\ function{\isacharcomma}{\kern0pt}\ that\ is\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}snd\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}der\ c\ r{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ \ We\ distinguish\ the\ cases\ whether\ {\isacharparenleft}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ L\ {\isacharparenleft}{\kern0pt}der\ c\ r{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ or\ not{\isachardot}{\kern0pt}\ In\ the\ first\ case\ we\ have\ by\ Theorem{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}lexercorrect{\isacharbraceright}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}\ a\ value\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ so\ that\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}lexer\ {\isacharparenleft}{\kern0pt}der\ c\ r{\isacharparenright}{\kern0pt}\ s\ {\isacharequal}{\kern0pt}\ Some\ v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ der\ c\ r\ {\isasymrightarrow}\ v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ hold{\isachardot}{\kern0pt}\ By\ Lemma{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}slexeraux{\isacharbraceright}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}\ we\ can\ also\ infer\ from{\isachartilde}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharparenright}{\kern0pt}\ that\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ L\ r\isactrlsub s{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ holds{\isachardot}{\kern0pt}\ \ Hence\ we\ know\ by\ Theorem{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}lexercorrect{\isacharbraceright}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}\ that\ there\ exists\ a\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}v{\isacharprime}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ with\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}lexer\ r\isactrlsub s\ s\ {\isacharequal}{\kern0pt}\ Some\ v{\isacharprime}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ r\isactrlsub s\ {\isasymrightarrow}\ v{\isacharprime}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ From\ the\ latter\ we\ know\ by\ Lemma{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}slexeraux{\isacharbraceright}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}\ that\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ der\ c\ r\ {\isasymrightarrow}\ {\isacharparenleft}{\kern0pt}f\isactrlsub r\ v{\isacharprime}{\kern0pt}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ holds{\isachardot}{\kern0pt}\ By\ the\ uniqueness\ of\ the\ POSIX\ relation\ {\isacharparenleft}{\kern0pt}Theorem{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}posixdeterm{\isacharbraceright}{\kern0pt}{\isacharparenright}{\kern0pt}\ we\ can\ infer\ that\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ v{\isacharbraceright}{\kern0pt}\ is\ equal\ to\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}f\isactrlsub r\ v{\isacharprime}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharminus}{\kern0pt}{\isacharminus}{\kern0pt}{\isacharminus}{\kern0pt}that\ is\ the\ rectification\ function\ applied\ to\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}v{\isacharprime}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ produces\ the\ original\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ \ Now\ the\ case\ follows\ by\ the\ definitions\ of\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}const\ lexer{\isacharbraceright}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}const\ slexer{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ \ In\ the\ second\ case\ where\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymnotin}\ L\ {\isacharparenleft}{\kern0pt}der\ c\ r{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ we\ have\ that\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}lexer\ {\isacharparenleft}{\kern0pt}der\ c\ r{\isacharparenright}{\kern0pt}\ s\ {\isacharequal}{\kern0pt}\ None{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ by\ Theorem{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}lexercorrect{\isacharbraceright}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isachardot}{\kern0pt}\ \ We\ also\ know\ by\ Lemma{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}slexeraux{\isacharbraceright}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}\ that\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymnotin}\ L\ r\isactrlsub s{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ Hence\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}lexer\ r\isactrlsub s\ s\ {\isacharequal}{\kern0pt}\ None{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ by\ Theorem{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}lexercorrect{\isacharbraceright}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}\ and\ by\ IH\ then\ also\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}slexer\ r\isactrlsub s\ s\ {\isacharequal}{\kern0pt}\ None{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ With\ this\ we\ can\ conclude\ in\ this\ case\ too{\isachardot}{\kern0pt}{\isacharbackslash}{\kern0pt}qed\ \ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}proof{\isacharbraceright}{\kern0pt}\ \ } +fy the result. This gives us a simplified derivative + \isa{r\isactrlsub s} and a rectification function \isa{f\isactrlsub r}. The lexer + is then recursively called with the simplified derivative, but before + we inject the character \isa{c} into the value \isa{v}, we need to rectify + \isa{v} (that is construct \isa{f\isactrlsub r\ v}). Before we can establish the correctness + of \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}}, we need to show that simplification preserves the language + and simplification preserves our POSIX relation once the value is rectified + (recall \isa{simp} generates a (regular expression, rectification function) pair): + + \begin{lemma}\mbox{}\smallskip\\\label{slexeraux} + \begin{tabular}{ll} + (1) & \isa{L{\isacharparenleft}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}}\\ + (2) & \isa{{\normalsize{}If\,}\ {\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ fst\ {\isacharparenleft}{\kern0pt}simp\ r{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\ {\normalsize \,then\,}\ {\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ snd\ {\isacharparenleft}{\kern0pt}simp\ r{\isacharparenright}{\kern0pt}\ v{\isachardot}{\kern0pt}} + \end{tabular} + \end{lemma} + + \begin{proof} Both are by induction on \isa{r}. There is no + interesting case for the first statement. For the second statement, + of interest are the \isa{r\ {\isacharequal}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}} and \isa{r\ {\isacharequal}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}} cases. In each case we have to analyse four subcases whether + \isa{fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}} and \isa{fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} equals \isa{\isactrlbold {\isadigit{0}}} (respectively \isa{\isactrlbold {\isadigit{1}}}). For example for \isa{r\ {\isacharequal}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}}, consider the subcase \isa{fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ \isactrlbold {\isadigit{0}}} and + \isa{fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymnoteq}\ \isactrlbold {\isadigit{0}}}. By assumption we know \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ fst\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}. From this we can infer \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v} + and by IH also (*) \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ snd\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ v}. Given \isa{fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ \isactrlbold {\isadigit{0}}} + we know \isa{L{\isacharparenleft}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isasymemptyset}}. By the first statement + \isa{L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}} is the empty set, meaning (**) \isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}}. + Taking (*) and (**) together gives by the \mbox{\isa{P{\isacharplus}{\kern0pt}R}}-rule + \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Right\ {\isacharparenleft}{\kern0pt}snd\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ v{\isacharparenright}{\kern0pt}}. In turn this + gives \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ snd\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ v} as we need to show. + The other cases are similar.\qed + \end{proof} + + \noindent We can now prove relatively straightforwardly that the + optimised lexer produces the expected result: + + \begin{theorem} + \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}\ r\ s\ {\isacharequal}{\kern0pt}\ lexer\ r\ s} + \end{theorem} + + \begin{proof} By induction on \isa{s} generalising over \isa{r}. The case \isa{{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} is trivial. For the cons-case suppose the + string is of the form \isa{c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s}. By induction hypothesis we + know \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}\ r\ s\ {\isacharequal}{\kern0pt}\ lexer\ r\ s} holds for all \isa{r} (in + particular for \isa{r} being the derivative \isa{r{\isacharbackslash}{\kern0pt}c}). Let \isa{r\isactrlsub s} be the simplified derivative regular expression, that is \isa{fst\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}}, and \isa{f\isactrlsub r} be the rectification + function, that is \isa{snd\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}}. We distinguish the cases + whether (*) \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}} or not. In the first case we + have by Theorem~\ref{lexercorrect}(2) a value \isa{v} so that \isa{lexer\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ s\ {\isacharequal}{\kern0pt}\ Some\ v} and \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v} hold. + By Lemma~\ref{slexeraux}(1) we can also infer from~(*) that \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub s{\isacharparenright}{\kern0pt}} holds. Hence we know by Theorem~\ref{lexercorrect}(2) that + there exists a \isa{v{\isacharprime}{\kern0pt}} with \isa{lexer\ r\isactrlsub s\ s\ {\isacharequal}{\kern0pt}\ Some\ v{\isacharprime}{\kern0pt}} and + \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub s{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isacharprime}{\kern0pt}}. From the latter we know by + Lemma~\ref{slexeraux}(2) that \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ f\isactrlsub r\ v{\isacharprime}{\kern0pt}} holds. + By the uniqueness of the POSIX relation (Theorem~\ref{posixdeterm}) we + can infer that \isa{v} is equal to \isa{f\isactrlsub r\ v{\isacharprime}{\kern0pt}}---that is the + rectification function applied to \isa{v{\isacharprime}{\kern0pt}} + produces the original \isa{v}. Now the case follows by the + definitions of \isa{lexer} and \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}}. + + In the second case where \isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}} we have that + \isa{lexer\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ s\ {\isacharequal}{\kern0pt}\ None} by Theorem~\ref{lexercorrect}(1). We + also know by Lemma~\ref{slexeraux}(1) that \isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub s{\isacharparenright}{\kern0pt}}. Hence + \isa{lexer\ r\isactrlsub s\ s\ {\isacharequal}{\kern0pt}\ None} by Theorem~\ref{lexercorrect}(1) and + by IH then also \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}\ r\isactrlsub s\ s\ {\isacharequal}{\kern0pt}\ None}. With this we can + conclude in this case too.\qed + + \end{proof}% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{HERE% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +\begin{lemma} + \isa{{\normalsize{}If\,}\ v\ {\isacharcolon}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c\ {\normalsize \,then\,}\ retrieve\ {\isacharparenleft}{\kern0pt}r\mbox{$\bbslash$}c{\isacharparenright}{\kern0pt}\ v\ {\isacharequal}{\kern0pt}\ retrieve\ r\ {\isacharparenleft}{\kern0pt}inj\ {\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}\ c\ v{\isacharparenright}{\kern0pt}{\isachardot}{\kern0pt}} + \end{lemma} + + \begin{proof} + By induction on the definition of \isa{r\mbox{$^\downarrow$}}. The cases for rule 1) and 2) are + straightforward as \isa{\isactrlbold {\isadigit{0}}{\isacharbackslash}{\kern0pt}c} and \isa{\isactrlbold {\isadigit{1}}{\isacharbackslash}{\kern0pt}c} are both equal to + \isa{\isactrlbold {\isadigit{0}}}. This means \isa{v\ {\isacharcolon}{\kern0pt}\ \isactrlbold {\isadigit{0}}} cannot hold. Similarly in case of rule 3) + where \isa{r} is of the form \isa{ACHAR\ d} with \isa{c\ {\isacharequal}{\kern0pt}\ d}. Then by assumption + we know \isa{v\ {\isacharcolon}{\kern0pt}\ \isactrlbold {\isadigit{1}}}, which implies \isa{v\ {\isacharequal}{\kern0pt}\ Empty}. The equation follows by + simplification of left- and right-hand side. In case \isa{c\ {\isasymnoteq}\ d} we have again + \isa{v\ {\isacharcolon}{\kern0pt}\ \isactrlbold {\isadigit{0}}}, which cannot hold. + + For rule 4a) we have again \isa{v\ {\isacharcolon}{\kern0pt}\ \isactrlbold {\isadigit{0}}}. The property holds by IH for rule 4b). + The induction hypothesis is + \[ + \isa{retrieve\ {\isacharparenleft}{\kern0pt}r\mbox{$\bbslash$}c{\isacharparenright}{\kern0pt}\ v\ {\isacharequal}{\kern0pt}\ retrieve\ r\ {\isacharparenleft}{\kern0pt}inj\ {\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}\ c\ v{\isacharparenright}{\kern0pt}} + \] + which is what left- and right-hand side simplify to. The slightly more interesting case + is for 4c). By assumption we have + \isa{v\ {\isacharcolon}{\kern0pt}\ {\isacharparenleft}{\kern0pt}{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isacharplus}{\kern0pt}\ {\isacharparenleft}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isacharparenleft}{\kern0pt}AALTs\ bs\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}rs{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}}. This means we + have either (*) \isa{v{\isadigit{1}}\ {\isacharcolon}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c} with \isa{v\ {\isacharequal}{\kern0pt}\ Left\ v{\isadigit{1}}} or + (**) \isa{v{\isadigit{2}}\ {\isacharcolon}{\kern0pt}\ {\isacharparenleft}{\kern0pt}{\isacharparenleft}{\kern0pt}AALTs\ bs\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}rs{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c} with \isa{v\ {\isacharequal}{\kern0pt}\ Right\ v{\isadigit{2}}}. + The former case is straightforward by simplification. The second case is \ldots TBD. + + Rule 5) TBD. + + Finally for rule 6) the reasoning is as follows: By assumption we have + \isa{v\ {\isacharcolon}{\kern0pt}\ {\isacharparenleft}{\kern0pt}{\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ {\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}\isactrlsup {\isasymstar}}. This means we also have + \isa{v\ {\isacharequal}{\kern0pt}\ Seq\ v{\isadigit{1}}\ v{\isadigit{2}}}, \isa{v{\isadigit{1}}\ {\isacharcolon}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c} and \isa{v{\isadigit{2}}\ {\isacharequal}{\kern0pt}\ Stars\ vs}. + We want to prove + \begin{align} + & \isa{retrieve\ {\isacharparenleft}{\kern0pt}ASEQ\ bs\ {\isacharparenleft}{\kern0pt}fuse\ {\isacharbrackleft}{\kern0pt}Z{\isacharbrackright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\mbox{$\bbslash$}c{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}ASTAR\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ r{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ v}\\ + &= \isa{retrieve\ {\isacharparenleft}{\kern0pt}ASTAR\ bs\ r{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}inj\ {\isacharparenleft}{\kern0pt}{\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\ c\ v{\isacharparenright}{\kern0pt}} + \end{align} + The right-hand side \isa{inj}-expression is equal to + \isa{Stars\ {\isacharparenleft}{\kern0pt}inj\ {\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}\ c\ v{\isadigit{1}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs{\isacharparenright}{\kern0pt}}, which means the \isa{retrieve}-expression + simplifies to + \[ + \isa{bs\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}Z{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ retrieve\ r\ {\isacharparenleft}{\kern0pt}inj\ {\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}\ c\ v{\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharat}{\kern0pt}\ retrieve\ {\isacharparenleft}{\kern0pt}ASTAR\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}Stars\ vs{\isacharparenright}{\kern0pt}} + \] + The left-hand side (3) above simplifies to + \[ + \isa{bs\ {\isacharat}{\kern0pt}\ retrieve\ {\isacharparenleft}{\kern0pt}fuse\ {\isacharbrackleft}{\kern0pt}Z{\isacharbrackright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\mbox{$\bbslash$}c{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ v{\isadigit{1}}\ {\isacharat}{\kern0pt}\ retrieve\ {\isacharparenleft}{\kern0pt}ASTAR\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}Stars\ vs{\isacharparenright}{\kern0pt}} + \] + We can move out the \isa{fuse\ {\isacharbrackleft}{\kern0pt}Z{\isacharbrackright}{\kern0pt}} and then use the IH to show that left-hand side + and right-hand side are equal. This completes the proof. + \end{proof} + + + + \bibliographystyle{plain} + \bibliography{root}% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimtheory +% +\endisadelimtheory +% +\isatagtheory +% +\endisatagtheory +{\isafoldtheory}% +% +\isadelimtheory +\isanewline +% +\endisadelimtheory +\isanewline +\isanewline +% +\end{isabellebody}% +\endinput +%:%file=~/Dropbox/Workspace/journalpaper/lexing/thys2/Journal/Paper.thy%:% +%:%50=134%:% +%:%62=136%:% +%:%63=137%:% +%:%64=138%:% +%:%65=139%:% +%:%66=140%:% +%:%67=141%:% +%:%68=142%:% +%:%69=143%:% +%:%70=144%:% +%:%71=145%:% +%:%72=146%:% +%:%73=147%:% +%:%74=148%:% +%:%75=149%:% +%:%76=150%:% +%:%77=151%:% +%:%78=152%:% +%:%79=153%:% +%:%80=154%:% +%:%81=155%:% +%:%82=156%:% +%:%83=157%:% +%:%84=158%:% +%:%85=159%:% +%:%86=160%:% +%:%87=161%:% +%:%88=162%:% +%:%89=163%:% +%:%90=164%:% +%:%91=165%:% +%:%92=166%:% +%:%93=167%:% +%:%94=168%:% +%:%95=169%:% +%:%96=170%:% +%:%97=171%:% +%:%98=172%:% +%:%99=173%:% +%:%100=174%:% +%:%101=175%:% +%:%102=176%:% +%:%103=177%:% +%:%104=178%:% +%:%105=179%:% +%:%106=180%:% +%:%107=181%:% +%:%108=182%:% +%:%109=183%:% +%:%110=184%:% +%:%111=185%:% +%:%112=186%:% +%:%113=187%:% +%:%114=188%:% +%:%115=189%:% +%:%116=190%:% +%:%117=191%:% +%:%118=192%:% +%:%119=193%:% +%:%120=194%:% +%:%121=195%:% +%:%122=196%:% +%:%123=197%:% +%:%124=198%:% +%:%125=199%:% +%:%126=200%:% +%:%127=201%:% +%:%128=202%:% +%:%129=203%:% +%:%130=204%:% +%:%131=205%:% +%:%132=206%:% +%:%133=207%:% +%:%134=208%:% +%:%135=209%:% +%:%136=210%:% +%:%137=211%:% +%:%138=212%:% +%:%139=213%:% +%:%140=214%:% +%:%141=215%:% +%:%142=216%:% +%:%143=217%:% +%:%144=218%:% +%:%145=219%:% +%:%146=220%:% +%:%147=221%:% +%:%148=222%:% +%:%149=223%:% +%:%150=224%:% +%:%151=225%:% +%:%152=226%:% +%:%153=227%:% +%:%154=228%:% +%:%155=229%:% +%:%156=230%:% +%:%157=231%:% +%:%158=232%:% +%:%159=233%:% +%:%160=234%:% +%:%161=235%:% +%:%162=236%:% +%:%163=237%:% +%:%164=238%:% +%:%165=239%:% +%:%166=240%:% +%:%167=241%:% +%:%168=242%:% +%:%169=243%:% +%:%170=244%:% +%:%171=245%:% +%:%172=246%:% +%:%173=247%:% +%:%174=248%:% +%:%175=249%:% +%:%176=250%:% +%:%177=251%:% +%:%178=252%:% +%:%179=253%:% +%:%180=254%:% +%:%181=255%:% +%:%182=256%:% +%:%183=257%:% +%:%184=258%:% +%:%185=259%:% +%:%186=260%:% +%:%187=261%:% +%:%188=262%:% +%:%189=263%:% +%:%190=264%:% +%:%191=265%:% +%:%192=266%:% +%:%193=267%:% +%:%194=268%:% +%:%203=272%:% +%:%215=276%:% +%:%216=277%:% +%:%217=278%:% +%:%218=279%:% +%:%219=280%:% +%:%220=281%:% +%:%221=282%:% +%:%222=283%:% +%:%223=284%:% +%:%224=285%:% +%:%225=286%:% +%:%226=287%:% +%:%227=288%:% +%:%228=289%:% +%:%229=290%:% +%:%230=291%:% +%:%231=292%:% +%:%240=299%:% +%:%252=305%:% +%:%253=306%:% +%:%254=307%:% +%:%255=308%:% +%:%255=309%:% +%:%256=310%:% +%:%257=311%:% +%:%258=312%:% +%:%259=313%:% +%:%260=314%:% +%:%261=315%:% +%:%262=316%:% +%:%263=317%:% +%:%264=318%:% +%:%265=319%:% +%:%266=320%:% +%:%267=321%:% +%:%268=322%:% +%:%269=323%:% +%:%270=324%:% +%:%271=325%:% +%:%272=326%:% +%:%273=327%:% +%:%274=328%:% +%:%275=329%:% +%:%276=330%:% +%:%277=331%:% +%:%278=332%:% +%:%279=333%:% +%:%280=334%:% +%:%281=335%:% +%:%282=336%:% +%:%283=337%:% +%:%284=338%:% +%:%285=339%:% +%:%286=340%:% +%:%287=341%:% +%:%288=342%:% +%:%289=343%:% +%:%290=344%:% +%:%291=345%:% +%:%292=346%:% +%:%293=347%:% +%:%294=348%:% +%:%295=349%:% +%:%296=350%:% +%:%297=351%:% +%:%298=352%:% +%:%299=353%:% +%:%300=354%:% +%:%301=355%:% +%:%302=356%:% +%:%303=357%:% +%:%304=358%:% +%:%305=359%:% +%:%306=360%:% +%:%307=361%:% +%:%308=362%:% +%:%309=363%:% +%:%310=364%:% +%:%311=365%:% +%:%312=366%:% +%:%313=367%:% +%:%314=368%:% +%:%315=369%:% +%:%316=370%:% +%:%317=371%:% +%:%318=372%:% +%:%319=373%:% +%:%319=374%:% +%:%320=375%:% +%:%321=376%:% +%:%322=377%:% +%:%323=378%:% +%:%324=379%:% +%:%325=380%:% +%:%326=381%:% +%:%327=382%:% +%:%328=383%:% +%:%329=384%:% +%:%330=385%:% +%:%331=386%:% +%:%332=387%:% +%:%333=388%:% +%:%334=389%:% +%:%335=390%:% +%:%336=391%:% +%:%337=392%:% +%:%338=393%:% +%:%339=394%:% +%:%340=395%:% +%:%341=396%:% +%:%342=397%:% +%:%343=398%:% +%:%344=399%:% +%:%345=400%:% +%:%346=401%:% +%:%347=402%:% +%:%348=403%:% +%:%349=404%:% +%:%350=405%:% +%:%351=406%:% +%:%352=407%:% +%:%353=408%:% +%:%354=409%:% +%:%355=410%:% +%:%356=411%:% +%:%357=412%:% +%:%358=413%:% +%:%359=414%:% +%:%360=415%:% +%:%361=416%:% +%:%362=417%:% +%:%363=418%:% +%:%364=419%:% +%:%365=420%:% +%:%366=421%:% +%:%367=422%:% +%:%368=423%:% +%:%369=424%:% +%:%370=425%:% +%:%371=426%:% +%:%372=427%:% +%:%373=428%:% +%:%374=429%:% +%:%375=430%:% +%:%376=431%:% +%:%377=432%:% +%:%378=433%:% +%:%379=434%:% +%:%380=435%:% +%:%381=436%:% +%:%382=437%:% +%:%383=438%:% +%:%384=439%:% +%:%385=440%:% +%:%386=441%:% +%:%387=442%:% +%:%388=443%:% +%:%389=444%:% +%:%390=445%:% +%:%391=446%:% +%:%392=447%:% +%:%393=448%:% +%:%394=449%:% +%:%395=450%:% +%:%396=451%:% +%:%397=452%:% +%:%398=453%:% +%:%399=454%:% +%:%400=455%:% +%:%401=456%:% +%:%402=457%:% +%:%403=458%:% +%:%404=459%:% +%:%405=460%:% +%:%406=461%:% +%:%407=462%:% +%:%408=463%:% +%:%409=464%:% +%:%410=465%:% +%:%411=466%:% +%:%412=467%:% +%:%413=468%:% +%:%414=469%:% +%:%415=470%:% +%:%416=471%:% +%:%417=472%:% +%:%418=473%:% +%:%419=474%:% +%:%420=475%:% +%:%421=476%:% +%:%422=477%:% +%:%423=478%:% +%:%424=479%:% +%:%433=486%:% +%:%445=488%:% +%:%446=489%:% +%:%446=490%:% +%:%447=491%:% +%:%448=492%:% +%:%449=493%:% +%:%450=494%:% +%:%451=495%:% +%:%452=496%:% +%:%453=497%:% +%:%454=498%:% +%:%455=499%:% +%:%456=500%:% +%:%457=501%:% +%:%458=502%:% +%:%459=503%:% +%:%460=504%:% +%:%461=505%:% +%:%462=506%:% +%:%463=507%:% +%:%464=508%:% +%:%465=509%:% +%:%466=510%:% +%:%467=511%:% +%:%468=512%:% +%:%469=513%:% +%:%470=514%:% +%:%471=515%:% +%:%472=516%:% +%:%473=517%:% +%:%474=518%:% +%:%475=519%:% +%:%476=520%:% +%:%477=521%:% +%:%478=522%:% +%:%479=523%:% +%:%480=524%:% +%:%481=525%:% +%:%482=526%:% +%:%483=527%:% +%:%484=528%:% +%:%485=529%:% +%:%485=530%:% +%:%486=531%:% +%:%487=532%:% +%:%488=533%:% +%:%489=534%:% +%:%490=535%:% +%:%490=536%:% +%:%491=537%:% +%:%492=538%:% +%:%493=539%:% +%:%494=540%:% +%:%495=541%:% +%:%496=542%:% +%:%497=543%:% +%:%498=544%:% +%:%499=545%:% +%:%500=546%:% +%:%501=547%:% +%:%502=548%:% +%:%503=549%:% +%:%504=550%:% +%:%505=551%:% +%:%506=552%:% +%:%507=553%:% +%:%508=554%:% +%:%509=555%:% +%:%510=556%:% +%:%511=557%:% +%:%512=558%:% +%:%513=559%:% +%:%514=560%:% +%:%515=561%:% +%:%516=562%:% +%:%517=563%:% +%:%518=564%:% +%:%519=565%:% +%:%520=566%:% +%:%521=567%:% +%:%522=568%:% +%:%523=569%:% +%:%524=570%:% +%:%525=571%:% +%:%526=572%:% +%:%527=573%:% +%:%528=574%:% +%:%529=575%:% +%:%530=576%:% +%:%531=577%:% +%:%532=578%:% +%:%533=579%:% +%:%534=580%:% +%:%535=581%:% +%:%536=582%:% +%:%537=583%:% +%:%538=584%:% +%:%539=585%:% +%:%540=586%:% +%:%541=587%:% +%:%542=588%:% +%:%543=589%:% +%:%544=590%:% +%:%545=591%:% +%:%546=592%:% +%:%547=593%:% +%:%548=594%:% +%:%549=595%:% +%:%550=596%:% +%:%551=597%:% +%:%552=598%:% +%:%553=599%:% +%:%554=600%:% +%:%555=601%:% +%:%556=602%:% +%:%557=603%:% +%:%558=604%:% +%:%559=605%:% +%:%560=606%:% +%:%561=607%:% +%:%562=608%:% +%:%563=609%:% +%:%564=610%:% +%:%565=611%:% +%:%566=612%:% +%:%567=613%:% +%:%568=614%:% +%:%569=615%:% +%:%570=616%:% +%:%571=617%:% +%:%572=618%:% +%:%573=619%:% +%:%574=620%:% +%:%575=621%:% +%:%576=622%:% +%:%577=623%:% +%:%578=624%:% +%:%579=625%:% +%:%588=629%:% +%:%600=633%:% +%:%601=634%:% +%:%602=635%:% +%:%603=636%:% +%:%604=637%:% +%:%605=638%:% +%:%606=639%:% +%:%607=640%:% +%:%608=641%:% +%:%609=642%:% +%:%610=643%:% +%:%611=644%:% +%:%612=645%:% +%:%613=646%:% +%:%614=647%:% +%:%615=648%:% +%:%616=649%:% +%:%617=650%:% +%:%618=651%:% +%:%619=652%:% +%:%620=653%:% +%:%621=654%:% +%:%622=655%:% +%:%623=656%:% +%:%624=657%:% +%:%625=658%:% +%:%626=659%:% +%:%627=660%:% +%:%628=661%:% +%:%629=662%:% +%:%630=663%:% +%:%631=664%:% +%:%632=665%:% +%:%633=666%:% +%:%634=667%:% +%:%635=668%:% +%:%636=669%:% +%:%637=670%:% +%:%638=671%:% +%:%639=672%:% +%:%640=673%:% +%:%641=674%:% +%:%642=675%:% +%:%643=676%:% +%:%644=677%:% +%:%645=678%:% +%:%646=679%:% +%:%647=680%:% +%:%648=681%:% +%:%649=682%:% +%:%650=683%:% +%:%651=684%:% +%:%652=685%:% +%:%653=686%:% +%:%654=687%:% +%:%655=688%:% +%:%656=689%:% +%:%657=690%:% +%:%658=691%:% +%:%659=692%:% +%:%660=693%:% +%:%661=694%:% +%:%662=695%:% +%:%663=696%:% +%:%664=697%:% +%:%665=698%:% +%:%666=699%:% +%:%667=700%:% +%:%668=701%:% +%:%669=702%:% +%:%670=703%:% +%:%671=704%:% +%:%672=705%:% +%:%673=706%:% +%:%674=707%:% +%:%675=708%:% +%:%676=709%:% +%:%677=710%:% +%:%678=711%:% +%:%679=712%:% +%:%680=713%:% +%:%681=714%:% +%:%682=715%:% +%:%683=716%:% +%:%684=717%:% +%:%685=718%:% +%:%686=719%:% +%:%687=720%:% +%:%688=721%:% +%:%689=722%:% +%:%690=723%:% +%:%691=724%:% +%:%692=725%:% +%:%693=726%:% +%:%694=727%:% +%:%695=728%:% +%:%696=729%:% +%:%697=730%:% +%:%698=731%:% +%:%699=732%:% +%:%700=733%:% +%:%701=734%:% +%:%702=735%:% +%:%703=736%:% +%:%704=737%:% +%:%705=738%:% +%:%706=739%:% +%:%707=740%:% +%:%708=741%:% +%:%709=742%:% +%:%710=743%:% +%:%711=744%:% +%:%712=745%:% +%:%713=746%:% +%:%714=747%:% +%:%714=748%:% +%:%715=749%:% +%:%715=750%:% +%:%716=751%:% +%:%717=752%:% +%:%717=753%:% +%:%718=754%:% +%:%719=755%:% +%:%720=756%:% +%:%721=757%:% +%:%722=758%:% +%:%723=759%:% +%:%724=760%:% +%:%725=761%:% +%:%726=762%:% +%:%727=763%:% +%:%728=764%:% +%:%729=765%:% +%:%730=766%:% +%:%731=767%:% +%:%732=768%:% +%:%733=769%:% +%:%734=770%:% +%:%735=771%:% +%:%736=772%:% +%:%737=773%:% +%:%738=774%:% +%:%739=775%:% +%:%740=776%:% +%:%741=777%:% +%:%742=778%:% +%:%743=779%:% +%:%744=780%:% +%:%745=781%:% +%:%746=782%:% +%:%747=783%:% +%:%748=784%:% +%:%749=785%:% +%:%750=786%:% +%:%751=787%:% +%:%752=788%:% +%:%753=789%:% +%:%754=790%:% +%:%755=791%:% +%:%756=792%:% +%:%757=793%:% +%:%758=794%:% +%:%759=795%:% +%:%760=796%:% +%:%761=797%:% +%:%762=798%:% +%:%763=799%:% +%:%764=800%:% +%:%765=801%:% +%:%766=802%:% +%:%767=803%:% +%:%768=804%:% +%:%769=805%:% +%:%770=806%:% +%:%771=807%:% +%:%772=808%:% +%:%773=809%:% +%:%773=810%:% +%:%773=811%:% +%:%774=812%:% +%:%775=813%:% +%:%776=814%:% +%:%777=815%:% +%:%778=816%:% +%:%779=817%:% +%:%780=818%:% +%:%781=819%:% +%:%782=820%:% +%:%782=821%:% +%:%783=822%:% +%:%784=823%:% +%:%785=824%:% +%:%786=825%:% +%:%787=826%:% +%:%788=827%:% +%:%789=828%:% +%:%790=829%:% +%:%791=830%:% +%:%792=831%:% +%:%793=832%:% +%:%794=833%:% +%:%795=834%:% +%:%796=835%:% +%:%797=836%:% +%:%798=837%:% +%:%799=838%:% +%:%800=839%:% +%:%801=840%:% +%:%802=841%:% +%:%803=842%:% +%:%804=843%:% +%:%805=844%:% +%:%806=845%:% +%:%807=846%:% +%:%808=847%:% +%:%809=848%:% +%:%810=849%:% +%:%811=850%:% +%:%812=851%:% +%:%813=852%:% +%:%814=853%:% +%:%815=854%:% +%:%816=855%:% +%:%817=856%:% +%:%817=857%:% +%:%818=858%:% +%:%819=859%:% +%:%820=860%:% +%:%821=861%:% +%:%822=862%:% +%:%823=863%:% +%:%824=864%:% +%:%824=865%:% +%:%825=866%:% +%:%826=867%:% +%:%827=868%:% +%:%827=869%:% +%:%828=870%:% +%:%829=871%:% +%:%830=872%:% +%:%831=873%:% +%:%832=874%:% +%:%833=875%:% +%:%834=876%:% +%:%835=877%:% +%:%836=878%:% +%:%837=879%:% +%:%838=880%:% +%:%838=881%:% +%:%839=882%:% +%:%840=883%:% +%:%841=884%:% +%:%842=885%:% +%:%842=886%:% +%:%843=887%:% +%:%844=888%:% +%:%845=889%:% +%:%846=890%:% +%:%847=891%:% +%:%848=892%:% +%:%849=893%:% +%:%850=894%:% +%:%851=895%:% +%:%852=896%:% +%:%853=897%:% +%:%854=898%:% +%:%855=899%:% +%:%856=900%:% +%:%857=901%:% +%:%858=902%:% +%:%859=903%:% +%:%860=904%:% +%:%861=905%:% +%:%862=906%:% +%:%863=907%:% +%:%864=908%:% +%:%865=909%:% +%:%866=910%:% +%:%867=911%:% +%:%868=912%:% +%:%869=913%:% +%:%870=914%:% +%:%871=915%:% +%:%872=916%:% +%:%873=917%:% +%:%874=918%:% +%:%875=919%:% +%:%876=920%:% +%:%877=921%:% +%:%878=922%:% +%:%879=923%:% +%:%880=924%:% +%:%881=925%:% +%:%882=926%:% +%:%883=927%:% +%:%884=928%:% +%:%885=929%:% +%:%886=930%:% +%:%887=931%:% +%:%888=932%:% +%:%889=933%:% +%:%890=934%:% +%:%891=935%:% +%:%892=936%:% +%:%893=937%:% +%:%894=938%:% +%:%895=939%:% +%:%896=940%:% +%:%897=941%:% +%:%898=942%:% +%:%899=943%:% +%:%900=944%:% +%:%900=945%:% +%:%901=946%:% +%:%902=947%:% +%:%903=948%:% +%:%904=949%:% +%:%905=950%:% +%:%906=951%:% +%:%907=952%:% +%:%908=953%:% +%:%909=954%:% +%:%910=955%:% +%:%911=956%:% +%:%912=957%:% +%:%913=958%:% +%:%914=959%:% +%:%915=960%:% +%:%916=961%:% +%:%917=962%:% +%:%918=963%:% +%:%919=964%:% +%:%920=965%:% +%:%921=966%:% +%:%922=967%:% +%:%923=968%:% +%:%924=969%:% +%:%925=970%:% +%:%926=971%:% +%:%927=972%:% +%:%928=973%:% +%:%929=974%:% +%:%930=975%:% +%:%931=976%:% +%:%932=977%:% +%:%933=978%:% +%:%933=979%:% +%:%934=980%:% +%:%935=981%:% +%:%936=982%:% +%:%937=983%:% +%:%938=984%:% +%:%939=985%:% +%:%940=986%:% +%:%941=987%:% +%:%942=988%:% +%:%943=989%:% +%:%944=990%:% +%:%945=991%:% +%:%946=992%:% +%:%947=993%:% +%:%948=994%:% +%:%949=995%:% +%:%950=996%:% +%:%951=997%:% +%:%952=998%:% +%:%953=999%:% +%:%954=1000%:% +%:%955=1001%:% +%:%956=1002%:% +%:%957=1003%:% +%:%958=1004%:% +%:%959=1005%:% +%:%960=1006%:% +%:%961=1007%:% +%:%962=1008%:% +%:%963=1009%:% +%:%963=1010%:% +%:%964=1011%:% +%:%965=1012%:% +%:%966=1013%:% +%:%967=1014%:% +%:%968=1015%:% +%:%969=1016%:% +%:%970=1017%:% +%:%971=1018%:% +%:%971=1019%:% +%:%972=1020%:% +%:%973=1021%:% +%:%974=1022%:% +%:%975=1023%:% +%:%976=1024%:% +%:%977=1025%:% +%:%978=1026%:% +%:%979=1027%:% +%:%980=1028%:% +%:%981=1029%:% +%:%982=1030%:% +%:%983=1031%:% +%:%984=1032%:% +%:%985=1033%:% +%:%986=1034%:% +%:%987=1035%:% +%:%988=1036%:% +%:%989=1037%:% +%:%990=1038%:% +%:%991=1039%:% +%:%992=1040%:% +%:%993=1041%:% +%:%994=1042%:% +%:%995=1043%:% +%:%996=1044%:% +%:%997=1045%:% +%:%998=1046%:% +%:%999=1047%:% +%:%1000=1048%:% +%:%1001=1049%:% +%:%1002=1050%:% +%:%1003=1051%:% +%:%1004=1052%:% +%:%1005=1053%:% +%:%1006=1054%:% +%:%1007=1055%:% +%:%1008=1056%:% +%:%1008=1057%:% +%:%1008=1058%:% +%:%1009=1059%:% +%:%1009=1060%:% +%:%1009=1061%:% +%:%1010=1062%:% +%:%1011=1063%:% +%:%1011=1064%:% +%:%1012=1065%:% +%:%1013=1066%:% +%:%1014=1067%:% +%:%1015=1068%:% +%:%1016=1069%:% +%:%1017=1070%:% +%:%1018=1071%:% +%:%1019=1072%:% +%:%1020=1073%:% +%:%1021=1074%:% +%:%1022=1075%:% +%:%1023=1076%:% +%:%1024=1077%:% +%:%1025=1078%:% +%:%1026=1079%:% +%:%1027=1080%:% +%:%1028=1081%:% +%:%1029=1082%:% +%:%1030=1083%:% +%:%1031=1084%:% +%:%1032=1085%:% +%:%1033=1086%:% +%:%1034=1087%:% +%:%1035=1088%:% +%:%1036=1089%:% +%:%1037=1090%:% +%:%1038=1091%:% +%:%1039=1092%:% +%:%1040=1093%:% +%:%1041=1094%:% +%:%1042=1095%:% +%:%1043=1096%:% +%:%1044=1097%:% +%:%1045=1098%:% +%:%1045=1099%:% +%:%1046=1100%:% +%:%1047=1101%:% +%:%1048=1102%:% +%:%1048=1103%:% +%:%1048=1104%:% +%:%1048=1105%:% +%:%1049=1106%:% +%:%1050=1107%:% +%:%1051=1108%:% +%:%1052=1109%:% +%:%1053=1110%:% +%:%1054=1111%:% +%:%1055=1112%:% +%:%1056=1113%:% +%:%1057=1114%:% +%:%1058=1115%:% +%:%1059=1116%:% +%:%1060=1117%:% +%:%1061=1118%:% +%:%1062=1119%:% +%:%1063=1120%:% +%:%1064=1121%:% +%:%1065=1122%:% +%:%1066=1123%:% +%:%1067=1124%:% +%:%1068=1125%:% +%:%1069=1126%:% +%:%1070=1127%:% +%:%1071=1128%:% +%:%1072=1129%:% +%:%1073=1130%:% +%:%1074=1131%:% +%:%1075=1132%:% +%:%1076=1133%:% +%:%1077=1134%:% +%:%1078=1135%:% +%:%1079=1136%:% +%:%1080=1137%:% +%:%1081=1138%:% +%:%1082=1139%:% +%:%1083=1140%:% +%:%1084=1141%:% +%:%1085=1142%:% +%:%1094=1146%:% +%:%1106=1150%:% +%:%1107=1151%:% +%:%1108=1152%:% +%:%1109=1153%:% +%:%1110=1154%:% +%:%1111=1155%:% +%:%1112=1156%:% +%:%1113=1157%:% +%:%1114=1158%:% +%:%1115=1159%:% +%:%1116=1160%:% +%:%1117=1161%:% +%:%1118=1162%:% +%:%1119=1163%:% +%:%1120=1164%:% +%:%1121=1165%:% +%:%1122=1166%:% +%:%1123=1167%:% +%:%1124=1168%:% +%:%1125=1169%:% +%:%1126=1170%:% +%:%1127=1171%:% +%:%1128=1172%:% +%:%1129=1173%:% +%:%1130=1174%:% +%:%1131=1175%:% +%:%1132=1176%:% +%:%1133=1177%:% +%:%1134=1178%:% +%:%1135=1179%:% +%:%1136=1180%:% +%:%1137=1181%:% +%:%1138=1182%:% +%:%1139=1183%:% +%:%1140=1184%:% +%:%1141=1185%:% +%:%1142=1186%:% +%:%1143=1187%:% +%:%1144=1188%:% +%:%1145=1189%:% +%:%1146=1190%:% +%:%1147=1191%:% +%:%1148=1192%:% +%:%1149=1193%:% +%:%1150=1194%:% +%:%1151=1195%:% +%:%1152=1196%:% +%:%1153=1197%:% +%:%1154=1198%:% +%:%1155=1199%:% +%:%1156=1200%:% +%:%1157=1201%:% +%:%1158=1202%:% +%:%1159=1203%:% +%:%1160=1204%:% +%:%1161=1205%:% +%:%1162=1206%:% +%:%1163=1207%:% +%:%1164=1208%:% +%:%1165=1209%:% +%:%1166=1210%:% +%:%1167=1211%:% +%:%1168=1212%:% +%:%1169=1213%:% +%:%1170=1214%:% +%:%1171=1215%:% +%:%1172=1216%:% +%:%1173=1217%:% +%:%1174=1218%:% +%:%1175=1219%:% +%:%1176=1220%:% +%:%1177=1221%:% +%:%1178=1222%:% +%:%1179=1223%:% +%:%1180=1224%:% +%:%1181=1225%:% +%:%1182=1226%:% +%:%1183=1227%:% +%:%1184=1228%:% +%:%1185=1229%:% +%:%1186=1230%:% +%:%1187=1231%:% +%:%1188=1232%:% +%:%1189=1233%:% +%:%1190=1234%:% +%:%1191=1235%:% +%:%1192=1236%:% +%:%1193=1237%:% +%:%1194=1238%:% +%:%1195=1239%:% +%:%1196=1240%:% +%:%1197=1241%:% +%:%1198=1242%:% +%:%1199=1243%:% +%:%1200=1244%:% +%:%1201=1245%:% +%:%1202=1246%:% +%:%1203=1247%:% +%:%1204=1248%:% +%:%1205=1249%:% +%:%1206=1250%:% +%:%1207=1251%:% +%:%1208=1252%:% +%:%1209=1253%:% +%:%1210=1254%:% +%:%1211=1255%:% +%:%1212=1256%:% +%:%1213=1257%:% +%:%1214=1258%:% +%:%1215=1259%:% +%:%1216=1260%:% +%:%1217=1261%:% +%:%1218=1262%:% +%:%1219=1263%:% +%:%1220=1264%:% +%:%1221=1265%:% +%:%1221=1266%:% +%:%1222=1267%:% +%:%1223=1268%:% +%:%1224=1269%:% +%:%1225=1270%:% +%:%1226=1271%:% +%:%1227=1272%:% +%:%1228=1273%:% +%:%1229=1274%:% +%:%1230=1275%:% +%:%1231=1276%:% +%:%1232=1277%:% +%:%1232=1278%:% +%:%1233=1279%:% +%:%1234=1280%:% +%:%1235=1281%:% +%:%1236=1282%:% +%:%1237=1283%:% +%:%1238=1284%:% +%:%1239=1285%:% +%:%1240=1286%:% +%:%1241=1287%:% +%:%1242=1288%:% +%:%1243=1289%:% +%:%1244=1290%:% +%:%1245=1291%:% +%:%1246=1292%:% +%:%1247=1293%:% +%:%1248=1294%:% +%:%1249=1295%:% +%:%1250=1296%:% +%:%1251=1297%:% +%:%1252=1298%:% +%:%1253=1299%:% +%:%1254=1300%:% +%:%1255=1301%:% +%:%1256=1302%:% +%:%1257=1303%:% +%:%1258=1304%:% +%:%1259=1305%:% +%:%1260=1306%:% +%:%1261=1307%:% +%:%1262=1308%:% +%:%1263=1309%:% +%:%1264=1310%:% +%:%1265=1311%:% +%:%1266=1312%:% +%:%1267=1313%:% +%:%1268=1314%:% +%:%1269=1315%:% +%:%1270=1316%:% +%:%1271=1317%:% +%:%1272=1318%:% +%:%1273=1319%:% +%:%1274=1320%:% +%:%1275=1321%:% +%:%1276=1322%:% +%:%1277=1323%:% +%:%1278=1324%:% +%:%1279=1325%:% +%:%1280=1326%:% +%:%1281=1327%:% +%:%1282=1328%:% +%:%1283=1329%:% +%:%1284=1330%:% +%:%1285=1331%:% +%:%1286=1332%:% +%:%1286=1333%:% +%:%1286=1334%:% +%:%1287=1335%:% +%:%1288=1336%:% +%:%1288=1337%:% +%:%1288=1338%:% +%:%1288=1339%:% +%:%1289=1340%:% +%:%1290=1341%:% +%:%1291=1342%:% +%:%1292=1343%:% +%:%1293=1344%:% +%:%1293=1345%:% +%:%1294=1346%:% +%:%1295=1347%:% +%:%1296=1348%:% +%:%1297=1349%:% +%:%1298=1350%:% +%:%1299=1351%:% +%:%1300=1352%:% +%:%1301=1353%:% +%:%1302=1354%:% +%:%1303=1355%:% +%:%1304=1356%:% +%:%1305=1357%:% +%:%1306=1358%:% +%:%1307=1359%:% +%:%1308=1360%:% +%:%1309=1361%:% +%:%1310=1362%:% +%:%1311=1363%:% +%:%1312=1364%:% +%:%1313=1365%:% +%:%1314=1366%:% +%:%1315=1367%:% +%:%1316=1368%:% +%:%1317=1369%:% +%:%1318=1370%:% +%:%1319=1371%:% +%:%1320=1372%:% +%:%1321=1373%:% +%:%1322=1374%:% +%:%1323=1375%:% +%:%1324=1376%:% +%:%1325=1377%:% +%:%1326=1378%:% +%:%1327=1379%:% +%:%1328=1380%:% +%:%1329=1381%:% +%:%1330=1382%:% +%:%1331=1383%:% +%:%1332=1384%:% +%:%1333=1385%:% +%:%1334=1386%:% +%:%1335=1387%:% +%:%1336=1388%:% +%:%1337=1389%:% +%:%1338=1390%:% +%:%1339=1391%:% +%:%1340=1392%:% +%:%1341=1393%:% +%:%1341=1394%:% +%:%1342=1395%:% +%:%1342=1396%:% +%:%1343=1397%:% +%:%1343=1398%:% +%:%1344=1399%:% +%:%1345=1400%:% +%:%1346=1401%:% +%:%1347=1402%:% +%:%1348=1403%:% +%:%1349=1404%:% +%:%1350=1405%:% +%:%1350=1406%:% +%:%1351=1407%:% +%:%1351=1408%:% +%:%1352=1409%:% +%:%1352=1410%:% +%:%1353=1411%:% +%:%1354=1412%:% +%:%1355=1413%:% +%:%1356=1414%:% +%:%1357=1415%:% +%:%1358=1416%:% +%:%1359=1417%:% +%:%1360=1418%:% +%:%1361=1419%:% +%:%1362=1420%:% +%:%1363=1421%:% +%:%1364=1422%:% +%:%1365=1423%:% +%:%1366=1424%:% +%:%1367=1425%:% +%:%1368=1426%:% +%:%1369=1427%:% +%:%1370=1428%:% +%:%1371=1429%:% +%:%1372=1430%:% +%:%1373=1431%:% +%:%1374=1432%:% +%:%1375=1433%:% +%:%1376=1434%:% +%:%1377=1435%:% +%:%1378=1436%:% +%:%1379=1437%:% +%:%1380=1438%:% +%:%1381=1439%:% +%:%1382=1440%:% +%:%1383=1441%:% +%:%1383=1442%:% +%:%1384=1443%:% +%:%1385=1444%:% +%:%1386=1445%:% +%:%1387=1446%:% +%:%1388=1447%:% +%:%1389=1448%:% +%:%1390=1449%:% +%:%1390=1450%:% +%:%1391=1451%:% +%:%1392=1452%:% +%:%1393=1453%:% +%:%1393=1454%:% +%:%1394=1455%:% +%:%1395=1456%:% +%:%1396=1457%:% +%:%1396=1458%:% +%:%1397=1459%:% +%:%1397=1460%:% +%:%1398=1461%:% +%:%1398=1462%:% +%:%1399=1463%:% +%:%1400=1464%:% +%:%1401=1465%:% +%:%1401=1466%:% +%:%1402=1467%:% +%:%1402=1468%:% +%:%1403=1469%:% +%:%1403=1470%:% +%:%1403=1471%:% +%:%1403=1472%:% +%:%1404=1473%:% +%:%1404=1474%:% +%:%1405=1475%:% +%:%1406=1476%:% +%:%1406=1478%:% +%:%1406=1479%:% +%:%1406=1480%:% +%:%1406=1481%:% +%:%1406=1482%:% +%:%1407=1483%:% +%:%1407=1484%:% +%:%1408=1485%:% +%:%1408=1486%:% +%:%1409=1487%:% +%:%1409=1488%:% +%:%1410=1489%:% +%:%1411=1490%:% +%:%1412=1491%:% +%:%1413=1492%:% +%:%1413=1493%:% +%:%1414=1494%:% +%:%1414=1495%:% +%:%1415=1496%:% +%:%1416=1497%:% +%:%1416=1498%:% +%:%1417=1499%:% +%:%1418=1500%:% +%:%1419=1501%:% +%:%1420=1502%:% +%:%1421=1503%:% +%:%1422=1504%:% +%:%1423=1505%:% +%:%1424=1506%:% +%:%1425=1507%:% +%:%1426=1508%:% +%:%1427=1509%:% +%:%1428=1510%:% +%:%1429=1511%:% +%:%1430=1512%:% +%:%1431=1513%:% +%:%1432=1514%:% +%:%1433=1515%:% +%:%1434=1516%:% +%:%1435=1517%:% +%:%1436=1518%:% +%:%1437=1519%:% +%:%1438=1520%:% +%:%1439=1521%:% +%:%1440=1522%:% +%:%1441=1523%:% +%:%1442=1524%:% +%:%1443=1525%:% +%:%1444=1526%:% +%:%1445=1527%:% +%:%1446=1528%:% +%:%1447=1529%:% +%:%1448=1530%:% +%:%1449=1531%:% +%:%1450=1532%:% +%:%1451=1533%:% +%:%1452=1534%:% +%:%1453=1535%:% +%:%1454=1536%:% +%:%1455=1537%:% +%:%1456=1538%:% +%:%1457=1539%:% +%:%1458=1540%:% +%:%1459=1541%:% +%:%1460=1542%:% +%:%1461=1543%:% +%:%1462=1544%:% +%:%1463=1545%:% +%:%1464=1546%:% +%:%1465=1547%:% +%:%1466=1548%:% +%:%1467=1549%:% +%:%1476=1553%:% +%:%1488=1560%:% +%:%1489=1561%:% +%:%1490=1562%:% +%:%1491=1563%:% +%:%1492=1564%:% +%:%1493=1565%:% +%:%1494=1566%:% +%:%1503=1571%:% +%:%1515=1575%:% +%:%1516=1576%:% +%:%1517=1577%:% +%:%1518=1578%:% +%:%1519=1579%:% +%:%1520=1580%:% +%:%1521=1581%:% +%:%1522=1582%:% +%:%1523=1583%:% +%:%1524=1584%:% +%:%1525=1585%:% +%:%1526=1586%:% +%:%1527=1587%:% +%:%1528=1588%:% +%:%1529=1589%:% +%:%1530=1590%:% +%:%1531=1591%:% +%:%1532=1592%:% +%:%1533=1593%:% +%:%1534=1594%:% +%:%1535=1595%:% +%:%1536=1596%:% +%:%1537=1597%:% +%:%1538=1598%:% +%:%1539=1599%:% +%:%1540=1600%:% +%:%1541=1601%:% +%:%1542=1602%:% +%:%1543=1603%:% +%:%1544=1604%:% +%:%1545=1605%:% +%:%1546=1606%:% +%:%1547=1607%:% +%:%1548=1608%:% +%:%1549=1609%:% +%:%1550=1610%:% +%:%1551=1611%:% +%:%1552=1612%:% +%:%1553=1613%:% +%:%1554=1614%:% +%:%1555=1615%:% +%:%1556=1616%:% +%:%1557=1617%:% +%:%1558=1618%:% +%:%1559=1619%:% +%:%1560=1620%:% +%:%1561=1621%:% +%:%1562=1622%:% +%:%1563=1623%:% +%:%1564=1624%:% +%:%1565=1625%:% +%:%1566=1626%:% +%:%1567=1627%:% +%:%1568=1628%:% +%:%1569=1629%:% +%:%1570=1630%:% +%:%1571=1631%:% +%:%1572=1632%:% +%:%1573=1633%:% +%:%1574=1634%:% +%:%1575=1635%:% +%:%1576=1636%:% +%:%1577=1637%:% +%:%1578=1638%:% +%:%1579=1639%:% +%:%1580=1640%:% +%:%1581=1641%:% +%:%1582=1642%:% +%:%1583=1643%:% +%:%1584=1644%:% +%:%1585=1645%:% +%:%1586=1646%:% +%:%1587=1647%:% +%:%1588=1648%:% +%:%1589=1649%:% +%:%1590=1650%:% +%:%1591=1651%:% +%:%1592=1652%:% +%:%1593=1653%:% +%:%1594=1654%:% +%:%1595=1655%:% +%:%1596=1656%:% +%:%1597=1657%:% +%:%1598=1658%:% +%:%1599=1659%:% +%:%1600=1660%:% +%:%1601=1661%:% +%:%1602=1662%:% +%:%1603=1663%:% +%:%1604=1664%:% +%:%1604=1780%:% +%:%1605=1781%:% +%:%1606=1782%:% +%:%1607=1783%:% +%:%1608=1784%:% +%:%1608=1945%:% +%:%1609=1946%:% +%:%1610=1947%:% +%:%1611=1948%:% +%:%1612=1949%:% +%:%1613=1950%:% +%:%1614=1951%:% +%:%1615=1952%:% +%:%1616=1953%:% +%:%1617=1954%:% +%:%1618=1955%:% +%:%1619=1956%:% +%:%1620=1957%:% +%:%1621=1958%:% +%:%1622=1959%:% +%:%1623=1960%:% +%:%1624=1961%:% +%:%1625=1962%:% +%:%1626=1963%:% +%:%1627=1964%:% +%:%1627=1965%:% +%:%1628=1966%:% +%:%1628=1967%:% +%:%1628=1968%:% +%:%1629=1969%:% +%:%1629=1970%:% +%:%1630=1971%:% +%:%1631=1972%:% +%:%1632=1973%:% +%:%1633=1974%:% +%:%1634=1975%:% +%:%1635=1976%:% +%:%1636=1977%:% +%:%1637=1978%:% +%:%1638=1979%:% +%:%1639=1980%:% +%:%1640=1981%:% +%:%1641=1982%:% +%:%1642=1983%:% +%:%1643=1984%:% +%:%1644=1985%:% +%:%1645=1986%:% +%:%1646=1987%:% +%:%1646=1988%:% +%:%1647=1989%:% +%:%1648=1990%:% +%:%1649=1991%:% +%:%1649=1992%:% +%:%1649=1993%:% +%:%1650=1994%:% +%:%1651=1995%:% +%:%1652=1996%:% +%:%1652=1997%:% +%:%1653=1998%:% +%:%1653=1999%:% +%:%1654=2000%:% +%:%1655=2001%:% +%:%1656=2002%:% +%:%1657=2003%:% +%:%1658=2004%:% +%:%1659=2005%:% +%:%1660=2006%:% +%:%1661=2007%:% +%:%1662=2008%:% +%:%1663=2009%:% +%:%1664=2010%:% +%:%1665=2011%:% +%:%1666=2012%:% +%:%1667=2013%:% +%:%1668=2014%:% +%:%1669=2015%:% +%:%1670=2016%:% +%:%1679=2021%:% +%:%1691=2025%:% +%:%1692=2026%:% +%:%1693=2027%:% +%:%1694=2028%:% +%:%1695=2029%:% +%:%1696=2030%:% +%:%1697=2031%:% +%:%1698=2032%:% +%:%1699=2033%:% +%:%1700=2034%:% +%:%1701=2035%:% +%:%1702=2036%:% +%:%1703=2037%:% +%:%1704=2038%:% +%:%1705=2039%:% +%:%1706=2040%:% +%:%1707=2041%:% +%:%1708=2042%:% +%:%1709=2043%:% +%:%1710=2044%:% +%:%1711=2045%:% +%:%1712=2046%:% +%:%1713=2047%:% +%:%1714=2048%:% +%:%1715=2049%:% +%:%1716=2050%:% +%:%1717=2051%:% +%:%1718=2052%:% +%:%1719=2053%:% +%:%1720=2054%:% +%:%1721=2055%:% +%:%1722=2056%:% +%:%1723=2057%:% +%:%1724=2058%:% +%:%1725=2059%:% +%:%1726=2060%:% +%:%1727=2061%:% +%:%1728=2062%:% +%:%1729=2063%:% +%:%1730=2064%:% +%:%1731=2065%:% +%:%1732=2066%:% +%:%1733=2067%:% +%:%1734=2068%:% +%:%1735=2069%:% +%:%1736=2070%:% +%:%1737=2071%:% +%:%1738=2072%:% +%:%1739=2073%:% +%:%1740=2074%:% +%:%1741=2075%:% +%:%1742=2076%:% +%:%1743=2077%:% +%:%1756=2083%:% +%:%1759=2084%:% +%:%1760=2085%:% \ No newline at end of file