# HG changeset patch # User Christian Urban # Date 1641945519 0 # Node ID 0efa7ffd96fffae556a4b3c45c3bee49225f88f3 # Parent c807202896458adc9917ceb022077776dd06b6ae# Parent f5866f1d6a59742dbab24d282c476589ff428642 updated diff -r c80720289645 -r 0efa7ffd96ff thys2/Journal/Paper.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/thys2/Journal/Paper.tex Tue Jan 11 23:58:39 2022 +0000 @@ -0,0 +1,3369 @@ +% +\begin{isabellebody}% +\setisabellecontext{Paper}% +% +\isadelimtheory +% +\endisadelimtheory +% +\isatagtheory +% +\endisatagtheory +{\isafoldtheory}% +% +\isadelimtheory +% +\endisadelimtheory +% +\isadelimproof +% +\endisadelimproof +% +\isatagproof +% +\endisatagproof +{\isafoldproof}% +% +\isadelimproof +% +\endisadelimproof +% +\isadelimproof +% +\endisadelimproof +% +\isatagproof +% +\endisatagproof +{\isafoldproof}% +% +\isadelimproof +% +\endisadelimproof +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{Core of the proof% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +This paper builds on previous work by Ausaf and Urban using +regular expression'd bit-coded derivatives to do lexing that +is both fast and satisfies the POSIX specification. +In their work, a bit-coded algorithm introduced by Sulzmann and Lu +was formally verified in Isabelle, by a very clever use of +flex function and retrieve to carefully mimic the way a value is +built up by the injection funciton. + +In the previous work, Ausaf and Urban established the below equality: +\begin{lemma} +\isa{{\normalsize{}If\,}\ v\ {\isacharcolon}{\kern0pt}\ r{\isacharbackslash}{\kern0pt}s\ {\normalsize \,then\,}\ Some\ {\isacharparenleft}{\kern0pt}flex\ r\ id\ s\ v{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ decode\ {\isacharparenleft}{\kern0pt}retrieve\ {\isacharparenleft}{\kern0pt}r\mbox{$^\uparrow$}\mbox{$\bbslash$}s{\isacharparenright}{\kern0pt}\ v{\isacharparenright}{\kern0pt}\ r{\isachardot}{\kern0pt}} +\end{lemma} + +This lemma establishes a link with the lexer without bit-codes. + +With it we get the correctness of bit-coded algorithm. +\begin{lemma} +\isa{lexer\mbox{$_b$}\ r\ s\ {\isacharequal}{\kern0pt}\ lexer\ r\ s} +\end{lemma} + +However what is not certain is whether we can add simplification +to the bit-coded algorithm, without breaking the correct lexing output. + + +The reason that we do need to add a simplification phase +after each derivative step of $\textit{blexer}$ is +because it produces intermediate +regular expressions that can grow exponentially. +For example, the regular expression $(a+aa)^*$ after taking +derivative against just 10 $a$s will have size 8192. + +%TODO: add figure for this? + + +Therefore, we insert a simplification phase +after each derivation step, as defined below: +\begin{lemma} +\isa{blexer{\isacharunderscore}{\kern0pt}simp\ r\ s\ {\isasymequiv}\ \textrm{if}\ nullable\mbox{$_b$}\ {\isacharparenleft}{\kern0pt}bders{\isacharunderscore}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r\mbox{$^\uparrow$}{\isacharparenright}{\kern0pt}\ s{\isacharparenright}{\kern0pt}\ \textrm{then}\ decode\ {\isacharparenleft}{\kern0pt}mkeps\mbox{$_b$}\ {\isacharparenleft}{\kern0pt}bders{\isacharunderscore}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r\mbox{$^\uparrow$}{\isacharparenright}{\kern0pt}\ s{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ r\ \textrm{else}\ None} +\end{lemma} + +The simplification function is given as follows: + +\begin{center} + \begin{tabular}{lcl} + \isa{bsimp\ {\isacharparenleft}{\kern0pt}ASEQ\ bs\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{bsimp{\isacharunderscore}{\kern0pt}ASEQ\ bs\ {\isacharparenleft}{\kern0pt}bsimp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}bsimp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{bsimp\ {\isacharparenleft}{\kern0pt}AALTs\ bs{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}\ rs{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{bsimp{\isacharunderscore}{\kern0pt}AALTs\ bs{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}\ {\isacharparenleft}{\kern0pt}distinctBy\ {\isacharparenleft}{\kern0pt}flts\ {\isacharparenleft}{\kern0pt}map\ bsimp\ rs{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ erase\ {\isasymemptyset}{\isacharparenright}{\kern0pt}}\\ + \isa{bsimp\ AZERO} & $\dn$ & \isa{AZERO}\\ + +\end{tabular} +\end{center} + +And the two helper functions are: +\begin{center} + \begin{tabular}{lcl} + \isa{bsimp{\isacharunderscore}{\kern0pt}AALTs\ bs\isactrlsub {\isadigit{1}}\ {\isacharbrackleft}{\kern0pt}r{\isacharbrackright}{\kern0pt}} & $\dn$ & \isa{bsimp{\isacharunderscore}{\kern0pt}ASEQ\ bs\isactrlsub {\isadigit{1}}\ {\isacharparenleft}{\kern0pt}bsimp\ r{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}bsimp\ r{\isadigit{2}}{\isachardot}{\kern0pt}{\isadigit{0}}{\isacharparenright}{\kern0pt}}\\ + \isa{bsimp{\isacharunderscore}{\kern0pt}AALTs\ bs{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}\ {\isacharbrackleft}{\kern0pt}r{\isacharbrackright}{\kern0pt}} & $\dn$ & \isa{bsimp{\isacharunderscore}{\kern0pt}AALTs\ bs{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}\ {\isacharparenleft}{\kern0pt}distinctBy\ {\isacharparenleft}{\kern0pt}flts\ {\isacharparenleft}{\kern0pt}map\ bsimp\ rs{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ erase\ {\isasymemptyset}{\isacharparenright}{\kern0pt}}\\ + \isa{bsimp{\isacharunderscore}{\kern0pt}AALTs\ bs{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}\ {\isacharparenleft}{\kern0pt}v\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vb\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vc{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{AZERO}\\ + +\end{tabular} +\end{center} + + +This might sound trivial in the case of producing a YES/NO answer, +but once we require a lexing output to be produced (which is required +in applications like compiler front-end, malicious attack domain extraction, +etc.), it is not straightforward if we still extract what is needed according +to the POSIX standard. + + + + + +By simplification, we mean specifically the following rules: + +\begin{center} + \begin{tabular}{lcl} + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{ASEQ\ bs\ AZERO\ r\isactrlsub {\isadigit{2}}\ {\isasymleadsto}\ AZERO}}}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{ASEQ\ bs\ r\isactrlsub {\isadigit{1}}\ AZERO\ {\isasymleadsto}\ AZERO}}}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{ASEQ\ bs\ {\isacharparenleft}{\kern0pt}AONE\ bs\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymleadsto}\ fuse\ {\isacharparenleft}{\kern0pt}bs\ {\isacharat}{\kern0pt}\ bs\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ r\isactrlsub {\isadigit{1}}}}}\\ + \isa{\mbox{}\inferrule{\mbox{r\isactrlsub {\isadigit{1}}\ {\isasymleadsto}\ r\isactrlsub {\isadigit{2}}}}{\mbox{ASEQ\ bs\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{3}}\ {\isasymleadsto}\ ASEQ\ bs\ r\isactrlsub {\isadigit{2}}\ r\isactrlsub {\isadigit{3}}}}}\\ + \isa{\mbox{}\inferrule{\mbox{r\isactrlsub {\isadigit{3}}\ {\isasymleadsto}\ r\isactrlsub {\isadigit{4}}}}{\mbox{ASEQ\ bs\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{3}}\ {\isasymleadsto}\ ASEQ\ bs\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{4}}}}}\\ + \isa{\mbox{}\inferrule{\mbox{r\ {\isasymleadsto}\ r{\isacharprime}{\kern0pt}}}{\mbox{AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}r{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ rs\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymleadsto}\ AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}r{\isacharprime}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ rs\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}}}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub a\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}AZERO{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ rs\isactrlsub b{\isacharparenright}{\kern0pt}\ {\isasymleadsto}\ AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub a\ {\isacharat}{\kern0pt}\ rs\isactrlsub b{\isacharparenright}{\kern0pt}}}}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub a\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}AALTs\ bs\isactrlsub {\isadigit{1}}\ rs\isactrlsub {\isadigit{1}}{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ rs\isactrlsub b{\isacharparenright}{\kern0pt}\ {\isasymleadsto}\ AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub a\ {\isacharat}{\kern0pt}\ map\ {\isacharparenleft}{\kern0pt}fuse\ bs\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ rs\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ rs\isactrlsub b{\isacharparenright}{\kern0pt}}}}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{AALTs\ {\isacharparenleft}{\kern0pt}bs\ {\isacharat}{\kern0pt}\ bs\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ rs\ {\isasymleadsto}\ AALTs\ bs\ {\isacharparenleft}{\kern0pt}map\ {\isacharparenleft}{\kern0pt}fuse\ bs\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ rs{\isacharparenright}{\kern0pt}}}}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{AALTs\ bs\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymleadsto}\ AZERO}}}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{AALTs\ bs\ {\isacharbrackleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbrackright}{\kern0pt}\ {\isasymleadsto}\ fuse\ bs\ r\isactrlsub {\isadigit{1}}}}}\\ + \isa{\mbox{}\inferrule{\mbox{a\isactrlsub {\isadigit{1}}\mbox{$^\downarrow$}\ {\isacharequal}{\kern0pt}\ a\isactrlsub {\isadigit{2}}\mbox{$^\downarrow$}}}{\mbox{AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub a\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}a\isactrlsub {\isadigit{1}}{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ rs\isactrlsub b\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}a\isactrlsub {\isadigit{2}}{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ rs\isactrlsub c{\isacharparenright}{\kern0pt}\ {\isasymleadsto}\ AALTs\ bs\ {\isacharparenleft}{\kern0pt}rs\isactrlsub a\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}a\isactrlsub {\isadigit{1}}{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ rs\isactrlsub b\ {\isacharat}{\kern0pt}\ rs\isactrlsub c{\isacharparenright}{\kern0pt}}}}\\ + + \end{tabular} +\end{center} + + +And these can be made compact by the following simplification function: + +where the function $\mathit{bsimp_AALTs}$ + +The core idea of the proof is that two regular expressions, +if "isomorphic" up to a finite number of rewrite steps, will +remain "isomorphic" when we take the same sequence of +derivatives on both of them. +This can be expressed by the following rewrite relation lemma: +\begin{lemma} +\isa{{\isacharparenleft}{\kern0pt}r\mbox{$\bbslash$}s{\isacharparenright}{\kern0pt}\ {\isasymleadsto}{\isacharasterisk}{\kern0pt}\ bders{\isacharunderscore}{\kern0pt}simp\ r\ s} +\end{lemma} + +This isomorphic relation implies a property that leads to the +correctness result: +if two (nullable) regular expressions are "rewritable" in many steps +from one another, +then a call to function $\textit{bmkeps}$ gives the same +bit-sequence : +\begin{lemma} +\isa{{\normalsize{}If\,}\ \mbox{r{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}\ {\isasymleadsto}{\isacharasterisk}{\kern0pt}\ r{\isadigit{2}}{\isachardot}{\kern0pt}{\isadigit{0}}}\ {\normalsize \,and\,}\ \mbox{nullable\mbox{$_b$}\ r{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}}\ {\normalsize \,then\,}\ mkeps\mbox{$_b$}\ r{\isadigit{1}}{\isachardot}{\kern0pt}{\isadigit{0}}\ {\isacharequal}{\kern0pt}\ mkeps\mbox{$_b$}\ r{\isadigit{2}}{\isachardot}{\kern0pt}{\isadigit{0}}{\isachardot}{\kern0pt}} +\end{lemma} + +Given the same bit-sequence, the decode function +will give out the same value, which is the output +of both lexers: +\begin{lemma} +\isa{lexer\mbox{$_b$}\ r\ s\ {\isasymequiv}\ \textrm{if}\ nullable\mbox{$_b$}\ {\isacharparenleft}{\kern0pt}r\mbox{$^\uparrow$}\mbox{$\bbslash$}s{\isacharparenright}{\kern0pt}\ \textrm{then}\ decode\ {\isacharparenleft}{\kern0pt}mkeps\mbox{$_b$}\ {\isacharparenleft}{\kern0pt}r\mbox{$^\uparrow$}\mbox{$\bbslash$}s{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ r\ \textrm{else}\ None} +\end{lemma} + +\begin{lemma} +\isa{blexer{\isacharunderscore}{\kern0pt}simp\ r\ s\ {\isasymequiv}\ \textrm{if}\ nullable\mbox{$_b$}\ {\isacharparenleft}{\kern0pt}bders{\isacharunderscore}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r\mbox{$^\uparrow$}{\isacharparenright}{\kern0pt}\ s{\isacharparenright}{\kern0pt}\ \textrm{then}\ decode\ {\isacharparenleft}{\kern0pt}mkeps\mbox{$_b$}\ {\isacharparenleft}{\kern0pt}bders{\isacharunderscore}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r\mbox{$^\uparrow$}{\isacharparenright}{\kern0pt}\ s{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ r\ \textrm{else}\ None} +\end{lemma} + +And that yields the correctness result: +\begin{lemma} +\isa{lexer\ r\ s\ {\isacharequal}{\kern0pt}\ blexer{\isacharunderscore}{\kern0pt}simp\ r\ s} +\end{lemma} + +The nice thing about the above% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{Additional Simp Rules?% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +One question someone would ask is: +can we add more "atomic" simplification/rewriting rules, +so the simplification is even more aggressive, making +the intermediate results smaller, and therefore more space-efficient? +For example, one might want to do open up alternatives who is a child +of a sequence: + +\begin{center} + \begin{tabular}{lcl} + \isa{ASEQ\ bs\ {\isacharparenleft}{\kern0pt}AALTs\ bs{\isadigit{1}}\ rs{\isacharparenright}{\kern0pt}\ r\ {\isasymleadsto}{\isacharquery}{\kern0pt}\ AALTs\ {\isacharparenleft}{\kern0pt}bs\ {\isacharat}{\kern0pt}\ bs{\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}map\ {\isacharparenleft}{\kern0pt}{\isasymlambda}r{\isacharprime}{\kern0pt}{\isachardot}{\kern0pt}\ ASEQ\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ r{\isacharprime}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ rs{\isacharparenright}{\kern0pt}}\\ + \end{tabular} +\end{center} + +This rule allows us to simplify \mbox{\isa{a\ {\isacharplus}{\kern0pt}\ b\ {\isasymcdot}\ c\ {\isacharplus}{\kern0pt}\ a\ {\isasymcdot}\ c}}% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{Introduction% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +Brzozowski \cite{Brzozowski1964} introduced the notion of the {\em +derivative} \isa{r{\isacharbackslash}{\kern0pt}c} of a regular expression \isa{r} w.r.t.\ +a character~\isa{c}, and showed that it gave a simple solution to the +problem of matching a string \isa{s} with a regular expression \isa{r}: if the derivative of \isa{r} w.r.t.\ (in succession) all the +characters of the string matches the empty string, then \isa{r} +matches \isa{s} (and {\em vice versa}). The derivative has the +property (which may almost be regarded as its specification) that, for +every string \isa{s} and regular expression \isa{r} and character +\isa{c}, one has \isa{cs\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}} if and only if \mbox{\isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}}}. +The beauty of Brzozowski's derivatives is that +they are neatly expressible in any functional language, and easily +definable and reasoned about in theorem provers---the definitions just +consist of inductive datatypes and simple recursive functions. A +mechanised correctness proof of Brzozowski's matcher in for example HOL4 +has been mentioned by Owens and Slind~\cite{Owens2008}. Another one in +Isabelle/HOL is part of the work by Krauss and Nipkow \cite{Krauss2011}. +And another one in Coq is given by Coquand and Siles \cite{Coquand2012}. + +If a regular expression matches a string, then in general there is more +than one way of how the string is matched. There are two commonly used +disambiguation strategies to generate a unique answer: one is called +GREEDY matching \cite{Frisch2004} and the other is POSIX +matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}. +For example consider the string \isa{xy} and the regular expression +\mbox{\isa{{\isacharparenleft}{\kern0pt}x\ {\isacharplus}{\kern0pt}\ y\ {\isacharplus}{\kern0pt}\ xy{\isacharparenright}{\kern0pt}\isactrlsup {\isasymstar}}}. Either the string can be +matched in two `iterations' by the single letter-regular expressions +\isa{x} and \isa{y}, or directly in one iteration by \isa{xy}. The +first case corresponds to GREEDY matching, which first matches with the +left-most symbol and only matches the next symbol in case of a mismatch +(this is greedy in the sense of preferring instant gratification to +delayed repletion). The second case is POSIX matching, which prefers the +longest match. + +In the context of lexing, where an input string needs to be split up +into a sequence of tokens, POSIX is the more natural disambiguation +strategy for what programmers consider basic syntactic building blocks +in their programs. These building blocks are often specified by some +regular expressions, say \isa{r\isactrlbsub key\isactrlesub } and \isa{r\isactrlbsub id\isactrlesub } for recognising keywords and identifiers, +respectively. There are a few underlying (informal) rules behind +tokenising a string in a POSIX \cite{POSIX} fashion: + +\begin{itemize} +\item[$\bullet$] \emph{The Longest Match Rule} (or \emph{``{M}aximal {M}unch {R}ule''}): +The longest initial substring matched by any regular expression is taken as +next token.\smallskip + +\item[$\bullet$] \emph{Priority Rule:} +For a particular longest initial substring, the first (leftmost) regular expression +that can match determines the token.\smallskip + +\item[$\bullet$] \emph{Star Rule:} A subexpression repeated by ${}^\star$ shall +not match an empty string unless this is the only match for the repetition.\smallskip + +\item[$\bullet$] \emph{Empty String Rule:} An empty string shall be considered to +be longer than no match at all. +\end{itemize} + +\noindent Consider for example a regular expression \isa{r\isactrlbsub key\isactrlesub } for recognising keywords such as \isa{if}, +\isa{then} and so on; and \isa{r\isactrlbsub id\isactrlesub } +recognising identifiers (say, a single character followed by +characters or numbers). Then we can form the regular expression +\isa{{\isacharparenleft}{\kern0pt}r\isactrlbsub key\isactrlesub \ {\isacharplus}{\kern0pt}\ r\isactrlbsub id\isactrlesub {\isacharparenright}{\kern0pt}\isactrlsup {\isasymstar}} +and use POSIX matching to tokenise strings, say \isa{iffoo} and +\isa{if}. For \isa{iffoo} we obtain by the Longest Match Rule +a single identifier token, not a keyword followed by an +identifier. For \isa{if} we obtain by the Priority Rule a keyword +token, not an identifier token---even if \isa{r\isactrlbsub id\isactrlesub } +matches also. By the Star Rule we know \isa{{\isacharparenleft}{\kern0pt}r\isactrlbsub key\isactrlesub \ {\isacharplus}{\kern0pt}\ r\isactrlbsub id\isactrlesub {\isacharparenright}{\kern0pt}\isactrlsup {\isasymstar}} matches \isa{iffoo}, +respectively \isa{if}, in exactly one `iteration' of the star. The +Empty String Rule is for cases where, for example, the regular expression +\isa{{\isacharparenleft}{\kern0pt}a\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\isactrlsup {\isasymstar}} matches against the +string \isa{bc}. Then the longest initial matched substring is the +empty string, which is matched by both the whole regular expression +and the parenthesised subexpression. + + +One limitation of Brzozowski's matcher is that it only generates a +YES/NO answer for whether a string is being matched by a regular +expression. Sulzmann and Lu~\cite{Sulzmann2014} extended this matcher +to allow generation not just of a YES/NO answer but of an actual +matching, called a [lexical] {\em value}. Assuming a regular +expression matches a string, values encode the information of +\emph{how} the string is matched by the regular expression---that is, +which part of the string is matched by which part of the regular +expression. For this consider again the string \isa{xy} and +the regular expression \mbox{\isa{{\isacharparenleft}{\kern0pt}x\ {\isacharplus}{\kern0pt}\ {\isacharparenleft}{\kern0pt}y\ {\isacharplus}{\kern0pt}\ xy{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\isactrlsup {\isasymstar}}} +(this time fully parenthesised). We can view this regular expression +as tree and if the string \isa{xy} is matched by two Star +`iterations', then the \isa{x} is matched by the left-most +alternative in this tree and the \isa{y} by the right-left alternative. This +suggests to record this matching as + +\begin{center} +\isa{Stars\ {\isacharbrackleft}{\kern0pt}Left\ {\isacharparenleft}{\kern0pt}Char\ x{\isacharparenright}{\kern0pt}{\isacharcomma}{\kern0pt}\ Right\ {\isacharparenleft}{\kern0pt}Left\ {\isacharparenleft}{\kern0pt}Char\ y{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isacharbrackright}{\kern0pt}} +\end{center} + +\noindent where \isa{Stars}, \isa{Left}, \isa{Right} and \isa{Char} are constructors for values. \isa{Stars} records how many +iterations were used; \isa{Left}, respectively \isa{Right}, which +alternative is used. This `tree view' leads naturally to the idea that +regular expressions act as types and values as inhabiting those types +(see, for example, \cite{HosoyaVouillonPierce2005}). The value for +matching \isa{xy} in a single `iteration', i.e.~the POSIX value, +would look as follows + +\begin{center} +\isa{Stars\ {\isacharbrackleft}{\kern0pt}Seq\ {\isacharparenleft}{\kern0pt}Char\ x{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}Char\ y{\isacharparenright}{\kern0pt}{\isacharbrackright}{\kern0pt}} +\end{center} + +\noindent where \isa{Stars} has only a single-element list for the +single iteration and \isa{Seq} indicates that \isa{xy} is matched +by a sequence regular expression. + +%, which we will in what follows +%write more formally as \isa{x\ {\isasymcdot}\ y}. + + +Sulzmann and Lu give a simple algorithm to calculate a value that +appears to be the value associated with POSIX matching. The challenge +then is to specify that value, in an algorithm-independent fashion, +and to show that Sulzmann and Lu's derivative-based algorithm does +indeed calculate a value that is correct according to the +specification. The answer given by Sulzmann and Lu +\cite{Sulzmann2014} is to define a relation (called an ``order +relation'') on the set of values of \isa{r}, and to show that (once +a string to be matched is chosen) there is a maximum element and that +it is computed by their derivative-based algorithm. This proof idea is +inspired by work of Frisch and Cardelli \cite{Frisch2004} on a GREEDY +regular expression matching algorithm. However, we were not able to +establish transitivity and totality for the ``order relation'' by +Sulzmann and Lu. There are some inherent problems with their approach +(of which some of the proofs are not published in +\cite{Sulzmann2014}); perhaps more importantly, we give in this paper +a simple inductive (and algorithm-independent) definition of what we +call being a {\em POSIX value} for a regular expression \isa{r} and +a string \isa{s}; we show that the algorithm by Sulzmann and Lu +computes such a value and that such a value is unique. Our proofs are +both done by hand and checked in Isabelle/HOL. The experience of +doing our proofs has been that this mechanical checking was absolutely +essential: this subject area has hidden snares. This was also noted by +Kuklewicz \cite{Kuklewicz} who found that nearly all POSIX matching +implementations are ``buggy'' \cite[Page 203]{Sulzmann2014} and by +Grathwohl et al \cite[Page 36]{CrashCourse2014} who wrote: + +\begin{quote} +\it{}``The POSIX strategy is more complicated than the greedy because of +the dependence on information about the length of matched strings in the +various subexpressions.'' +\end{quote} + + + +\noindent {\bf Contributions:} We have implemented in Isabelle/HOL the +derivative-based regular expression matching algorithm of +Sulzmann and Lu \cite{Sulzmann2014}. We have proved the correctness of this +algorithm according to our specification of what a POSIX value is (inspired +by work of Vansummeren \cite{Vansummeren2006}). Sulzmann +and Lu sketch in \cite{Sulzmann2014} an informal correctness proof: but to +us it contains unfillable gaps.\footnote{An extended version of +\cite{Sulzmann2014} is available at the website of its first author; this +extended version already includes remarks in the appendix that their +informal proof contains gaps, and possible fixes are not fully worked out.} +Our specification of a POSIX value consists of a simple inductive definition +that given a string and a regular expression uniquely determines this value. +We also show that our definition is equivalent to an ordering +of values based on positions by Okui and Suzuki \cite{OkuiSuzuki2010}. + +%Derivatives as calculated by Brzozowski's method are usually more complex +%regular expressions than the initial one; various optimisations are +%possible. We prove the correctness when simplifications of \isa{\isactrlbold {\isadigit{0}}\ {\isacharplus}{\kern0pt}\ r}, +%\isa{r\ {\isacharplus}{\kern0pt}\ \isactrlbold {\isadigit{0}}}, \isa{\isactrlbold {\isadigit{1}}\ {\isasymcdot}\ r} and \isa{r\ {\isasymcdot}\ \isactrlbold {\isadigit{1}}} to +%\isa{r} are applied. + +We extend our results to ??? Bitcoded version??% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{Preliminaries% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +\noindent Strings in Isabelle/HOL are lists of characters with +the empty string being represented by the empty list, written \isa{{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}, and list-cons being written as \isa{\underline{\hspace{2mm}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}\underline{\hspace{2mm}}}. Often +we use the usual bracket notation for lists also for strings; for +example a string consisting of just a single character \isa{c} is +written \isa{{\isacharbrackleft}{\kern0pt}c{\isacharbrackright}{\kern0pt}}. We use the usual definitions for +\emph{prefixes} and \emph{strict prefixes} of strings. By using the +type \isa{char} for characters we have a supply of finitely many +characters roughly corresponding to the ASCII character set. Regular +expressions are defined as usual as the elements of the following +inductive datatype: + + \begin{center} + \isa{r\ {\isacharcolon}{\kern0pt}{\isacharequal}{\kern0pt}} + \isa{\isactrlbold {\isadigit{0}}} $\mid$ + \isa{\isactrlbold {\isadigit{1}}} $\mid$ + \isa{c} $\mid$ + \isa{r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}} $\mid$ + \isa{r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}} $\mid$ + \isa{r\isactrlsup {\isasymstar}} + \end{center} + + \noindent where \isa{\isactrlbold {\isadigit{0}}} stands for the regular expression that does + not match any string, \isa{\isactrlbold {\isadigit{1}}} for the regular expression that matches + only the empty string and \isa{c} for matching a character literal. The + language of a regular expression is also defined as usual by the + recursive function \isa{L} with the six clauses: + + \begin{center} + \begin{tabular}{l@ {\hspace{4mm}}rcl} + \textit{(1)} & \isa{L{\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{0}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isasymemptyset}}\\ + \textit{(2)} & \isa{L{\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{1}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}}\\ + \textit{(3)} & \isa{L{\isacharparenleft}{\kern0pt}c{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}c{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}}\\ + \textit{(4)} & \isa{L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & + \isa{L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharat}{\kern0pt}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \textit{(5)} & \isa{L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & + \isa{L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymunion}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \textit{(6)} & \isa{L{\isacharparenleft}{\kern0pt}r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isasymstar}}\\ + \end{tabular} + \end{center} + + \noindent In clause \textit{(4)} we use the operation \isa{\underline{\hspace{2mm}}\ {\isacharat}{\kern0pt}\ \underline{\hspace{2mm}}} for the concatenation of two languages (it is also list-append for + strings). We use the star-notation for regular expressions and for + languages (in the last clause above). The star for languages is defined + inductively by two clauses: \isa{{\isacharparenleft}{\kern0pt}i{\isacharparenright}{\kern0pt}} the empty string being in + the star of a language and \isa{{\isacharparenleft}{\kern0pt}ii{\isacharparenright}{\kern0pt}} if \isa{s\isactrlsub {\isadigit{1}}} is in a + language and \isa{s\isactrlsub {\isadigit{2}}} in the star of this language, then also \isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}} is in the star of this language. It will also be convenient + to use the following notion of a \emph{semantic derivative} (or \emph{left + quotient}) of a language defined as + % + \begin{center} + \isa{Der\ c\ A\ {\isasymequiv}\ {\isacharbraceleft}{\kern0pt}s\ \mbox{\boldmath$\mid$}\ c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s\ {\isasymin}\ A{\isacharbraceright}{\kern0pt}}\;. + \end{center} + + \noindent + For semantic derivatives we have the following equations (for example + mechanically proved in \cite{Krauss2011}): + % + \begin{equation}\label{SemDer} + \begin{array}{lcl} + \isa{Der\ c\ {\isasymemptyset}} & \dn & \isa{{\isasymemptyset}}\\ + \isa{Der\ c\ {\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}} & \dn & \isa{{\isasymemptyset}}\\ + \isa{Der\ c\ {\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}d{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}} & \dn & \isa{\textrm{if}\ c\ {\isacharequal}{\kern0pt}\ d\ \textrm{then}\ {\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ \textrm{else}\ {\isasymemptyset}}\\ + \isa{Der\ c\ {\isacharparenleft}{\kern0pt}A\ {\isasymunion}\ B{\isacharparenright}{\kern0pt}} & \dn & \isa{Der\ c\ A\ {\isasymunion}\ Der\ c\ B}\\ + \isa{Der\ c\ {\isacharparenleft}{\kern0pt}A\ {\isacharat}{\kern0pt}\ B{\isacharparenright}{\kern0pt}} & \dn & \isa{{\isacharparenleft}{\kern0pt}Der\ c\ A\ {\isacharat}{\kern0pt}\ B{\isacharparenright}{\kern0pt}\ {\isasymunion}\ {\isacharparenleft}{\kern0pt}\textrm{if}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymin}\ A\ \textrm{then}\ Der\ c\ B\ \textrm{else}\ {\isasymemptyset}{\isacharparenright}{\kern0pt}}\\ + \isa{Der\ c\ {\isacharparenleft}{\kern0pt}A{\isasymstar}{\isacharparenright}{\kern0pt}} & \dn & \isa{Der\ c\ A\ {\isacharat}{\kern0pt}\ A{\isasymstar}} + \end{array} + \end{equation} + + + \noindent \emph{\Brz's derivatives} of regular expressions + \cite{Brzozowski1964} can be easily defined by two recursive functions: + the first is from regular expressions to booleans (implementing a test + when a regular expression can match the empty string), and the second + takes a regular expression and a character to a (derivative) regular + expression: + + \begin{center} + \begin{tabular}{lcl} + \isa{nullable\ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{0}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{False}\\ + \isa{nullable\ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{1}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{True}\\ + \isa{nullable\ {\isacharparenleft}{\kern0pt}c{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{False}\\ + \isa{nullable\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{nullable\ r\isactrlsub {\isadigit{1}}\ {\isasymor}\ nullable\ r\isactrlsub {\isadigit{2}}}\\ + \isa{nullable\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{nullable\ r\isactrlsub {\isadigit{1}}\ {\isasymand}\ nullable\ r\isactrlsub {\isadigit{2}}}\\ + \isa{nullable\ {\isacharparenleft}{\kern0pt}r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{True}\medskip\\ + +% \end{tabular} +% \end{center} + +% \begin{center} +% \begin{tabular}{lcl} + + \isa{\isactrlbold {\isadigit{0}}{\isacharbackslash}{\kern0pt}c} & $\dn$ & \isa{\isactrlbold {\isadigit{0}}}\\ + \isa{\isactrlbold {\isadigit{1}}{\isacharbackslash}{\kern0pt}c} & $\dn$ & \isa{\isactrlbold {\isadigit{0}}}\\ + \isa{d{\isacharbackslash}{\kern0pt}c} & $\dn$ & \isa{\textrm{if}\ c\ {\isacharequal}{\kern0pt}\ d\ \textrm{then}\ \isactrlbold {\isadigit{1}}\ \textrm{else}\ \isactrlbold {\isadigit{0}}}\\ + \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isacharplus}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}}\\ + \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c} & $\dn$ & \isa{\textrm{if}\ nullable\ r\isactrlsub {\isadigit{1}}\ \textrm{then}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}\ {\isacharplus}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ \textrm{else}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}}\\ + \isa{{\isacharparenleft}{\kern0pt}r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsup {\isasymstar}} + \end{tabular} + \end{center} + + \noindent + We may extend this definition to give derivatives w.r.t.~strings: + + \begin{center} + \begin{tabular}{lcl} + \isa{r{\isacharbackslash}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} & $\dn$ & \isa{r}\\ + \isa{r{\isacharbackslash}{\kern0pt}{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}s}\\ + \end{tabular} + \end{center} + + \noindent Given the equations in \eqref{SemDer}, it is a relatively easy + exercise in mechanical reasoning to establish that + + \begin{proposition}\label{derprop}\mbox{}\\ + \begin{tabular}{ll} + \textit{(1)} & \isa{nullable\ r} if and only if + \isa{{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}}, and \\ + \textit{(2)} & \isa{L{\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ Der\ c\ {\isacharparenleft}{\kern0pt}L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}}. + \end{tabular} + \end{proposition} + + \noindent With this in place it is also very routine to prove that the + regular expression matcher defined as + % + \begin{center} + \isa{match\ r\ s\ {\isasymequiv}\ nullable\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}s{\isacharparenright}{\kern0pt}} + \end{center} + + \noindent gives a positive answer if and only if \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}}. + Consequently, this regular expression matching algorithm satisfies the + usual specification for regular expression matching. While the matcher + above calculates a provably correct YES/NO answer for whether a regular + expression matches a string or not, the novel idea of Sulzmann and Lu + \cite{Sulzmann2014} is to append another phase to this algorithm in order + to calculate a [lexical] value. We will explain the details next.% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{POSIX Regular Expression Matching\label{posixsec}% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +There have been many previous works that use values for encoding + \emph{how} a regular expression matches a string. + The clever idea by Sulzmann and Lu \cite{Sulzmann2014} is to + define a function on values that mirrors (but inverts) the + construction of the derivative on regular expressions. \emph{Values} + are defined as the inductive datatype + + \begin{center} + \isa{v\ {\isacharcolon}{\kern0pt}{\isacharequal}{\kern0pt}} + \isa{Empty} $\mid$ + \isa{Char\ c} $\mid$ + \isa{Left\ v} $\mid$ + \isa{Right\ v} $\mid$ + \isa{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}} $\mid$ + \isa{Stars\ vs} + \end{center} + + \noindent where we use \isa{vs} to stand for a list of + values. (This is similar to the approach taken by Frisch and + Cardelli for GREEDY matching \cite{Frisch2004}, and Sulzmann and Lu + for POSIX matching \cite{Sulzmann2014}). The string underlying a + value can be calculated by the \isa{flat} function, written + \isa{{\isacharbar}{\kern0pt}\underline{\hspace{2mm}}{\isacharbar}{\kern0pt}} and defined as: + + \begin{center} + \begin{tabular}[t]{lcl} + \isa{{\isacharbar}{\kern0pt}Empty{\isacharbar}{\kern0pt}} & $\dn$ & \isa{{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}\\ + \isa{{\isacharbar}{\kern0pt}Char\ c{\isacharbar}{\kern0pt}} & $\dn$ & \isa{{\isacharbrackleft}{\kern0pt}c{\isacharbrackright}{\kern0pt}}\\ + \isa{{\isacharbar}{\kern0pt}Left\ v{\isacharbar}{\kern0pt}} & $\dn$ & \isa{{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}}\\ + \isa{{\isacharbar}{\kern0pt}Right\ v{\isacharbar}{\kern0pt}} & $\dn$ & \isa{{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}} + \end{tabular}\hspace{14mm} + \begin{tabular}[t]{lcl} + \isa{{\isacharbar}{\kern0pt}Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} & $\dn$ & \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharat}{\kern0pt}\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}}\\ + \isa{{\isacharbar}{\kern0pt}Stars\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbar}{\kern0pt}} & $\dn$ & \isa{{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}\\ + \isa{{\isacharbar}{\kern0pt}Stars\ {\isacharparenleft}{\kern0pt}v\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs{\isacharparenright}{\kern0pt}{\isacharbar}{\kern0pt}} & $\dn$ & \isa{{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}\ {\isacharat}{\kern0pt}\ {\isacharbar}{\kern0pt}Stars\ vs{\isacharbar}{\kern0pt}}\\ + \end{tabular} + \end{center} + + \noindent We will sometimes refer to the underlying string of a + value as \emph{flattened value}. We will also overload our notation and + use \isa{{\isacharbar}{\kern0pt}vs{\isacharbar}{\kern0pt}} for flattening a list of values and concatenating + the resulting strings. + + Sulzmann and Lu define + inductively an \emph{inhabitation relation} that associates values to + regular expressions. We define this relation as + follows:\footnote{Note that the rule for \isa{Stars} differs from + our earlier paper \cite{AusafDyckhoffUrban2016}. There we used the + original definition by Sulzmann and Lu which does not require that + the values \isa{v\ {\isasymin}\ vs} flatten to a non-empty + string. The reason for introducing the more restricted version of + lexical values is convenience later on when reasoning about an + ordering relation for values.} + + \begin{center} + \begin{tabular}{c@ {\hspace{12mm}}c}\label{prfintros} + \\[-8mm] + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{Empty\ {\isacharcolon}{\kern0pt}\ \isactrlbold {\isadigit{1}}}}} & + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{Char\ c\ {\isacharcolon}{\kern0pt}\ c}}}\\[4mm] + \isa{\mbox{}\inferrule{\mbox{v\isactrlsub {\isadigit{1}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{1}}}}{\mbox{Left\ v\isactrlsub {\isadigit{1}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}}}} & + \isa{\mbox{}\inferrule{\mbox{v\isactrlsub {\isadigit{2}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{1}}}}{\mbox{Right\ v\isactrlsub {\isadigit{2}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{2}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{1}}}}}\\[4mm] + \isa{\mbox{}\inferrule{\mbox{v\isactrlsub {\isadigit{1}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{1}}}\\\ \mbox{v\isactrlsub {\isadigit{2}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{2}}}}{\mbox{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}}}} & + \isa{\mbox{}\inferrule{\mbox{{\isasymforall}v{\isasymin}vs{\isachardot}{\kern0pt}\ v\ {\isacharcolon}{\kern0pt}\ r\ {\isasymand}\ {\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}}{\mbox{Stars\ vs\ {\isacharcolon}{\kern0pt}\ r\isactrlsup {\isasymstar}}}} + \end{tabular} + \end{center} + + \noindent where in the clause for \isa{Stars} we use the + notation \isa{v\ {\isasymin}\ vs} for indicating that \isa{v} is a + member in the list \isa{vs}. We require in this rule that every + value in \isa{vs} flattens to a non-empty string. The idea is that + \isa{Stars}-values satisfy the informal Star Rule (see Introduction) + where the $^\star$ does not match the empty string unless this is + the only match for the repetition. Note also that no values are + associated with the regular expression \isa{\isactrlbold {\isadigit{0}}}, and that the + only value associated with the regular expression \isa{\isactrlbold {\isadigit{1}}} is + \isa{Empty}. It is routine to establish how values ``inhabiting'' + a regular expression correspond to the language of a regular + expression, namely + + \begin{proposition}\label{inhabs} + \isa{L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbraceleft}{\kern0pt}{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}\ \mbox{\boldmath$\mid$}\ v\ {\isacharcolon}{\kern0pt}\ r{\isacharbraceright}{\kern0pt}} + \end{proposition} + + \noindent + Given a regular expression \isa{r} and a string \isa{s}, we define the + set of all \emph{Lexical Values} inhabited by \isa{r} with the underlying string + being \isa{s}:\footnote{Okui and Suzuki refer to our lexical values + as \emph{canonical values} in \cite{OkuiSuzuki2010}. The notion of \emph{non-problematic + values} by Cardelli and Frisch \cite{Frisch2004} is related, but not identical + to our lexical values.} + + \begin{center} + \isa{LV\ r\ s\ {\isasymequiv}\ {\isacharbraceleft}{\kern0pt}v\ \mbox{\boldmath$\mid$}\ v\ {\isacharcolon}{\kern0pt}\ r\ {\isasymand}\ {\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ s{\isacharbraceright}{\kern0pt}} + \end{center} + + \noindent The main property of \isa{LV\ r\ s} is that it is alway finite. + + \begin{proposition} + \isa{finite\ {\isacharparenleft}{\kern0pt}LV\ r\ s{\isacharparenright}{\kern0pt}} + \end{proposition} + + \noindent This finiteness property does not hold in general if we + remove the side-condition about \isa{{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} in the + \isa{Stars}-rule above. For example using Sulzmann and Lu's + less restrictive definition, \isa{LV\ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{1}}\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} would contain + infinitely many values, but according to our more restricted + definition only a single value, namely \isa{LV\ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{1}}\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbraceleft}{\kern0pt}Stars\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}}. + + If a regular expression \isa{r} matches a string \isa{s}, then + generally the set \isa{LV\ r\ s} is not just a singleton set. In + case of POSIX matching the problem is to calculate the unique lexical value + that satisfies the (informal) POSIX rules from the Introduction. + Graphically the POSIX value calculation algorithm by Sulzmann and Lu + can be illustrated by the picture in Figure~\ref{Sulz} where the + path from the left to the right involving \isa{derivatives}/\isa{nullable} is the first phase of the algorithm + (calculating successive \Brz's derivatives) and \isa{mkeps}/\isa{inj}, the path from right to left, the second + phase. This picture shows the steps required when a regular + expression, say \isa{r\isactrlsub {\isadigit{1}}}, matches the string \isa{{\isacharbrackleft}{\kern0pt}a{\isacharcomma}{\kern0pt}\ b{\isacharcomma}{\kern0pt}\ c{\isacharbrackright}{\kern0pt}}. We first build the three derivatives (according to + \isa{a}, \isa{b} and \isa{c}). We then use \isa{nullable} + to find out whether the resulting derivative regular expression + \isa{r\isactrlsub {\isadigit{4}}} can match the empty string. If yes, we call the + function \isa{mkeps} that produces a value \isa{v\isactrlsub {\isadigit{4}}} + for how \isa{r\isactrlsub {\isadigit{4}}} can match the empty string (taking into + account the POSIX constraints in case there are several ways). This + function is defined by the clauses: + +\begin{figure}[t] +\begin{center} +\begin{tikzpicture}[scale=2,node distance=1.3cm, + every node/.style={minimum size=6mm}] +\node (r1) {\isa{r\isactrlsub {\isadigit{1}}}}; +\node (r2) [right=of r1]{\isa{r\isactrlsub {\isadigit{2}}}}; +\draw[->,line width=1mm](r1)--(r2) node[above,midway] {\isa{\underline{\hspace{2mm}}{\isacharbackslash}{\kern0pt}a}}; +\node (r3) [right=of r2]{\isa{r\isactrlsub {\isadigit{3}}}}; +\draw[->,line width=1mm](r2)--(r3) node[above,midway] {\isa{\underline{\hspace{2mm}}{\isacharbackslash}{\kern0pt}b}}; +\node (r4) [right=of r3]{\isa{r\isactrlsub {\isadigit{4}}}}; +\draw[->,line width=1mm](r3)--(r4) node[above,midway] {\isa{\underline{\hspace{2mm}}{\isacharbackslash}{\kern0pt}c}}; +\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\isa{nullable}}}; +\node (v4) [below=of r4]{\isa{v\isactrlsub {\isadigit{4}}}}; +\draw[->,line width=1mm](r4) -- (v4); +\node (v3) [left=of v4] {\isa{v\isactrlsub {\isadigit{3}}}}; +\draw[->,line width=1mm](v4)--(v3) node[below,midway] {\isa{inj\ r\isactrlsub {\isadigit{3}}\ c}}; +\node (v2) [left=of v3]{\isa{v\isactrlsub {\isadigit{2}}}}; +\draw[->,line width=1mm](v3)--(v2) node[below,midway] {\isa{inj\ r\isactrlsub {\isadigit{2}}\ b}}; +\node (v1) [left=of v2] {\isa{v\isactrlsub {\isadigit{1}}}}; +\draw[->,line width=1mm](v2)--(v1) node[below,midway] {\isa{inj\ r\isactrlsub {\isadigit{1}}\ a}}; +\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\isa{mkeps}}}; +\end{tikzpicture} +\end{center} +\mbox{}\\[-13mm] + +\caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014}, +matching the string \isa{{\isacharbrackleft}{\kern0pt}a{\isacharcomma}{\kern0pt}\ b{\isacharcomma}{\kern0pt}\ c{\isacharbrackright}{\kern0pt}}. The first phase (the arrows from +left to right) is \Brz's matcher building successive derivatives. If the +last regular expression is \isa{nullable}, then the functions of the +second phase are called (the top-down and right-to-left arrows): first +\isa{mkeps} calculates a value \isa{v\isactrlsub {\isadigit{4}}} witnessing +how the empty string has been recognised by \isa{r\isactrlsub {\isadigit{4}}}. After +that the function \isa{inj} ``injects back'' the characters of the string into +the values. +\label{Sulz}} +\end{figure} + + \begin{center} + \begin{tabular}{lcl} + \isa{mkeps\ \isactrlbold {\isadigit{1}}} & $\dn$ & \isa{Empty}\\ + \isa{mkeps\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{Seq\ {\isacharparenleft}{\kern0pt}mkeps\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}mkeps\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{mkeps\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{\textrm{if}\ nullable\ r\isactrlsub {\isadigit{1}}\ \textrm{then}\ Left\ {\isacharparenleft}{\kern0pt}mkeps\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ \textrm{else}\ Right\ {\isacharparenleft}{\kern0pt}mkeps\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{mkeps\ {\isacharparenleft}{\kern0pt}r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{Stars\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}\\ + \end{tabular} + \end{center} + + \noindent Note that this function needs only to be partially defined, + namely only for regular expressions that are nullable. In case \isa{nullable} fails, the string \isa{{\isacharbrackleft}{\kern0pt}a{\isacharcomma}{\kern0pt}\ b{\isacharcomma}{\kern0pt}\ c{\isacharbrackright}{\kern0pt}} cannot be matched by \isa{r\isactrlsub {\isadigit{1}}} and the null value \isa{None} is returned. Note also how this function + makes some subtle choices leading to a POSIX value: for example if an + alternative regular expression, say \isa{r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}}, can + match the empty string and furthermore \isa{r\isactrlsub {\isadigit{1}}} can match the + empty string, then we return a \isa{Left}-value. The \isa{Right}-value will only be returned if \isa{r\isactrlsub {\isadigit{1}}} cannot match the empty + string. + + The most interesting idea from Sulzmann and Lu \cite{Sulzmann2014} is + the construction of a value for how \isa{r\isactrlsub {\isadigit{1}}} can match the + string \isa{{\isacharbrackleft}{\kern0pt}a{\isacharcomma}{\kern0pt}\ b{\isacharcomma}{\kern0pt}\ c{\isacharbrackright}{\kern0pt}} from the value how the last derivative, \isa{r\isactrlsub {\isadigit{4}}} in Fig.~\ref{Sulz}, can match the empty string. Sulzmann and + Lu achieve this by stepwise ``injecting back'' the characters into the + values thus inverting the operation of building derivatives, but on the level + of values. The corresponding function, called \isa{inj}, takes three + arguments, a regular expression, a character and a value. For example in + the first (or right-most) \isa{inj}-step in Fig.~\ref{Sulz} the regular + expression \isa{r\isactrlsub {\isadigit{3}}}, the character \isa{c} from the last + derivative step and \isa{v\isactrlsub {\isadigit{4}}}, which is the value corresponding + to the derivative regular expression \isa{r\isactrlsub {\isadigit{4}}}. The result is + the new value \isa{v\isactrlsub {\isadigit{3}}}. The final result of the algorithm is + the value \isa{v\isactrlsub {\isadigit{1}}}. The \isa{inj} function is defined by recursion on regular + expressions and by analysing the shape of values (corresponding to + the derivative regular expressions). + % + \begin{center} + \begin{tabular}{l@ {\hspace{5mm}}lcl} + \textit{(1)} & \isa{inj\ d\ c\ {\isacharparenleft}{\kern0pt}Empty{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{Char\ d}\\ + \textit{(2)} & \isa{inj\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ c\ {\isacharparenleft}{\kern0pt}Left\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}} & $\dn$ & + \isa{Left\ {\isacharparenleft}{\kern0pt}inj\ r\isactrlsub {\isadigit{1}}\ c\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}}\\ + \textit{(3)} & \isa{inj\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ c\ {\isacharparenleft}{\kern0pt}Right\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & + \isa{Right\ {\isacharparenleft}{\kern0pt}inj\ r\isactrlsub {\isadigit{2}}\ c\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \textit{(4)} & \isa{inj\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ c\ {\isacharparenleft}{\kern0pt}Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ + & \isa{Seq\ {\isacharparenleft}{\kern0pt}inj\ r\isactrlsub {\isadigit{1}}\ c\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ v\isactrlsub {\isadigit{2}}}\\ + \textit{(5)} & \isa{inj\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ c\ {\isacharparenleft}{\kern0pt}Left\ {\isacharparenleft}{\kern0pt}Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}} & $\dn$ + & \isa{Seq\ {\isacharparenleft}{\kern0pt}inj\ r\isactrlsub {\isadigit{1}}\ c\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ v\isactrlsub {\isadigit{2}}}\\ + \textit{(6)} & \isa{inj\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ c\ {\isacharparenleft}{\kern0pt}Right\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ + & \isa{Seq\ {\isacharparenleft}{\kern0pt}mkeps\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}inj\ r\isactrlsub {\isadigit{2}}\ c\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \textit{(7)} & \isa{inj\ {\isacharparenleft}{\kern0pt}r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\ c\ {\isacharparenleft}{\kern0pt}Seq\ v\ {\isacharparenleft}{\kern0pt}Stars\ vs{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}} & $\dn$ + & \isa{Stars\ {\isacharparenleft}{\kern0pt}inj\ r\ c\ v\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs{\isacharparenright}{\kern0pt}}\\ + \end{tabular} + \end{center} + + \noindent To better understand what is going on in this definition it + might be instructive to look first at the three sequence cases (clauses + \textit{(4)} -- \textit{(6)}). In each case we need to construct an ``injected value'' for + \isa{r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}}. This must be a value of the form \isa{Seq\ \underline{\hspace{2mm}}\ \underline{\hspace{2mm}}}\,. Recall the clause of the \isa{derivative}-function + for sequence regular expressions: + + \begin{center} + \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c} $\dn$ \isa{\textrm{if}\ nullable\ r\isactrlsub {\isadigit{1}}\ \textrm{then}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}\ {\isacharplus}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ \textrm{else}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}} + \end{center} + + \noindent Consider first the \isa{else}-branch where the derivative is \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}}. The corresponding value must therefore + be of the form \isa{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}}, which matches the left-hand + side in clause~\textit{(4)} of \isa{inj}. In the \isa{if}-branch the derivative is an + alternative, namely \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}\ {\isacharplus}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}}. This means we either have to consider a \isa{Left}- or + \isa{Right}-value. In case of the \isa{Left}-value we know further it + must be a value for a sequence regular expression. Therefore the pattern + we match in the clause \textit{(5)} is \isa{Left\ {\isacharparenleft}{\kern0pt}Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}, + while in \textit{(6)} it is just \isa{Right\ v\isactrlsub {\isadigit{2}}}. One more interesting + point is in the right-hand side of clause \textit{(6)}: since in this case the + regular expression \isa{r\isactrlsub {\isadigit{1}}} does not ``contribute'' to + matching the string, that means it only matches the empty string, we need to + call \isa{mkeps} in order to construct a value for how \isa{r\isactrlsub {\isadigit{1}}} + can match this empty string. A similar argument applies for why we can + expect in the left-hand side of clause \textit{(7)} that the value is of the form + \isa{Seq\ v\ {\isacharparenleft}{\kern0pt}Stars\ vs{\isacharparenright}{\kern0pt}}---the derivative of a star is \isa{{\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsup {\isasymstar}}. Finally, the reason for why we can ignore the second argument + in clause \textit{(1)} of \isa{inj} is that it will only ever be called in cases + where \isa{c\ {\isacharequal}{\kern0pt}\ d}, but the usual linearity restrictions in patterns do + not allow us to build this constraint explicitly into our function + definition.\footnote{Sulzmann and Lu state this clause as \isa{inj\ c\ c\ {\isacharparenleft}{\kern0pt}Empty{\isacharparenright}{\kern0pt}} $\dn$ \isa{Char\ c}, + but our deviation is harmless.} + + The idea of the \isa{inj}-function to ``inject'' a character, say + \isa{c}, into a value can be made precise by the first part of the + following lemma, which shows that the underlying string of an injected + value has a prepended character \isa{c}; the second part shows that + the underlying string of an \isa{mkeps}-value is always the empty + string (given the regular expression is nullable since otherwise + \isa{mkeps} might not be defined). + + \begin{lemma}\mbox{}\smallskip\\\label{Prf_injval_flat} + \begin{tabular}{ll} + (1) & \isa{{\normalsize{}If\,}\ v\ {\isacharcolon}{\kern0pt}\ r{\isacharbackslash}{\kern0pt}c\ {\normalsize \,then\,}\ {\isacharbar}{\kern0pt}inj\ r\ c\ v{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}{\isachardot}{\kern0pt}}\\ + (2) & \isa{{\normalsize{}If\,}\ nullable\ r\ {\normalsize \,then\,}\ {\isacharbar}{\kern0pt}mkeps\ r{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isachardot}{\kern0pt}} + \end{tabular} + \end{lemma} + + \begin{proof} + Both properties are by routine inductions: the first one can, for example, + be proved by induction over the definition of \isa{derivatives}; the second by + an induction on \isa{r}. There are no interesting cases.\qed + \end{proof} + + Having defined the \isa{mkeps} and \isa{inj} function we can extend + \Brz's matcher so that a value is constructed (assuming the + regular expression matches the string). The clauses of the Sulzmann and Lu lexer are + + \begin{center} + \begin{tabular}{lcl} + \isa{lexer\ r\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} & $\dn$ & \isa{\textrm{if}\ nullable\ r\ \textrm{then}\ Some\ {\isacharparenleft}{\kern0pt}mkeps\ r{\isacharparenright}{\kern0pt}\ \textrm{else}\ None}\\ + \isa{lexer\ r\ {\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{case} \isa{lexer\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ s} \isa{of}\\ + & & \phantom{$|$} \isa{None} \isa{{\isasymRightarrow}} \isa{None}\\ + & & $|$ \isa{Some\ v} \isa{{\isasymRightarrow}} \isa{Some\ {\isacharparenleft}{\kern0pt}inj\ r\ c\ v{\isacharparenright}{\kern0pt}} + \end{tabular} + \end{center} + + \noindent If the regular expression does not match the string, \isa{None} is + returned. If the regular expression \emph{does} + match the string, then \isa{Some} value is returned. One important + virtue of this algorithm is that it can be implemented with ease in any + functional programming language and also in Isabelle/HOL. In the remaining + part of this section we prove that this algorithm is correct. + + The well-known idea of POSIX matching is informally defined by some + rules such as the Longest Match and Priority Rules (see + Introduction); as correctly argued in \cite{Sulzmann2014}, this + needs formal specification. Sulzmann and Lu define an ``ordering + relation'' between values and argue that there is a maximum value, + as given by the derivative-based algorithm. In contrast, we shall + introduce a simple inductive definition that specifies directly what + a \emph{POSIX value} is, incorporating the POSIX-specific choices + into the side-conditions of our rules. Our definition is inspired by + the matching relation given by Vansummeren~\cite{Vansummeren2006}. + The relation we define is ternary and + written as \mbox{\isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}}, relating + strings, regular expressions and values; the inductive rules are given in + Figure~\ref{POSIXrules}. + We can prove that given a string \isa{s} and regular expression \isa{r}, the POSIX value \isa{v} is uniquely determined by \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}. + + % + \begin{figure}[t] + \begin{center} + \begin{tabular}{c} + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{{\isacharparenleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharcomma}{\kern0pt}\ \isactrlbold {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Empty}}}\isa{P}\isa{\isactrlbold {\isadigit{1}}} \qquad + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{{\isacharparenleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}c{\isacharbrackright}{\kern0pt}{\isacharcomma}{\kern0pt}\ c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Char\ c}}}\isa{P}\isa{c}\medskip\\ + \isa{\mbox{}\inferrule{\mbox{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}}{\mbox{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Left\ v}}}\isa{P{\isacharplus}{\kern0pt}L}\qquad + \isa{\mbox{}\inferrule{\mbox{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}\\\ \mbox{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}}}{\mbox{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Right\ v}}}\isa{P{\isacharplus}{\kern0pt}R}\medskip\\ + $\mprset{flushleft} + \inferrule + {\isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{1}}} \qquad + \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{2}}} \\\\ + \isa{{\isasymnexists}s\isactrlsub {\isadigit{3}}\ s\isactrlsub {\isadigit{4}}{\isachardot}{\kern0pt}a{\isachardot}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{3}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{4}}\ {\isacharequal}{\kern0pt}\ s\isactrlsub {\isadigit{2}}\ {\isasymand}\ s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{4}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}} + {\isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}}}$\isa{PS}\\ + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{{\isacharparenleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharcomma}{\kern0pt}\ r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Stars\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}}}\isa{P{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}\medskip\\ + $\mprset{flushleft} + \inferrule + {\isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v} \qquad + \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Stars\ vs} \qquad + \isa{{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} \\\\ + \isa{{\isasymnexists}s\isactrlsub {\isadigit{3}}\ s\isactrlsub {\isadigit{4}}{\isachardot}{\kern0pt}a{\isachardot}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{3}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{4}}\ {\isacharequal}{\kern0pt}\ s\isactrlsub {\isadigit{2}}\ {\isasymand}\ s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{4}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}}} + {\isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Stars\ {\isacharparenleft}{\kern0pt}v\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs{\isacharparenright}{\kern0pt}}}$\isa{P{\isasymstar}} + \end{tabular} + \end{center} + \caption{Our inductive definition of POSIX values.}\label{POSIXrules} + \end{figure} + + + + \begin{theorem}\mbox{}\smallskip\\\label{posixdeterm} + \begin{tabular}{ll} + (1) & If \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v} then \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}} and \isa{{\isacharbar}{\kern0pt}v{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ s}.\\ + (2) & \isa{{\normalsize{}If\,}\ \mbox{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}\ {\normalsize \,and\,}\ \mbox{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isacharprime}{\kern0pt}}\ {\normalsize \,then\,}\ v\ {\isacharequal}{\kern0pt}\ v{\isacharprime}{\kern0pt}{\isachardot}{\kern0pt}} + \end{tabular} + \end{theorem} + + \begin{proof} Both by induction on the definition of \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}. + The second parts follows by a case analysis of \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isacharprime}{\kern0pt}} and + the first part.\qed + \end{proof} + + \noindent + We claim that our \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v} relation captures the idea behind the four + informal POSIX rules shown in the Introduction: Consider for example the + rules \isa{P{\isacharplus}{\kern0pt}L} and \isa{P{\isacharplus}{\kern0pt}R} where the POSIX value for a string + and an alternative regular expression, that is \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}, + is specified---it is always a \isa{Left}-value, \emph{except} when the + string to be matched is not in the language of \isa{r\isactrlsub {\isadigit{1}}}; only then it + is a \isa{Right}-value (see the side-condition in \isa{P{\isacharplus}{\kern0pt}R}). + Interesting is also the rule for sequence regular expressions (\isa{PS}). The first two premises state that \isa{v\isactrlsub {\isadigit{1}}} and \isa{v\isactrlsub {\isadigit{2}}} + are the POSIX values for \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}} and \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} + respectively. Consider now the third premise and note that the POSIX value + of this rule should match the string \mbox{\isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}}}. According to the + Longest Match Rule, we want that the \isa{s\isactrlsub {\isadigit{1}}} is the longest initial + split of \mbox{\isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}}} such that \isa{s\isactrlsub {\isadigit{2}}} is still recognised + by \isa{r\isactrlsub {\isadigit{2}}}. Let us assume, contrary to the third premise, that there + \emph{exist} an \isa{s\isactrlsub {\isadigit{3}}} and \isa{s\isactrlsub {\isadigit{4}}} such that \isa{s\isactrlsub {\isadigit{2}}} + can be split up into a non-empty string \isa{s\isactrlsub {\isadigit{3}}} and a possibly empty + string \isa{s\isactrlsub {\isadigit{4}}}. Moreover the longer string \isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{3}}} can be + matched by \isa{r\isactrlsub {\isadigit{1}}} and the shorter \isa{s\isactrlsub {\isadigit{4}}} can still be + matched by \isa{r\isactrlsub {\isadigit{2}}}. In this case \isa{s\isactrlsub {\isadigit{1}}} would \emph{not} be the + longest initial split of \mbox{\isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}}} and therefore \isa{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}} cannot be a POSIX value for \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}. + The main point is that our side-condition ensures the Longest + Match Rule is satisfied. + + A similar condition is imposed on the POSIX value in the \isa{P{\isasymstar}}-rule. Also there we want that \isa{s\isactrlsub {\isadigit{1}}} is the longest initial + split of \isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}} and furthermore the corresponding value + \isa{v} cannot be flattened to the empty string. In effect, we require + that in each ``iteration'' of the star, some non-empty substring needs to + be ``chipped'' away; only in case of the empty string we accept \isa{Stars\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} as the POSIX value. Indeed we can show that our POSIX values + are lexical values which exclude those \isa{Stars} that contain subvalues + that flatten to the empty string. + + \begin{lemma}\label{LVposix} + \isa{{\normalsize{}If\,}\ {\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\ {\normalsize \,then\,}\ v\ {\isasymin}\ LV\ r\ s{\isachardot}{\kern0pt}} + \end{lemma} + + \begin{proof} + By routine induction on \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}.\qed + \end{proof} + + \noindent + Next is the lemma that shows the function \isa{mkeps} calculates + the POSIX value for the empty string and a nullable regular expression. + + \begin{lemma}\label{lemmkeps} + \isa{{\normalsize{}If\,}\ nullable\ r\ {\normalsize \,then\,}\ {\isacharparenleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ mkeps\ r{\isachardot}{\kern0pt}} + \end{lemma} + + \begin{proof} + By routine induction on \isa{r}.\qed + \end{proof} + + \noindent + The central lemma for our POSIX relation is that the \isa{inj}-function + preserves POSIX values. + + \begin{lemma}\label{Posix2} + \isa{{\normalsize{}If\,}\ {\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\ {\normalsize \,then\,}\ {\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ inj\ r\ c\ v{\isachardot}{\kern0pt}} + \end{lemma} + + \begin{proof} + By induction on \isa{r}. We explain two cases. + + \begin{itemize} + \item[$\bullet$] Case \isa{r\ {\isacharequal}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}}. There are + two subcases, namely \isa{{\isacharparenleft}{\kern0pt}a{\isacharparenright}{\kern0pt}} \mbox{\isa{v\ {\isacharequal}{\kern0pt}\ Left\ v{\isacharprime}{\kern0pt}}} and \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isacharprime}{\kern0pt}}; and \isa{{\isacharparenleft}{\kern0pt}b{\isacharparenright}{\kern0pt}} \isa{v\ {\isacharequal}{\kern0pt}\ Right\ v{\isacharprime}{\kern0pt}}, \isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}} and \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isacharprime}{\kern0pt}}. In \isa{{\isacharparenleft}{\kern0pt}a{\isacharparenright}{\kern0pt}} we + know \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isacharprime}{\kern0pt}}, from which we can infer \isa{{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ inj\ r\isactrlsub {\isadigit{1}}\ c\ v{\isacharprime}{\kern0pt}} by induction hypothesis and hence \isa{{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ inj\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ c\ {\isacharparenleft}{\kern0pt}Left\ v{\isacharprime}{\kern0pt}{\isacharparenright}{\kern0pt}} as needed. Similarly + in subcase \isa{{\isacharparenleft}{\kern0pt}b{\isacharparenright}{\kern0pt}} where, however, in addition we have to use + Proposition~\ref{derprop}(2) in order to infer \isa{c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}} from \isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}}.\smallskip + + \item[$\bullet$] Case \isa{r\ {\isacharequal}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}}. There are three subcases: + + \begin{quote} + \begin{description} + \item[\isa{{\isacharparenleft}{\kern0pt}a{\isacharparenright}{\kern0pt}}] \isa{v\ {\isacharequal}{\kern0pt}\ Left\ {\isacharparenleft}{\kern0pt}Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} and \isa{nullable\ r\isactrlsub {\isadigit{1}}} + \item[\isa{{\isacharparenleft}{\kern0pt}b{\isacharparenright}{\kern0pt}}] \isa{v\ {\isacharequal}{\kern0pt}\ Right\ v\isactrlsub {\isadigit{1}}} and \isa{nullable\ r\isactrlsub {\isadigit{1}}} + \item[\isa{{\isacharparenleft}{\kern0pt}c{\isacharparenright}{\kern0pt}}] \isa{v\ {\isacharequal}{\kern0pt}\ Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}} and \isa{{\isasymnot}\ nullable\ r\isactrlsub {\isadigit{1}}} + \end{description} + \end{quote} + + \noindent For \isa{{\isacharparenleft}{\kern0pt}a{\isacharparenright}{\kern0pt}} we know \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{1}}} and + \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{2}}} as well as + % + \[\isa{{\isasymnexists}s\isactrlsub {\isadigit{3}}\ s\isactrlsub {\isadigit{4}}{\isachardot}{\kern0pt}a{\isachardot}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{3}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{4}}\ {\isacharequal}{\kern0pt}\ s\isactrlsub {\isadigit{2}}\ {\isasymand}\ s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{4}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\] + + \noindent From the latter we can infer by Proposition~\ref{derprop}(2): + % + \[\isa{{\isasymnexists}s\isactrlsub {\isadigit{3}}\ s\isactrlsub {\isadigit{4}}{\isachardot}{\kern0pt}a{\isachardot}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{3}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{4}}\ {\isacharequal}{\kern0pt}\ s\isactrlsub {\isadigit{2}}\ {\isasymand}\ c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{4}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\] + + \noindent We can use the induction hypothesis for \isa{r\isactrlsub {\isadigit{1}}} to obtain + \isa{{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ inj\ r\isactrlsub {\isadigit{1}}\ c\ v\isactrlsub {\isadigit{1}}}. Putting this all together allows us to infer + \isa{{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Seq\ {\isacharparenleft}{\kern0pt}inj\ r\isactrlsub {\isadigit{1}}\ c\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ v\isactrlsub {\isadigit{2}}}. The case \isa{{\isacharparenleft}{\kern0pt}c{\isacharparenright}{\kern0pt}} + is similar. + + For \isa{{\isacharparenleft}{\kern0pt}b{\isacharparenright}{\kern0pt}} we know \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{1}}} and + \isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}. From the former + we have \isa{{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ inj\ r\isactrlsub {\isadigit{2}}\ c\ v\isactrlsub {\isadigit{1}}} by induction hypothesis + for \isa{r\isactrlsub {\isadigit{2}}}. From the latter we can infer + % + \[\isa{{\isasymnexists}s\isactrlsub {\isadigit{3}}\ s\isactrlsub {\isadigit{4}}{\isachardot}{\kern0pt}a{\isachardot}{\kern0pt}\ s\isactrlsub {\isadigit{3}}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{3}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{4}}\ {\isacharequal}{\kern0pt}\ c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s\ {\isasymand}\ s\isactrlsub {\isadigit{3}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymand}\ s\isactrlsub {\isadigit{4}}\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\] + + \noindent By Lemma~\ref{lemmkeps} we know \isa{{\isacharparenleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ mkeps\ r\isactrlsub {\isadigit{1}}} + holds. Putting this all together, we can conclude with \isa{{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Seq\ {\isacharparenleft}{\kern0pt}mkeps\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}inj\ r\isactrlsub {\isadigit{2}}\ c\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}}, as required. + + Finally suppose \isa{r\ {\isacharequal}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\isactrlsup {\isasymstar}}. This case is very similar to the + sequence case, except that we need to also ensure that \isa{{\isacharbar}{\kern0pt}inj\ r\isactrlsub {\isadigit{1}}\ c\ v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isasymnoteq}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}. This follows from \isa{{\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ inj\ r\isactrlsub {\isadigit{1}}\ c\ v\isactrlsub {\isadigit{1}}} (which in turn follows from \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{1}}} and the induction hypothesis).\qed + \end{itemize} + \end{proof} + + \noindent + With Lemma~\ref{Posix2} in place, it is completely routine to establish + that the Sulzmann and Lu lexer satisfies our specification (returning + the null value \isa{None} iff the string is not in the language of the regular expression, + and returning a unique POSIX value iff the string \emph{is} in the language): + + \begin{theorem}\mbox{}\smallskip\\\label{lexercorrect} + \begin{tabular}{ll} + (1) & \isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}} if and only if \isa{lexer\ r\ s\ {\isacharequal}{\kern0pt}\ None}\\ + (2) & \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}} if and only if \isa{{\isasymexists}v{\isachardot}{\kern0pt}\ lexer\ r\ s\ {\isacharequal}{\kern0pt}\ Some\ v\ {\isasymand}\ {\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}\\ + \end{tabular} + \end{theorem} + + \begin{proof} + By induction on \isa{s} using Lemma~\ref{lemmkeps} and \ref{Posix2}.\qed + \end{proof} + + \noindent In \textit{(2)} we further know by Theorem~\ref{posixdeterm} that the + value returned by the lexer must be unique. A simple corollary + of our two theorems is: + + \begin{corollary}\mbox{}\smallskip\\\label{lexercorrectcor} + \begin{tabular}{ll} + (1) & \isa{lexer\ r\ s\ {\isacharequal}{\kern0pt}\ None} if and only if \isa{{\isasymnexists}v{\isachardot}{\kern0pt}a{\isachardot}{\kern0pt}\ {\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}\\ + (2) & \isa{lexer\ r\ s\ {\isacharequal}{\kern0pt}\ Some\ v} if and only if \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}\\ + \end{tabular} + \end{corollary} + + \noindent This concludes our correctness proof. Note that we have + not changed the algorithm of Sulzmann and Lu,\footnote{All + deviations we introduced are harmless.} but introduced our own + specification for what a correct result---a POSIX value---should be. + In the next section we show that our specification coincides with + another one given by Okui and Suzuki using a different technique.% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{Ordering of Values according to Okui and Suzuki% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +While in the previous section we have defined POSIX values directly + in terms of a ternary relation (see inference rules in Figure~\ref{POSIXrules}), + Sulzmann and Lu took a different approach in \cite{Sulzmann2014}: + they introduced an ordering for values and identified POSIX values + as the maximal elements. An extended version of \cite{Sulzmann2014} + is available at the website of its first author; this includes more + details of their proofs, but which are evidently not in final form + yet. Unfortunately, we were not able to verify claims that their + ordering has properties such as being transitive or having maximal + elements. + + Okui and Suzuki \cite{OkuiSuzuki2010,OkuiSuzukiTech} described + another ordering of values, which they use to establish the + correctness of their automata-based algorithm for POSIX matching. + Their ordering resembles some aspects of the one given by Sulzmann + and Lu, but overall is quite different. To begin with, Okui and + Suzuki identify POSIX values as minimal, rather than maximal, + elements in their ordering. A more substantial difference is that + the ordering by Okui and Suzuki uses \emph{positions} in order to + identify and compare subvalues. Positions are lists of natural + numbers. This allows them to quite naturally formalise the Longest + Match and Priority rules of the informal POSIX standard. Consider + for example the value \isa{v} + + \begin{center} + \isa{v\ {\isasymequiv}\ Stars\ {\isacharbrackleft}{\kern0pt}Seq\ {\isacharparenleft}{\kern0pt}Char\ x{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}Char\ y{\isacharparenright}{\kern0pt}{\isacharcomma}{\kern0pt}\ Char\ z{\isacharbrackright}{\kern0pt}} + \end{center} + + \noindent + At position \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{0}}{\isacharcomma}{\kern0pt}{\isadigit{1}}{\isacharbrackright}{\kern0pt}} of this value is the + subvalue \isa{Char\ y} and at position \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{1}}{\isacharbrackright}{\kern0pt}} the + subvalue \isa{Char\ z}. At the `root' position, or empty list + \isa{{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}}, is the whole value \isa{v}. Positions such as \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{0}}{\isacharcomma}{\kern0pt}{\isadigit{1}}{\isacharcomma}{\kern0pt}{\isadigit{0}}{\isacharbrackright}{\kern0pt}} or \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{2}}{\isacharbrackright}{\kern0pt}} are outside of \isa{v}. If it exists, the subvalue of \isa{v} at a position \isa{p}, written \isa{v\mbox{$\downharpoonleft$}\isactrlbsub p\isactrlesub }, can be recursively defined by + + \begin{center} + \begin{tabular}{r@ {\hspace{0mm}}lcl} + \isa{v} & \isa{{\isasymdownharpoonleft}\isactrlbsub {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\isactrlesub } & \isa{{\isasymequiv}}& \isa{v}\\ + \isa{Left\ v} & \isa{{\isasymdownharpoonleft}\isactrlbsub {\isadigit{0}}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}ps\isactrlesub } & \isa{{\isasymequiv}}& \isa{v\mbox{$\downharpoonleft$}\isactrlbsub ps\isactrlesub }\\ + \isa{Right\ v} & \isa{{\isasymdownharpoonleft}\isactrlbsub {\isadigit{1}}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}ps\isactrlesub } & \isa{{\isasymequiv}} & + \isa{v\mbox{$\downharpoonleft$}\isactrlbsub ps\isactrlesub }\\ + \isa{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}} & \isa{{\isasymdownharpoonleft}\isactrlbsub {\isadigit{0}}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}ps\isactrlesub } & \isa{{\isasymequiv}} & + \isa{v\isactrlsub {\isadigit{1}}\mbox{$\downharpoonleft$}\isactrlbsub ps\isactrlesub } \\ + \isa{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}} & \isa{{\isasymdownharpoonleft}\isactrlbsub {\isadigit{1}}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}ps\isactrlesub } + & \isa{{\isasymequiv}} & + \isa{v\isactrlsub {\isadigit{2}}\mbox{$\downharpoonleft$}\isactrlbsub ps\isactrlesub } \\ + \isa{Stars\ vs} & \isa{{\isasymdownharpoonleft}\isactrlbsub n{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}ps\isactrlesub } & \isa{{\isasymequiv}}& \isa{vs\ensuremath{_{[\mathit{n}]}}\mbox{$\downharpoonleft$}\isactrlbsub ps\isactrlesub }\\ + \end{tabular} + \end{center} + + \noindent In the last clause we use Isabelle's notation \isa{vs\ensuremath{_{[\mathit{n}]}}} for the + \isa{n}th element in a list. The set of positions inside a value \isa{v}, + written \isa{Pos\ v}, is given by + + \begin{center} + \begin{tabular}{lcl} + \isa{Pos\ {\isacharparenleft}{\kern0pt}Empty{\isacharparenright}{\kern0pt}} & \isa{{\isasymequiv}} & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}}\\ + \isa{Pos\ {\isacharparenleft}{\kern0pt}Char\ c{\isacharparenright}{\kern0pt}} & \isa{{\isasymequiv}} & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}}\\ + \isa{Pos\ {\isacharparenleft}{\kern0pt}Left\ v{\isacharparenright}{\kern0pt}} & \isa{{\isasymequiv}} & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isasymunion}\ {\isacharbraceleft}{\kern0pt}{\isadigit{0}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\ \mbox{\boldmath$\mid$}\ ps\ {\isasymin}\ Pos\ v{\isacharbraceright}{\kern0pt}}\\ + \isa{Pos\ {\isacharparenleft}{\kern0pt}Right\ v{\isacharparenright}{\kern0pt}} & \isa{{\isasymequiv}} & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isasymunion}\ {\isacharbraceleft}{\kern0pt}{\isadigit{1}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\ \mbox{\boldmath$\mid$}\ ps\ {\isasymin}\ Pos\ v{\isacharbraceright}{\kern0pt}}\\ + \isa{Pos\ {\isacharparenleft}{\kern0pt}Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} + & \isa{{\isasymequiv}} + & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isasymunion}\ {\isacharbraceleft}{\kern0pt}{\isadigit{0}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\ \mbox{\boldmath$\mid$}\ ps\ {\isasymin}\ Pos\ v\isactrlsub {\isadigit{1}}{\isacharbraceright}{\kern0pt}\ {\isasymunion}\ {\isacharbraceleft}{\kern0pt}{\isadigit{1}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\ \mbox{\boldmath$\mid$}\ ps\ {\isasymin}\ Pos\ v\isactrlsub {\isadigit{2}}{\isacharbraceright}{\kern0pt}}\\ + \isa{Pos\ {\isacharparenleft}{\kern0pt}Stars\ vs{\isacharparenright}{\kern0pt}} & \isa{{\isasymequiv}} & \isa{{\isacharbraceleft}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isasymunion}\ {\isacharparenleft}{\kern0pt}{\isasymUnion}n\ {\isacharless}{\kern0pt}\ len\ vs\ {\isacharbraceleft}{\kern0pt}n\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\ \mbox{\boldmath$\mid$}\ ps\ {\isasymin}\ Pos\ vs\ensuremath{_{[\mathit{n}]}}{\isacharbraceright}{\kern0pt}{\isacharparenright}{\kern0pt}}\\ + \end{tabular} + \end{center} + + \noindent + whereby \isa{len} in the last clause stands for the length of a list. Clearly + for every position inside a value there exists a subvalue at that position. + + + To help understanding the ordering of Okui and Suzuki, consider again + the earlier value + \isa{v} and compare it with the following \isa{w}: + + \begin{center} + \begin{tabular}{l} + \isa{v\ {\isasymequiv}\ Stars\ {\isacharbrackleft}{\kern0pt}Seq\ {\isacharparenleft}{\kern0pt}Char\ x{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}Char\ y{\isacharparenright}{\kern0pt}{\isacharcomma}{\kern0pt}\ Char\ z{\isacharbrackright}{\kern0pt}}\\ + \isa{w\ {\isasymequiv}\ Stars\ {\isacharbrackleft}{\kern0pt}Char\ x{\isacharcomma}{\kern0pt}\ Char\ y{\isacharcomma}{\kern0pt}\ Char\ z{\isacharbrackright}{\kern0pt}} + \end{tabular} + \end{center} + + \noindent Both values match the string \isa{xyz}, that means if + we flatten these values at their respective root position, we obtain + \isa{xyz}. However, at position \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{0}}{\isacharbrackright}{\kern0pt}}, \isa{v} matches + \isa{xy} whereas \isa{w} matches only the shorter \isa{x}. So + according to the Longest Match Rule, we should prefer \isa{v}, + rather than \isa{w} as POSIX value for string \isa{xyz} (and + corresponding regular expression). In order to + formalise this idea, Okui and Suzuki introduce a measure for + subvalues at position \isa{p}, called the \emph{norm} of \isa{v} + at position \isa{p}. We can define this measure in Isabelle as an + integer as follows + + \begin{center} + \isa{{\isasymparallel}v{\isasymparallel}\isactrlbsub p\isactrlesub \ {\isasymequiv}\ \textrm{if}\ p\ {\isasymin}\ Pos\ v\ \textrm{then}\ len\ {\isacharbar}{\kern0pt}v\mbox{$\downharpoonleft$}\isactrlbsub p\isactrlesub {\isacharbar}{\kern0pt}\ \textrm{else}\ {\isacharminus}{\kern0pt}\ {\isadigit{1}}} + \end{center} + + \noindent where we take the length of the flattened value at + position \isa{p}, provided the position is inside \isa{v}; if + not, then the norm is \isa{{\isacharminus}{\kern0pt}{\isadigit{1}}}. The default for outside + positions is crucial for the POSIX requirement of preferring a + \isa{Left}-value over a \isa{Right}-value (if they can match the + same string---see the Priority Rule from the Introduction). For this + consider + + \begin{center} + \isa{v\ {\isasymequiv}\ Left\ {\isacharparenleft}{\kern0pt}Char\ x{\isacharparenright}{\kern0pt}} \qquad and \qquad \isa{w\ {\isasymequiv}\ Right\ {\isacharparenleft}{\kern0pt}Char\ x{\isacharparenright}{\kern0pt}} + \end{center} + + \noindent Both values match \isa{x}. At position \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{0}}{\isacharbrackright}{\kern0pt}} + the norm of \isa{v} is \isa{{\isadigit{1}}} (the subvalue matches \isa{x}), + but the norm of \isa{w} is \isa{{\isacharminus}{\kern0pt}{\isadigit{1}}} (the position is outside + \isa{w} according to how we defined the `inside' positions of + \isa{Left}- and \isa{Right}-values). Of course at position + \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{1}}{\isacharbrackright}{\kern0pt}}, the norms \isa{{\isasymparallel}v{\isasymparallel}\isactrlbsub {\isacharbrackleft}{\kern0pt}{\isadigit{1}}{\isacharbrackright}{\kern0pt}\isactrlesub } and \isa{{\isasymparallel}w{\isasymparallel}\isactrlbsub {\isacharbrackleft}{\kern0pt}{\isadigit{1}}{\isacharbrackright}{\kern0pt}\isactrlesub } are reversed, but the point is that subvalues + will be analysed according to lexicographically ordered + positions. According to this ordering, the position \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{0}}{\isacharbrackright}{\kern0pt}} + takes precedence over \isa{{\isacharbrackleft}{\kern0pt}{\isadigit{1}}{\isacharbrackright}{\kern0pt}} and thus also \isa{v} will be + preferred over \isa{w}. The lexicographic ordering of positions, written + \isa{\underline{\hspace{2mm}}\ {\isasymprec}\isactrlbsub lex\isactrlesub \ \underline{\hspace{2mm}}}, can be conveniently formalised + by three inference rules + + \begin{center} + \begin{tabular}{ccc} + \isa{\mbox{}\inferrule{\mbox{}}{\mbox{{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ {\isasymprec}\isactrlbsub lex\isactrlesub \ p\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps}}}\hspace{1cm} & + \isa{\mbox{}\inferrule{\mbox{p\isactrlsub {\isadigit{1}}\ {\isacharless}{\kern0pt}\ p\isactrlsub {\isadigit{2}}}}{\mbox{p\isactrlsub {\isadigit{1}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\isactrlsub {\isadigit{1}}\ {\isasymprec}\isactrlbsub lex\isactrlesub \ p\isactrlsub {\isadigit{2}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\isactrlsub {\isadigit{2}}}}}\hspace{1cm} & + \isa{\mbox{}\inferrule{\mbox{ps\isactrlsub {\isadigit{1}}\ {\isasymprec}\isactrlbsub lex\isactrlesub \ ps\isactrlsub {\isadigit{2}}}}{\mbox{p\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\isactrlsub {\isadigit{1}}\ {\isasymprec}\isactrlbsub lex\isactrlesub \ p\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}ps\isactrlsub {\isadigit{2}}}}} + \end{tabular} + \end{center} + + With the norm and lexicographic order in place, + we can state the key definition of Okui and Suzuki + \cite{OkuiSuzuki2010}: a value \isa{v\isactrlsub {\isadigit{1}}} is \emph{smaller at position \isa{p}} than + \isa{v\isactrlsub {\isadigit{2}}}, written \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\isactrlbsub p\isactrlesub \ v\isactrlsub {\isadigit{2}}}, + if and only if $(i)$ the norm at position \isa{p} is + greater in \isa{v\isactrlsub {\isadigit{1}}} (that is the string \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}\mbox{$\downharpoonleft$}\isactrlbsub p\isactrlesub {\isacharbar}{\kern0pt}} is longer + than \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}\mbox{$\downharpoonleft$}\isactrlbsub p\isactrlesub {\isacharbar}{\kern0pt}}) and $(ii)$ all subvalues at + positions that are inside \isa{v\isactrlsub {\isadigit{1}}} or \isa{v\isactrlsub {\isadigit{2}}} and that are + lexicographically smaller than \isa{p}, we have the same norm, namely + + \begin{center} + \begin{tabular}{c} + \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\isactrlbsub p\isactrlesub \ v\isactrlsub {\isadigit{2}}} + \isa{{\isasymequiv}} + $\begin{cases} + (i) & \isa{{\isasymparallel}v\isactrlsub {\isadigit{2}}{\isasymparallel}\isactrlbsub p\isactrlesub \ {\isacharless}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{1}}{\isasymparallel}\isactrlbsub p\isactrlesub } \quad\text{and}\smallskip \\ + (ii) & \isa{{\isasymforall}q{\isasymin}Pos\ v\isactrlsub {\isadigit{1}}\ {\isasymunion}\ Pos\ v\isactrlsub {\isadigit{2}}{\isachardot}{\kern0pt}\ q\ {\isasymprec}\isactrlbsub lex\isactrlesub \ p\ {\isasymlongrightarrow}\ {\isasymparallel}v\isactrlsub {\isadigit{1}}{\isasymparallel}\isactrlbsub q\isactrlesub \ {\isacharequal}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{2}}{\isasymparallel}\isactrlbsub q\isactrlesub } + \end{cases}$ + \end{tabular} + \end{center} + + \noindent The position \isa{p} in this definition acts as the + \emph{first distinct position} of \isa{v\isactrlsub {\isadigit{1}}} and \isa{v\isactrlsub {\isadigit{2}}}, where both values match strings of different length + \cite{OkuiSuzuki2010}. Since at \isa{p} the values \isa{v\isactrlsub {\isadigit{1}}} and \isa{v\isactrlsub {\isadigit{2}}} match different strings, the + ordering is irreflexive. Derived from the definition above + are the following two orderings: + + \begin{center} + \begin{tabular}{l} + \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}\ {\isasymequiv}\ {\isasymexists}p{\isachardot}{\kern0pt}\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\isactrlbsub p\isactrlesub \ v\isactrlsub {\isadigit{2}}}\\ + \isa{v\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{2}}\ {\isasymequiv}\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}\ {\isasymor}\ v\isactrlsub {\isadigit{1}}\ {\isacharequal}{\kern0pt}\ v\isactrlsub {\isadigit{2}}} + \end{tabular} + \end{center} + + While we encountered a number of obstacles for establishing properties like + transitivity for the ordering of Sulzmann and Lu (and which we failed + to overcome), it is relatively straightforward to establish this + property for the orderings + \isa{\underline{\hspace{2mm}}\ {\isasymprec}\ \underline{\hspace{2mm}}} and \isa{\underline{\hspace{2mm}}\ \mbox{$\preccurlyeq$}\ \underline{\hspace{2mm}}} + by Okui and Suzuki. + + \begin{lemma}[Transitivity]\label{transitivity} + \isa{{\normalsize{}If\,}\ \mbox{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}}\ {\normalsize \,and\,}\ \mbox{v\isactrlsub {\isadigit{2}}\ {\isasymprec}\ v\isactrlsub {\isadigit{3}}}\ {\normalsize \,then\,}\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{3}}{\isachardot}{\kern0pt}} + \end{lemma} + + \begin{proof} From the assumption we obtain two positions \isa{p} + and \isa{q}, where the values \isa{v\isactrlsub {\isadigit{1}}} and \isa{v\isactrlsub {\isadigit{2}}} (respectively \isa{v\isactrlsub {\isadigit{2}}} and \isa{v\isactrlsub {\isadigit{3}}}) are `distinct'. Since \isa{{\isasymprec}\isactrlbsub lex\isactrlesub } is trichotomous, we need to consider + three cases, namely \isa{p\ {\isacharequal}{\kern0pt}\ q}, \isa{p\ {\isasymprec}\isactrlbsub lex\isactrlesub \ q} and + \isa{q\ {\isasymprec}\isactrlbsub lex\isactrlesub \ p}. Let us look at the first case. Clearly + \isa{{\isasymparallel}v\isactrlsub {\isadigit{2}}{\isasymparallel}\isactrlbsub p\isactrlesub \ {\isacharless}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{1}}{\isasymparallel}\isactrlbsub p\isactrlesub } and \isa{{\isasymparallel}v\isactrlsub {\isadigit{3}}{\isasymparallel}\isactrlbsub p\isactrlesub \ {\isacharless}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{2}}{\isasymparallel}\isactrlbsub p\isactrlesub } imply \isa{{\isasymparallel}v\isactrlsub {\isadigit{3}}{\isasymparallel}\isactrlbsub p\isactrlesub \ {\isacharless}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{1}}{\isasymparallel}\isactrlbsub p\isactrlesub }. It remains to show + that for a \isa{p{\isacharprime}{\kern0pt}\ {\isasymin}\ Pos\ v\isactrlsub {\isadigit{1}}\ {\isasymunion}\ Pos\ v\isactrlsub {\isadigit{3}}} + with \isa{p{\isacharprime}{\kern0pt}\ {\isasymprec}\isactrlbsub lex\isactrlesub \ p} that \isa{{\isasymparallel}v\isactrlsub {\isadigit{1}}{\isasymparallel}\isactrlbsub p{\isacharprime}{\kern0pt}\isactrlesub \ {\isacharequal}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{3}}{\isasymparallel}\isactrlbsub p{\isacharprime}{\kern0pt}\isactrlesub } holds. Suppose \isa{p{\isacharprime}{\kern0pt}\ {\isasymin}\ Pos\ v\isactrlsub {\isadigit{1}}}, then we can infer from the first assumption that \isa{{\isasymparallel}v\isactrlsub {\isadigit{1}}{\isasymparallel}\isactrlbsub p{\isacharprime}{\kern0pt}\isactrlesub \ {\isacharequal}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{2}}{\isasymparallel}\isactrlbsub p{\isacharprime}{\kern0pt}\isactrlesub }. But this means + that \isa{p{\isacharprime}{\kern0pt}} must be in \isa{Pos\ v\isactrlsub {\isadigit{2}}} too (the norm + cannot be \isa{{\isacharminus}{\kern0pt}{\isadigit{1}}} given \isa{p{\isacharprime}{\kern0pt}\ {\isasymin}\ Pos\ v\isactrlsub {\isadigit{1}}}). + Hence we can use the second assumption and + infer \isa{{\isasymparallel}v\isactrlsub {\isadigit{2}}{\isasymparallel}\isactrlbsub p{\isacharprime}{\kern0pt}\isactrlesub \ {\isacharequal}{\kern0pt}\ {\isasymparallel}v\isactrlsub {\isadigit{3}}{\isasymparallel}\isactrlbsub p{\isacharprime}{\kern0pt}\isactrlesub }, + which concludes this case with \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{3}}}. The reasoning in the other cases is similar.\qed + \end{proof} + + \noindent + The proof for $\preccurlyeq$ is similar and omitted. + It is also straightforward to show that \isa{{\isasymprec}} and + $\preccurlyeq$ are partial orders. Okui and Suzuki furthermore show that they + are linear orderings for lexical values \cite{OkuiSuzuki2010} of a given + regular expression and given string, but we have not formalised this in Isabelle. It is + not essential for our results. What we are going to show below is + that for a given \isa{r} and \isa{s}, the orderings have a unique + minimal element on the set \isa{LV\ r\ s}, which is the POSIX value + we defined in the previous section. We start with two properties that + show how the length of a flattened value relates to the \isa{{\isasymprec}}-ordering. + + \begin{proposition}\mbox{}\smallskip\\\label{ordlen} + \begin{tabular}{@ {}ll} + (1) & + \isa{{\normalsize{}If\,}\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}\ {\normalsize \,then\,}\ len\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}\ {\isasymle}\ len\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}{\isachardot}{\kern0pt}}\\ + (2) & + \isa{{\normalsize{}If\,}\ len\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}\ {\isacharless}{\kern0pt}\ len\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\normalsize \,then\,}\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}{\isachardot}{\kern0pt}} + \end{tabular} + \end{proposition} + + \noindent Both properties follow from the definition of the ordering. Note that + \textit{(2)} entails that a value, say \isa{v\isactrlsub {\isadigit{2}}}, whose underlying + string is a strict prefix of another flattened value, say \isa{v\isactrlsub {\isadigit{1}}}, then + \isa{v\isactrlsub {\isadigit{1}}} must be smaller than \isa{v\isactrlsub {\isadigit{2}}}. For our proofs it + will be useful to have the following properties---in each case the underlying strings + of the compared values are the same: + + \begin{proposition}\mbox{}\smallskip\\\label{ordintros} + \begin{tabular}{ll} + \textit{(1)} & + \isa{{\normalsize{}If\,}\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}\ {\normalsize \,then\,}\ Left\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ Right\ v\isactrlsub {\isadigit{2}}{\isachardot}{\kern0pt}}\\ + \textit{(2)} & If + \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} \;then\; + \isa{Left\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ Left\ v\isactrlsub {\isadigit{2}}} \;iff\; + \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}}\\ + \textit{(3)} & If + \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} \;then\; + \isa{Right\ v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ Right\ v\isactrlsub {\isadigit{2}}} \;iff\; + \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}}\\ + \textit{(4)} & If + \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}w\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} \;then\; + \isa{Seq\ v\ v\isactrlsub {\isadigit{2}}\ {\isasymprec}\ Seq\ v\ w\isactrlsub {\isadigit{2}}} \;iff\; + \isa{v\isactrlsub {\isadigit{2}}\ {\isasymprec}\ w\isactrlsub {\isadigit{2}}}\\ + \textit{(5)} & If + \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharat}{\kern0pt}\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}w\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharat}{\kern0pt}\ {\isacharbar}{\kern0pt}w\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} \;and\; + \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ w\isactrlsub {\isadigit{1}}} \;then\; + \isa{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}\ {\isasymprec}\ Seq\ w\isactrlsub {\isadigit{1}}\ w\isactrlsub {\isadigit{2}}}\\ + \textit{(6)} & If + \isa{{\isacharbar}{\kern0pt}vs\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}vs\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} \;then\; + \isa{Stars\ {\isacharparenleft}{\kern0pt}vs\ {\isacharat}{\kern0pt}\ vs\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymprec}\ Stars\ {\isacharparenleft}{\kern0pt}vs\ {\isacharat}{\kern0pt}\ vs\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} \;iff\; + \isa{Stars\ vs\isactrlsub {\isadigit{1}}\ {\isasymprec}\ Stars\ vs\isactrlsub {\isadigit{2}}}\\ + + \textit{(7)} & If + \isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{1}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} \;and\; + \isa{v\isactrlsub {\isadigit{1}}\ {\isasymprec}\ v\isactrlsub {\isadigit{2}}} \;then\; + \isa{Stars\ {\isacharparenleft}{\kern0pt}v\isactrlsub {\isadigit{1}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isasymprec}\ Stars\ {\isacharparenleft}{\kern0pt}v\isactrlsub {\isadigit{2}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \end{tabular} + \end{proposition} + + \noindent One might prefer that statements \textit{(4)} and \textit{(5)} + (respectively \textit{(6)} and \textit{(7)}) + are combined into a single \textit{iff}-statement (like the ones for \isa{Left} and \isa{Right}). Unfortunately this cannot be done easily: such + a single statement would require an additional assumption about the + two values \isa{Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}} and \isa{Seq\ w\isactrlsub {\isadigit{1}}\ w\isactrlsub {\isadigit{2}}} + being inhabited by the same regular expression. The + complexity of the proofs involved seems to not justify such a + `cleaner' single statement. The statements given are just the properties that + allow us to establish our theorems without any difficulty. The proofs + for Proposition~\ref{ordintros} are routine. + + + Next we establish how Okui and Suzuki's orderings relate to our + definition of POSIX values. Given a \isa{POSIX} value \isa{v\isactrlsub {\isadigit{1}}} + for \isa{r} and \isa{s}, then any other lexical value \isa{v\isactrlsub {\isadigit{2}}} in \isa{LV\ r\ s} is greater or equal than \isa{v\isactrlsub {\isadigit{1}}}, namely: + + + \begin{theorem}\label{orderone} + \isa{{\normalsize{}If\,}\ \mbox{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{1}}}\ {\normalsize \,and\,}\ \mbox{v\isactrlsub {\isadigit{2}}\ {\isasymin}\ LV\ r\ s}\ {\normalsize \,then\,}\ v\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{2}}{\isachardot}{\kern0pt}} + \end{theorem} + + \begin{proof} By induction on our POSIX rules. By + Theorem~\ref{posixdeterm} and the definition of \isa{LV}, it is clear + that \isa{v\isactrlsub {\isadigit{1}}} and \isa{v\isactrlsub {\isadigit{2}}} have the same + underlying string \isa{s}. The three base cases are + straightforward: for example for \isa{v\isactrlsub {\isadigit{1}}\ {\isacharequal}{\kern0pt}\ Empty}, we have + that \isa{v\isactrlsub {\isadigit{2}}\ {\isasymin}\ LV\ \isactrlbold {\isadigit{1}}\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} must also be of the form + \mbox{\isa{v\isactrlsub {\isadigit{2}}\ {\isacharequal}{\kern0pt}\ Empty}}. Therefore we have \isa{v\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{2}}}. The inductive cases for + \isa{r} being of the form \isa{r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}} and + \isa{r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}} are as follows: + + + \begin{itemize} + + \item[$\bullet$] Case \isa{P{\isacharplus}{\kern0pt}L} with \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Left\ w\isactrlsub {\isadigit{1}}}: In this case the value + \isa{v\isactrlsub {\isadigit{2}}} is either of the + form \isa{Left\ w\isactrlsub {\isadigit{2}}} or \isa{Right\ w\isactrlsub {\isadigit{2}}}. In the + latter case we can immediately conclude with \mbox{\isa{v\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{2}}}} since a \isa{Left}-value with the + same underlying string \isa{s} is always smaller than a + \isa{Right}-value by Proposition~\ref{ordintros}\textit{(1)}. + In the former case we have \isa{w\isactrlsub {\isadigit{2}}\ {\isasymin}\ LV\ r\isactrlsub {\isadigit{1}}\ s} and can use the induction hypothesis to infer + \isa{w\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ w\isactrlsub {\isadigit{2}}}. Because \isa{w\isactrlsub {\isadigit{1}}} and \isa{w\isactrlsub {\isadigit{2}}} have the same underlying string + \isa{s}, we can conclude with \isa{Left\ w\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ Left\ w\isactrlsub {\isadigit{2}}} using + Proposition~\ref{ordintros}\textit{(2)}.\smallskip + + \item[$\bullet$] Case \isa{P{\isacharplus}{\kern0pt}R} with \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Right\ w\isactrlsub {\isadigit{1}}}: This case similar to the previous + case, except that we additionally know \isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}}. This is needed when \isa{v\isactrlsub {\isadigit{2}}} is of the form + \mbox{\isa{Left\ w\isactrlsub {\isadigit{2}}}}. Since \mbox{\isa{{\isacharbar}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}w\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}} \isa{{\isacharequal}{\kern0pt}\ s}} and \isa{w\isactrlsub {\isadigit{2}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{1}}}, we can derive a contradiction for \mbox{\isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}}} using + Proposition~\ref{inhabs}. So also in this case \mbox{\isa{v\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{2}}}}.\smallskip + + \item[$\bullet$] Case \isa{PS} with \isa{{\isacharparenleft}{\kern0pt}s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Seq\ w\isactrlsub {\isadigit{1}}\ w\isactrlsub {\isadigit{2}}}: We can assume \isa{v\isactrlsub {\isadigit{2}}\ {\isacharequal}{\kern0pt}\ Seq\ u\isactrlsub {\isadigit{1}}\ u\isactrlsub {\isadigit{2}}} with \isa{u\isactrlsub {\isadigit{1}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{1}}} and \mbox{\isa{u\isactrlsub {\isadigit{2}}\ {\isacharcolon}{\kern0pt}\ r\isactrlsub {\isadigit{2}}}}. We have \isa{s\isactrlsub {\isadigit{1}}\ {\isacharat}{\kern0pt}\ s\isactrlsub {\isadigit{2}}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}u\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}\ {\isacharat}{\kern0pt}\ {\isacharbar}{\kern0pt}u\isactrlsub {\isadigit{2}}{\isacharbar}{\kern0pt}}. By the side-condition of the + \isa{PS}-rule we know that either \isa{s\isactrlsub {\isadigit{1}}\ {\isacharequal}{\kern0pt}\ {\isacharbar}{\kern0pt}u\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}} or that \isa{{\isacharbar}{\kern0pt}u\isactrlsub {\isadigit{1}}{\isacharbar}{\kern0pt}} is a strict prefix of + \isa{s\isactrlsub {\isadigit{1}}}. In the latter case we can infer \isa{w\isactrlsub {\isadigit{1}}\ {\isasymprec}\ u\isactrlsub {\isadigit{1}}} by + Proposition~\ref{ordlen}\textit{(2)} and from this \isa{v\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{2}}} by Proposition~\ref{ordintros}\textit{(5)} + (as noted above \isa{v\isactrlsub {\isadigit{1}}} and \isa{v\isactrlsub {\isadigit{2}}} must have the + same underlying string). + In the former case we know + \isa{u\isactrlsub {\isadigit{1}}\ {\isasymin}\ LV\ r\isactrlsub {\isadigit{1}}\ s\isactrlsub {\isadigit{1}}} and \isa{u\isactrlsub {\isadigit{2}}\ {\isasymin}\ LV\ r\isactrlsub {\isadigit{2}}\ s\isactrlsub {\isadigit{2}}}. With this we can use the + induction hypotheses to infer \isa{w\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ u\isactrlsub {\isadigit{1}}} and \isa{w\isactrlsub {\isadigit{2}}\ \mbox{$\preccurlyeq$}\ u\isactrlsub {\isadigit{2}}}. By + Proposition~\ref{ordintros}\textit{(4,5)} we can again infer + \isa{v\isactrlsub {\isadigit{1}}\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{2}}}. + + \end{itemize} + + \noindent The case for \isa{P{\isasymstar}} is similar to the \isa{PS}-case and omitted.\qed + \end{proof} + + \noindent This theorem shows that our \isa{POSIX} value for a + regular expression \isa{r} and string \isa{s} is in fact a + minimal element of the values in \isa{LV\ r\ s}. By + Proposition~\ref{ordlen}\textit{(2)} we also know that any value in + \isa{LV\ r\ s{\isacharprime}{\kern0pt}}, with \isa{s{\isacharprime}{\kern0pt}} being a strict prefix, cannot be + smaller than \isa{v\isactrlsub {\isadigit{1}}}. The next theorem shows the + opposite---namely any minimal element in \isa{LV\ r\ s} must be a + \isa{POSIX} value. This can be established by induction on \isa{r}, but the proof can be drastically simplified by using the fact + from the previous section about the existence of a \isa{POSIX} value + whenever a string \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}}. + + + \begin{theorem} + \isa{{\normalsize{}If\,}\ \mbox{v\isactrlsub {\isadigit{1}}\ {\isasymin}\ LV\ r\ s}\ {\normalsize \,and\,}\ \mbox{{\isasymforall}v\isactrlsub {\isadigit{2}}{\isasymin}LV\ r\ s{\isachardot}{\kern0pt}\ v\isactrlsub {\isadigit{2}}\ \mbox{$\not\prec$}\ v\isactrlsub {\isadigit{1}}}\ {\normalsize \,then\,}\ {\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub {\isadigit{1}}{\isachardot}{\kern0pt}} + \end{theorem} + + \begin{proof} + If \isa{v\isactrlsub {\isadigit{1}}\ {\isasymin}\ LV\ r\ s} then + \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}} by Proposition~\ref{inhabs}. Hence by Theorem~\ref{lexercorrect}(2) + there exists a + \isa{POSIX} value \isa{v\isactrlsub P} with \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\isactrlsub P} + and by Lemma~\ref{LVposix} we also have \mbox{\isa{v\isactrlsub P\ {\isasymin}\ LV\ r\ s}}. + By Theorem~\ref{orderone} we therefore have + \isa{v\isactrlsub P\ \mbox{$\preccurlyeq$}\ v\isactrlsub {\isadigit{1}}}. If \isa{v\isactrlsub P\ {\isacharequal}{\kern0pt}\ v\isactrlsub {\isadigit{1}}} then + we are done. Otherwise we have \isa{v\isactrlsub P\ {\isasymprec}\ v\isactrlsub {\isadigit{1}}}, which + however contradicts the second assumption about \isa{v\isactrlsub {\isadigit{1}}} being the smallest + element in \isa{LV\ r\ s}. So we are done in this case too.\qed + \end{proof} + + \noindent + From this we can also show + that if \isa{LV\ r\ s} is non-empty (or equivalently \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}}) then + it has a unique minimal element: + + \begin{corollary} + \isa{{\normalsize{}If\,}\ LV\ r\ s\ {\isasymnoteq}\ {\isasymemptyset}\ {\normalsize \,then\,}\ {\isasymexists}{\isacharbang}{\kern0pt}vmin{\isachardot}{\kern0pt}\ vmin\ {\isasymin}\ LV\ r\ s\ {\isasymand}\ {\isacharparenleft}{\kern0pt}{\isasymforall}v{\isasymin}LV\ r\ s{\isachardot}{\kern0pt}\ vmin\ \mbox{$\preccurlyeq$}\ v{\isacharparenright}{\kern0pt}{\isachardot}{\kern0pt}} + \end{corollary} + + + + \noindent To sum up, we have shown that the (unique) minimal elements + of the ordering by Okui and Suzuki are exactly the \isa{POSIX} + values we defined inductively in Section~\ref{posixsec}. This provides + an independent confirmation that our ternary relation formalises the + informal POSIX rules.% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{Bitcoded Lexing% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +Incremental calculation of the value. To simplify the proof we first define the function +\isa{flex} which calculates the ``iterated'' injection function. With this we can +rewrite the lexer as + +\begin{center} +\isa{lexer\ r\ s\ {\isacharequal}{\kern0pt}\ {\isacharparenleft}{\kern0pt}\textrm{if}\ nullable\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}s{\isacharparenright}{\kern0pt}\ \textrm{then}\ Some\ {\isacharparenleft}{\kern0pt}flex\ r\ id\ s\ {\isacharparenleft}{\kern0pt}mkeps\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}s{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ \textrm{else}\ None{\isacharparenright}{\kern0pt}} +\end{center}% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{Optimisations% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +Derivatives as calculated by \Brz's method are usually more complex + regular expressions than the initial one; the result is that the + derivative-based matching and lexing algorithms are often abysmally slow. + However, various optimisations are possible, such as the simplifications + of \isa{\isactrlbold {\isadigit{0}}\ {\isacharplus}{\kern0pt}\ r}, \isa{r\ {\isacharplus}{\kern0pt}\ \isactrlbold {\isadigit{0}}}, \isa{\isactrlbold {\isadigit{1}}\ {\isasymcdot}\ r} and + \isa{r\ {\isasymcdot}\ \isactrlbold {\isadigit{1}}} to \isa{r}. These simplifications can speed up the + algorithms considerably, as noted in \cite{Sulzmann2014}. One of the + advantages of having a simple specification and correctness proof is that + the latter can be refined to prove the correctness of such simplification + steps. While the simplification of regular expressions according to + rules like + + \begin{equation}\label{Simpl} + \begin{array}{lcllcllcllcl} + \isa{\isactrlbold {\isadigit{0}}\ {\isacharplus}{\kern0pt}\ r} & \isa{{\isasymRightarrow}} & \isa{r} \hspace{8mm}%\\ + \isa{r\ {\isacharplus}{\kern0pt}\ \isactrlbold {\isadigit{0}}} & \isa{{\isasymRightarrow}} & \isa{r} \hspace{8mm}%\\ + \isa{\isactrlbold {\isadigit{1}}\ {\isasymcdot}\ r} & \isa{{\isasymRightarrow}} & \isa{r} \hspace{8mm}%\\ + \isa{r\ {\isasymcdot}\ \isactrlbold {\isadigit{1}}} & \isa{{\isasymRightarrow}} & \isa{r} + \end{array} + \end{equation} + + \noindent is well understood, there is an obstacle with the POSIX value + calculation algorithm by Sulzmann and Lu: if we build a derivative regular + expression and then simplify it, we will calculate a POSIX value for this + simplified derivative regular expression, \emph{not} for the original (unsimplified) + derivative regular expression. Sulzmann and Lu \cite{Sulzmann2014} overcome this obstacle by + not just calculating a simplified regular expression, but also calculating + a \emph{rectification function} that ``repairs'' the incorrect value. + + The rectification functions can be (slightly clumsily) implemented in + Isabelle/HOL as follows using some auxiliary functions: + + \begin{center} + \begin{tabular}{lcl} + \isa{F\isactrlbsub Right\isactrlesub \ f\ v} & $\dn$ & \isa{Right\ {\isacharparenleft}{\kern0pt}f\ v{\isacharparenright}{\kern0pt}}\\ + \isa{F\isactrlbsub Left\isactrlesub \ f\ v} & $\dn$ & \isa{Left\ {\isacharparenleft}{\kern0pt}f\ v{\isacharparenright}{\kern0pt}}\\ + + \isa{F\isactrlbsub Alt\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}\ {\isacharparenleft}{\kern0pt}Right\ v{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{Right\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ v{\isacharparenright}{\kern0pt}}\\ + \isa{F\isactrlbsub Alt\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}\ {\isacharparenleft}{\kern0pt}Left\ v{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{Left\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ v{\isacharparenright}{\kern0pt}}\\ + + \isa{F\isactrlbsub Seq{\isadigit{1}}\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}\ v} & $\dn$ & \isa{Seq\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ {\isacharparenleft}{\kern0pt}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ v{\isacharparenright}{\kern0pt}}\\ + \isa{F\isactrlbsub Seq{\isadigit{2}}\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}\ v} & $\dn$ & \isa{Seq\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ v{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ {\isacharparenleft}{\kern0pt}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}}\\ + \isa{F\isactrlbsub Seq\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}\ {\isacharparenleft}{\kern0pt}Seq\ v\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{Seq\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\medskip\\ + %\end{tabular} + % + %\begin{tabular}{lcl} + \isa{simp\isactrlbsub Alt\isactrlesub \ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{0}}{\isacharcomma}{\kern0pt}\ \underline{\hspace{2mm}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F\isactrlbsub Right\isactrlesub \ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{simp\isactrlbsub Alt\isactrlesub \ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{0}}{\isacharcomma}{\kern0pt}\ \underline{\hspace{2mm}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ F\isactrlbsub Left\isactrlesub \ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}}\\ + \isa{simp\isactrlbsub Alt\isactrlesub \ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F\isactrlbsub Alt\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{simp\isactrlbsub Seq\isactrlesub \ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F\isactrlbsub Seq{\isadigit{1}}\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{simp\isactrlbsub Seq\isactrlesub \ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}\isactrlbold {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ F\isactrlbsub Seq{\isadigit{2}}\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{simp\isactrlbsub Seq\isactrlesub \ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F\isactrlbsub Seq\isactrlesub \ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \end{tabular} + \end{center} + + \noindent + The functions \isa{simp\isactrlbsub Alt\isactrlesub } and \isa{simp\isactrlbsub Seq\isactrlesub } encode the simplification rules + in \eqref{Simpl} and compose the rectification functions (simplifications can occur + deep inside the regular expression). The main simplification function is then + + \begin{center} + \begin{tabular}{lcl} + \isa{simp\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{simp\isactrlbsub Alt\isactrlesub \ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{simp\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} & $\dn$ & \isa{simp\isactrlbsub Seq\isactrlesub \ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}}\\ + \isa{simp\ r} & $\dn$ & \isa{{\isacharparenleft}{\kern0pt}r{\isacharcomma}{\kern0pt}\ id{\isacharparenright}{\kern0pt}}\\ + \end{tabular} + \end{center} + + \noindent where \isa{id} stands for the identity function. The + function \isa{simp} returns a simplified regular expression and a corresponding + rectification function. Note that we do not simplify under stars: this + seems to slow down the algorithm, rather than speed it up. The optimised + lexer is then given by the clauses: + + \begin{center} + \begin{tabular}{lcl} + \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}\ r\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} & $\dn$ & \isa{\textrm{if}\ nullable\ r\ \textrm{then}\ Some\ {\isacharparenleft}{\kern0pt}mkeps\ r{\isacharparenright}{\kern0pt}\ \textrm{else}\ None}\\ + \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}\ r\ {\isacharparenleft}{\kern0pt}c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s{\isacharparenright}{\kern0pt}} & $\dn$ & + \isa{let\ {\isacharparenleft}{\kern0pt}r\isactrlsub s{\isacharcomma}{\kern0pt}\ f\isactrlsub r{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ simp\ {\isacharparenleft}{\kern0pt}r}$\backslash$\isa{c{\isacharparenright}{\kern0pt}\ in}\\ + & & \isa{case} \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}\ r\isactrlsub s\ s} \isa{of}\\ + & & \phantom{$|$} \isa{None} \isa{{\isasymRightarrow}} \isa{None}\\ + & & $|$ \isa{Some\ v} \isa{{\isasymRightarrow}} \isa{Some\ {\isacharparenleft}{\kern0pt}inj\ r\ c\ {\isacharparenleft}{\kern0pt}f\isactrlsub r\ v{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}} + \end{tabular} + \end{center} + + \noindent + In the second clause we first calculate the derivative \isa{r{\isacharbackslash}{\kern0pt}c} + and then simpli + +text \isa{\ \ Incremental\ calculation\ of\ the\ value{\isachardot}{\kern0pt}\ To\ simplify\ the\ proof\ we\ first\ define\ the\ function\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}const\ flex{\isacharbraceright}{\kern0pt}\ which\ calculates\ the\ {\isacharbackquote}{\kern0pt}{\isacharbackquote}{\kern0pt}iterated{\isacharprime}{\kern0pt}{\isacharprime}{\kern0pt}\ injection\ function{\isachardot}{\kern0pt}\ With\ this\ we\ can\ rewrite\ the\ lexer\ as\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ lexer{\isacharunderscore}{\kern0pt}flex{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ {\isachardoublequote}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ {\isachardoublequote}{\kern0pt}v\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}v\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{7}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ code{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{7}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ areg{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}{\isacharequal}{\kern0pt}{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}AZERO{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}mid{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}AONE\ bs{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}mid{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}ACHAR\ bs\ c{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}mid{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}AALT\ bs\ r{\isadigit{1}}\ r{\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}mid{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}ASEQ\ bs\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}mid{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}ASTAR\ bs\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ intern{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ erase{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ Some\ simple\ facts\ about\ erase\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}lemma{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}mbox{\isacharbraceleft}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ erase{\isacharunderscore}{\kern0pt}bder{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ erase{\isacharunderscore}{\kern0pt}intern{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}lemma{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bnullable{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}medskip{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ \ {\isacharpercent}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharpercent}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ {\isacharpercent}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharpercent}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ \ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{5}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bder{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{6}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{3}}{\isacharparenright}{\kern0pt}{\isacharbrackleft}{\kern0pt}of\ bs\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}\ {\isachardoublequote}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ bmkeps{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{4}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}medskip{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ \ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharbrackleft}{\kern0pt}mode{\isacharequal}{\kern0pt}IfThen{\isacharbrackright}{\kern0pt}\ bder{\isacharunderscore}{\kern0pt}retrieve{\isacharbraceright}{\kern0pt}\ \ By\ induction\ on\ {\isasymopen}r{\isasymclose}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}theorem{\isacharbraceright}{\kern0pt}{\isacharbrackleft}{\kern0pt}Main\ Lemma{\isacharbrackright}{\kern0pt}{\isacharbackslash}{\kern0pt}mbox{\isacharbraceleft}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharbrackleft}{\kern0pt}mode{\isacharequal}{\kern0pt}IfThen{\isacharbrackright}{\kern0pt}\ MAIN{\isacharunderscore}{\kern0pt}decode{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}theorem{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}noindent\ Definition\ of\ the\ bitcoded\ lexer\ \ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ blexer{\isacharunderscore}{\kern0pt}def{\isacharbraceright}{\kern0pt}\ \ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}theorem{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ blexer{\isacharunderscore}{\kern0pt}correctness{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}theorem{\isacharbraceright}{\kern0pt}\ \ } + +section \isa{Optimisations} + +text \isa{\ \ Derivatives\ as\ calculated\ by\ {\isacharbackslash}{\kern0pt}Brz{\isacharprime}{\kern0pt}s\ method\ are\ usually\ more\ complex\ regular\ expressions\ than\ the\ initial\ one{\isacharsemicolon}{\kern0pt}\ the\ result\ is\ that\ the\ derivative{\isacharminus}{\kern0pt}based\ matching\ and\ lexing\ algorithms\ are\ often\ abysmally\ slow{\isachardot}{\kern0pt}\ However{\isacharcomma}{\kern0pt}\ various\ optimisations\ are\ possible{\isacharcomma}{\kern0pt}\ such\ as\ the\ simplifications\ of\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}ALT\ ZERO\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharcomma}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}ALT\ r\ ZERO{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharcomma}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}SEQ\ ONE\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}SEQ\ r\ ONE{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ to\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ r{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ These\ simplifications\ can\ speed\ up\ the\ algorithms\ considerably{\isacharcomma}{\kern0pt}\ as\ noted\ in\ {\isacharbackslash}{\kern0pt}cite{\isacharbraceleft}{\kern0pt}Sulzmann{\isadigit{2}}{\isadigit{0}}{\isadigit{1}}{\isadigit{4}}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ One\ of\ the\ advantages\ of\ having\ a\ simple\ specification\ and\ correctness\ proof\ is\ that\ the\ latter\ can\ be\ refined\ to\ prove\ the\ correctness\ of\ such\ simplification\ steps{\isachardot}{\kern0pt}\ While\ the\ simplification\ of\ regular\ expressions\ according\ to\ rules\ like\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}equation{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}label{\isacharbraceleft}{\kern0pt}Simpl{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}array{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcllcllcllcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}ALT\ ZERO\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}{\isasymRightarrow}{\isasymclose}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ r{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}hspace{\isacharbraceleft}{\kern0pt}{\isadigit{8}}mm{\isacharbraceright}{\kern0pt}{\isacharpercent}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}ALT\ r\ ZERO{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}{\isasymRightarrow}{\isasymclose}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ r{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}hspace{\isacharbraceleft}{\kern0pt}{\isadigit{8}}mm{\isacharbraceright}{\kern0pt}{\isacharpercent}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}SEQ\ ONE\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ \ {\isacharampersand}{\kern0pt}\ {\isasymopen}{\isasymRightarrow}{\isasymclose}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ r{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}hspace{\isacharbraceleft}{\kern0pt}{\isadigit{8}}mm{\isacharbraceright}{\kern0pt}{\isacharpercent}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}SEQ\ r\ ONE{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ \ {\isacharampersand}{\kern0pt}\ {\isasymopen}{\isasymRightarrow}{\isasymclose}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ r{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}array{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}equation{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}noindent\ is\ well\ understood{\isacharcomma}{\kern0pt}\ there\ is\ an\ obstacle\ with\ the\ POSIX\ value\ calculation\ algorithm\ by\ Sulzmann\ and\ Lu{\isacharcolon}{\kern0pt}\ if\ we\ build\ a\ derivative\ regular\ expression\ and\ then\ simplify\ it{\isacharcomma}{\kern0pt}\ we\ will\ calculate\ a\ POSIX\ value\ for\ this\ simplified\ derivative\ regular\ expression{\isacharcomma}{\kern0pt}\ {\isacharbackslash}{\kern0pt}emph{\isacharbraceleft}{\kern0pt}not{\isacharbraceright}{\kern0pt}\ for\ the\ original\ {\isacharparenleft}{\kern0pt}unsimplified{\isacharparenright}{\kern0pt}\ derivative\ regular\ expression{\isachardot}{\kern0pt}\ Sulzmann\ and\ Lu\ {\isacharbackslash}{\kern0pt}cite{\isacharbraceleft}{\kern0pt}Sulzmann{\isadigit{2}}{\isadigit{0}}{\isadigit{1}}{\isadigit{4}}{\isacharbraceright}{\kern0pt}\ overcome\ this\ obstacle\ by\ not\ just\ calculating\ a\ simplified\ regular\ expression{\isacharcomma}{\kern0pt}\ but\ also\ calculating\ a\ {\isacharbackslash}{\kern0pt}emph{\isacharbraceleft}{\kern0pt}rectification\ function{\isacharbraceright}{\kern0pt}\ that\ {\isacharbackquote}{\kern0pt}{\isacharbackquote}{\kern0pt}repairs{\isacharprime}{\kern0pt}{\isacharprime}{\kern0pt}\ the\ incorrect\ value{\isachardot}{\kern0pt}\ \ The\ rectification\ functions\ can\ be\ {\isacharparenleft}{\kern0pt}slightly\ clumsily{\isacharparenright}{\kern0pt}\ implemented\ \ in\ Isabelle{\isacharslash}{\kern0pt}HOL\ as\ follows\ using\ some\ auxiliary\ functions{\isacharcolon}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}RIGHT{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}Right\ {\isacharparenleft}{\kern0pt}f\ v{\isacharparenright}{\kern0pt}{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}LEFT{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}Left\ {\isacharparenleft}{\kern0pt}f\ v{\isacharparenright}{\kern0pt}{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ \ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}ALT{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}Right\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ v{\isacharparenright}{\kern0pt}{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}ALT{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}Left\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ v{\isacharparenright}{\kern0pt}{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ \ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}SEQ{\isadigit{1}}{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}Seq\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ {\isacharparenleft}{\kern0pt}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ v{\isacharparenright}{\kern0pt}{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}SEQ{\isadigit{2}}{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}Seq\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ v{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ {\isacharparenleft}{\kern0pt}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}SEQ{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}Seq\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{1}}\ v\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}f\isactrlsub {\isadigit{2}}\ v\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isasymclose}{\isacharbackslash}{\kern0pt}medskip{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharpercent}{\kern0pt}{\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharpercent}{\kern0pt}\ {\isacharpercent}{\kern0pt}{\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}ALT\ {\isacharparenleft}{\kern0pt}ZERO{\isacharcomma}{\kern0pt}\ DUMMY{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}RIGHT\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}ALT\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}ZERO{\isacharcomma}{\kern0pt}\ DUMMY{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}LEFT\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}ALT\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharparenleft}{\kern0pt}ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}ALT\ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}SEQ\ {\isacharparenleft}{\kern0pt}ONE{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}SEQ{\isadigit{1}}\ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}SEQ\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}ONE{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}SEQ{\isadigit{2}}\ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}SEQ\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharparenleft}{\kern0pt}SEQ\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isacharcomma}{\kern0pt}\ F{\isacharunderscore}{\kern0pt}SEQ\ f\isactrlsub {\isadigit{1}}\ f\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}noindent\ The\ functions\ {\isasymopen}simp\isactrlbsub Alt\isactrlesub {\isasymclose}\ and\ {\isasymopen}simp\isactrlbsub Seq\isactrlesub {\isasymclose}\ encode\ the\ simplification\ rules\ in\ {\isacharbackslash}{\kern0pt}eqref{\isacharbraceleft}{\kern0pt}Simpl{\isacharbraceright}{\kern0pt}\ and\ compose\ the\ rectification\ functions\ {\isacharparenleft}{\kern0pt}simplifications\ can\ occur\ deep\ inside\ the\ regular\ expression{\isacharparenright}{\kern0pt}{\isachardot}{\kern0pt}\ The\ main\ simplification\ function\ is\ then\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}ALT\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}SEQ\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp{\isacharunderscore}{\kern0pt}SEQ\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}simp\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharparenleft}{\kern0pt}r{\isacharcomma}{\kern0pt}\ id{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}noindent\ where\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}id{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ stands\ for\ the\ identity\ function{\isachardot}{\kern0pt}\ The\ function\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}const\ simp{\isacharbraceright}{\kern0pt}\ returns\ a\ simplified\ regular\ expression\ and\ a\ corresponding\ rectification\ function{\isachardot}{\kern0pt}\ Note\ that\ we\ do\ not\ simplify\ under\ stars{\isacharcolon}{\kern0pt}\ this\ seems\ to\ slow\ down\ the\ algorithm{\isacharcomma}{\kern0pt}\ rather\ than\ speed\ it\ up{\isachardot}{\kern0pt}\ The\ optimised\ lexer\ is\ then\ given\ by\ the\ clauses{\isacharcolon}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}lcl{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ slexer{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}rhs{\isacharparenright}{\kern0pt}\ slexer{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ {\isacharparenleft}{\kern0pt}lhs{\isacharparenright}{\kern0pt}\ slexer{\isachardot}{\kern0pt}simps{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}dn{\isachardollar}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}let\ {\isacharparenleft}{\kern0pt}r\isactrlsub s{\isacharcomma}{\kern0pt}\ f\isactrlsub r{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ simp\ {\isacharparenleft}{\kern0pt}r\ {\isasymclose}{\isachardollar}{\kern0pt}{\isacharbackslash}{\kern0pt}backslash{\isachardollar}{\kern0pt}{\isasymopen}\ c{\isacharparenright}{\kern0pt}\ in{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isasymopen}case{\isasymclose}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}slexer\ r\isactrlsub s\ s{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isasymopen}of{\isasymclose}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharbackslash}{\kern0pt}phantom{\isacharbraceleft}{\kern0pt}{\isachardollar}{\kern0pt}{\isacharbar}{\kern0pt}{\isachardollar}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}None{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ \ {\isasymopen}{\isasymRightarrow}{\isasymclose}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ None{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isachardollar}{\kern0pt}{\isacharbar}{\kern0pt}{\isachardollar}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}Some\ v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ {\isasymopen}{\isasymRightarrow}{\isasymclose}\ {\isasymopen}Some\ {\isacharparenleft}{\kern0pt}inj\ r\ c\ {\isacharparenleft}{\kern0pt}f\isactrlsub r\ v{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isasymclose}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}center{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}noindent\ In\ the\ second\ clause\ we\ first\ calculate\ the\ derivative\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}der\ c\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ then\ simplify\ the\ result{\isachardot}{\kern0pt}\ This\ gives\ us\ a\ simplified\ derivative\ {\isasymopen}r\isactrlsub s{\isasymclose}\ and\ a\ rectification\ function\ {\isasymopen}f\isactrlsub r{\isasymclose}{\isachardot}{\kern0pt}\ The\ lexer\ is\ then\ recursively\ called\ with\ the\ simplified\ derivative{\isacharcomma}{\kern0pt}\ but\ before\ we\ inject\ the\ character\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ c{\isacharbraceright}{\kern0pt}\ into\ the\ value\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ v{\isacharbraceright}{\kern0pt}{\isacharcomma}{\kern0pt}\ we\ need\ to\ rectify\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ v{\isacharbraceright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}that\ is\ construct\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}f\isactrlsub r\ v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isachardot}{\kern0pt}\ Before\ we\ can\ establish\ the\ correctness\ of\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}slexer{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharcomma}{\kern0pt}\ we\ need\ to\ show\ that\ simplification\ preserves\ the\ language\ and\ simplification\ preserves\ our\ POSIX\ relation\ once\ the\ value\ is\ rectified\ {\isacharparenleft}{\kern0pt}recall\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}const\ {\isachardoublequote}{\kern0pt}simp{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ generates\ a\ {\isacharparenleft}{\kern0pt}regular\ expression{\isacharcomma}{\kern0pt}\ rectification\ function{\isacharparenright}{\kern0pt}\ pair{\isacharparenright}{\kern0pt}{\isacharcolon}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}lemma{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}mbox{\isacharbraceleft}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}smallskip{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}label{\isacharbraceleft}{\kern0pt}slexeraux{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}{\isacharbraceleft}{\kern0pt}ll{\isacharbraceright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ L{\isacharunderscore}{\kern0pt}fst{\isacharunderscore}{\kern0pt}simp{\isacharbrackleft}{\kern0pt}symmetric{\isacharbrackright}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharbackslash}{\kern0pt}{\isacharbackslash}{\kern0pt}\ {\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isacharampersand}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm{\isacharbrackleft}{\kern0pt}mode{\isacharequal}{\kern0pt}IfThen{\isacharbrackright}{\kern0pt}\ Posix{\isacharunderscore}{\kern0pt}simp{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}tabular{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}lemma{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}proof{\isacharbraceright}{\kern0pt}\ Both\ are\ by\ induction\ on\ {\isasymopen}r{\isasymclose}{\isachardot}{\kern0pt}\ There\ is\ no\ interesting\ case\ for\ the\ first\ statement{\isachardot}{\kern0pt}\ For\ the\ second\ statement{\isacharcomma}{\kern0pt}\ of\ interest\ are\ the\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}r\ {\isacharequal}{\kern0pt}\ ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}r\ {\isacharequal}{\kern0pt}\ SEQ\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ cases{\isachardot}{\kern0pt}\ In\ each\ case\ we\ have\ to\ analyse\ four\ subcases\ whether\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ equals\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}const\ ZERO{\isacharbraceright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}respectively\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}const\ ONE{\isacharbraceright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isachardot}{\kern0pt}\ For\ example\ for\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}r\ {\isacharequal}{\kern0pt}\ ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharcomma}{\kern0pt}\ consider\ the\ subcase\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ ZERO{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymnoteq}\ ZERO{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ By\ assumption\ we\ know\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ fst\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ From\ this\ we\ can\ infer\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ by\ IH\ also\ {\isacharparenleft}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ r\isactrlsub {\isadigit{2}}\ {\isasymrightarrow}\ {\isacharparenleft}{\kern0pt}snd\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ v{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ Given\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ ZERO{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ we\ know\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}L\ {\isacharparenleft}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isacharbraceleft}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ By\ the\ first\ statement\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}L\ r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ is\ the\ empty\ set{\isacharcomma}{\kern0pt}\ meaning\ {\isacharparenleft}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymnotin}\ L\ r\isactrlsub {\isadigit{1}}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ Taking\ {\isacharparenleft}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharparenright}{\kern0pt}\ and\ {\isacharparenleft}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharparenright}{\kern0pt}\ together\ gives\ by\ the\ {\isacharbackslash}{\kern0pt}mbox{\isacharbraceleft}{\kern0pt}{\isasymopen}P{\isacharplus}{\kern0pt}R{\isasymclose}{\isacharbraceright}{\kern0pt}{\isacharminus}{\kern0pt}rule\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}\ {\isasymrightarrow}\ Right\ {\isacharparenleft}{\kern0pt}snd\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ v{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ In\ turn\ this\ gives\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}\ {\isasymrightarrow}\ snd\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}ALT\ r\isactrlsub {\isadigit{1}}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ as\ we\ need\ to\ show{\isachardot}{\kern0pt}\ The\ other\ cases\ are\ similar{\isachardot}{\kern0pt}{\isacharbackslash}{\kern0pt}qed\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}proof{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}noindent\ We\ can\ now\ prove\ relatively\ straightforwardly\ that\ the\ optimised\ lexer\ produces\ the\ expected\ result{\isacharcolon}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}theorem{\isacharbraceright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}thm\ slexer{\isacharunderscore}{\kern0pt}correctness{\isacharbraceright}{\kern0pt}\ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}theorem{\isacharbraceright}{\kern0pt}\ \ {\isacharbackslash}{\kern0pt}begin{\isacharbraceleft}{\kern0pt}proof{\isacharbraceright}{\kern0pt}\ By\ induction\ on\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ s{\isacharbraceright}{\kern0pt}\ generalising\ over\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ r{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ The\ case\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ is\ trivial{\isachardot}{\kern0pt}\ For\ the\ cons{\isacharminus}{\kern0pt}case\ suppose\ the\ string\ is\ of\ the\ form\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}c\ {\isacharhash}{\kern0pt}\ s{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ By\ induction\ hypothesis\ we\ know\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}slexer\ r\ s\ {\isacharequal}{\kern0pt}\ lexer\ r\ s{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ holds\ for\ all\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ r{\isacharbraceright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}in\ particular\ for\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ being\ the\ derivative\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}der\ c\ r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isachardot}{\kern0pt}\ Let\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}r\isactrlsub s{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ be\ the\ simplified\ derivative\ regular\ expression{\isacharcomma}{\kern0pt}\ that\ is\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}der\ c\ r{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharcomma}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}f\isactrlsub r{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ be\ the\ rectification\ function{\isacharcomma}{\kern0pt}\ that\ is\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}snd\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}der\ c\ r{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ \ We\ distinguish\ the\ cases\ whether\ {\isacharparenleft}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ L\ {\isacharparenleft}{\kern0pt}der\ c\ r{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ or\ not{\isachardot}{\kern0pt}\ In\ the\ first\ case\ we\ have\ by\ Theorem{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}lexercorrect{\isacharbraceright}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}\ a\ value\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ so\ that\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}lexer\ {\isacharparenleft}{\kern0pt}der\ c\ r{\isacharparenright}{\kern0pt}\ s\ {\isacharequal}{\kern0pt}\ Some\ v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ der\ c\ r\ {\isasymrightarrow}\ v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ hold{\isachardot}{\kern0pt}\ By\ Lemma{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}slexeraux{\isacharbraceright}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}\ we\ can\ also\ infer\ from{\isachartilde}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isacharasterisk}{\kern0pt}{\isacharparenright}{\kern0pt}\ that\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ L\ r\isactrlsub s{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ holds{\isachardot}{\kern0pt}\ \ Hence\ we\ know\ by\ Theorem{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}lexercorrect{\isacharbraceright}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}\ that\ there\ exists\ a\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}v{\isacharprime}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ with\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}lexer\ r\isactrlsub s\ s\ {\isacharequal}{\kern0pt}\ Some\ v{\isacharprime}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ r\isactrlsub s\ {\isasymrightarrow}\ v{\isacharprime}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ From\ the\ latter\ we\ know\ by\ Lemma{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}slexeraux{\isacharbraceright}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isadigit{2}}{\isacharparenright}{\kern0pt}\ that\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymin}\ der\ c\ r\ {\isasymrightarrow}\ {\isacharparenleft}{\kern0pt}f\isactrlsub r\ v{\isacharprime}{\kern0pt}{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ holds{\isachardot}{\kern0pt}\ By\ the\ uniqueness\ of\ the\ POSIX\ relation\ {\isacharparenleft}{\kern0pt}Theorem{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}posixdeterm{\isacharbraceright}{\kern0pt}{\isacharparenright}{\kern0pt}\ we\ can\ infer\ that\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ v{\isacharbraceright}{\kern0pt}\ is\ equal\ to\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}f\isactrlsub r\ v{\isacharprime}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isacharminus}{\kern0pt}{\isacharminus}{\kern0pt}{\isacharminus}{\kern0pt}that\ is\ the\ rectification\ function\ applied\ to\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}v{\isacharprime}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ produces\ the\ original\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}v{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ \ Now\ the\ case\ follows\ by\ the\ definitions\ of\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}const\ lexer{\isacharbraceright}{\kern0pt}\ and\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}const\ slexer{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ \ In\ the\ second\ case\ where\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymnotin}\ L\ {\isacharparenleft}{\kern0pt}der\ c\ r{\isacharparenright}{\kern0pt}{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ we\ have\ that\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}lexer\ {\isacharparenleft}{\kern0pt}der\ c\ r{\isacharparenright}{\kern0pt}\ s\ {\isacharequal}{\kern0pt}\ None{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ by\ Theorem{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}lexercorrect{\isacharbraceright}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}{\isachardot}{\kern0pt}\ \ We\ also\ know\ by\ Lemma{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}slexeraux{\isacharbraceright}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}\ that\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}s\ {\isasymnotin}\ L\ r\isactrlsub s{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ Hence\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}lexer\ r\isactrlsub s\ s\ {\isacharequal}{\kern0pt}\ None{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}\ by\ Theorem{\isachartilde}{\kern0pt}{\isacharbackslash}{\kern0pt}ref{\isacharbraceleft}{\kern0pt}lexercorrect{\isacharbraceright}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isadigit{1}}{\isacharparenright}{\kern0pt}\ and\ by\ IH\ then\ also\ {\isacharat}{\kern0pt}{\isacharbraceleft}{\kern0pt}term\ {\isachardoublequote}{\kern0pt}slexer\ r\isactrlsub s\ s\ {\isacharequal}{\kern0pt}\ None{\isachardoublequote}{\kern0pt}{\isacharbraceright}{\kern0pt}{\isachardot}{\kern0pt}\ With\ this\ we\ can\ conclude\ in\ this\ case\ too{\isachardot}{\kern0pt}{\isacharbackslash}{\kern0pt}qed\ \ {\isacharbackslash}{\kern0pt}end{\isacharbraceleft}{\kern0pt}proof{\isacharbraceright}{\kern0pt}\ \ } +fy the result. This gives us a simplified derivative + \isa{r\isactrlsub s} and a rectification function \isa{f\isactrlsub r}. The lexer + is then recursively called with the simplified derivative, but before + we inject the character \isa{c} into the value \isa{v}, we need to rectify + \isa{v} (that is construct \isa{f\isactrlsub r\ v}). Before we can establish the correctness + of \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}}, we need to show that simplification preserves the language + and simplification preserves our POSIX relation once the value is rectified + (recall \isa{simp} generates a (regular expression, rectification function) pair): + + \begin{lemma}\mbox{}\smallskip\\\label{slexeraux} + \begin{tabular}{ll} + (1) & \isa{L{\isacharparenleft}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ L{\isacharparenleft}{\kern0pt}r{\isacharparenright}{\kern0pt}}\\ + (2) & \isa{{\normalsize{}If\,}\ {\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ fst\ {\isacharparenleft}{\kern0pt}simp\ r{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v\ {\normalsize \,then\,}\ {\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ snd\ {\isacharparenleft}{\kern0pt}simp\ r{\isacharparenright}{\kern0pt}\ v{\isachardot}{\kern0pt}} + \end{tabular} + \end{lemma} + + \begin{proof} Both are by induction on \isa{r}. There is no + interesting case for the first statement. For the second statement, + of interest are the \isa{r\ {\isacharequal}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}} and \isa{r\ {\isacharequal}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isasymcdot}\ r\isactrlsub {\isadigit{2}}} cases. In each case we have to analyse four subcases whether + \isa{fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}} and \isa{fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}} equals \isa{\isactrlbold {\isadigit{0}}} (respectively \isa{\isactrlbold {\isadigit{1}}}). For example for \isa{r\ {\isacharequal}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}}, consider the subcase \isa{fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ \isactrlbold {\isadigit{0}}} and + \isa{fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymnoteq}\ \isactrlbold {\isadigit{0}}}. By assumption we know \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ fst\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v}. From this we can infer \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v} + and by IH also (*) \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ snd\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ v}. Given \isa{fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ \isactrlbold {\isadigit{0}}} + we know \isa{L{\isacharparenleft}{\kern0pt}fst\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharequal}{\kern0pt}\ {\isasymemptyset}}. By the first statement + \isa{L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}} is the empty set, meaning (**) \isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}{\isacharparenright}{\kern0pt}}. + Taking (*) and (**) together gives by the \mbox{\isa{P{\isacharplus}{\kern0pt}R}}-rule + \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ Right\ {\isacharparenleft}{\kern0pt}snd\ {\isacharparenleft}{\kern0pt}simp\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ v{\isacharparenright}{\kern0pt}}. In turn this + gives \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ snd\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\ {\isacharplus}{\kern0pt}\ r\isactrlsub {\isadigit{2}}{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ v} as we need to show. + The other cases are similar.\qed + \end{proof} + + \noindent We can now prove relatively straightforwardly that the + optimised lexer produces the expected result: + + \begin{theorem} + \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}\ r\ s\ {\isacharequal}{\kern0pt}\ lexer\ r\ s} + \end{theorem} + + \begin{proof} By induction on \isa{s} generalising over \isa{r}. The case \isa{{\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}} is trivial. For the cons-case suppose the + string is of the form \isa{c\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}s}. By induction hypothesis we + know \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}\ r\ s\ {\isacharequal}{\kern0pt}\ lexer\ r\ s} holds for all \isa{r} (in + particular for \isa{r} being the derivative \isa{r{\isacharbackslash}{\kern0pt}c}). Let \isa{r\isactrlsub s} be the simplified derivative regular expression, that is \isa{fst\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}}, and \isa{f\isactrlsub r} be the rectification + function, that is \isa{snd\ {\isacharparenleft}{\kern0pt}simp\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}}. We distinguish the cases + whether (*) \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}} or not. In the first case we + have by Theorem~\ref{lexercorrect}(2) a value \isa{v} so that \isa{lexer\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ s\ {\isacharequal}{\kern0pt}\ Some\ v} and \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v} hold. + By Lemma~\ref{slexeraux}(1) we can also infer from~(*) that \isa{s\ {\isasymin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub s{\isacharparenright}{\kern0pt}} holds. Hence we know by Theorem~\ref{lexercorrect}(2) that + there exists a \isa{v{\isacharprime}{\kern0pt}} with \isa{lexer\ r\isactrlsub s\ s\ {\isacharequal}{\kern0pt}\ Some\ v{\isacharprime}{\kern0pt}} and + \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r\isactrlsub s{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ v{\isacharprime}{\kern0pt}}. From the latter we know by + Lemma~\ref{slexeraux}(2) that \isa{{\isacharparenleft}{\kern0pt}s{\isacharcomma}{\kern0pt}\ r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymrightarrow}\ f\isactrlsub r\ v{\isacharprime}{\kern0pt}} holds. + By the uniqueness of the POSIX relation (Theorem~\ref{posixdeterm}) we + can infer that \isa{v} is equal to \isa{f\isactrlsub r\ v{\isacharprime}{\kern0pt}}---that is the + rectification function applied to \isa{v{\isacharprime}{\kern0pt}} + produces the original \isa{v}. Now the case follows by the + definitions of \isa{lexer} and \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}}. + + In the second case where \isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}} we have that + \isa{lexer\ {\isacharparenleft}{\kern0pt}r{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ s\ {\isacharequal}{\kern0pt}\ None} by Theorem~\ref{lexercorrect}(1). We + also know by Lemma~\ref{slexeraux}(1) that \isa{s\ {\isasymnotin}\ L{\isacharparenleft}{\kern0pt}r\isactrlsub s{\isacharparenright}{\kern0pt}}. Hence + \isa{lexer\ r\isactrlsub s\ s\ {\isacharequal}{\kern0pt}\ None} by Theorem~\ref{lexercorrect}(1) and + by IH then also \isa{lexer\isactrlsup {\isacharplus}{\kern0pt}\ r\isactrlsub s\ s\ {\isacharequal}{\kern0pt}\ None}. With this we can + conclude in this case too.\qed + + \end{proof}% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimdocument +% +\endisadelimdocument +% +\isatagdocument +% +\isamarkupsection{HERE% +} +\isamarkuptrue% +% +\endisatagdocument +{\isafolddocument}% +% +\isadelimdocument +% +\endisadelimdocument +% +\begin{isamarkuptext}% +\begin{lemma} + \isa{{\normalsize{}If\,}\ v\ {\isacharcolon}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c\ {\normalsize \,then\,}\ retrieve\ {\isacharparenleft}{\kern0pt}r\mbox{$\bbslash$}c{\isacharparenright}{\kern0pt}\ v\ {\isacharequal}{\kern0pt}\ retrieve\ r\ {\isacharparenleft}{\kern0pt}inj\ {\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}\ c\ v{\isacharparenright}{\kern0pt}{\isachardot}{\kern0pt}} + \end{lemma} + + \begin{proof} + By induction on the definition of \isa{r\mbox{$^\downarrow$}}. The cases for rule 1) and 2) are + straightforward as \isa{\isactrlbold {\isadigit{0}}{\isacharbackslash}{\kern0pt}c} and \isa{\isactrlbold {\isadigit{1}}{\isacharbackslash}{\kern0pt}c} are both equal to + \isa{\isactrlbold {\isadigit{0}}}. This means \isa{v\ {\isacharcolon}{\kern0pt}\ \isactrlbold {\isadigit{0}}} cannot hold. Similarly in case of rule 3) + where \isa{r} is of the form \isa{ACHAR\ d} with \isa{c\ {\isacharequal}{\kern0pt}\ d}. Then by assumption + we know \isa{v\ {\isacharcolon}{\kern0pt}\ \isactrlbold {\isadigit{1}}}, which implies \isa{v\ {\isacharequal}{\kern0pt}\ Empty}. The equation follows by + simplification of left- and right-hand side. In case \isa{c\ {\isasymnoteq}\ d} we have again + \isa{v\ {\isacharcolon}{\kern0pt}\ \isactrlbold {\isadigit{0}}}, which cannot hold. + + For rule 4a) we have again \isa{v\ {\isacharcolon}{\kern0pt}\ \isactrlbold {\isadigit{0}}}. The property holds by IH for rule 4b). + The induction hypothesis is + \[ + \isa{retrieve\ {\isacharparenleft}{\kern0pt}r\mbox{$\bbslash$}c{\isacharparenright}{\kern0pt}\ v\ {\isacharequal}{\kern0pt}\ retrieve\ r\ {\isacharparenleft}{\kern0pt}inj\ {\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}\ c\ v{\isacharparenright}{\kern0pt}} + \] + which is what left- and right-hand side simplify to. The slightly more interesting case + is for 4c). By assumption we have + \isa{v\ {\isacharcolon}{\kern0pt}\ {\isacharparenleft}{\kern0pt}{\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isacharplus}{\kern0pt}\ {\isacharparenleft}{\kern0pt}{\isacharparenleft}{\kern0pt}{\isacharparenleft}{\kern0pt}AALTs\ bs\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}rs{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}}. This means we + have either (*) \isa{v{\isadigit{1}}\ {\isacharcolon}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{1}}\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c} with \isa{v\ {\isacharequal}{\kern0pt}\ Left\ v{\isadigit{1}}} or + (**) \isa{v{\isadigit{2}}\ {\isacharcolon}{\kern0pt}\ {\isacharparenleft}{\kern0pt}{\isacharparenleft}{\kern0pt}AALTs\ bs\ {\isacharparenleft}{\kern0pt}r\isactrlsub {\isadigit{2}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}rs{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c} with \isa{v\ {\isacharequal}{\kern0pt}\ Right\ v{\isadigit{2}}}. + The former case is straightforward by simplification. The second case is \ldots TBD. + + Rule 5) TBD. + + Finally for rule 6) the reasoning is as follows: By assumption we have + \isa{v\ {\isacharcolon}{\kern0pt}\ {\isacharparenleft}{\kern0pt}{\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c{\isacharparenright}{\kern0pt}\ {\isasymcdot}\ {\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}\isactrlsup {\isasymstar}}. This means we also have + \isa{v\ {\isacharequal}{\kern0pt}\ Seq\ v{\isadigit{1}}\ v{\isadigit{2}}}, \isa{v{\isadigit{1}}\ {\isacharcolon}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}{\isacharbackslash}{\kern0pt}c} and \isa{v{\isadigit{2}}\ {\isacharequal}{\kern0pt}\ Stars\ vs}. + We want to prove + \begin{align} + & \isa{retrieve\ {\isacharparenleft}{\kern0pt}ASEQ\ bs\ {\isacharparenleft}{\kern0pt}fuse\ {\isacharbrackleft}{\kern0pt}Z{\isacharbrackright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\mbox{$\bbslash$}c{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}ASTAR\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ r{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ v}\\ + &= \isa{retrieve\ {\isacharparenleft}{\kern0pt}ASTAR\ bs\ r{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}inj\ {\isacharparenleft}{\kern0pt}{\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}\isactrlsup {\isasymstar}{\isacharparenright}{\kern0pt}\ c\ v{\isacharparenright}{\kern0pt}} + \end{align} + The right-hand side \isa{inj}-expression is equal to + \isa{Stars\ {\isacharparenleft}{\kern0pt}inj\ {\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}\ c\ v{\isadigit{1}}\mbox{$\,$}{\isacharcolon}{\kern0pt}{\isacharcolon}{\kern0pt}\mbox{$\,$}vs{\isacharparenright}{\kern0pt}}, which means the \isa{retrieve}-expression + simplifies to + \[ + \isa{bs\ {\isacharat}{\kern0pt}\ {\isacharbrackleft}{\kern0pt}Z{\isacharbrackright}{\kern0pt}\ {\isacharat}{\kern0pt}\ retrieve\ r\ {\isacharparenleft}{\kern0pt}inj\ {\isacharparenleft}{\kern0pt}r\mbox{$^\downarrow$}{\isacharparenright}{\kern0pt}\ c\ v{\isadigit{1}}{\isacharparenright}{\kern0pt}\ {\isacharat}{\kern0pt}\ retrieve\ {\isacharparenleft}{\kern0pt}ASTAR\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}Stars\ vs{\isacharparenright}{\kern0pt}} + \] + The left-hand side (3) above simplifies to + \[ + \isa{bs\ {\isacharat}{\kern0pt}\ retrieve\ {\isacharparenleft}{\kern0pt}fuse\ {\isacharbrackleft}{\kern0pt}Z{\isacharbrackright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}r\mbox{$\bbslash$}c{\isacharparenright}{\kern0pt}{\isacharparenright}{\kern0pt}\ v{\isadigit{1}}\ {\isacharat}{\kern0pt}\ retrieve\ {\isacharparenleft}{\kern0pt}ASTAR\ {\isacharbrackleft}{\kern0pt}{\isacharbrackright}{\kern0pt}\ r{\isacharparenright}{\kern0pt}\ {\isacharparenleft}{\kern0pt}Stars\ vs{\isacharparenright}{\kern0pt}} + \] + We can move out the \isa{fuse\ {\isacharbrackleft}{\kern0pt}Z{\isacharbrackright}{\kern0pt}} and then use the IH to show that left-hand side + and right-hand side are equal. This completes the proof. + \end{proof} + + + + \bibliographystyle{plain} + \bibliography{root}% +\end{isamarkuptext}\isamarkuptrue% +% +\isadelimtheory +% +\endisadelimtheory +% +\isatagtheory +% +\endisatagtheory +{\isafoldtheory}% +% +\isadelimtheory +\isanewline +% +\endisadelimtheory +\isanewline +\isanewline +% +\end{isabellebody}% +\endinput +%:%file=Paper.tex%:% +%:%50=134%:% +%:%62=136%:% +%:%63=137%:% +%:%64=138%:% +%:%65=139%:% +%:%66=140%:% +%:%67=141%:% +%:%68=142%:% +%:%69=143%:% +%:%70=144%:% +%:%71=145%:% +%:%72=146%:% +%:%73=147%:% +%:%74=148%:% +%:%75=149%:% +%:%76=150%:% +%:%77=151%:% +%:%78=152%:% +%:%79=153%:% +%:%80=154%:% +%:%81=155%:% +%:%82=156%:% +%:%83=157%:% +%:%84=158%:% +%:%85=159%:% +%:%86=160%:% +%:%87=161%:% +%:%88=162%:% +%:%89=163%:% +%:%90=164%:% +%:%91=165%:% +%:%92=166%:% +%:%93=167%:% +%:%94=168%:% +%:%95=169%:% +%:%96=170%:% +%:%97=171%:% +%:%98=172%:% +%:%99=173%:% +%:%100=174%:% +%:%101=175%:% +%:%102=176%:% +%:%103=177%:% +%:%104=178%:% +%:%105=179%:% +%:%106=180%:% +%:%107=181%:% +%:%108=182%:% +%:%109=183%:% +%:%110=184%:% +%:%111=185%:% +%:%112=186%:% +%:%113=187%:% +%:%114=188%:% +%:%115=189%:% +%:%116=190%:% +%:%117=191%:% +%:%118=192%:% +%:%119=193%:% +%:%120=194%:% +%:%121=195%:% +%:%122=196%:% +%:%123=197%:% +%:%124=198%:% +%:%125=199%:% +%:%126=200%:% +%:%127=201%:% +%:%128=202%:% +%:%129=203%:% +%:%130=204%:% +%:%131=205%:% +%:%132=206%:% +%:%133=207%:% +%:%134=208%:% +%:%135=209%:% +%:%136=210%:% +%:%137=211%:% +%:%138=212%:% +%:%139=213%:% +%:%140=214%:% +%:%141=215%:% +%:%142=216%:% +%:%143=217%:% +%:%144=218%:% +%:%145=219%:% +%:%146=220%:% +%:%147=221%:% +%:%148=222%:% +%:%149=223%:% +%:%150=224%:% +%:%151=225%:% +%:%152=226%:% +%:%153=227%:% +%:%154=228%:% +%:%155=229%:% +%:%156=230%:% +%:%157=231%:% +%:%158=232%:% +%:%159=233%:% +%:%160=234%:% +%:%161=235%:% +%:%162=236%:% +%:%163=237%:% +%:%164=238%:% +%:%165=239%:% +%:%166=240%:% +%:%167=241%:% +%:%168=242%:% +%:%169=243%:% +%:%170=244%:% +%:%171=245%:% +%:%172=246%:% +%:%173=247%:% +%:%174=248%:% +%:%175=249%:% +%:%176=250%:% +%:%177=251%:% +%:%178=252%:% +%:%179=253%:% +%:%180=254%:% +%:%181=255%:% +%:%182=256%:% +%:%183=257%:% +%:%184=258%:% +%:%185=259%:% +%:%186=260%:% +%:%187=261%:% +%:%188=262%:% +%:%189=263%:% +%:%190=264%:% +%:%191=265%:% +%:%192=266%:% +%:%193=267%:% +%:%194=268%:% +%:%203=272%:% +%:%215=276%:% +%:%216=277%:% +%:%217=278%:% +%:%218=279%:% +%:%219=280%:% +%:%220=281%:% +%:%221=282%:% +%:%222=283%:% +%:%223=284%:% +%:%224=285%:% +%:%225=286%:% +%:%226=287%:% +%:%227=288%:% +%:%228=289%:% +%:%237=297%:% +%:%249=303%:% +%:%250=304%:% +%:%251=305%:% +%:%252=306%:% +%:%252=307%:% +%:%253=308%:% +%:%254=309%:% +%:%255=310%:% +%:%256=311%:% +%:%257=312%:% +%:%258=313%:% +%:%259=314%:% +%:%260=315%:% +%:%261=316%:% +%:%262=317%:% +%:%263=318%:% +%:%264=319%:% +%:%265=320%:% +%:%266=321%:% +%:%267=322%:% +%:%268=323%:% +%:%269=324%:% +%:%270=325%:% +%:%271=326%:% +%:%272=327%:% +%:%273=328%:% +%:%274=329%:% +%:%275=330%:% +%:%276=331%:% +%:%277=332%:% +%:%278=333%:% +%:%279=334%:% +%:%280=335%:% +%:%281=336%:% +%:%282=337%:% +%:%283=338%:% +%:%284=339%:% +%:%285=340%:% +%:%286=341%:% +%:%287=342%:% +%:%288=343%:% +%:%289=344%:% +%:%290=345%:% +%:%291=346%:% +%:%292=347%:% +%:%293=348%:% +%:%294=349%:% +%:%295=350%:% +%:%296=351%:% +%:%297=352%:% +%:%298=353%:% +%:%299=354%:% +%:%300=355%:% +%:%301=356%:% +%:%302=357%:% +%:%303=358%:% +%:%304=359%:% +%:%305=360%:% +%:%306=361%:% +%:%307=362%:% +%:%308=363%:% +%:%309=364%:% +%:%310=365%:% +%:%311=366%:% +%:%312=367%:% +%:%313=368%:% +%:%314=369%:% +%:%315=370%:% +%:%316=371%:% +%:%316=372%:% +%:%317=373%:% +%:%318=374%:% +%:%319=375%:% +%:%320=376%:% +%:%321=377%:% +%:%322=378%:% +%:%323=379%:% +%:%324=380%:% +%:%325=381%:% +%:%326=382%:% +%:%327=383%:% +%:%328=384%:% +%:%329=385%:% +%:%330=386%:% +%:%331=387%:% +%:%332=388%:% +%:%333=389%:% +%:%334=390%:% +%:%335=391%:% +%:%336=392%:% +%:%337=393%:% +%:%338=394%:% +%:%339=395%:% +%:%340=396%:% +%:%341=397%:% +%:%342=398%:% +%:%343=399%:% +%:%344=400%:% +%:%345=401%:% +%:%346=402%:% +%:%347=403%:% +%:%348=404%:% +%:%349=405%:% +%:%350=406%:% +%:%351=407%:% +%:%352=408%:% +%:%353=409%:% +%:%354=410%:% +%:%355=411%:% +%:%356=412%:% +%:%357=413%:% +%:%358=414%:% +%:%359=415%:% +%:%360=416%:% +%:%361=417%:% +%:%362=418%:% +%:%363=419%:% +%:%364=420%:% +%:%365=421%:% +%:%366=422%:% +%:%367=423%:% +%:%368=424%:% +%:%369=425%:% +%:%370=426%:% +%:%371=427%:% +%:%372=428%:% +%:%373=429%:% +%:%374=430%:% +%:%375=431%:% +%:%376=432%:% +%:%377=433%:% +%:%378=434%:% +%:%379=435%:% +%:%380=436%:% +%:%381=437%:% +%:%382=438%:% +%:%383=439%:% +%:%384=440%:% +%:%385=441%:% +%:%386=442%:% +%:%387=443%:% +%:%388=444%:% +%:%389=445%:% +%:%390=446%:% +%:%391=447%:% +%:%392=448%:% +%:%393=449%:% +%:%394=450%:% +%:%395=451%:% +%:%396=452%:% +%:%397=453%:% +%:%398=454%:% +%:%399=455%:% +%:%400=456%:% +%:%401=457%:% +%:%402=458%:% +%:%403=459%:% +%:%404=460%:% +%:%405=461%:% +%:%406=462%:% +%:%407=463%:% +%:%408=464%:% +%:%409=465%:% +%:%410=466%:% +%:%411=467%:% +%:%412=468%:% +%:%413=469%:% +%:%414=470%:% +%:%415=471%:% +%:%416=472%:% +%:%417=473%:% +%:%418=474%:% +%:%419=475%:% +%:%420=476%:% +%:%421=477%:% +%:%430=484%:% +%:%442=486%:% +%:%443=487%:% +%:%443=488%:% +%:%444=489%:% +%:%445=490%:% +%:%446=491%:% +%:%447=492%:% +%:%448=493%:% +%:%449=494%:% +%:%450=495%:% +%:%451=496%:% +%:%452=497%:% +%:%453=498%:% +%:%454=499%:% +%:%455=500%:% +%:%456=501%:% +%:%457=502%:% +%:%458=503%:% +%:%459=504%:% +%:%460=505%:% +%:%461=506%:% +%:%462=507%:% +%:%463=508%:% +%:%464=509%:% +%:%465=510%:% +%:%466=511%:% +%:%467=512%:% +%:%468=513%:% +%:%469=514%:% +%:%470=515%:% +%:%471=516%:% +%:%472=517%:% +%:%473=518%:% +%:%474=519%:% +%:%475=520%:% +%:%476=521%:% +%:%477=522%:% +%:%478=523%:% +%:%479=524%:% +%:%480=525%:% +%:%481=526%:% +%:%482=527%:% +%:%482=528%:% +%:%483=529%:% +%:%484=530%:% +%:%485=531%:% +%:%486=532%:% +%:%487=533%:% +%:%487=534%:% +%:%488=535%:% +%:%489=536%:% +%:%490=537%:% +%:%491=538%:% +%:%492=539%:% +%:%493=540%:% +%:%494=541%:% +%:%495=542%:% +%:%496=543%:% +%:%497=544%:% +%:%498=545%:% +%:%499=546%:% +%:%500=547%:% +%:%501=548%:% +%:%502=549%:% +%:%503=550%:% +%:%504=551%:% +%:%505=552%:% +%:%506=553%:% +%:%507=554%:% +%:%508=555%:% +%:%509=556%:% +%:%510=557%:% +%:%511=558%:% +%:%512=559%:% +%:%513=560%:% +%:%514=561%:% +%:%515=562%:% +%:%516=563%:% +%:%517=564%:% +%:%518=565%:% +%:%519=566%:% +%:%520=567%:% +%:%521=568%:% +%:%522=569%:% +%:%523=570%:% +%:%524=571%:% +%:%525=572%:% +%:%526=573%:% +%:%527=574%:% +%:%528=575%:% +%:%529=576%:% +%:%530=577%:% +%:%531=578%:% +%:%532=579%:% +%:%533=580%:% +%:%534=581%:% +%:%535=582%:% +%:%536=583%:% +%:%537=584%:% +%:%538=585%:% +%:%539=586%:% +%:%540=587%:% +%:%541=588%:% +%:%542=589%:% +%:%543=590%:% +%:%544=591%:% +%:%545=592%:% +%:%546=593%:% +%:%547=594%:% +%:%548=595%:% +%:%549=596%:% +%:%550=597%:% +%:%551=598%:% +%:%552=599%:% +%:%553=600%:% +%:%554=601%:% +%:%555=602%:% +%:%556=603%:% +%:%557=604%:% +%:%558=605%:% +%:%559=606%:% +%:%560=607%:% +%:%561=608%:% +%:%562=609%:% +%:%563=610%:% +%:%564=611%:% +%:%565=612%:% +%:%566=613%:% +%:%567=614%:% +%:%568=615%:% +%:%569=616%:% +%:%570=617%:% +%:%571=618%:% +%:%572=619%:% +%:%573=620%:% +%:%574=621%:% +%:%575=622%:% +%:%576=623%:% +%:%585=627%:% +%:%597=631%:% +%:%598=632%:% +%:%599=633%:% +%:%600=634%:% +%:%601=635%:% +%:%602=636%:% +%:%603=637%:% +%:%604=638%:% +%:%605=639%:% +%:%606=640%:% +%:%607=641%:% +%:%608=642%:% +%:%609=643%:% +%:%610=644%:% +%:%611=645%:% +%:%612=646%:% +%:%613=647%:% +%:%614=648%:% +%:%615=649%:% +%:%616=650%:% +%:%617=651%:% +%:%618=652%:% +%:%619=653%:% +%:%620=654%:% +%:%621=655%:% +%:%622=656%:% +%:%623=657%:% +%:%624=658%:% +%:%625=659%:% +%:%626=660%:% +%:%627=661%:% +%:%628=662%:% +%:%629=663%:% +%:%630=664%:% +%:%631=665%:% +%:%632=666%:% +%:%633=667%:% +%:%634=668%:% +%:%635=669%:% +%:%636=670%:% +%:%637=671%:% +%:%638=672%:% +%:%639=673%:% +%:%640=674%:% +%:%641=675%:% +%:%642=676%:% +%:%643=677%:% +%:%644=678%:% +%:%645=679%:% +%:%646=680%:% +%:%647=681%:% +%:%648=682%:% +%:%649=683%:% +%:%650=684%:% +%:%651=685%:% +%:%652=686%:% +%:%653=687%:% +%:%654=688%:% +%:%655=689%:% +%:%656=690%:% +%:%657=691%:% +%:%658=692%:% +%:%659=693%:% +%:%660=694%:% +%:%661=695%:% +%:%662=696%:% +%:%663=697%:% +%:%664=698%:% +%:%665=699%:% +%:%666=700%:% +%:%667=701%:% +%:%668=702%:% +%:%669=703%:% +%:%670=704%:% +%:%671=705%:% +%:%672=706%:% +%:%673=707%:% +%:%674=708%:% +%:%675=709%:% +%:%676=710%:% +%:%677=711%:% +%:%678=712%:% +%:%679=713%:% +%:%680=714%:% +%:%681=715%:% +%:%682=716%:% +%:%683=717%:% +%:%684=718%:% +%:%685=719%:% +%:%686=720%:% +%:%687=721%:% +%:%688=722%:% +%:%689=723%:% +%:%690=724%:% +%:%691=725%:% +%:%692=726%:% +%:%693=727%:% +%:%694=728%:% +%:%695=729%:% +%:%696=730%:% +%:%697=731%:% +%:%698=732%:% +%:%699=733%:% +%:%700=734%:% +%:%701=735%:% +%:%702=736%:% +%:%703=737%:% +%:%704=738%:% +%:%705=739%:% +%:%706=740%:% +%:%707=741%:% +%:%708=742%:% +%:%709=743%:% +%:%710=744%:% +%:%711=745%:% +%:%711=746%:% +%:%712=747%:% +%:%712=748%:% +%:%713=749%:% +%:%714=750%:% +%:%714=751%:% +%:%715=752%:% +%:%716=753%:% +%:%717=754%:% +%:%718=755%:% +%:%719=756%:% +%:%720=757%:% +%:%721=758%:% +%:%722=759%:% +%:%723=760%:% +%:%724=761%:% +%:%725=762%:% +%:%726=763%:% +%:%727=764%:% +%:%728=765%:% +%:%729=766%:% +%:%730=767%:% +%:%731=768%:% +%:%732=769%:% +%:%733=770%:% +%:%734=771%:% +%:%735=772%:% +%:%736=773%:% +%:%737=774%:% +%:%738=775%:% +%:%739=776%:% +%:%740=777%:% +%:%741=778%:% +%:%742=779%:% +%:%743=780%:% +%:%744=781%:% +%:%745=782%:% +%:%746=783%:% +%:%747=784%:% +%:%748=785%:% +%:%749=786%:% +%:%750=787%:% +%:%751=788%:% +%:%752=789%:% +%:%753=790%:% +%:%754=791%:% +%:%755=792%:% +%:%756=793%:% +%:%757=794%:% +%:%758=795%:% +%:%759=796%:% +%:%760=797%:% +%:%761=798%:% +%:%762=799%:% +%:%763=800%:% +%:%764=801%:% +%:%765=802%:% +%:%766=803%:% +%:%767=804%:% +%:%768=805%:% +%:%769=806%:% +%:%770=807%:% +%:%770=808%:% +%:%770=809%:% +%:%771=810%:% +%:%772=811%:% +%:%773=812%:% +%:%774=813%:% +%:%775=814%:% +%:%776=815%:% +%:%777=816%:% +%:%778=817%:% +%:%779=818%:% +%:%779=819%:% +%:%780=820%:% +%:%781=821%:% +%:%782=822%:% +%:%783=823%:% +%:%784=824%:% +%:%785=825%:% +%:%786=826%:% +%:%787=827%:% +%:%788=828%:% +%:%789=829%:% +%:%790=830%:% +%:%791=831%:% +%:%792=832%:% +%:%793=833%:% +%:%794=834%:% +%:%795=835%:% +%:%796=836%:% +%:%797=837%:% +%:%798=838%:% +%:%799=839%:% +%:%800=840%:% +%:%801=841%:% +%:%802=842%:% +%:%803=843%:% +%:%804=844%:% +%:%805=845%:% +%:%806=846%:% +%:%807=847%:% +%:%808=848%:% +%:%809=849%:% +%:%810=850%:% +%:%811=851%:% +%:%812=852%:% +%:%813=853%:% +%:%814=854%:% +%:%814=855%:% +%:%815=856%:% +%:%816=857%:% +%:%817=858%:% +%:%818=859%:% +%:%819=860%:% +%:%820=861%:% +%:%821=862%:% +%:%821=863%:% +%:%822=864%:% +%:%823=865%:% +%:%824=866%:% +%:%824=867%:% +%:%825=868%:% +%:%826=869%:% +%:%827=870%:% +%:%828=871%:% +%:%829=872%:% +%:%830=873%:% +%:%831=874%:% +%:%832=875%:% +%:%833=876%:% +%:%834=877%:% +%:%835=878%:% +%:%835=879%:% +%:%836=880%:% +%:%837=881%:% +%:%838=882%:% +%:%839=883%:% +%:%839=884%:% +%:%840=885%:% +%:%841=886%:% +%:%842=887%:% +%:%843=888%:% +%:%844=889%:% +%:%845=890%:% +%:%846=891%:% +%:%847=892%:% +%:%848=893%:% +%:%849=894%:% +%:%850=895%:% +%:%851=896%:% +%:%852=897%:% +%:%853=898%:% +%:%854=899%:% +%:%855=900%:% +%:%856=901%:% +%:%857=902%:% +%:%858=903%:% +%:%859=904%:% +%:%860=905%:% +%:%861=906%:% +%:%862=907%:% +%:%863=908%:% +%:%864=909%:% +%:%865=910%:% +%:%866=911%:% +%:%867=912%:% +%:%868=913%:% +%:%869=914%:% +%:%870=915%:% +%:%871=916%:% +%:%872=917%:% +%:%873=918%:% +%:%874=919%:% +%:%875=920%:% +%:%876=921%:% +%:%877=922%:% +%:%878=923%:% +%:%879=924%:% +%:%880=925%:% +%:%881=926%:% +%:%882=927%:% +%:%883=928%:% +%:%884=929%:% +%:%885=930%:% +%:%886=931%:% +%:%887=932%:% +%:%888=933%:% +%:%889=934%:% +%:%890=935%:% +%:%891=936%:% +%:%892=937%:% +%:%893=938%:% +%:%894=939%:% +%:%895=940%:% +%:%896=941%:% +%:%897=942%:% +%:%897=943%:% +%:%898=944%:% +%:%899=945%:% +%:%900=946%:% +%:%901=947%:% +%:%902=948%:% +%:%903=949%:% +%:%904=950%:% +%:%905=951%:% +%:%906=952%:% +%:%907=953%:% +%:%908=954%:% +%:%909=955%:% +%:%910=956%:% +%:%911=957%:% +%:%912=958%:% +%:%913=959%:% +%:%914=960%:% +%:%915=961%:% +%:%916=962%:% +%:%917=963%:% +%:%918=964%:% +%:%919=965%:% +%:%920=966%:% +%:%921=967%:% +%:%922=968%:% +%:%923=969%:% +%:%924=970%:% +%:%925=971%:% +%:%926=972%:% +%:%927=973%:% +%:%928=974%:% +%:%929=975%:% +%:%930=976%:% +%:%930=977%:% +%:%931=978%:% +%:%932=979%:% +%:%933=980%:% +%:%934=981%:% +%:%935=982%:% +%:%936=983%:% +%:%937=984%:% +%:%938=985%:% +%:%939=986%:% +%:%940=987%:% +%:%941=988%:% +%:%942=989%:% +%:%943=990%:% +%:%944=991%:% +%:%945=992%:% +%:%946=993%:% +%:%947=994%:% +%:%948=995%:% +%:%949=996%:% +%:%950=997%:% +%:%951=998%:% +%:%952=999%:% +%:%953=1000%:% +%:%954=1001%:% +%:%955=1002%:% +%:%956=1003%:% +%:%957=1004%:% +%:%958=1005%:% +%:%959=1006%:% +%:%960=1007%:% +%:%960=1008%:% +%:%961=1009%:% +%:%962=1010%:% +%:%963=1011%:% +%:%964=1012%:% +%:%965=1013%:% +%:%966=1014%:% +%:%967=1015%:% +%:%968=1016%:% +%:%968=1017%:% +%:%969=1018%:% +%:%970=1019%:% +%:%971=1020%:% +%:%972=1021%:% +%:%973=1022%:% +%:%974=1023%:% +%:%975=1024%:% +%:%976=1025%:% +%:%977=1026%:% +%:%978=1027%:% +%:%979=1028%:% +%:%980=1029%:% +%:%981=1030%:% +%:%982=1031%:% +%:%983=1032%:% +%:%984=1033%:% +%:%985=1034%:% +%:%986=1035%:% +%:%987=1036%:% +%:%988=1037%:% +%:%989=1038%:% +%:%990=1039%:% +%:%991=1040%:% +%:%992=1041%:% +%:%993=1042%:% +%:%994=1043%:% +%:%995=1044%:% +%:%996=1045%:% +%:%997=1046%:% +%:%998=1047%:% +%:%999=1048%:% +%:%1000=1049%:% +%:%1001=1050%:% +%:%1002=1051%:% +%:%1003=1052%:% +%:%1004=1053%:% +%:%1005=1054%:% +%:%1005=1055%:% +%:%1005=1056%:% +%:%1006=1057%:% +%:%1006=1058%:% +%:%1006=1059%:% +%:%1007=1060%:% +%:%1008=1061%:% +%:%1008=1062%:% +%:%1009=1063%:% +%:%1010=1064%:% +%:%1011=1065%:% +%:%1012=1066%:% +%:%1013=1067%:% +%:%1014=1068%:% +%:%1015=1069%:% +%:%1016=1070%:% +%:%1017=1071%:% +%:%1018=1072%:% +%:%1019=1073%:% +%:%1020=1074%:% +%:%1021=1075%:% +%:%1022=1076%:% +%:%1023=1077%:% +%:%1024=1078%:% +%:%1025=1079%:% +%:%1026=1080%:% +%:%1027=1081%:% +%:%1028=1082%:% +%:%1029=1083%:% +%:%1030=1084%:% +%:%1031=1085%:% +%:%1032=1086%:% +%:%1033=1087%:% +%:%1034=1088%:% +%:%1035=1089%:% +%:%1036=1090%:% +%:%1037=1091%:% +%:%1038=1092%:% +%:%1039=1093%:% +%:%1040=1094%:% +%:%1041=1095%:% +%:%1042=1096%:% +%:%1042=1097%:% +%:%1043=1098%:% +%:%1044=1099%:% +%:%1045=1100%:% +%:%1045=1101%:% +%:%1045=1102%:% +%:%1045=1103%:% +%:%1046=1104%:% +%:%1047=1105%:% +%:%1048=1106%:% +%:%1049=1107%:% +%:%1050=1108%:% +%:%1051=1109%:% +%:%1052=1110%:% +%:%1053=1111%:% +%:%1054=1112%:% +%:%1055=1113%:% +%:%1056=1114%:% +%:%1057=1115%:% +%:%1058=1116%:% +%:%1059=1117%:% +%:%1060=1118%:% +%:%1061=1119%:% +%:%1062=1120%:% +%:%1063=1121%:% +%:%1064=1122%:% +%:%1065=1123%:% +%:%1066=1124%:% +%:%1067=1125%:% +%:%1068=1126%:% +%:%1069=1127%:% +%:%1070=1128%:% +%:%1071=1129%:% +%:%1072=1130%:% +%:%1073=1131%:% +%:%1074=1132%:% +%:%1075=1133%:% +%:%1076=1134%:% +%:%1077=1135%:% +%:%1078=1136%:% +%:%1079=1137%:% +%:%1080=1138%:% +%:%1081=1139%:% +%:%1082=1140%:% +%:%1091=1144%:% +%:%1103=1148%:% +%:%1104=1149%:% +%:%1105=1150%:% +%:%1106=1151%:% +%:%1107=1152%:% +%:%1108=1153%:% +%:%1109=1154%:% +%:%1110=1155%:% +%:%1111=1156%:% +%:%1112=1157%:% +%:%1113=1158%:% +%:%1114=1159%:% +%:%1115=1160%:% +%:%1116=1161%:% +%:%1117=1162%:% +%:%1118=1163%:% +%:%1119=1164%:% +%:%1120=1165%:% +%:%1121=1166%:% +%:%1122=1167%:% +%:%1123=1168%:% +%:%1124=1169%:% +%:%1125=1170%:% +%:%1126=1171%:% +%:%1127=1172%:% +%:%1128=1173%:% +%:%1129=1174%:% +%:%1130=1175%:% +%:%1131=1176%:% +%:%1132=1177%:% +%:%1133=1178%:% +%:%1134=1179%:% +%:%1135=1180%:% +%:%1136=1181%:% +%:%1137=1182%:% +%:%1138=1183%:% +%:%1139=1184%:% +%:%1140=1185%:% +%:%1141=1186%:% +%:%1142=1187%:% +%:%1143=1188%:% +%:%1144=1189%:% +%:%1145=1190%:% +%:%1146=1191%:% +%:%1147=1192%:% +%:%1148=1193%:% +%:%1149=1194%:% +%:%1150=1195%:% +%:%1151=1196%:% +%:%1152=1197%:% +%:%1153=1198%:% +%:%1154=1199%:% +%:%1155=1200%:% +%:%1156=1201%:% +%:%1157=1202%:% +%:%1158=1203%:% +%:%1159=1204%:% +%:%1160=1205%:% +%:%1161=1206%:% +%:%1162=1207%:% +%:%1163=1208%:% +%:%1164=1209%:% +%:%1165=1210%:% +%:%1166=1211%:% +%:%1167=1212%:% +%:%1168=1213%:% +%:%1169=1214%:% +%:%1170=1215%:% +%:%1171=1216%:% +%:%1172=1217%:% +%:%1173=1218%:% +%:%1174=1219%:% +%:%1175=1220%:% +%:%1176=1221%:% +%:%1177=1222%:% +%:%1178=1223%:% +%:%1179=1224%:% +%:%1180=1225%:% +%:%1181=1226%:% +%:%1182=1227%:% +%:%1183=1228%:% +%:%1184=1229%:% +%:%1185=1230%:% +%:%1186=1231%:% +%:%1187=1232%:% +%:%1188=1233%:% +%:%1189=1234%:% +%:%1190=1235%:% +%:%1191=1236%:% +%:%1192=1237%:% +%:%1193=1238%:% +%:%1194=1239%:% +%:%1195=1240%:% +%:%1196=1241%:% +%:%1197=1242%:% +%:%1198=1243%:% +%:%1199=1244%:% +%:%1200=1245%:% +%:%1201=1246%:% +%:%1202=1247%:% +%:%1203=1248%:% +%:%1204=1249%:% +%:%1205=1250%:% +%:%1206=1251%:% +%:%1207=1252%:% +%:%1208=1253%:% +%:%1209=1254%:% +%:%1210=1255%:% +%:%1211=1256%:% +%:%1212=1257%:% +%:%1213=1258%:% +%:%1214=1259%:% +%:%1215=1260%:% +%:%1216=1261%:% +%:%1217=1262%:% +%:%1218=1263%:% +%:%1218=1264%:% +%:%1219=1265%:% +%:%1220=1266%:% +%:%1221=1267%:% +%:%1222=1268%:% +%:%1223=1269%:% +%:%1224=1270%:% +%:%1225=1271%:% +%:%1226=1272%:% +%:%1227=1273%:% +%:%1228=1274%:% +%:%1229=1275%:% +%:%1229=1276%:% +%:%1230=1277%:% +%:%1231=1278%:% +%:%1232=1279%:% +%:%1233=1280%:% +%:%1234=1281%:% +%:%1235=1282%:% +%:%1236=1283%:% +%:%1237=1284%:% +%:%1238=1285%:% +%:%1239=1286%:% +%:%1240=1287%:% +%:%1241=1288%:% +%:%1242=1289%:% +%:%1243=1290%:% +%:%1244=1291%:% +%:%1245=1292%:% +%:%1246=1293%:% +%:%1247=1294%:% +%:%1248=1295%:% +%:%1249=1296%:% +%:%1250=1297%:% +%:%1251=1298%:% +%:%1252=1299%:% +%:%1253=1300%:% +%:%1254=1301%:% +%:%1255=1302%:% +%:%1256=1303%:% +%:%1257=1304%:% +%:%1258=1305%:% +%:%1259=1306%:% +%:%1260=1307%:% +%:%1261=1308%:% +%:%1262=1309%:% +%:%1263=1310%:% +%:%1264=1311%:% +%:%1265=1312%:% +%:%1266=1313%:% +%:%1267=1314%:% +%:%1268=1315%:% +%:%1269=1316%:% +%:%1270=1317%:% +%:%1271=1318%:% +%:%1272=1319%:% +%:%1273=1320%:% +%:%1274=1321%:% +%:%1275=1322%:% +%:%1276=1323%:% +%:%1277=1324%:% +%:%1278=1325%:% +%:%1279=1326%:% +%:%1280=1327%:% +%:%1281=1328%:% +%:%1282=1329%:% +%:%1283=1330%:% +%:%1283=1331%:% +%:%1283=1332%:% +%:%1284=1333%:% +%:%1285=1334%:% +%:%1285=1335%:% +%:%1285=1336%:% +%:%1285=1337%:% +%:%1286=1338%:% +%:%1287=1339%:% +%:%1288=1340%:% +%:%1289=1341%:% +%:%1290=1342%:% +%:%1290=1343%:% +%:%1291=1344%:% +%:%1292=1345%:% +%:%1293=1346%:% +%:%1294=1347%:% +%:%1295=1348%:% +%:%1296=1349%:% +%:%1297=1350%:% +%:%1298=1351%:% +%:%1299=1352%:% +%:%1300=1353%:% +%:%1301=1354%:% +%:%1302=1355%:% +%:%1303=1356%:% +%:%1304=1357%:% +%:%1305=1358%:% +%:%1306=1359%:% +%:%1307=1360%:% +%:%1308=1361%:% +%:%1309=1362%:% +%:%1310=1363%:% +%:%1311=1364%:% +%:%1312=1365%:% +%:%1313=1366%:% +%:%1314=1367%:% +%:%1315=1368%:% +%:%1316=1369%:% +%:%1317=1370%:% +%:%1318=1371%:% +%:%1319=1372%:% +%:%1320=1373%:% +%:%1321=1374%:% +%:%1322=1375%:% +%:%1323=1376%:% +%:%1324=1377%:% +%:%1325=1378%:% +%:%1326=1379%:% +%:%1327=1380%:% +%:%1328=1381%:% +%:%1329=1382%:% +%:%1330=1383%:% +%:%1331=1384%:% +%:%1332=1385%:% +%:%1333=1386%:% +%:%1334=1387%:% +%:%1335=1388%:% +%:%1336=1389%:% +%:%1337=1390%:% +%:%1338=1391%:% +%:%1338=1392%:% +%:%1339=1393%:% +%:%1339=1394%:% +%:%1340=1395%:% +%:%1340=1396%:% +%:%1341=1397%:% +%:%1342=1398%:% +%:%1343=1399%:% +%:%1344=1400%:% +%:%1345=1401%:% +%:%1346=1402%:% +%:%1347=1403%:% +%:%1347=1404%:% +%:%1348=1405%:% +%:%1348=1406%:% +%:%1349=1407%:% +%:%1349=1408%:% +%:%1350=1409%:% +%:%1351=1410%:% +%:%1352=1411%:% +%:%1353=1412%:% +%:%1354=1413%:% +%:%1355=1414%:% +%:%1356=1415%:% +%:%1357=1416%:% +%:%1358=1417%:% +%:%1359=1418%:% +%:%1360=1419%:% +%:%1361=1420%:% +%:%1362=1421%:% +%:%1363=1422%:% +%:%1364=1423%:% +%:%1365=1424%:% +%:%1366=1425%:% +%:%1367=1426%:% +%:%1368=1427%:% +%:%1369=1428%:% +%:%1370=1429%:% +%:%1371=1430%:% +%:%1372=1431%:% +%:%1373=1432%:% +%:%1374=1433%:% +%:%1375=1434%:% +%:%1376=1435%:% +%:%1377=1436%:% +%:%1378=1437%:% +%:%1379=1438%:% +%:%1380=1439%:% +%:%1380=1440%:% +%:%1381=1441%:% +%:%1382=1442%:% +%:%1383=1443%:% +%:%1384=1444%:% +%:%1385=1445%:% +%:%1386=1446%:% +%:%1387=1447%:% +%:%1387=1448%:% +%:%1388=1449%:% +%:%1389=1450%:% +%:%1390=1451%:% +%:%1390=1452%:% +%:%1391=1453%:% +%:%1392=1454%:% +%:%1393=1455%:% +%:%1393=1456%:% +%:%1394=1457%:% +%:%1394=1458%:% +%:%1395=1459%:% +%:%1395=1460%:% +%:%1396=1461%:% +%:%1397=1462%:% +%:%1398=1463%:% +%:%1398=1464%:% +%:%1399=1465%:% +%:%1399=1466%:% +%:%1400=1467%:% +%:%1400=1468%:% +%:%1400=1469%:% +%:%1400=1470%:% +%:%1401=1471%:% +%:%1401=1472%:% +%:%1402=1473%:% +%:%1403=1474%:% +%:%1403=1476%:% +%:%1403=1477%:% +%:%1403=1478%:% +%:%1403=1479%:% +%:%1403=1480%:% +%:%1404=1481%:% +%:%1404=1482%:% +%:%1405=1483%:% +%:%1405=1484%:% +%:%1406=1485%:% +%:%1406=1486%:% +%:%1407=1487%:% +%:%1408=1488%:% +%:%1409=1489%:% +%:%1410=1490%:% +%:%1410=1491%:% +%:%1411=1492%:% +%:%1411=1493%:% +%:%1412=1494%:% +%:%1413=1495%:% +%:%1413=1496%:% +%:%1414=1497%:% +%:%1415=1498%:% +%:%1416=1499%:% +%:%1417=1500%:% +%:%1418=1501%:% +%:%1419=1502%:% +%:%1420=1503%:% +%:%1421=1504%:% +%:%1422=1505%:% +%:%1423=1506%:% +%:%1424=1507%:% +%:%1425=1508%:% +%:%1426=1509%:% +%:%1427=1510%:% +%:%1428=1511%:% +%:%1429=1512%:% +%:%1430=1513%:% +%:%1431=1514%:% +%:%1432=1515%:% +%:%1433=1516%:% +%:%1434=1517%:% +%:%1435=1518%:% +%:%1436=1519%:% +%:%1437=1520%:% +%:%1438=1521%:% +%:%1439=1522%:% +%:%1440=1523%:% +%:%1441=1524%:% +%:%1442=1525%:% +%:%1443=1526%:% +%:%1444=1527%:% +%:%1445=1528%:% +%:%1446=1529%:% +%:%1447=1530%:% +%:%1448=1531%:% +%:%1449=1532%:% +%:%1450=1533%:% +%:%1451=1534%:% +%:%1452=1535%:% +%:%1453=1536%:% +%:%1454=1537%:% +%:%1455=1538%:% +%:%1456=1539%:% +%:%1457=1540%:% +%:%1458=1541%:% +%:%1459=1542%:% +%:%1460=1543%:% +%:%1461=1544%:% +%:%1462=1545%:% +%:%1463=1546%:% +%:%1464=1547%:% +%:%1473=1551%:% +%:%1485=1558%:% +%:%1486=1559%:% +%:%1487=1560%:% +%:%1488=1561%:% +%:%1489=1562%:% +%:%1490=1563%:% +%:%1491=1564%:% +%:%1500=1569%:% +%:%1512=1573%:% +%:%1513=1574%:% +%:%1514=1575%:% +%:%1515=1576%:% +%:%1516=1577%:% +%:%1517=1578%:% +%:%1518=1579%:% +%:%1519=1580%:% +%:%1520=1581%:% +%:%1521=1582%:% +%:%1522=1583%:% +%:%1523=1584%:% +%:%1524=1585%:% +%:%1525=1586%:% +%:%1526=1587%:% +%:%1527=1588%:% +%:%1528=1589%:% +%:%1529=1590%:% +%:%1530=1591%:% +%:%1531=1592%:% +%:%1532=1593%:% +%:%1533=1594%:% +%:%1534=1595%:% +%:%1535=1596%:% +%:%1536=1597%:% +%:%1537=1598%:% +%:%1538=1599%:% +%:%1539=1600%:% +%:%1540=1601%:% +%:%1541=1602%:% +%:%1542=1603%:% +%:%1543=1604%:% +%:%1544=1605%:% +%:%1545=1606%:% +%:%1546=1607%:% +%:%1547=1608%:% +%:%1548=1609%:% +%:%1549=1610%:% +%:%1550=1611%:% +%:%1551=1612%:% +%:%1552=1613%:% +%:%1553=1614%:% +%:%1554=1615%:% +%:%1555=1616%:% +%:%1556=1617%:% +%:%1557=1618%:% +%:%1558=1619%:% +%:%1559=1620%:% +%:%1560=1621%:% +%:%1561=1622%:% +%:%1562=1623%:% +%:%1563=1624%:% +%:%1564=1625%:% +%:%1565=1626%:% +%:%1566=1627%:% +%:%1567=1628%:% +%:%1568=1629%:% +%:%1569=1630%:% +%:%1570=1631%:% +%:%1571=1632%:% +%:%1572=1633%:% +%:%1573=1634%:% +%:%1574=1635%:% +%:%1575=1636%:% +%:%1576=1637%:% +%:%1577=1638%:% +%:%1578=1639%:% +%:%1579=1640%:% +%:%1580=1641%:% +%:%1581=1642%:% +%:%1582=1643%:% +%:%1583=1644%:% +%:%1584=1645%:% +%:%1585=1646%:% +%:%1586=1647%:% +%:%1587=1648%:% +%:%1588=1649%:% +%:%1589=1650%:% +%:%1590=1651%:% +%:%1591=1652%:% +%:%1592=1653%:% +%:%1593=1654%:% +%:%1594=1655%:% +%:%1595=1656%:% +%:%1596=1657%:% +%:%1597=1658%:% +%:%1598=1659%:% +%:%1599=1660%:% +%:%1600=1661%:% +%:%1601=1662%:% +%:%1601=1778%:% +%:%1602=1779%:% +%:%1603=1780%:% +%:%1604=1781%:% +%:%1605=1782%:% +%:%1605=1943%:% +%:%1606=1944%:% +%:%1607=1945%:% +%:%1608=1946%:% +%:%1609=1947%:% +%:%1610=1948%:% +%:%1611=1949%:% +%:%1612=1950%:% +%:%1613=1951%:% +%:%1614=1952%:% +%:%1615=1953%:% +%:%1616=1954%:% +%:%1617=1955%:% +%:%1618=1956%:% +%:%1619=1957%:% +%:%1620=1958%:% +%:%1621=1959%:% +%:%1622=1960%:% +%:%1623=1961%:% +%:%1624=1962%:% +%:%1624=1963%:% +%:%1625=1964%:% +%:%1625=1965%:% +%:%1625=1966%:% +%:%1626=1967%:% +%:%1626=1968%:% +%:%1627=1969%:% +%:%1628=1970%:% +%:%1629=1971%:% +%:%1630=1972%:% +%:%1631=1973%:% +%:%1632=1974%:% +%:%1633=1975%:% +%:%1634=1976%:% +%:%1635=1977%:% +%:%1636=1978%:% +%:%1637=1979%:% +%:%1638=1980%:% +%:%1639=1981%:% +%:%1640=1982%:% +%:%1641=1983%:% +%:%1642=1984%:% +%:%1643=1985%:% +%:%1643=1986%:% +%:%1644=1987%:% +%:%1645=1988%:% +%:%1646=1989%:% +%:%1646=1990%:% +%:%1646=1991%:% +%:%1647=1992%:% +%:%1648=1993%:% +%:%1649=1994%:% +%:%1649=1995%:% +%:%1650=1996%:% +%:%1650=1997%:% +%:%1651=1998%:% +%:%1652=1999%:% +%:%1653=2000%:% +%:%1654=2001%:% +%:%1655=2002%:% +%:%1656=2003%:% +%:%1657=2004%:% +%:%1658=2005%:% +%:%1659=2006%:% +%:%1660=2007%:% +%:%1661=2008%:% +%:%1662=2009%:% +%:%1663=2010%:% +%:%1664=2011%:% +%:%1665=2012%:% +%:%1666=2013%:% +%:%1667=2014%:% +%:%1676=2019%:% +%:%1688=2023%:% +%:%1689=2024%:% +%:%1690=2025%:% +%:%1691=2026%:% +%:%1692=2027%:% +%:%1693=2028%:% +%:%1694=2029%:% +%:%1695=2030%:% +%:%1696=2031%:% +%:%1697=2032%:% +%:%1698=2033%:% +%:%1699=2034%:% +%:%1700=2035%:% +%:%1701=2036%:% +%:%1702=2037%:% +%:%1703=2038%:% +%:%1704=2039%:% +%:%1705=2040%:% +%:%1706=2041%:% +%:%1707=2042%:% +%:%1708=2043%:% +%:%1709=2044%:% +%:%1710=2045%:% +%:%1711=2046%:% +%:%1712=2047%:% +%:%1713=2048%:% +%:%1714=2049%:% +%:%1715=2050%:% +%:%1716=2051%:% +%:%1717=2052%:% +%:%1718=2053%:% +%:%1719=2054%:% +%:%1720=2055%:% +%:%1721=2056%:% +%:%1722=2057%:% +%:%1723=2058%:% +%:%1724=2059%:% +%:%1725=2060%:% +%:%1726=2061%:% +%:%1727=2062%:% +%:%1728=2063%:% +%:%1729=2064%:% +%:%1730=2065%:% +%:%1731=2066%:% +%:%1732=2067%:% +%:%1733=2068%:% +%:%1734=2069%:% +%:%1735=2070%:% +%:%1736=2071%:% +%:%1737=2072%:% +%:%1738=2073%:% +%:%1739=2074%:% +%:%1740=2075%:% +%:%1753=2081%:% +%:%1756=2082%:% +%:%1757=2083%:% diff -r c80720289645 -r 0efa7ffd96ff thys2/Journal/Paper.thy --- a/thys2/Journal/Paper.thy Tue Jan 11 23:55:13 2022 +0000 +++ b/thys2/Journal/Paper.thy Tue Jan 11 23:58:39 2022 +0000 @@ -50,10 +50,10 @@ Cons ("_\<^latex>\\\mbox{$\\,$}\::\<^latex>\\\mbox{$\\,$}\_" [75,73] 73) and ZERO ("\<^bold>0" 81) and - ONE ("\<^bold>1" 81) and + ONE ("\<^bold>1" 81) and CH ("_" [1000] 80) and - ALT ("_ + _" [77,77] 78) and - SEQ ("_ \ _" [77,77] 78) and + ALT ("_ + _" [77,77] 77) and + SEQ ("_ \ _" [78,78] 78) and STAR ("_\<^sup>\" [79] 78) and val.Void ("Empty" 78) and @@ -286,8 +286,10 @@ \end{tabular} \end{center} -This rule allows us to simplify \mbox{@{term "ALT (SEQ (ALT a b) c) (SEQ a c)"}} - +This rule allows us to simplify \mbox{@{term "(ALT (SEQ (ALT a b) c) (SEQ a c))"}} + into \mbox{@{term "ALT (SEQ a c) (SEQ b c)"}}, +which cannot be done under the rrewrite rule because only alternatives which are +children of another alternative can be spilled out. \ (*This rule allows us to simplify \mbox{@{term "ALT (SEQ (ALT a b) c) (SEQ a c)"}} diff -r c80720289645 -r 0efa7ffd96ff thys2/Journal/Paper.thy~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/thys2/Journal/Paper.thy~ Tue Jan 11 23:58:39 2022 +0000 @@ -0,0 +1,2313 @@ +(*<*) +theory Paper +imports + "../Lexer" + "../Simplifying" + "../Positions" + + "../SizeBound" + "HOL-Library.LaTeXsugar" +begin + +lemma Suc_0_fold: + "Suc 0 = 1" +by simp + + + +declare [[show_question_marks = false]] + +syntax (latex output) + "_Collect" :: "pttrn => bool => 'a set" ("(1{_ \<^latex>\\\mbox{\\boldmath$\\mid$}\ _})") + "_CollectIn" :: "pttrn => 'a set => bool => 'a set" ("(1{_ \ _ |e _})") + +syntax + "_Not_Ex" :: "idts \ bool \ bool" ("(3\_.a./ _)" [0, 10] 10) + "_Not_Ex1" :: "pttrn \ bool \ bool" ("(3\!_.a./ _)" [0, 10] 10) + + +abbreviation + "der_syn r c \ der c r" + +abbreviation + "ders_syn r s \ ders s r" + + abbreviation + "bder_syn r c \ bder c r" + +abbreviation + "bders_syn r s \ bders r s" + + +abbreviation + "nprec v1 v2 \ \(v1 :\val v2)" + + + + +notation (latex output) + If ("(\<^latex>\\\textrm{\if\<^latex>\}\ (_)/ \<^latex>\\\textrm{\then\<^latex>\}\ (_)/ \<^latex>\\\textrm{\else\<^latex>\}\ (_))" 10) and + Cons ("_\<^latex>\\\mbox{$\\,$}\::\<^latex>\\\mbox{$\\,$}\_" [75,73] 73) and + + ZERO ("\<^bold>0" 81) and + ONE ("\<^bold>1" 81) and + CH ("_" [1000] 80) and + ALT ("_ + _" [77,77] 78) and + SEQ ("_ \ _" [77,77] 78) and + STAR ("_\<^sup>\" [79] 78) and + + val.Void ("Empty" 78) and + val.Char ("Char _" [1000] 78) and + val.Left ("Left _" [79] 78) and + val.Right ("Right _" [1000] 78) and + val.Seq ("Seq _ _" [79,79] 78) and + val.Stars ("Stars _" [79] 78) and + + L ("L'(_')" [10] 78) and + LV ("LV _ _" [80,73] 78) and + der_syn ("_\\_" [79, 1000] 76) and + ders_syn ("_\\_" [79, 1000] 76) and + flat ("|_|" [75] 74) and + flats ("|_|" [72] 74) and + Sequ ("_ @ _" [78,77] 63) and + injval ("inj _ _ _" [79,77,79] 76) and + mkeps ("mkeps _" [79] 76) and + length ("len _" [73] 73) and + intlen ("len _" [73] 73) and + set ("_" [73] 73) and + + Prf ("_ : _" [75,75] 75) and + Posix ("'(_, _') \ _" [63,75,75] 75) and + + lexer ("lexer _ _" [78,78] 77) and + F_RIGHT ("F\<^bsub>Right\<^esub> _") and + F_LEFT ("F\<^bsub>Left\<^esub> _") and + F_ALT ("F\<^bsub>Alt\<^esub> _ _") and + F_SEQ1 ("F\<^bsub>Seq1\<^esub> _ _") and + F_SEQ2 ("F\<^bsub>Seq2\<^esub> _ _") and + F_SEQ ("F\<^bsub>Seq\<^esub> _ _") and + simp_SEQ ("simp\<^bsub>Seq\<^esub> _ _" [1000, 1000] 1) and + simp_ALT ("simp\<^bsub>Alt\<^esub> _ _" [1000, 1000] 1) and + slexer ("lexer\<^sup>+" 1000) and + + at ("_\<^latex>\\\mbox{$\\downharpoonleft$}\\<^bsub>_\<^esub>") and + lex_list ("_ \\<^bsub>lex\<^esub> _") and + PosOrd ("_ \\<^bsub>_\<^esub> _" [77,77,77] 77) and + PosOrd_ex ("_ \ _" [77,77] 77) and + PosOrd_ex_eq ("_ \<^latex>\\\mbox{$\\preccurlyeq$}\ _" [77,77] 77) and + pflat_len ("\_\\<^bsub>_\<^esub>") and + nprec ("_ \<^latex>\\\mbox{$\\not\\prec$}\ _" [77,77] 77) and + + bder_syn ("_\<^latex>\\\mbox{$\\bbslash$}\_" [79, 1000] 76) and + bders_syn ("_\<^latex>\\\mbox{$\\bbslash$}\_" [79, 1000] 76) and + intern ("_\<^latex>\\\mbox{$^\\uparrow$}\" [900] 80) and + erase ("_\<^latex>\\\mbox{$^\\downarrow$}\" [1000] 74) and + bnullable ("nullable\<^latex>\\\mbox{$_b$}\ _" [1000] 80) and + bmkeps ("mkeps\<^latex>\\\mbox{$_b$}\ _" [1000] 80) and + blexer ("lexer\<^latex>\\\mbox{$_b$}\ _ _" [77, 77] 80) and + code ("code _" [79] 74) and + + DUMMY ("\<^latex>\\\underline{\\hspace{2mm}}\") + + +definition + "match r s \ nullable (ders s r)" + + +lemma LV_STAR_ONE_empty: + shows "LV (STAR ONE) [] = {Stars []}" +by(auto simp add: LV_def elim: Prf.cases intro: Prf.intros) + + + +(* +comments not implemented + +p9. The condition "not exists s3 s4..." appears often enough (in particular in +the proof of Lemma 3) to warrant a definition. + +*) + + +(*>*) + +section\Core of the proof\ +text \ +This paper builds on previous work by Ausaf and Urban using +regular expression'd bit-coded derivatives to do lexing that +is both fast and satisfies the POSIX specification. +In their work, a bit-coded algorithm introduced by Sulzmann and Lu +was formally verified in Isabelle, by a very clever use of +flex function and retrieve to carefully mimic the way a value is +built up by the injection funciton. + +In the previous work, Ausaf and Urban established the below equality: +\begin{lemma} +@{thm [mode=IfThen] MAIN_decode} +\end{lemma} + +This lemma establishes a link with the lexer without bit-codes. + +With it we get the correctness of bit-coded algorithm. +\begin{lemma} +@{thm [mode=IfThen] blexer_correctness} +\end{lemma} + +However what is not certain is whether we can add simplification +to the bit-coded algorithm, without breaking the correct lexing output. + + +The reason that we do need to add a simplification phase +after each derivative step of $\textit{blexer}$ is +because it produces intermediate +regular expressions that can grow exponentially. +For example, the regular expression $(a+aa)^*$ after taking +derivative against just 10 $a$s will have size 8192. + +%TODO: add figure for this? + + +Therefore, we insert a simplification phase +after each derivation step, as defined below: +\begin{lemma} +@{thm blexer_simp_def} +\end{lemma} + +The simplification function is given as follows: + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) bsimp.simps(1)[of "bs" "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bsimp.simps(1)[of "bs" "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bsimp.simps(2)} & $\dn$ & @{thm (rhs) bsimp.simps(2)}\\ + @{thm (lhs) bsimp.simps(3)} & $\dn$ & @{thm (rhs) bsimp.simps(3)}\\ + +\end{tabular} +\end{center} + +And the two helper functions are: +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) bsimp_AALTs.simps(2)[of "bs\<^sub>1" "r" ]} & $\dn$ & @{thm (rhs) bsimp.simps(1)[of "bs\<^sub>1" "r" ]}\\ + @{thm (lhs) bsimp_AALTs.simps(2)} & $\dn$ & @{thm (rhs) bsimp.simps(2)}\\ + @{thm (lhs) bsimp_AALTs.simps(3)} & $\dn$ & @{thm (rhs) bsimp.simps(3)}\\ + +\end{tabular} +\end{center} + + +This might sound trivial in the case of producing a YES/NO answer, +but once we require a lexing output to be produced (which is required +in applications like compiler front-end, malicious attack domain extraction, +etc.), it is not straightforward if we still extract what is needed according +to the POSIX standard. + + + + + +By simplification, we mean specifically the following rules: + +\begin{center} + \begin{tabular}{lcl} + @{thm[mode=Axiom] rrewrite.intros(1)[of "bs" "r\<^sub>2"]}\\ + @{thm[mode=Axiom] rrewrite.intros(2)[of "bs" "r\<^sub>1"]}\\ + @{thm[mode=Axiom] rrewrite.intros(3)[of "bs" "bs\<^sub>1" "r\<^sub>1"]}\\ + @{thm[mode=Rule] rrewrite.intros(4)[of "r\<^sub>1" "r\<^sub>2" "bs" "r\<^sub>3"]}\\ + @{thm[mode=Rule] rrewrite.intros(5)[of "r\<^sub>3" "r\<^sub>4" "bs" "r\<^sub>1"]}\\ + @{thm[mode=Rule] rrewrite.intros(6)[of "r" "r'" "bs" "rs\<^sub>1" "rs\<^sub>2"]}\\ + @{thm[mode=Axiom] rrewrite.intros(7)[of "bs" "rs\<^sub>a" "rs\<^sub>b"]}\\ + @{thm[mode=Axiom] rrewrite.intros(8)[of "bs" "rs\<^sub>a" "bs\<^sub>1" "rs\<^sub>1" "rs\<^sub>b"]}\\ + @{thm[mode=Axiom] rrewrite.intros(9)[of "bs" "bs\<^sub>1" "rs"]}\\ + @{thm[mode=Axiom] rrewrite.intros(10)[of "bs" ]}\\ + @{thm[mode=Axiom] rrewrite.intros(11)[of "bs" "r\<^sub>1"]}\\ + @{thm[mode=Rule] rrewrite.intros(12)[of "a\<^sub>1" "a\<^sub>2" "bs" "rs\<^sub>a" "rs\<^sub>b" "rs\<^sub>c"]}\\ + + \end{tabular} +\end{center} + + +And these can be made compact by the following simplification function: + +where the function $\mathit{bsimp_AALTs}$ + +The core idea of the proof is that two regular expressions, +if "isomorphic" up to a finite number of rewrite steps, will +remain "isomorphic" when we take the same sequence of +derivatives on both of them. +This can be expressed by the following rewrite relation lemma: +\begin{lemma} +@{thm [mode=IfThen] central} +\end{lemma} + +This isomorphic relation implies a property that leads to the +correctness result: +if two (nullable) regular expressions are "rewritable" in many steps +from one another, +then a call to function $\textit{bmkeps}$ gives the same +bit-sequence : +\begin{lemma} +@{thm [mode=IfThen] rewrites_bmkeps} +\end{lemma} + +Given the same bit-sequence, the decode function +will give out the same value, which is the output +of both lexers: +\begin{lemma} +@{thm blexer_def} +\end{lemma} + +\begin{lemma} +@{thm blexer_simp_def} +\end{lemma} + +And that yields the correctness result: +\begin{lemma} +@{thm blexersimp_correctness} +\end{lemma} + +The nice thing about the above +\ + + +section \ Additional Simp Rules?\ + + +text \ +One question someone would ask is: +can we add more "atomic" simplification/rewriting rules, +so the simplification is even more aggressive, making +the intermediate results smaller, and therefore more space-efficient? +For example, one might want to do open up alternatives who is a child +of a sequence: + +\begin{center} + \begin{tabular}{lcl} + @{thm[mode=Rule] aggressive.intros(1)[of "bs" "bs1" "rs" "r"]}\\ + \end{tabular} +\end{center} + +This rule allows us to simplify \mbox{@{term "(ALT (SEQ (ALT a b) c) (SEQ a c))"}} + into \mbox{@{term "ALT (SEQ a c) (SEQ b c)"}}, +which is cannot be done under the \mbox{ \} rule because only alternatives which are +children of another alternative can be spilled out. +\ + +(*This rule allows us to simplify \mbox{@{term "ALT (SEQ (ALT a b) c) (SEQ a c)"}} + into \mbox{@{term "ALT (SEQ a c) (SEQ b c)"}}, +which is cannot be done under the \ rule because only alternatives which are +children of another alternative can be spilled out.*) +section \Introduction\ + + +text \ + + +Brzozowski \cite{Brzozowski1964} introduced the notion of the {\em +derivative} @{term "der c r"} of a regular expression \r\ w.r.t.\ +a character~\c\, and showed that it gave a simple solution to the +problem of matching a string @{term s} with a regular expression @{term +r}: if the derivative of @{term r} w.r.t.\ (in succession) all the +characters of the string matches the empty string, then @{term r} +matches @{term s} (and {\em vice versa}). The derivative has the +property (which may almost be regarded as its specification) that, for +every string @{term s} and regular expression @{term r} and character +@{term c}, one has @{term "cs \ L(r)"} if and only if \mbox{@{term "s \ L(der c r)"}}. +The beauty of Brzozowski's derivatives is that +they are neatly expressible in any functional language, and easily +definable and reasoned about in theorem provers---the definitions just +consist of inductive datatypes and simple recursive functions. A +mechanised correctness proof of Brzozowski's matcher in for example HOL4 +has been mentioned by Owens and Slind~\cite{Owens2008}. Another one in +Isabelle/HOL is part of the work by Krauss and Nipkow \cite{Krauss2011}. +And another one in Coq is given by Coquand and Siles \cite{Coquand2012}. + +If a regular expression matches a string, then in general there is more +than one way of how the string is matched. There are two commonly used +disambiguation strategies to generate a unique answer: one is called +GREEDY matching \cite{Frisch2004} and the other is POSIX +matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}. +For example consider the string @{term xy} and the regular expression +\mbox{@{term "STAR (ALT (ALT x y) xy)"}}. Either the string can be +matched in two `iterations' by the single letter-regular expressions +@{term x} and @{term y}, or directly in one iteration by @{term xy}. The +first case corresponds to GREEDY matching, which first matches with the +left-most symbol and only matches the next symbol in case of a mismatch +(this is greedy in the sense of preferring instant gratification to +delayed repletion). The second case is POSIX matching, which prefers the +longest match. + +In the context of lexing, where an input string needs to be split up +into a sequence of tokens, POSIX is the more natural disambiguation +strategy for what programmers consider basic syntactic building blocks +in their programs. These building blocks are often specified by some +regular expressions, say \r\<^bsub>key\<^esub>\ and \r\<^bsub>id\<^esub>\ for recognising keywords and identifiers, +respectively. There are a few underlying (informal) rules behind +tokenising a string in a POSIX \cite{POSIX} fashion: + +\begin{itemize} +\item[$\bullet$] \emph{The Longest Match Rule} (or \emph{``{M}aximal {M}unch {R}ule''}): +The longest initial substring matched by any regular expression is taken as +next token.\smallskip + +\item[$\bullet$] \emph{Priority Rule:} +For a particular longest initial substring, the first (leftmost) regular expression +that can match determines the token.\smallskip + +\item[$\bullet$] \emph{Star Rule:} A subexpression repeated by ${}^\star$ shall +not match an empty string unless this is the only match for the repetition.\smallskip + +\item[$\bullet$] \emph{Empty String Rule:} An empty string shall be considered to +be longer than no match at all. +\end{itemize} + +\noindent Consider for example a regular expression \r\<^bsub>key\<^esub>\ for recognising keywords such as \if\, +\then\ and so on; and \r\<^bsub>id\<^esub>\ +recognising identifiers (say, a single character followed by +characters or numbers). Then we can form the regular expression +\(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\\ +and use POSIX matching to tokenise strings, say \iffoo\ and +\if\. For \iffoo\ we obtain by the Longest Match Rule +a single identifier token, not a keyword followed by an +identifier. For \if\ we obtain by the Priority Rule a keyword +token, not an identifier token---even if \r\<^bsub>id\<^esub>\ +matches also. By the Star Rule we know \(r\<^bsub>key\<^esub> + +r\<^bsub>id\<^esub>)\<^sup>\\ matches \iffoo\, +respectively \if\, in exactly one `iteration' of the star. The +Empty String Rule is for cases where, for example, the regular expression +\(a\<^sup>\)\<^sup>\\ matches against the +string \bc\. Then the longest initial matched substring is the +empty string, which is matched by both the whole regular expression +and the parenthesised subexpression. + + +One limitation of Brzozowski's matcher is that it only generates a +YES/NO answer for whether a string is being matched by a regular +expression. Sulzmann and Lu~\cite{Sulzmann2014} extended this matcher +to allow generation not just of a YES/NO answer but of an actual +matching, called a [lexical] {\em value}. Assuming a regular +expression matches a string, values encode the information of +\emph{how} the string is matched by the regular expression---that is, +which part of the string is matched by which part of the regular +expression. For this consider again the string \xy\ and +the regular expression \mbox{\(x + (y + xy))\<^sup>\\} +(this time fully parenthesised). We can view this regular expression +as tree and if the string \xy\ is matched by two Star +`iterations', then the \x\ is matched by the left-most +alternative in this tree and the \y\ by the right-left alternative. This +suggests to record this matching as + +\begin{center} +@{term "Stars [Left(Char x), Right(Left(Char y))]"} +\end{center} + +\noindent where @{const Stars}, \Left\, \Right\ and \Char\ are constructors for values. \Stars\ records how many +iterations were used; \Left\, respectively \Right\, which +alternative is used. This `tree view' leads naturally to the idea that +regular expressions act as types and values as inhabiting those types +(see, for example, \cite{HosoyaVouillonPierce2005}). The value for +matching \xy\ in a single `iteration', i.e.~the POSIX value, +would look as follows + +\begin{center} +@{term "Stars [Seq (Char x) (Char y)]"} +\end{center} + +\noindent where @{const Stars} has only a single-element list for the +single iteration and @{const Seq} indicates that @{term xy} is matched +by a sequence regular expression. + +%, which we will in what follows +%write more formally as @{term "SEQ x y"}. + + +Sulzmann and Lu give a simple algorithm to calculate a value that +appears to be the value associated with POSIX matching. The challenge +then is to specify that value, in an algorithm-independent fashion, +and to show that Sulzmann and Lu's derivative-based algorithm does +indeed calculate a value that is correct according to the +specification. The answer given by Sulzmann and Lu +\cite{Sulzmann2014} is to define a relation (called an ``order +relation'') on the set of values of @{term r}, and to show that (once +a string to be matched is chosen) there is a maximum element and that +it is computed by their derivative-based algorithm. This proof idea is +inspired by work of Frisch and Cardelli \cite{Frisch2004} on a GREEDY +regular expression matching algorithm. However, we were not able to +establish transitivity and totality for the ``order relation'' by +Sulzmann and Lu. There are some inherent problems with their approach +(of which some of the proofs are not published in +\cite{Sulzmann2014}); perhaps more importantly, we give in this paper +a simple inductive (and algorithm-independent) definition of what we +call being a {\em POSIX value} for a regular expression @{term r} and +a string @{term s}; we show that the algorithm by Sulzmann and Lu +computes such a value and that such a value is unique. Our proofs are +both done by hand and checked in Isabelle/HOL. The experience of +doing our proofs has been that this mechanical checking was absolutely +essential: this subject area has hidden snares. This was also noted by +Kuklewicz \cite{Kuklewicz} who found that nearly all POSIX matching +implementations are ``buggy'' \cite[Page 203]{Sulzmann2014} and by +Grathwohl et al \cite[Page 36]{CrashCourse2014} who wrote: + +\begin{quote} +\it{}``The POSIX strategy is more complicated than the greedy because of +the dependence on information about the length of matched strings in the +various subexpressions.'' +\end{quote} + + + +\noindent {\bf Contributions:} We have implemented in Isabelle/HOL the +derivative-based regular expression matching algorithm of +Sulzmann and Lu \cite{Sulzmann2014}. We have proved the correctness of this +algorithm according to our specification of what a POSIX value is (inspired +by work of Vansummeren \cite{Vansummeren2006}). Sulzmann +and Lu sketch in \cite{Sulzmann2014} an informal correctness proof: but to +us it contains unfillable gaps.\footnote{An extended version of +\cite{Sulzmann2014} is available at the website of its first author; this +extended version already includes remarks in the appendix that their +informal proof contains gaps, and possible fixes are not fully worked out.} +Our specification of a POSIX value consists of a simple inductive definition +that given a string and a regular expression uniquely determines this value. +We also show that our definition is equivalent to an ordering +of values based on positions by Okui and Suzuki \cite{OkuiSuzuki2010}. + +%Derivatives as calculated by Brzozowski's method are usually more complex +%regular expressions than the initial one; various optimisations are +%possible. We prove the correctness when simplifications of @{term "ALT ZERO r"}, +%@{term "ALT r ZERO"}, @{term "SEQ ONE r"} and @{term "SEQ r ONE"} to +%@{term r} are applied. + +We extend our results to ??? Bitcoded version?? + +\ + + + + +section \Preliminaries\ + +text \\noindent Strings in Isabelle/HOL are lists of characters with +the empty string being represented by the empty list, written @{term +"[]"}, and list-cons being written as @{term "DUMMY # DUMMY"}. Often +we use the usual bracket notation for lists also for strings; for +example a string consisting of just a single character @{term c} is +written @{term "[c]"}. We use the usual definitions for +\emph{prefixes} and \emph{strict prefixes} of strings. By using the +type @{type char} for characters we have a supply of finitely many +characters roughly corresponding to the ASCII character set. Regular +expressions are defined as usual as the elements of the following +inductive datatype: + + \begin{center} + \r :=\ + @{const "ZERO"} $\mid$ + @{const "ONE"} $\mid$ + @{term "CH c"} $\mid$ + @{term "ALT r\<^sub>1 r\<^sub>2"} $\mid$ + @{term "SEQ r\<^sub>1 r\<^sub>2"} $\mid$ + @{term "STAR r"} + \end{center} + + \noindent where @{const ZERO} stands for the regular expression that does + not match any string, @{const ONE} for the regular expression that matches + only the empty string and @{term c} for matching a character literal. The + language of a regular expression is also defined as usual by the + recursive function @{term L} with the six clauses: + + \begin{center} + \begin{tabular}{l@ {\hspace{4mm}}rcl} + \textit{(1)} & @{thm (lhs) L.simps(1)} & $\dn$ & @{thm (rhs) L.simps(1)}\\ + \textit{(2)} & @{thm (lhs) L.simps(2)} & $\dn$ & @{thm (rhs) L.simps(2)}\\ + \textit{(3)} & @{thm (lhs) L.simps(3)} & $\dn$ & @{thm (rhs) L.simps(3)}\\ + \textit{(4)} & @{thm (lhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & + @{thm (rhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ + \textit{(5)} & @{thm (lhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & + @{thm (rhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ + \textit{(6)} & @{thm (lhs) L.simps(6)} & $\dn$ & @{thm (rhs) L.simps(6)}\\ + \end{tabular} + \end{center} + + \noindent In clause \textit{(4)} we use the operation @{term "DUMMY ;; + DUMMY"} for the concatenation of two languages (it is also list-append for + strings). We use the star-notation for regular expressions and for + languages (in the last clause above). The star for languages is defined + inductively by two clauses: \(i)\ the empty string being in + the star of a language and \(ii)\ if @{term "s\<^sub>1"} is in a + language and @{term "s\<^sub>2"} in the star of this language, then also @{term + "s\<^sub>1 @ s\<^sub>2"} is in the star of this language. It will also be convenient + to use the following notion of a \emph{semantic derivative} (or \emph{left + quotient}) of a language defined as + % + \begin{center} + @{thm Der_def}\;. + \end{center} + + \noindent + For semantic derivatives we have the following equations (for example + mechanically proved in \cite{Krauss2011}): + % + \begin{equation}\label{SemDer} + \begin{array}{lcl} + @{thm (lhs) Der_null} & \dn & @{thm (rhs) Der_null}\\ + @{thm (lhs) Der_empty} & \dn & @{thm (rhs) Der_empty}\\ + @{thm (lhs) Der_char} & \dn & @{thm (rhs) Der_char}\\ + @{thm (lhs) Der_union} & \dn & @{thm (rhs) Der_union}\\ + @{thm (lhs) Der_Sequ} & \dn & @{thm (rhs) Der_Sequ}\\ + @{thm (lhs) Der_star} & \dn & @{thm (rhs) Der_star} + \end{array} + \end{equation} + + + \noindent \emph{\Brz's derivatives} of regular expressions + \cite{Brzozowski1964} can be easily defined by two recursive functions: + the first is from regular expressions to booleans (implementing a test + when a regular expression can match the empty string), and the second + takes a regular expression and a character to a (derivative) regular + expression: + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) nullable.simps(1)} & $\dn$ & @{thm (rhs) nullable.simps(1)}\\ + @{thm (lhs) nullable.simps(2)} & $\dn$ & @{thm (rhs) nullable.simps(2)}\\ + @{thm (lhs) nullable.simps(3)} & $\dn$ & @{thm (rhs) nullable.simps(3)}\\ + @{thm (lhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) nullable.simps(6)} & $\dn$ & @{thm (rhs) nullable.simps(6)}\medskip\\ + +% \end{tabular} +% \end{center} + +% \begin{center} +% \begin{tabular}{lcl} + + @{thm (lhs) der.simps(1)} & $\dn$ & @{thm (rhs) der.simps(1)}\\ + @{thm (lhs) der.simps(2)} & $\dn$ & @{thm (rhs) der.simps(2)}\\ + @{thm (lhs) der.simps(3)} & $\dn$ & @{thm (rhs) der.simps(3)}\\ + @{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) der.simps(6)} & $\dn$ & @{thm (rhs) der.simps(6)} + \end{tabular} + \end{center} + + \noindent + We may extend this definition to give derivatives w.r.t.~strings: + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) ders.simps(1)} & $\dn$ & @{thm (rhs) ders.simps(1)}\\ + @{thm (lhs) ders.simps(2)} & $\dn$ & @{thm (rhs) ders.simps(2)}\\ + \end{tabular} + \end{center} + + \noindent Given the equations in \eqref{SemDer}, it is a relatively easy + exercise in mechanical reasoning to establish that + + \begin{proposition}\label{derprop}\mbox{}\\ + \begin{tabular}{ll} + \textit{(1)} & @{thm (lhs) nullable_correctness} if and only if + @{thm (rhs) nullable_correctness}, and \\ + \textit{(2)} & @{thm[mode=IfThen] der_correctness}. + \end{tabular} + \end{proposition} + + \noindent With this in place it is also very routine to prove that the + regular expression matcher defined as + % + \begin{center} + @{thm match_def} + \end{center} + + \noindent gives a positive answer if and only if @{term "s \ L r"}. + Consequently, this regular expression matching algorithm satisfies the + usual specification for regular expression matching. While the matcher + above calculates a provably correct YES/NO answer for whether a regular + expression matches a string or not, the novel idea of Sulzmann and Lu + \cite{Sulzmann2014} is to append another phase to this algorithm in order + to calculate a [lexical] value. We will explain the details next. + +\ + +section \POSIX Regular Expression Matching\label{posixsec}\ + +text \ + + There have been many previous works that use values for encoding + \emph{how} a regular expression matches a string. + The clever idea by Sulzmann and Lu \cite{Sulzmann2014} is to + define a function on values that mirrors (but inverts) the + construction of the derivative on regular expressions. \emph{Values} + are defined as the inductive datatype + + \begin{center} + \v :=\ + @{const "Void"} $\mid$ + @{term "val.Char c"} $\mid$ + @{term "Left v"} $\mid$ + @{term "Right v"} $\mid$ + @{term "Seq v\<^sub>1 v\<^sub>2"} $\mid$ + @{term "Stars vs"} + \end{center} + + \noindent where we use @{term vs} to stand for a list of + values. (This is similar to the approach taken by Frisch and + Cardelli for GREEDY matching \cite{Frisch2004}, and Sulzmann and Lu + for POSIX matching \cite{Sulzmann2014}). The string underlying a + value can be calculated by the @{const flat} function, written + @{term "flat DUMMY"} and defined as: + + \begin{center} + \begin{tabular}[t]{lcl} + @{thm (lhs) flat.simps(1)} & $\dn$ & @{thm (rhs) flat.simps(1)}\\ + @{thm (lhs) flat.simps(2)} & $\dn$ & @{thm (rhs) flat.simps(2)}\\ + @{thm (lhs) flat.simps(3)} & $\dn$ & @{thm (rhs) flat.simps(3)}\\ + @{thm (lhs) flat.simps(4)} & $\dn$ & @{thm (rhs) flat.simps(4)} + \end{tabular}\hspace{14mm} + \begin{tabular}[t]{lcl} + @{thm (lhs) flat.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) flat.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\ + @{thm (lhs) flat.simps(6)} & $\dn$ & @{thm (rhs) flat.simps(6)}\\ + @{thm (lhs) flat.simps(7)} & $\dn$ & @{thm (rhs) flat.simps(7)}\\ + \end{tabular} + \end{center} + + \noindent We will sometimes refer to the underlying string of a + value as \emph{flattened value}. We will also overload our notation and + use @{term "flats vs"} for flattening a list of values and concatenating + the resulting strings. + + Sulzmann and Lu define + inductively an \emph{inhabitation relation} that associates values to + regular expressions. We define this relation as + follows:\footnote{Note that the rule for @{term Stars} differs from + our earlier paper \cite{AusafDyckhoffUrban2016}. There we used the + original definition by Sulzmann and Lu which does not require that + the values @{term "v \ set vs"} flatten to a non-empty + string. The reason for introducing the more restricted version of + lexical values is convenience later on when reasoning about an + ordering relation for values.} + + \begin{center} + \begin{tabular}{c@ {\hspace{12mm}}c}\label{prfintros} + \\[-8mm] + @{thm[mode=Axiom] Prf.intros(4)} & + @{thm[mode=Axiom] Prf.intros(5)[of "c"]}\\[4mm] + @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} & + @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\\[4mm] + @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]} & + @{thm[mode=Rule] Prf.intros(6)[of "vs"]} + \end{tabular} + \end{center} + + \noindent where in the clause for @{const "Stars"} we use the + notation @{term "v \ set vs"} for indicating that \v\ is a + member in the list \vs\. We require in this rule that every + value in @{term vs} flattens to a non-empty string. The idea is that + @{term "Stars"}-values satisfy the informal Star Rule (see Introduction) + where the $^\star$ does not match the empty string unless this is + the only match for the repetition. Note also that no values are + associated with the regular expression @{term ZERO}, and that the + only value associated with the regular expression @{term ONE} is + @{term Void}. It is routine to establish how values ``inhabiting'' + a regular expression correspond to the language of a regular + expression, namely + + \begin{proposition}\label{inhabs} + @{thm L_flat_Prf} + \end{proposition} + + \noindent + Given a regular expression \r\ and a string \s\, we define the + set of all \emph{Lexical Values} inhabited by \r\ with the underlying string + being \s\:\footnote{Okui and Suzuki refer to our lexical values + as \emph{canonical values} in \cite{OkuiSuzuki2010}. The notion of \emph{non-problematic + values} by Cardelli and Frisch \cite{Frisch2004} is related, but not identical + to our lexical values.} + + \begin{center} + @{thm LV_def} + \end{center} + + \noindent The main property of @{term "LV r s"} is that it is alway finite. + + \begin{proposition} + @{thm LV_finite} + \end{proposition} + + \noindent This finiteness property does not hold in general if we + remove the side-condition about @{term "flat v \ []"} in the + @{term Stars}-rule above. For example using Sulzmann and Lu's + less restrictive definition, @{term "LV (STAR ONE) []"} would contain + infinitely many values, but according to our more restricted + definition only a single value, namely @{thm LV_STAR_ONE_empty}. + + If a regular expression \r\ matches a string \s\, then + generally the set @{term "LV r s"} is not just a singleton set. In + case of POSIX matching the problem is to calculate the unique lexical value + that satisfies the (informal) POSIX rules from the Introduction. + Graphically the POSIX value calculation algorithm by Sulzmann and Lu + can be illustrated by the picture in Figure~\ref{Sulz} where the + path from the left to the right involving @{term + derivatives}/@{const nullable} is the first phase of the algorithm + (calculating successive \Brz's derivatives) and @{const + mkeps}/\inj\, the path from right to left, the second + phase. This picture shows the steps required when a regular + expression, say \r\<^sub>1\, matches the string @{term + "[a,b,c]"}. We first build the three derivatives (according to + @{term a}, @{term b} and @{term c}). We then use @{const nullable} + to find out whether the resulting derivative regular expression + @{term "r\<^sub>4"} can match the empty string. If yes, we call the + function @{const mkeps} that produces a value @{term "v\<^sub>4"} + for how @{term "r\<^sub>4"} can match the empty string (taking into + account the POSIX constraints in case there are several ways). This + function is defined by the clauses: + +\begin{figure}[t] +\begin{center} +\begin{tikzpicture}[scale=2,node distance=1.3cm, + every node/.style={minimum size=6mm}] +\node (r1) {@{term "r\<^sub>1"}}; +\node (r2) [right=of r1]{@{term "r\<^sub>2"}}; +\draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}}; +\node (r3) [right=of r2]{@{term "r\<^sub>3"}}; +\draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}}; +\node (r4) [right=of r3]{@{term "r\<^sub>4"}}; +\draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}}; +\draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}}; +\node (v4) [below=of r4]{@{term "v\<^sub>4"}}; +\draw[->,line width=1mm](r4) -- (v4); +\node (v3) [left=of v4] {@{term "v\<^sub>3"}}; +\draw[->,line width=1mm](v4)--(v3) node[below,midway] {\inj r\<^sub>3 c\}; +\node (v2) [left=of v3]{@{term "v\<^sub>2"}}; +\draw[->,line width=1mm](v3)--(v2) node[below,midway] {\inj r\<^sub>2 b\}; +\node (v1) [left=of v2] {@{term "v\<^sub>1"}}; +\draw[->,line width=1mm](v2)--(v1) node[below,midway] {\inj r\<^sub>1 a\}; +\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}}; +\end{tikzpicture} +\end{center} +\mbox{}\\[-13mm] + +\caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014}, +matching the string @{term "[a,b,c]"}. The first phase (the arrows from +left to right) is \Brz's matcher building successive derivatives. If the +last regular expression is @{term nullable}, then the functions of the +second phase are called (the top-down and right-to-left arrows): first +@{term mkeps} calculates a value @{term "v\<^sub>4"} witnessing +how the empty string has been recognised by @{term "r\<^sub>4"}. After +that the function @{term inj} ``injects back'' the characters of the string into +the values. +\label{Sulz}} +\end{figure} + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\ + @{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\ + \end{tabular} + \end{center} + + \noindent Note that this function needs only to be partially defined, + namely only for regular expressions that are nullable. In case @{const + nullable} fails, the string @{term "[a,b,c]"} cannot be matched by @{term + "r\<^sub>1"} and the null value @{term "None"} is returned. Note also how this function + makes some subtle choices leading to a POSIX value: for example if an + alternative regular expression, say @{term "ALT r\<^sub>1 r\<^sub>2"}, can + match the empty string and furthermore @{term "r\<^sub>1"} can match the + empty string, then we return a \Left\-value. The \Right\-value will only be returned if @{term "r\<^sub>1"} cannot match the empty + string. + + The most interesting idea from Sulzmann and Lu \cite{Sulzmann2014} is + the construction of a value for how @{term "r\<^sub>1"} can match the + string @{term "[a,b,c]"} from the value how the last derivative, @{term + "r\<^sub>4"} in Fig.~\ref{Sulz}, can match the empty string. Sulzmann and + Lu achieve this by stepwise ``injecting back'' the characters into the + values thus inverting the operation of building derivatives, but on the level + of values. The corresponding function, called @{term inj}, takes three + arguments, a regular expression, a character and a value. For example in + the first (or right-most) @{term inj}-step in Fig.~\ref{Sulz} the regular + expression @{term "r\<^sub>3"}, the character @{term c} from the last + derivative step and @{term "v\<^sub>4"}, which is the value corresponding + to the derivative regular expression @{term "r\<^sub>4"}. The result is + the new value @{term "v\<^sub>3"}. The final result of the algorithm is + the value @{term "v\<^sub>1"}. The @{term inj} function is defined by recursion on regular + expressions and by analysing the shape of values (corresponding to + the derivative regular expressions). + % + \begin{center} + \begin{tabular}{l@ {\hspace{5mm}}lcl} + \textit{(1)} & @{thm (lhs) injval.simps(1)} & $\dn$ & @{thm (rhs) injval.simps(1)}\\ + \textit{(2)} & @{thm (lhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]} & $\dn$ & + @{thm (rhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]}\\ + \textit{(3)} & @{thm (lhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ & + @{thm (rhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ + \textit{(4)} & @{thm (lhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ + & @{thm (rhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ + \textit{(5)} & @{thm (lhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ + & @{thm (rhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ + \textit{(6)} & @{thm (lhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ + & @{thm (rhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ + \textit{(7)} & @{thm (lhs) injval.simps(7)[of "r" "c" "v" "vs"]} & $\dn$ + & @{thm (rhs) injval.simps(7)[of "r" "c" "v" "vs"]}\\ + \end{tabular} + \end{center} + + \noindent To better understand what is going on in this definition it + might be instructive to look first at the three sequence cases (clauses + \textit{(4)} -- \textit{(6)}). In each case we need to construct an ``injected value'' for + @{term "SEQ r\<^sub>1 r\<^sub>2"}. This must be a value of the form @{term + "Seq DUMMY DUMMY"}\,. Recall the clause of the \derivative\-function + for sequence regular expressions: + + \begin{center} + @{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} $\dn$ @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} + \end{center} + + \noindent Consider first the \else\-branch where the derivative is @{term + "SEQ (der c r\<^sub>1) r\<^sub>2"}. The corresponding value must therefore + be of the form @{term "Seq v\<^sub>1 v\<^sub>2"}, which matches the left-hand + side in clause~\textit{(4)} of @{term inj}. In the \if\-branch the derivative is an + alternative, namely @{term "ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c + r\<^sub>2)"}. This means we either have to consider a \Left\- or + \Right\-value. In case of the \Left\-value we know further it + must be a value for a sequence regular expression. Therefore the pattern + we match in the clause \textit{(5)} is @{term "Left (Seq v\<^sub>1 v\<^sub>2)"}, + while in \textit{(6)} it is just @{term "Right v\<^sub>2"}. One more interesting + point is in the right-hand side of clause \textit{(6)}: since in this case the + regular expression \r\<^sub>1\ does not ``contribute'' to + matching the string, that means it only matches the empty string, we need to + call @{const mkeps} in order to construct a value for how @{term "r\<^sub>1"} + can match this empty string. A similar argument applies for why we can + expect in the left-hand side of clause \textit{(7)} that the value is of the form + @{term "Seq v (Stars vs)"}---the derivative of a star is @{term "SEQ (der c r) + (STAR r)"}. Finally, the reason for why we can ignore the second argument + in clause \textit{(1)} of @{term inj} is that it will only ever be called in cases + where @{term "c=d"}, but the usual linearity restrictions in patterns do + not allow us to build this constraint explicitly into our function + definition.\footnote{Sulzmann and Lu state this clause as @{thm (lhs) + injval.simps(1)[of "c" "c"]} $\dn$ @{thm (rhs) injval.simps(1)[of "c"]}, + but our deviation is harmless.} + + The idea of the @{term inj}-function to ``inject'' a character, say + @{term c}, into a value can be made precise by the first part of the + following lemma, which shows that the underlying string of an injected + value has a prepended character @{term c}; the second part shows that + the underlying string of an @{const mkeps}-value is always the empty + string (given the regular expression is nullable since otherwise + \mkeps\ might not be defined). + + \begin{lemma}\mbox{}\smallskip\\\label{Prf_injval_flat} + \begin{tabular}{ll} + (1) & @{thm[mode=IfThen] Prf_injval_flat}\\ + (2) & @{thm[mode=IfThen] mkeps_flat} + \end{tabular} + \end{lemma} + + \begin{proof} + Both properties are by routine inductions: the first one can, for example, + be proved by induction over the definition of @{term derivatives}; the second by + an induction on @{term r}. There are no interesting cases.\qed + \end{proof} + + Having defined the @{const mkeps} and \inj\ function we can extend + \Brz's matcher so that a value is constructed (assuming the + regular expression matches the string). The clauses of the Sulzmann and Lu lexer are + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) lexer.simps(1)} & $\dn$ & @{thm (rhs) lexer.simps(1)}\\ + @{thm (lhs) lexer.simps(2)} & $\dn$ & \case\ @{term "lexer (der c r) s"} \of\\\ + & & \phantom{$|$} @{term "None"} \\\ @{term None}\\ + & & $|$ @{term "Some v"} \\\ @{term "Some (injval r c v)"} + \end{tabular} + \end{center} + + \noindent If the regular expression does not match the string, @{const None} is + returned. If the regular expression \emph{does} + match the string, then @{const Some} value is returned. One important + virtue of this algorithm is that it can be implemented with ease in any + functional programming language and also in Isabelle/HOL. In the remaining + part of this section we prove that this algorithm is correct. + + The well-known idea of POSIX matching is informally defined by some + rules such as the Longest Match and Priority Rules (see + Introduction); as correctly argued in \cite{Sulzmann2014}, this + needs formal specification. Sulzmann and Lu define an ``ordering + relation'' between values and argue that there is a maximum value, + as given by the derivative-based algorithm. In contrast, we shall + introduce a simple inductive definition that specifies directly what + a \emph{POSIX value} is, incorporating the POSIX-specific choices + into the side-conditions of our rules. Our definition is inspired by + the matching relation given by Vansummeren~\cite{Vansummeren2006}. + The relation we define is ternary and + written as \mbox{@{term "s \ r \ v"}}, relating + strings, regular expressions and values; the inductive rules are given in + Figure~\ref{POSIXrules}. + We can prove that given a string @{term s} and regular expression @{term + r}, the POSIX value @{term v} is uniquely determined by @{term "s \ r \ v"}. + + % + \begin{figure}[t] + \begin{center} + \begin{tabular}{c} + @{thm[mode=Axiom] Posix.intros(1)}\P\@{term "ONE"} \qquad + @{thm[mode=Axiom] Posix.intros(2)}\P\@{term "c"}\medskip\\ + @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\P+L\\qquad + @{thm[mode=Rule] Posix.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\P+R\\medskip\\ + $\mprset{flushleft} + \inferrule + {@{thm (prem 1) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad + @{thm (prem 2) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\ + @{thm (prem 3) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}} + {@{thm (concl) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\PS\\\ + @{thm[mode=Axiom] Posix.intros(7)}\P[]\\medskip\\ + $\mprset{flushleft} + \inferrule + {@{thm (prem 1) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad + @{thm (prem 2) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad + @{thm (prem 3) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\ + @{thm (prem 4) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}} + {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$\P\\ + \end{tabular} + \end{center} + \caption{Our inductive definition of POSIX values.}\label{POSIXrules} + \end{figure} + + + + \begin{theorem}\mbox{}\smallskip\\\label{posixdeterm} + \begin{tabular}{ll} + (1) & If @{thm (prem 1) Posix1(1)} then @{thm (concl) + Posix1(1)} and @{thm (concl) Posix1(2)}.\\ + (2) & @{thm[mode=IfThen] Posix_determ(1)[of _ _ "v" "v'"]} + \end{tabular} + \end{theorem} + + \begin{proof} Both by induction on the definition of @{term "s \ r \ v"}. + The second parts follows by a case analysis of @{term "s \ r \ v'"} and + the first part.\qed + \end{proof} + + \noindent + We claim that our @{term "s \ r \ v"} relation captures the idea behind the four + informal POSIX rules shown in the Introduction: Consider for example the + rules \P+L\ and \P+R\ where the POSIX value for a string + and an alternative regular expression, that is @{term "(s, ALT r\<^sub>1 r\<^sub>2)"}, + is specified---it is always a \Left\-value, \emph{except} when the + string to be matched is not in the language of @{term "r\<^sub>1"}; only then it + is a \Right\-value (see the side-condition in \P+R\). + Interesting is also the rule for sequence regular expressions (\PS\). The first two premises state that @{term "v\<^sub>1"} and @{term "v\<^sub>2"} + are the POSIX values for @{term "(s\<^sub>1, r\<^sub>1)"} and @{term "(s\<^sub>2, r\<^sub>2)"} + respectively. Consider now the third premise and note that the POSIX value + of this rule should match the string \mbox{@{term "s\<^sub>1 @ s\<^sub>2"}}. According to the + Longest Match Rule, we want that the @{term "s\<^sub>1"} is the longest initial + split of \mbox{@{term "s\<^sub>1 @ s\<^sub>2"}} such that @{term "s\<^sub>2"} is still recognised + by @{term "r\<^sub>2"}. Let us assume, contrary to the third premise, that there + \emph{exist} an @{term "s\<^sub>3"} and @{term "s\<^sub>4"} such that @{term "s\<^sub>2"} + can be split up into a non-empty string @{term "s\<^sub>3"} and a possibly empty + string @{term "s\<^sub>4"}. Moreover the longer string @{term "s\<^sub>1 @ s\<^sub>3"} can be + matched by \r\<^sub>1\ and the shorter @{term "s\<^sub>4"} can still be + matched by @{term "r\<^sub>2"}. In this case @{term "s\<^sub>1"} would \emph{not} be the + longest initial split of \mbox{@{term "s\<^sub>1 @ s\<^sub>2"}} and therefore @{term "Seq v\<^sub>1 + v\<^sub>2"} cannot be a POSIX value for @{term "(s\<^sub>1 @ s\<^sub>2, SEQ r\<^sub>1 r\<^sub>2)"}. + The main point is that our side-condition ensures the Longest + Match Rule is satisfied. + + A similar condition is imposed on the POSIX value in the \P\\-rule. Also there we want that @{term "s\<^sub>1"} is the longest initial + split of @{term "s\<^sub>1 @ s\<^sub>2"} and furthermore the corresponding value + @{term v} cannot be flattened to the empty string. In effect, we require + that in each ``iteration'' of the star, some non-empty substring needs to + be ``chipped'' away; only in case of the empty string we accept @{term + "Stars []"} as the POSIX value. Indeed we can show that our POSIX values + are lexical values which exclude those \Stars\ that contain subvalues + that flatten to the empty string. + + \begin{lemma}\label{LVposix} + @{thm [mode=IfThen] Posix_LV} + \end{lemma} + + \begin{proof} + By routine induction on @{thm (prem 1) Posix_LV}.\qed + \end{proof} + + \noindent + Next is the lemma that shows the function @{term "mkeps"} calculates + the POSIX value for the empty string and a nullable regular expression. + + \begin{lemma}\label{lemmkeps} + @{thm[mode=IfThen] Posix_mkeps} + \end{lemma} + + \begin{proof} + By routine induction on @{term r}.\qed + \end{proof} + + \noindent + The central lemma for our POSIX relation is that the \inj\-function + preserves POSIX values. + + \begin{lemma}\label{Posix2} + @{thm[mode=IfThen] Posix_injval} + \end{lemma} + + \begin{proof} + By induction on \r\. We explain two cases. + + \begin{itemize} + \item[$\bullet$] Case @{term "r = ALT r\<^sub>1 r\<^sub>2"}. There are + two subcases, namely \(a)\ \mbox{@{term "v = Left v'"}} and @{term + "s \ der c r\<^sub>1 \ v'"}; and \(b)\ @{term "v = Right v'"}, @{term + "s \ L (der c r\<^sub>1)"} and @{term "s \ der c r\<^sub>2 \ v'"}. In \(a)\ we + know @{term "s \ der c r\<^sub>1 \ v'"}, from which we can infer @{term "(c # s) + \ r\<^sub>1 \ injval r\<^sub>1 c v'"} by induction hypothesis and hence @{term "(c # + s) \ ALT r\<^sub>1 r\<^sub>2 \ injval (ALT r\<^sub>1 r\<^sub>2) c (Left v')"} as needed. Similarly + in subcase \(b)\ where, however, in addition we have to use + Proposition~\ref{derprop}(2) in order to infer @{term "c # s \ L r\<^sub>1"} from @{term + "s \ L (der c r\<^sub>1)"}.\smallskip + + \item[$\bullet$] Case @{term "r = SEQ r\<^sub>1 r\<^sub>2"}. There are three subcases: + + \begin{quote} + \begin{description} + \item[\(a)\] @{term "v = Left (Seq v\<^sub>1 v\<^sub>2)"} and @{term "nullable r\<^sub>1"} + \item[\(b)\] @{term "v = Right v\<^sub>1"} and @{term "nullable r\<^sub>1"} + \item[\(c)\] @{term "v = Seq v\<^sub>1 v\<^sub>2"} and @{term "\ nullable r\<^sub>1"} + \end{description} + \end{quote} + + \noindent For \(a)\ we know @{term "s\<^sub>1 \ der c r\<^sub>1 \ v\<^sub>1"} and + @{term "s\<^sub>2 \ r\<^sub>2 \ v\<^sub>2"} as well as + % + \[@{term "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \ s\<^sub>1 @ s\<^sub>3 \ L (der c r\<^sub>1) \ s\<^sub>4 \ L r\<^sub>2)"}\] + + \noindent From the latter we can infer by Proposition~\ref{derprop}(2): + % + \[@{term "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \ (c # s\<^sub>1) @ s\<^sub>3 \ L r\<^sub>1 \ s\<^sub>4 \ L r\<^sub>2)"}\] + + \noindent We can use the induction hypothesis for \r\<^sub>1\ to obtain + @{term "(c # s\<^sub>1) \ r\<^sub>1 \ injval r\<^sub>1 c v\<^sub>1"}. Putting this all together allows us to infer + @{term "((c # s\<^sub>1) @ s\<^sub>2) \ SEQ r\<^sub>1 r\<^sub>2 \ Seq (injval r\<^sub>1 c v\<^sub>1) v\<^sub>2"}. The case \(c)\ + is similar. + + For \(b)\ we know @{term "s \ der c r\<^sub>2 \ v\<^sub>1"} and + @{term "s\<^sub>1 @ s\<^sub>2 \ L (SEQ (der c r\<^sub>1) r\<^sub>2)"}. From the former + we have @{term "(c # s) \ r\<^sub>2 \ (injval r\<^sub>2 c v\<^sub>1)"} by induction hypothesis + for @{term "r\<^sub>2"}. From the latter we can infer + % + \[@{term "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = c # s \ s\<^sub>3 \ L r\<^sub>1 \ s\<^sub>4 \ L r\<^sub>2)"}\] + + \noindent By Lemma~\ref{lemmkeps} we know @{term "[] \ r\<^sub>1 \ (mkeps r\<^sub>1)"} + holds. Putting this all together, we can conclude with @{term "(c # + s) \ SEQ r\<^sub>1 r\<^sub>2 \ Seq (mkeps r\<^sub>1) (injval r\<^sub>2 c v\<^sub>1)"}, as required. + + Finally suppose @{term "r = STAR r\<^sub>1"}. This case is very similar to the + sequence case, except that we need to also ensure that @{term "flat (injval r\<^sub>1 + c v\<^sub>1) \ []"}. This follows from @{term "(c # s\<^sub>1) + \ r\<^sub>1 \ injval r\<^sub>1 c v\<^sub>1"} (which in turn follows from @{term "s\<^sub>1 \ der c + r\<^sub>1 \ v\<^sub>1"} and the induction hypothesis).\qed + \end{itemize} + \end{proof} + + \noindent + With Lemma~\ref{Posix2} in place, it is completely routine to establish + that the Sulzmann and Lu lexer satisfies our specification (returning + the null value @{term "None"} iff the string is not in the language of the regular expression, + and returning a unique POSIX value iff the string \emph{is} in the language): + + \begin{theorem}\mbox{}\smallskip\\\label{lexercorrect} + \begin{tabular}{ll} + (1) & @{thm (lhs) lexer_correct_None} if and only if @{thm (rhs) lexer_correct_None}\\ + (2) & @{thm (lhs) lexer_correct_Some} if and only if @{thm (rhs) lexer_correct_Some}\\ + \end{tabular} + \end{theorem} + + \begin{proof} + By induction on @{term s} using Lemma~\ref{lemmkeps} and \ref{Posix2}.\qed + \end{proof} + + \noindent In \textit{(2)} we further know by Theorem~\ref{posixdeterm} that the + value returned by the lexer must be unique. A simple corollary + of our two theorems is: + + \begin{corollary}\mbox{}\smallskip\\\label{lexercorrectcor} + \begin{tabular}{ll} + (1) & @{thm (lhs) lexer_correctness(2)} if and only if @{thm (rhs) lexer_correctness(2)}\\ + (2) & @{thm (lhs) lexer_correctness(1)} if and only if @{thm (rhs) lexer_correctness(1)}\\ + \end{tabular} + \end{corollary} + + \noindent This concludes our correctness proof. Note that we have + not changed the algorithm of Sulzmann and Lu,\footnote{All + deviations we introduced are harmless.} but introduced our own + specification for what a correct result---a POSIX value---should be. + In the next section we show that our specification coincides with + another one given by Okui and Suzuki using a different technique. + +\ + +section \Ordering of Values according to Okui and Suzuki\ + +text \ + + While in the previous section we have defined POSIX values directly + in terms of a ternary relation (see inference rules in Figure~\ref{POSIXrules}), + Sulzmann and Lu took a different approach in \cite{Sulzmann2014}: + they introduced an ordering for values and identified POSIX values + as the maximal elements. An extended version of \cite{Sulzmann2014} + is available at the website of its first author; this includes more + details of their proofs, but which are evidently not in final form + yet. Unfortunately, we were not able to verify claims that their + ordering has properties such as being transitive or having maximal + elements. + + Okui and Suzuki \cite{OkuiSuzuki2010,OkuiSuzukiTech} described + another ordering of values, which they use to establish the + correctness of their automata-based algorithm for POSIX matching. + Their ordering resembles some aspects of the one given by Sulzmann + and Lu, but overall is quite different. To begin with, Okui and + Suzuki identify POSIX values as minimal, rather than maximal, + elements in their ordering. A more substantial difference is that + the ordering by Okui and Suzuki uses \emph{positions} in order to + identify and compare subvalues. Positions are lists of natural + numbers. This allows them to quite naturally formalise the Longest + Match and Priority rules of the informal POSIX standard. Consider + for example the value @{term v} + + \begin{center} + @{term "v == Stars [Seq (Char x) (Char y), Char z]"} + \end{center} + + \noindent + At position \[0,1]\ of this value is the + subvalue \Char y\ and at position \[1]\ the + subvalue @{term "Char z"}. At the `root' position, or empty list + @{term "[]"}, is the whole value @{term v}. Positions such as \[0,1,0]\ or \[2]\ are outside of \v\. If it exists, the subvalue of @{term v} at a position \p\, written @{term "at v p"}, can be recursively defined by + + \begin{center} + \begin{tabular}{r@ {\hspace{0mm}}lcl} + @{term v} & \\\<^bsub>[]\<^esub>\ & \\\& @{thm (rhs) at.simps(1)}\\ + @{term "Left v"} & \\\<^bsub>0::ps\<^esub>\ & \\\& @{thm (rhs) at.simps(2)}\\ + @{term "Right v"} & \\\<^bsub>1::ps\<^esub>\ & \\\ & + @{thm (rhs) at.simps(3)[simplified Suc_0_fold]}\\ + @{term "Seq v\<^sub>1 v\<^sub>2"} & \\\<^bsub>0::ps\<^esub>\ & \\\ & + @{thm (rhs) at.simps(4)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} \\ + @{term "Seq v\<^sub>1 v\<^sub>2"} & \\\<^bsub>1::ps\<^esub>\ + & \\\ & + @{thm (rhs) at.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2", simplified Suc_0_fold]} \\ + @{term "Stars vs"} & \\\<^bsub>n::ps\<^esub>\ & \\\& @{thm (rhs) at.simps(6)}\\ + \end{tabular} + \end{center} + + \noindent In the last clause we use Isabelle's notation @{term "vs ! n"} for the + \n\th element in a list. The set of positions inside a value \v\, + written @{term "Pos v"}, is given by + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) Pos.simps(1)} & \\\ & @{thm (rhs) Pos.simps(1)}\\ + @{thm (lhs) Pos.simps(2)} & \\\ & @{thm (rhs) Pos.simps(2)}\\ + @{thm (lhs) Pos.simps(3)} & \\\ & @{thm (rhs) Pos.simps(3)}\\ + @{thm (lhs) Pos.simps(4)} & \\\ & @{thm (rhs) Pos.simps(4)}\\ + @{thm (lhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + & \\\ + & @{thm (rhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\ + @{thm (lhs) Pos_stars} & \\\ & @{thm (rhs) Pos_stars}\\ + \end{tabular} + \end{center} + + \noindent + whereby \len\ in the last clause stands for the length of a list. Clearly + for every position inside a value there exists a subvalue at that position. + + + To help understanding the ordering of Okui and Suzuki, consider again + the earlier value + \v\ and compare it with the following \w\: + + \begin{center} + \begin{tabular}{l} + @{term "v == Stars [Seq (Char x) (Char y), Char z]"}\\ + @{term "w == Stars [Char x, Char y, Char z]"} + \end{tabular} + \end{center} + + \noindent Both values match the string \xyz\, that means if + we flatten these values at their respective root position, we obtain + \xyz\. However, at position \[0]\, \v\ matches + \xy\ whereas \w\ matches only the shorter \x\. So + according to the Longest Match Rule, we should prefer \v\, + rather than \w\ as POSIX value for string \xyz\ (and + corresponding regular expression). In order to + formalise this idea, Okui and Suzuki introduce a measure for + subvalues at position \p\, called the \emph{norm} of \v\ + at position \p\. We can define this measure in Isabelle as an + integer as follows + + \begin{center} + @{thm pflat_len_def} + \end{center} + + \noindent where we take the length of the flattened value at + position \p\, provided the position is inside \v\; if + not, then the norm is \-1\. The default for outside + positions is crucial for the POSIX requirement of preferring a + \Left\-value over a \Right\-value (if they can match the + same string---see the Priority Rule from the Introduction). For this + consider + + \begin{center} + @{term "v == Left (Char x)"} \qquad and \qquad @{term "w == Right (Char x)"} + \end{center} + + \noindent Both values match \x\. At position \[0]\ + the norm of @{term v} is \1\ (the subvalue matches \x\), + but the norm of \w\ is \-1\ (the position is outside + \w\ according to how we defined the `inside' positions of + \Left\- and \Right\-values). Of course at position + \[1]\, the norms @{term "pflat_len v [1]"} and @{term + "pflat_len w [1]"} are reversed, but the point is that subvalues + will be analysed according to lexicographically ordered + positions. According to this ordering, the position \[0]\ + takes precedence over \[1]\ and thus also \v\ will be + preferred over \w\. The lexicographic ordering of positions, written + @{term "DUMMY \lex DUMMY"}, can be conveniently formalised + by three inference rules + + \begin{center} + \begin{tabular}{ccc} + @{thm [mode=Axiom] lex_list.intros(1)}\hspace{1cm} & + @{thm [mode=Rule] lex_list.intros(3)[where ?p1.0="p\<^sub>1" and ?p2.0="p\<^sub>2" and + ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]}\hspace{1cm} & + @{thm [mode=Rule] lex_list.intros(2)[where ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]} + \end{tabular} + \end{center} + + With the norm and lexicographic order in place, + we can state the key definition of Okui and Suzuki + \cite{OkuiSuzuki2010}: a value @{term "v\<^sub>1"} is \emph{smaller at position \p\} than + @{term "v\<^sub>2"}, written @{term "v\<^sub>1 \val p v\<^sub>2"}, + if and only if $(i)$ the norm at position \p\ is + greater in @{term "v\<^sub>1"} (that is the string @{term "flat (at v\<^sub>1 p)"} is longer + than @{term "flat (at v\<^sub>2 p)"}) and $(ii)$ all subvalues at + positions that are inside @{term "v\<^sub>1"} or @{term "v\<^sub>2"} and that are + lexicographically smaller than \p\, we have the same norm, namely + + \begin{center} + \begin{tabular}{c} + @{thm (lhs) PosOrd_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + \\\ + $\begin{cases} + (i) & @{term "pflat_len v\<^sub>1 p > pflat_len v\<^sub>2 p"} \quad\text{and}\smallskip \\ + (ii) & @{term "(\q \ Pos v\<^sub>1 \ Pos v\<^sub>2. q \lex p --> pflat_len v\<^sub>1 q = pflat_len v\<^sub>2 q)"} + \end{cases}$ + \end{tabular} + \end{center} + + \noindent The position \p\ in this definition acts as the + \emph{first distinct position} of \v\<^sub>1\ and \v\<^sub>2\, where both values match strings of different length + \cite{OkuiSuzuki2010}. Since at \p\ the values \v\<^sub>1\ and \v\<^sub>2\ match different strings, the + ordering is irreflexive. Derived from the definition above + are the following two orderings: + + \begin{center} + \begin{tabular}{l} + @{thm PosOrd_ex_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\ + @{thm PosOrd_ex_eq_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + \end{tabular} + \end{center} + + While we encountered a number of obstacles for establishing properties like + transitivity for the ordering of Sulzmann and Lu (and which we failed + to overcome), it is relatively straightforward to establish this + property for the orderings + @{term "DUMMY :\val DUMMY"} and @{term "DUMMY :\val DUMMY"} + by Okui and Suzuki. + + \begin{lemma}[Transitivity]\label{transitivity} + @{thm [mode=IfThen] PosOrd_trans[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and ?v3.0="v\<^sub>3"]} + \end{lemma} + + \begin{proof} From the assumption we obtain two positions \p\ + and \q\, where the values \v\<^sub>1\ and \v\<^sub>2\ (respectively \v\<^sub>2\ and \v\<^sub>3\) are `distinct'. Since \\\<^bsub>lex\<^esub>\ is trichotomous, we need to consider + three cases, namely @{term "p = q"}, @{term "p \lex q"} and + @{term "q \lex p"}. Let us look at the first case. Clearly + @{term "pflat_len v\<^sub>2 p < pflat_len v\<^sub>1 p"} and @{term + "pflat_len v\<^sub>3 p < pflat_len v\<^sub>2 p"} imply @{term + "pflat_len v\<^sub>3 p < pflat_len v\<^sub>1 p"}. It remains to show + that for a @{term "p' \ Pos v\<^sub>1 \ Pos v\<^sub>3"} + with @{term "p' \lex p"} that @{term "pflat_len v\<^sub>1 + p' = pflat_len v\<^sub>3 p'"} holds. Suppose @{term "p' \ Pos + v\<^sub>1"}, then we can infer from the first assumption that @{term + "pflat_len v\<^sub>1 p' = pflat_len v\<^sub>2 p'"}. But this means + that @{term "p'"} must be in @{term "Pos v\<^sub>2"} too (the norm + cannot be \-1\ given @{term "p' \ Pos v\<^sub>1"}). + Hence we can use the second assumption and + infer @{term "pflat_len v\<^sub>2 p' = pflat_len v\<^sub>3 p'"}, + which concludes this case with @{term "v\<^sub>1 :\val + v\<^sub>3"}. The reasoning in the other cases is similar.\qed + \end{proof} + + \noindent + The proof for $\preccurlyeq$ is similar and omitted. + It is also straightforward to show that \\\ and + $\preccurlyeq$ are partial orders. Okui and Suzuki furthermore show that they + are linear orderings for lexical values \cite{OkuiSuzuki2010} of a given + regular expression and given string, but we have not formalised this in Isabelle. It is + not essential for our results. What we are going to show below is + that for a given \r\ and \s\, the orderings have a unique + minimal element on the set @{term "LV r s"}, which is the POSIX value + we defined in the previous section. We start with two properties that + show how the length of a flattened value relates to the \\\-ordering. + + \begin{proposition}\mbox{}\smallskip\\\label{ordlen} + \begin{tabular}{@ {}ll} + (1) & + @{thm [mode=IfThen] PosOrd_shorterE[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\ + (2) & + @{thm [mode=IfThen] PosOrd_shorterI[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + \end{tabular} + \end{proposition} + + \noindent Both properties follow from the definition of the ordering. Note that + \textit{(2)} entails that a value, say @{term "v\<^sub>2"}, whose underlying + string is a strict prefix of another flattened value, say @{term "v\<^sub>1"}, then + @{term "v\<^sub>1"} must be smaller than @{term "v\<^sub>2"}. For our proofs it + will be useful to have the following properties---in each case the underlying strings + of the compared values are the same: + + \begin{proposition}\mbox{}\smallskip\\\label{ordintros} + \begin{tabular}{ll} + \textit{(1)} & + @{thm [mode=IfThen] PosOrd_Left_Right[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\ + \textit{(2)} & If + @{thm (prem 1) PosOrd_Left_eq[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} \;then\; + @{thm (lhs) PosOrd_Left_eq[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} \;iff\; + @{thm (rhs) PosOrd_Left_eq[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\ + \textit{(3)} & If + @{thm (prem 1) PosOrd_Right_eq[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} \;then\; + @{thm (lhs) PosOrd_Right_eq[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} \;iff\; + @{thm (rhs) PosOrd_Right_eq[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\ + \textit{(4)} & If + @{thm (prem 1) PosOrd_Seq_eq[where ?v2.0="v\<^sub>2" and ?w2.0="w\<^sub>2"]} \;then\; + @{thm (lhs) PosOrd_Seq_eq[where ?v2.0="v\<^sub>2" and ?w2.0="w\<^sub>2"]} \;iff\; + @{thm (rhs) PosOrd_Seq_eq[where ?v2.0="v\<^sub>2" and ?w2.0="w\<^sub>2"]}\\ + \textit{(5)} & If + @{thm (prem 2) PosOrd_SeqI1[simplified, where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and + ?w1.0="w\<^sub>1" and ?w2.0="w\<^sub>2"]} \;and\; + @{thm (prem 1) PosOrd_SeqI1[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and + ?w1.0="w\<^sub>1" and ?w2.0="w\<^sub>2"]} \;then\; + @{thm (concl) PosOrd_SeqI1[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and + ?w1.0="w\<^sub>1" and ?w2.0="w\<^sub>2"]}\\ + \textit{(6)} & If + @{thm (prem 1) PosOrd_Stars_append_eq[where ?vs1.0="vs\<^sub>1" and ?vs2.0="vs\<^sub>2"]} \;then\; + @{thm (lhs) PosOrd_Stars_append_eq[where ?vs1.0="vs\<^sub>1" and ?vs2.0="vs\<^sub>2"]} \;iff\; + @{thm (rhs) PosOrd_Stars_append_eq[where ?vs1.0="vs\<^sub>1" and ?vs2.0="vs\<^sub>2"]}\\ + + \textit{(7)} & If + @{thm (prem 2) PosOrd_StarsI[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and + ?vs1.0="vs\<^sub>1" and ?vs2.0="vs\<^sub>2"]} \;and\; + @{thm (prem 1) PosOrd_StarsI[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and + ?vs1.0="vs\<^sub>1" and ?vs2.0="vs\<^sub>2"]} \;then\; + @{thm (concl) PosOrd_StarsI[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and + ?vs1.0="vs\<^sub>1" and ?vs2.0="vs\<^sub>2"]}\\ + \end{tabular} + \end{proposition} + + \noindent One might prefer that statements \textit{(4)} and \textit{(5)} + (respectively \textit{(6)} and \textit{(7)}) + are combined into a single \textit{iff}-statement (like the ones for \Left\ and \Right\). Unfortunately this cannot be done easily: such + a single statement would require an additional assumption about the + two values @{term "Seq v\<^sub>1 v\<^sub>2"} and @{term "Seq w\<^sub>1 w\<^sub>2"} + being inhabited by the same regular expression. The + complexity of the proofs involved seems to not justify such a + `cleaner' single statement. The statements given are just the properties that + allow us to establish our theorems without any difficulty. The proofs + for Proposition~\ref{ordintros} are routine. + + + Next we establish how Okui and Suzuki's orderings relate to our + definition of POSIX values. Given a \POSIX\ value \v\<^sub>1\ + for \r\ and \s\, then any other lexical value \v\<^sub>2\ in @{term "LV r s"} is greater or equal than \v\<^sub>1\, namely: + + + \begin{theorem}\label{orderone} + @{thm [mode=IfThen] Posix_PosOrd[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + \end{theorem} + + \begin{proof} By induction on our POSIX rules. By + Theorem~\ref{posixdeterm} and the definition of @{const LV}, it is clear + that \v\<^sub>1\ and \v\<^sub>2\ have the same + underlying string @{term s}. The three base cases are + straightforward: for example for @{term "v\<^sub>1 = Void"}, we have + that @{term "v\<^sub>2 \ LV ONE []"} must also be of the form + \mbox{@{term "v\<^sub>2 = Void"}}. Therefore we have @{term + "v\<^sub>1 :\val v\<^sub>2"}. The inductive cases for + \r\ being of the form @{term "ALT r\<^sub>1 r\<^sub>2"} and + @{term "SEQ r\<^sub>1 r\<^sub>2"} are as follows: + + + \begin{itemize} + + \item[$\bullet$] Case \P+L\ with @{term "s \ (ALT r\<^sub>1 r\<^sub>2) + \ (Left w\<^sub>1)"}: In this case the value + @{term "v\<^sub>2"} is either of the + form @{term "Left w\<^sub>2"} or @{term "Right w\<^sub>2"}. In the + latter case we can immediately conclude with \mbox{@{term "v\<^sub>1 + :\val v\<^sub>2"}} since a \Left\-value with the + same underlying string \s\ is always smaller than a + \Right\-value by Proposition~\ref{ordintros}\textit{(1)}. + In the former case we have @{term "w\<^sub>2 + \ LV r\<^sub>1 s"} and can use the induction hypothesis to infer + @{term "w\<^sub>1 :\val w\<^sub>2"}. Because @{term + "w\<^sub>1"} and @{term "w\<^sub>2"} have the same underlying string + \s\, we can conclude with @{term "Left w\<^sub>1 + :\val Left w\<^sub>2"} using + Proposition~\ref{ordintros}\textit{(2)}.\smallskip + + \item[$\bullet$] Case \P+R\ with @{term "s \ (ALT r\<^sub>1 r\<^sub>2) + \ (Right w\<^sub>1)"}: This case similar to the previous + case, except that we additionally know @{term "s \ L + r\<^sub>1"}. This is needed when @{term "v\<^sub>2"} is of the form + \mbox{@{term "Left w\<^sub>2"}}. Since \mbox{@{term "flat v\<^sub>2 = flat + w\<^sub>2"} \= s\} and @{term "\ w\<^sub>2 : + r\<^sub>1"}, we can derive a contradiction for \mbox{@{term "s \ L + r\<^sub>1"}} using + Proposition~\ref{inhabs}. So also in this case \mbox{@{term "v\<^sub>1 + :\val v\<^sub>2"}}.\smallskip + + \item[$\bullet$] Case \PS\ with @{term "(s\<^sub>1 @ + s\<^sub>2) \ (SEQ r\<^sub>1 r\<^sub>2) \ (Seq + w\<^sub>1 w\<^sub>2)"}: We can assume @{term "v\<^sub>2 = Seq + (u\<^sub>1) (u\<^sub>2)"} with @{term "\ u\<^sub>1 : + r\<^sub>1"} and \mbox{@{term "\ u\<^sub>2 : + r\<^sub>2"}}. We have @{term "s\<^sub>1 @ s\<^sub>2 = (flat + u\<^sub>1) @ (flat u\<^sub>2)"}. By the side-condition of the + \PS\-rule we know that either @{term "s\<^sub>1 = flat + u\<^sub>1"} or that @{term "flat u\<^sub>1"} is a strict prefix of + @{term "s\<^sub>1"}. In the latter case we can infer @{term + "w\<^sub>1 :\val u\<^sub>1"} by + Proposition~\ref{ordlen}\textit{(2)} and from this @{term "v\<^sub>1 + :\val v\<^sub>2"} by Proposition~\ref{ordintros}\textit{(5)} + (as noted above @{term "v\<^sub>1"} and @{term "v\<^sub>2"} must have the + same underlying string). + In the former case we know + @{term "u\<^sub>1 \ LV r\<^sub>1 s\<^sub>1"} and @{term + "u\<^sub>2 \ LV r\<^sub>2 s\<^sub>2"}. With this we can use the + induction hypotheses to infer @{term "w\<^sub>1 :\val + u\<^sub>1"} and @{term "w\<^sub>2 :\val u\<^sub>2"}. By + Proposition~\ref{ordintros}\textit{(4,5)} we can again infer + @{term "v\<^sub>1 :\val + v\<^sub>2"}. + + \end{itemize} + + \noindent The case for \P\\ is similar to the \PS\-case and omitted.\qed + \end{proof} + + \noindent This theorem shows that our \POSIX\ value for a + regular expression \r\ and string @{term s} is in fact a + minimal element of the values in \LV r s\. By + Proposition~\ref{ordlen}\textit{(2)} we also know that any value in + \LV r s'\, with @{term "s'"} being a strict prefix, cannot be + smaller than \v\<^sub>1\. The next theorem shows the + opposite---namely any minimal element in @{term "LV r s"} must be a + \POSIX\ value. This can be established by induction on \r\, but the proof can be drastically simplified by using the fact + from the previous section about the existence of a \POSIX\ value + whenever a string @{term "s \ L r"}. + + + \begin{theorem} + @{thm [mode=IfThen] PosOrd_Posix[where ?v1.0="v\<^sub>1"]} + \end{theorem} + + \begin{proof} + If @{thm (prem 1) PosOrd_Posix[where ?v1.0="v\<^sub>1"]} then + @{term "s \ L r"} by Proposition~\ref{inhabs}. Hence by Theorem~\ref{lexercorrect}(2) + there exists a + \POSIX\ value @{term "v\<^sub>P"} with @{term "s \ r \ v\<^sub>P"} + and by Lemma~\ref{LVposix} we also have \mbox{@{term "v\<^sub>P \ LV r s"}}. + By Theorem~\ref{orderone} we therefore have + @{term "v\<^sub>P :\val v\<^sub>1"}. If @{term "v\<^sub>P = v\<^sub>1"} then + we are done. Otherwise we have @{term "v\<^sub>P :\val v\<^sub>1"}, which + however contradicts the second assumption about @{term "v\<^sub>1"} being the smallest + element in @{term "LV r s"}. So we are done in this case too.\qed + \end{proof} + + \noindent + From this we can also show + that if @{term "LV r s"} is non-empty (or equivalently @{term "s \ L r"}) then + it has a unique minimal element: + + \begin{corollary} + @{thm [mode=IfThen] Least_existence1} + \end{corollary} + + + + \noindent To sum up, we have shown that the (unique) minimal elements + of the ordering by Okui and Suzuki are exactly the \POSIX\ + values we defined inductively in Section~\ref{posixsec}. This provides + an independent confirmation that our ternary relation formalises the + informal POSIX rules. + +\ + +section \Bitcoded Lexing\ + + + + +text \ + +Incremental calculation of the value. To simplify the proof we first define the function +@{const flex} which calculates the ``iterated'' injection function. With this we can +rewrite the lexer as + +\begin{center} +@{thm lexer_flex} +\end{center} + + +\ + +section \Optimisations\ + +text \ + + Derivatives as calculated by \Brz's method are usually more complex + regular expressions than the initial one; the result is that the + derivative-based matching and lexing algorithms are often abysmally slow. + However, various optimisations are possible, such as the simplifications + of @{term "ALT ZERO r"}, @{term "ALT r ZERO"}, @{term "SEQ ONE r"} and + @{term "SEQ r ONE"} to @{term r}. These simplifications can speed up the + algorithms considerably, as noted in \cite{Sulzmann2014}. One of the + advantages of having a simple specification and correctness proof is that + the latter can be refined to prove the correctness of such simplification + steps. While the simplification of regular expressions according to + rules like + + \begin{equation}\label{Simpl} + \begin{array}{lcllcllcllcl} + @{term "ALT ZERO r"} & \\\ & @{term r} \hspace{8mm}%\\ + @{term "ALT r ZERO"} & \\\ & @{term r} \hspace{8mm}%\\ + @{term "SEQ ONE r"} & \\\ & @{term r} \hspace{8mm}%\\ + @{term "SEQ r ONE"} & \\\ & @{term r} + \end{array} + \end{equation} + + \noindent is well understood, there is an obstacle with the POSIX value + calculation algorithm by Sulzmann and Lu: if we build a derivative regular + expression and then simplify it, we will calculate a POSIX value for this + simplified derivative regular expression, \emph{not} for the original (unsimplified) + derivative regular expression. Sulzmann and Lu \cite{Sulzmann2014} overcome this obstacle by + not just calculating a simplified regular expression, but also calculating + a \emph{rectification function} that ``repairs'' the incorrect value. + + The rectification functions can be (slightly clumsily) implemented in + Isabelle/HOL as follows using some auxiliary functions: + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) F_RIGHT.simps(1)} & $\dn$ & \Right (f v)\\\ + @{thm (lhs) F_LEFT.simps(1)} & $\dn$ & \Left (f v)\\\ + + @{thm (lhs) F_ALT.simps(1)} & $\dn$ & \Right (f\<^sub>2 v)\\\ + @{thm (lhs) F_ALT.simps(2)} & $\dn$ & \Left (f\<^sub>1 v)\\\ + + @{thm (lhs) F_SEQ1.simps(1)} & $\dn$ & \Seq (f\<^sub>1 ()) (f\<^sub>2 v)\\\ + @{thm (lhs) F_SEQ2.simps(1)} & $\dn$ & \Seq (f\<^sub>1 v) (f\<^sub>2 ())\\\ + @{thm (lhs) F_SEQ.simps(1)} & $\dn$ & \Seq (f\<^sub>1 v\<^sub>1) (f\<^sub>2 v\<^sub>2)\\medskip\\ + %\end{tabular} + % + %\begin{tabular}{lcl} + @{term "simp_ALT (ZERO, DUMMY) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(r\<^sub>2, F_RIGHT f\<^sub>2)"}\\ + @{term "simp_ALT (r\<^sub>1, f\<^sub>1) (ZERO, DUMMY)"} & $\dn$ & @{term "(r\<^sub>1, F_LEFT f\<^sub>1)"}\\ + @{term "simp_ALT (r\<^sub>1, f\<^sub>1) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(ALT r\<^sub>1 r\<^sub>2, F_ALT f\<^sub>1 f\<^sub>2)"}\\ + @{term "simp_SEQ (ONE, f\<^sub>1) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(r\<^sub>2, F_SEQ1 f\<^sub>1 f\<^sub>2)"}\\ + @{term "simp_SEQ (r\<^sub>1, f\<^sub>1) (ONE, f\<^sub>2)"} & $\dn$ & @{term "(r\<^sub>1, F_SEQ2 f\<^sub>1 f\<^sub>2)"}\\ + @{term "simp_SEQ (r\<^sub>1, f\<^sub>1) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(SEQ r\<^sub>1 r\<^sub>2, F_SEQ f\<^sub>1 f\<^sub>2)"}\\ + \end{tabular} + \end{center} + + \noindent + The functions \simp\<^bsub>Alt\<^esub>\ and \simp\<^bsub>Seq\<^esub>\ encode the simplification rules + in \eqref{Simpl} and compose the rectification functions (simplifications can occur + deep inside the regular expression). The main simplification function is then + + \begin{center} + \begin{tabular}{lcl} + @{term "simp (ALT r\<^sub>1 r\<^sub>2)"} & $\dn$ & @{term "simp_ALT (simp r\<^sub>1) (simp r\<^sub>2)"}\\ + @{term "simp (SEQ r\<^sub>1 r\<^sub>2)"} & $\dn$ & @{term "simp_SEQ (simp r\<^sub>1) (simp r\<^sub>2)"}\\ + @{term "simp r"} & $\dn$ & @{term "(r, id)"}\\ + \end{tabular} + \end{center} + + \noindent where @{term "id"} stands for the identity function. The + function @{const simp} returns a simplified regular expression and a corresponding + rectification function. Note that we do not simplify under stars: this + seems to slow down the algorithm, rather than speed it up. The optimised + lexer is then given by the clauses: + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) slexer.simps(1)} & $\dn$ & @{thm (rhs) slexer.simps(1)}\\ + @{thm (lhs) slexer.simps(2)} & $\dn$ & + \let (r\<^sub>s, f\<^sub>r) = simp (r \$\backslash$\ c) in\\\ + & & \case\ @{term "slexer r\<^sub>s s"} \of\\\ + & & \phantom{$|$} @{term "None"} \\\ @{term None}\\ + & & $|$ @{term "Some v"} \\\ \Some (inj r c (f\<^sub>r v))\ + \end{tabular} + \end{center} + + \noindent + In the second clause we first calculate the derivative @{term "der c r"} + and then simpli + +text \ + +Incremental calculation of the value. To simplify the proof we first define the function +@{const flex} which calculates the ``iterated'' injection function. With this we can +rewrite the lexer as + +\begin{center} +@{thm lexer_flex} +\end{center} + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) code.simps(1)} & $\dn$ & @{thm (rhs) code.simps(1)}\\ + @{thm (lhs) code.simps(2)} & $\dn$ & @{thm (rhs) code.simps(2)}\\ + @{thm (lhs) code.simps(3)} & $\dn$ & @{thm (rhs) code.simps(3)}\\ + @{thm (lhs) code.simps(4)} & $\dn$ & @{thm (rhs) code.simps(4)}\\ + @{thm (lhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\ + @{thm (lhs) code.simps(6)} & $\dn$ & @{thm (rhs) code.simps(6)}\\ + @{thm (lhs) code.simps(7)} & $\dn$ & @{thm (rhs) code.simps(7)} +\end{tabular} +\end{center} + +\begin{center} +\begin{tabular}{lcl} + @{term areg} & $::=$ & @{term "AZERO"}\\ + & $\mid$ & @{term "AONE bs"}\\ + & $\mid$ & @{term "ACHAR bs c"}\\ + & $\mid$ & @{term "AALT bs r1 r2"}\\ + & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\ + & $\mid$ & @{term "ASTAR bs r"} +\end{tabular} +\end{center} + + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) intern.simps(1)} & $\dn$ & @{thm (rhs) intern.simps(1)}\\ + @{thm (lhs) intern.simps(2)} & $\dn$ & @{thm (rhs) intern.simps(2)}\\ + @{thm (lhs) intern.simps(3)} & $\dn$ & @{thm (rhs) intern.simps(3)}\\ + @{thm (lhs) intern.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) intern.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) intern.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) intern.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) intern.simps(6)} & $\dn$ & @{thm (rhs) intern.simps(6)}\\ +\end{tabular} +\end{center} + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) erase.simps(1)} & $\dn$ & @{thm (rhs) erase.simps(1)}\\ + @{thm (lhs) erase.simps(2)[of bs]} & $\dn$ & @{thm (rhs) erase.simps(2)[of bs]}\\ + @{thm (lhs) erase.simps(3)[of bs]} & $\dn$ & @{thm (rhs) erase.simps(3)[of bs]}\\ + @{thm (lhs) erase.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) erase.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) erase.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) erase.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) erase.simps(6)[of bs]} & $\dn$ & @{thm (rhs) erase.simps(6)[of bs]}\\ +\end{tabular} +\end{center} + +Some simple facts about erase + +\begin{lemma}\mbox{}\\ +@{thm erase_bder}\\ +@{thm erase_intern} +\end{lemma} + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) bnullable.simps(1)} & $\dn$ & @{thm (rhs) bnullable.simps(1)}\\ + @{thm (lhs) bnullable.simps(2)} & $\dn$ & @{thm (rhs) bnullable.simps(2)}\\ + @{thm (lhs) bnullable.simps(3)} & $\dn$ & @{thm (rhs) bnullable.simps(3)}\\ + @{thm (lhs) bnullable.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bnullable.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bnullable.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bnullable.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bnullable.simps(6)} & $\dn$ & @{thm (rhs) bnullable.simps(6)}\medskip\\ + +% \end{tabular} +% \end{center} + +% \begin{center} +% \begin{tabular}{lcl} + + @{thm (lhs) bder.simps(1)} & $\dn$ & @{thm (rhs) bder.simps(1)}\\ + @{thm (lhs) bder.simps(2)} & $\dn$ & @{thm (rhs) bder.simps(2)}\\ + @{thm (lhs) bder.simps(3)} & $\dn$ & @{thm (rhs) bder.simps(3)}\\ + @{thm (lhs) bder.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bder.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bder.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bder.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bder.simps(6)} & $\dn$ & @{thm (rhs) bder.simps(6)} + \end{tabular} + \end{center} + + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) bmkeps.simps(1)} & $\dn$ & @{thm (rhs) bmkeps.simps(1)}\\ + @{thm (lhs) bmkeps.simps(2)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bmkeps.simps(2)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bmkeps.simps(3)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bmkeps.simps(3)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bmkeps.simps(4)} & $\dn$ & @{thm (rhs) bmkeps.simps(4)}\medskip\\ +\end{tabular} +\end{center} + + +@{thm [mode=IfThen] bder_retrieve} + +By induction on \r\ + +\begin{theorem}[Main Lemma]\mbox{}\\ +@{thm [mode=IfThen] MAIN_decode} +\end{theorem} + +\noindent +Definition of the bitcoded lexer + +@{thm blexer_def} + + +\begin{theorem} +@{thm blexer_correctness} +\end{theorem} + +\ + +section \Optimisations\ + +text \ + + Derivatives as calculated by \Brz's method are usually more complex + regular expressions than the initial one; the result is that the + derivative-based matching and lexing algorithms are often abysmally slow. + However, various optimisations are possible, such as the simplifications + of @{term "ALT ZERO r"}, @{term "ALT r ZERO"}, @{term "SEQ ONE r"} and + @{term "SEQ r ONE"} to @{term r}. These simplifications can speed up the + algorithms considerably, as noted in \cite{Sulzmann2014}. One of the + advantages of having a simple specification and correctness proof is that + the latter can be refined to prove the correctness of such simplification + steps. While the simplification of regular expressions according to + rules like + + \begin{equation}\label{Simpl} + \begin{array}{lcllcllcllcl} + @{term "ALT ZERO r"} & \\\ & @{term r} \hspace{8mm}%\\ + @{term "ALT r ZERO"} & \\\ & @{term r} \hspace{8mm}%\\ + @{term "SEQ ONE r"} & \\\ & @{term r} \hspace{8mm}%\\ + @{term "SEQ r ONE"} & \\\ & @{term r} + \end{array} + \end{equation} + + \noindent is well understood, there is an obstacle with the POSIX value + calculation algorithm by Sulzmann and Lu: if we build a derivative regular + expression and then simplify it, we will calculate a POSIX value for this + simplified derivative regular expression, \emph{not} for the original (unsimplified) + derivative regular expression. Sulzmann and Lu \cite{Sulzmann2014} overcome this obstacle by + not just calculating a simplified regular expression, but also calculating + a \emph{rectification function} that ``repairs'' the incorrect value. + + The rectification functions can be (slightly clumsily) implemented in + Isabelle/HOL as follows using some auxiliary functions: + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) F_RIGHT.simps(1)} & $\dn$ & \Right (f v)\\\ + @{thm (lhs) F_LEFT.simps(1)} & $\dn$ & \Left (f v)\\\ + + @{thm (lhs) F_ALT.simps(1)} & $\dn$ & \Right (f\<^sub>2 v)\\\ + @{thm (lhs) F_ALT.simps(2)} & $\dn$ & \Left (f\<^sub>1 v)\\\ + + @{thm (lhs) F_SEQ1.simps(1)} & $\dn$ & \Seq (f\<^sub>1 ()) (f\<^sub>2 v)\\\ + @{thm (lhs) F_SEQ2.simps(1)} & $\dn$ & \Seq (f\<^sub>1 v) (f\<^sub>2 ())\\\ + @{thm (lhs) F_SEQ.simps(1)} & $\dn$ & \Seq (f\<^sub>1 v\<^sub>1) (f\<^sub>2 v\<^sub>2)\\medskip\\ + %\end{tabular} + % + %\begin{tabular}{lcl} + @{term "simp_ALT (ZERO, DUMMY) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(r\<^sub>2, F_RIGHT f\<^sub>2)"}\\ + @{term "simp_ALT (r\<^sub>1, f\<^sub>1) (ZERO, DUMMY)"} & $\dn$ & @{term "(r\<^sub>1, F_LEFT f\<^sub>1)"}\\ + @{term "simp_ALT (r\<^sub>1, f\<^sub>1) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(ALT r\<^sub>1 r\<^sub>2, F_ALT f\<^sub>1 f\<^sub>2)"}\\ + @{term "simp_SEQ (ONE, f\<^sub>1) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(r\<^sub>2, F_SEQ1 f\<^sub>1 f\<^sub>2)"}\\ + @{term "simp_SEQ (r\<^sub>1, f\<^sub>1) (ONE, f\<^sub>2)"} & $\dn$ & @{term "(r\<^sub>1, F_SEQ2 f\<^sub>1 f\<^sub>2)"}\\ + @{term "simp_SEQ (r\<^sub>1, f\<^sub>1) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(SEQ r\<^sub>1 r\<^sub>2, F_SEQ f\<^sub>1 f\<^sub>2)"}\\ + \end{tabular} + \end{center} + + \noindent + The functions \simp\<^bsub>Alt\<^esub>\ and \simp\<^bsub>Seq\<^esub>\ encode the simplification rules + in \eqref{Simpl} and compose the rectification functions (simplifications can occur + deep inside the regular expression). The main simplification function is then + + \begin{center} + \begin{tabular}{lcl} + @{term "simp (ALT r\<^sub>1 r\<^sub>2)"} & $\dn$ & @{term "simp_ALT (simp r\<^sub>1) (simp r\<^sub>2)"}\\ + @{term "simp (SEQ r\<^sub>1 r\<^sub>2)"} & $\dn$ & @{term "simp_SEQ (simp r\<^sub>1) (simp r\<^sub>2)"}\\ + @{term "simp r"} & $\dn$ & @{term "(r, id)"}\\ + \end{tabular} + \end{center} + + \noindent where @{term "id"} stands for the identity function. The + function @{const simp} returns a simplified regular expression and a corresponding + rectification function. Note that we do not simplify under stars: this + seems to slow down the algorithm, rather than speed it up. The optimised + lexer is then given by the clauses: + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) slexer.simps(1)} & $\dn$ & @{thm (rhs) slexer.simps(1)}\\ + @{thm (lhs) slexer.simps(2)} & $\dn$ & + \let (r\<^sub>s, f\<^sub>r) = simp (r \$\backslash$\ c) in\\\ + & & \case\ @{term "slexer r\<^sub>s s"} \of\\\ + & & \phantom{$|$} @{term "None"} \\\ @{term None}\\ + & & $|$ @{term "Some v"} \\\ \Some (inj r c (f\<^sub>r v))\ + \end{tabular} + \end{center} + + \noindent + In the second clause we first calculate the derivative @{term "der c r"} + and then simplify the result. This gives us a simplified derivative + \r\<^sub>s\ and a rectification function \f\<^sub>r\. The lexer + is then recursively called with the simplified derivative, but before + we inject the character @{term c} into the value @{term v}, we need to rectify + @{term v} (that is construct @{term "f\<^sub>r v"}). Before we can establish the correctness + of @{term "slexer"}, we need to show that simplification preserves the language + and simplification preserves our POSIX relation once the value is rectified + (recall @{const "simp"} generates a (regular expression, rectification function) pair): + + \begin{lemma}\mbox{}\smallskip\\\label{slexeraux} + \begin{tabular}{ll} + (1) & @{thm L_fst_simp[symmetric]}\\ + (2) & @{thm[mode=IfThen] Posix_simp} + \end{tabular} + \end{lemma} + + \begin{proof} Both are by induction on \r\. There is no + interesting case for the first statement. For the second statement, + of interest are the @{term "r = ALT r\<^sub>1 r\<^sub>2"} and @{term "r = SEQ r\<^sub>1 + r\<^sub>2"} cases. In each case we have to analyse four subcases whether + @{term "fst (simp r\<^sub>1)"} and @{term "fst (simp r\<^sub>2)"} equals @{const + ZERO} (respectively @{const ONE}). For example for @{term "r = ALT + r\<^sub>1 r\<^sub>2"}, consider the subcase @{term "fst (simp r\<^sub>1) = ZERO"} and + @{term "fst (simp r\<^sub>2) \ ZERO"}. By assumption we know @{term "s \ + fst (simp (ALT r\<^sub>1 r\<^sub>2)) \ v"}. From this we can infer @{term "s \ fst (simp r\<^sub>2) \ v"} + and by IH also (*) @{term "s \ r\<^sub>2 \ (snd (simp r\<^sub>2) v)"}. Given @{term "fst (simp r\<^sub>1) = ZERO"} + we know @{term "L (fst (simp r\<^sub>1)) = {}"}. By the first statement + @{term "L r\<^sub>1"} is the empty set, meaning (**) @{term "s \ L r\<^sub>1"}. + Taking (*) and (**) together gives by the \mbox{\P+R\}-rule + @{term "s \ ALT r\<^sub>1 r\<^sub>2 \ Right (snd (simp r\<^sub>2) v)"}. In turn this + gives @{term "s \ ALT r\<^sub>1 r\<^sub>2 \ snd (simp (ALT r\<^sub>1 r\<^sub>2)) v"} as we need to show. + The other cases are similar.\qed + \end{proof} + + \noindent We can now prove relatively straightforwardly that the + optimised lexer produces the expected result: + + \begin{theorem} + @{thm slexer_correctness} + \end{theorem} + + \begin{proof} By induction on @{term s} generalising over @{term + r}. The case @{term "[]"} is trivial. For the cons-case suppose the + string is of the form @{term "c # s"}. By induction hypothesis we + know @{term "slexer r s = lexer r s"} holds for all @{term r} (in + particular for @{term "r"} being the derivative @{term "der c + r"}). Let @{term "r\<^sub>s"} be the simplified derivative regular expression, that is @{term + "fst (simp (der c r))"}, and @{term "f\<^sub>r"} be the rectification + function, that is @{term "snd (simp (der c r))"}. We distinguish the cases + whether (*) @{term "s \ L (der c r)"} or not. In the first case we + have by Theorem~\ref{lexercorrect}(2) a value @{term "v"} so that @{term + "lexer (der c r) s = Some v"} and @{term "s \ der c r \ v"} hold. + By Lemma~\ref{slexeraux}(1) we can also infer from~(*) that @{term "s + \ L r\<^sub>s"} holds. Hence we know by Theorem~\ref{lexercorrect}(2) that + there exists a @{term "v'"} with @{term "lexer r\<^sub>s s = Some v'"} and + @{term "s \ r\<^sub>s \ v'"}. From the latter we know by + Lemma~\ref{slexeraux}(2) that @{term "s \ der c r \ (f\<^sub>r v')"} holds. + By the uniqueness of the POSIX relation (Theorem~\ref{posixdeterm}) we + can infer that @{term v} is equal to @{term "f\<^sub>r v'"}---that is the + rectification function applied to @{term "v'"} + produces the original @{term "v"}. Now the case follows by the + definitions of @{const lexer} and @{const slexer}. + + In the second case where @{term "s \ L (der c r)"} we have that + @{term "lexer (der c r) s = None"} by Theorem~\ref{lexercorrect}(1). We + also know by Lemma~\ref{slexeraux}(1) that @{term "s \ L r\<^sub>s"}. Hence + @{term "lexer r\<^sub>s s = None"} by Theorem~\ref{lexercorrect}(1) and + by IH then also @{term "slexer r\<^sub>s s = None"}. With this we can + conclude in this case too.\qed + + \end{proof} + +\ +fy the result. This gives us a simplified derivative + \r\<^sub>s\ and a rectification function \f\<^sub>r\. The lexer + is then recursively called with the simplified derivative, but before + we inject the character @{term c} into the value @{term v}, we need to rectify + @{term v} (that is construct @{term "f\<^sub>r v"}). Before we can establish the correctness + of @{term "slexer"}, we need to show that simplification preserves the language + and simplification preserves our POSIX relation once the value is rectified + (recall @{const "simp"} generates a (regular expression, rectification function) pair): + + \begin{lemma}\mbox{}\smallskip\\\label{slexeraux} + \begin{tabular}{ll} + (1) & @{thm L_fst_simp[symmetric]}\\ + (2) & @{thm[mode=IfThen] Posix_simp} + \end{tabular} + \end{lemma} + + \begin{proof} Both are by induction on \r\. There is no + interesting case for the first statement. For the second statement, + of interest are the @{term "r = ALT r\<^sub>1 r\<^sub>2"} and @{term "r = SEQ r\<^sub>1 + r\<^sub>2"} cases. In each case we have to analyse four subcases whether + @{term "fst (simp r\<^sub>1)"} and @{term "fst (simp r\<^sub>2)"} equals @{const + ZERO} (respectively @{const ONE}). For example for @{term "r = ALT + r\<^sub>1 r\<^sub>2"}, consider the subcase @{term "fst (simp r\<^sub>1) = ZERO"} and + @{term "fst (simp r\<^sub>2) \ ZERO"}. By assumption we know @{term "s \ + fst (simp (ALT r\<^sub>1 r\<^sub>2)) \ v"}. From this we can infer @{term "s \ fst (simp r\<^sub>2) \ v"} + and by IH also (*) @{term "s \ r\<^sub>2 \ (snd (simp r\<^sub>2) v)"}. Given @{term "fst (simp r\<^sub>1) = ZERO"} + we know @{term "L (fst (simp r\<^sub>1)) = {}"}. By the first statement + @{term "L r\<^sub>1"} is the empty set, meaning (**) @{term "s \ L r\<^sub>1"}. + Taking (*) and (**) together gives by the \mbox{\P+R\}-rule + @{term "s \ ALT r\<^sub>1 r\<^sub>2 \ Right (snd (simp r\<^sub>2) v)"}. In turn this + gives @{term "s \ ALT r\<^sub>1 r\<^sub>2 \ snd (simp (ALT r\<^sub>1 r\<^sub>2)) v"} as we need to show. + The other cases are similar.\qed + \end{proof} + + \noindent We can now prove relatively straightforwardly that the + optimised lexer produces the expected result: + + \begin{theorem} + @{thm slexer_correctness} + \end{theorem} + + \begin{proof} By induction on @{term s} generalising over @{term + r}. The case @{term "[]"} is trivial. For the cons-case suppose the + string is of the form @{term "c # s"}. By induction hypothesis we + know @{term "slexer r s = lexer r s"} holds for all @{term r} (in + particular for @{term "r"} being the derivative @{term "der c + r"}). Let @{term "r\<^sub>s"} be the simplified derivative regular expression, that is @{term + "fst (simp (der c r))"}, and @{term "f\<^sub>r"} be the rectification + function, that is @{term "snd (simp (der c r))"}. We distinguish the cases + whether (*) @{term "s \ L (der c r)"} or not. In the first case we + have by Theorem~\ref{lexercorrect}(2) a value @{term "v"} so that @{term + "lexer (der c r) s = Some v"} and @{term "s \ der c r \ v"} hold. + By Lemma~\ref{slexeraux}(1) we can also infer from~(*) that @{term "s + \ L r\<^sub>s"} holds. Hence we know by Theorem~\ref{lexercorrect}(2) that + there exists a @{term "v'"} with @{term "lexer r\<^sub>s s = Some v'"} and + @{term "s \ r\<^sub>s \ v'"}. From the latter we know by + Lemma~\ref{slexeraux}(2) that @{term "s \ der c r \ (f\<^sub>r v')"} holds. + By the uniqueness of the POSIX relation (Theorem~\ref{posixdeterm}) we + can infer that @{term v} is equal to @{term "f\<^sub>r v'"}---that is the + rectification function applied to @{term "v'"} + produces the original @{term "v"}. Now the case follows by the + definitions of @{const lexer} and @{const slexer}. + + In the second case where @{term "s \ L (der c r)"} we have that + @{term "lexer (der c r) s = None"} by Theorem~\ref{lexercorrect}(1). We + also know by Lemma~\ref{slexeraux}(1) that @{term "s \ L r\<^sub>s"}. Hence + @{term "lexer r\<^sub>s s = None"} by Theorem~\ref{lexercorrect}(1) and + by IH then also @{term "slexer r\<^sub>s s = None"}. With this we can + conclude in this case too.\qed + + \end{proof} + +\ + + +section \HERE\ + +text \ + + \begin{lemma} + @{thm [mode=IfThen] bder_retrieve} + \end{lemma} + + \begin{proof} + By induction on the definition of @{term "erase r"}. The cases for rule 1) and 2) are + straightforward as @{term "der c ZERO"} and @{term "der c ONE"} are both equal to + @{term ZERO}. This means @{term "\ v : ZERO"} cannot hold. Similarly in case of rule 3) + where @{term r} is of the form @{term "ACHAR d"} with @{term "c = d"}. Then by assumption + we know @{term "\ v : ONE"}, which implies @{term "v = Void"}. The equation follows by + simplification of left- and right-hand side. In case @{term "c \ d"} we have again + @{term "\ v : ZERO"}, which cannot hold. + + For rule 4a) we have again @{term "\ v : ZERO"}. The property holds by IH for rule 4b). + The induction hypothesis is + \[ + @{term "retrieve (bder c r) v = retrieve r (injval (erase r) c v)"} + \] + which is what left- and right-hand side simplify to. The slightly more interesting case + is for 4c). By assumption we have + @{term "\ v : ALT (der c (erase r\<^sub>1)) (der c (erase (AALTs bs (r\<^sub>2 # rs))))"}. This means we + have either (*) @{term "\ v1 : der c (erase r\<^sub>1)"} with @{term "v = Left v1"} or + (**) @{term "\ v2 : der c (erase (AALTs bs (r\<^sub>2 # rs)))"} with @{term "v = Right v2"}. + The former case is straightforward by simplification. The second case is \ldots TBD. + + Rule 5) TBD. + + Finally for rule 6) the reasoning is as follows: By assumption we have + @{term "\ v : SEQ (der c (erase r)) (STAR (erase r))"}. This means we also have + @{term "v = Seq v1 v2"}, @{term "\ v1 : der c (erase r)"} and @{term "v2 = Stars vs"}. + We want to prove + \begin{align} + & @{term "retrieve (ASEQ bs (fuse [Z] (bder c r)) (ASTAR [] r)) v"}\\ + &= @{term "retrieve (ASTAR bs r) (injval (STAR (erase r)) c v)"} + \end{align} + The right-hand side @{term inj}-expression is equal to + @{term "Stars (injval (erase r) c v1 # vs)"}, which means the @{term retrieve}-expression + simplifies to + \[ + @{term "bs @ [Z] @ retrieve r (injval (erase r) c v1) @ retrieve (ASTAR [] r) (Stars vs)"} + \] + The left-hand side (3) above simplifies to + \[ + @{term "bs @ retrieve (fuse [Z] (bder c r)) v1 @ retrieve (ASTAR [] r) (Stars vs)"} + \] + We can move out the @{term "fuse [Z]"} and then use the IH to show that left-hand side + and right-hand side are equal. This completes the proof. + \end{proof} + + + + \bibliographystyle{plain} + \bibliography{root} + +\ +(*<*) +end +(*>*) + + + +(* + +\begin{center} + \begin{tabular} + @{thm[mode=Rule] aggressive.intros(1)[of "bs" "bs1" "rs" "r"]} + end{tabular} +\end{center} + + + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) code.simps(1)} & $\dn$ & @{thm (rhs) code.simps(1)}\\ + @{thm (lhs) code.simps(2)} & $\dn$ & @{thm (rhs) code.simps(2)}\\ + @{thm (lhs) code.simps(3)} & $\dn$ & @{thm (rhs) code.simps(3)}\\ + @{thm (lhs) code.simps(4)} & $\dn$ & @{thm (rhs) code.simps(4)}\\ + @{thm (lhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\ + @{thm (lhs) code.simps(6)} & $\dn$ & @{thm (rhs) code.simps(6)}\\ + @{thm (lhs) code.simps(7)} & $\dn$ & @{thm (rhs) code.simps(7)} +\end{tabular} +\end{center} + +\begin{center} +\begin{tabular}{lcl} + @{term areg} & $::=$ & @{term "AZERO"}\\ + & $\mid$ & @{term "AONE bs"}\\ + & $\mid$ & @{term "ACHAR bs c"}\\ + & $\mid$ & @{term "AALT bs r1 r2"}\\ + & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\ + & $\mid$ & @{term "ASTAR bs r"} +\end{tabular} +\end{center} + + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) intern.simps(1)} & $\dn$ & @{thm (rhs) intern.simps(1)}\\ + @{thm (lhs) intern.simps(2)} & $\dn$ & @{thm (rhs) intern.simps(2)}\\ + @{thm (lhs) intern.simps(3)} & $\dn$ & @{thm (rhs) intern.simps(3)}\\ + @{thm (lhs) intern.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) intern.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) intern.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) intern.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) intern.simps(6)} & $\dn$ & @{thm (rhs) intern.simps(6)}\\ +\end{tabular} +\end{center} + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) erase.simps(1)} & $\dn$ & @{thm (rhs) erase.simps(1)}\\ + @{thm (lhs) erase.simps(2)[of bs]} & $\dn$ & @{thm (rhs) erase.simps(2)[of bs]}\\ + @{thm (lhs) erase.simps(3)[of bs]} & $\dn$ & @{thm (rhs) erase.simps(3)[of bs]}\\ + @{thm (lhs) erase.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) erase.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) erase.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) erase.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) erase.simps(6)[of bs]} & $\dn$ & @{thm (rhs) erase.simps(6)[of bs]}\\ +\end{tabular} +\end{center} + +Some simple facts about erase + +\begin{lemma}\mbox{}\\ +@{thm erase_bder}\\ +@{thm erase_intern} +\end{lemma} + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) bnullable.simps(1)} & $\dn$ & @{thm (rhs) bnullable.simps(1)}\\ + @{thm (lhs) bnullable.simps(2)} & $\dn$ & @{thm (rhs) bnullable.simps(2)}\\ + @{thm (lhs) bnullable.simps(3)} & $\dn$ & @{thm (rhs) bnullable.simps(3)}\\ + @{thm (lhs) bnullable.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bnullable.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bnullable.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bnullable.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bnullable.simps(6)} & $\dn$ & @{thm (rhs) bnullable.simps(6)}\medskip\\ + +% \end{tabular} +% \end{center} + +% \begin{center} +% \begin{tabular}{lcl} + + @{thm (lhs) bder.simps(1)} & $\dn$ & @{thm (rhs) bder.simps(1)}\\ + @{thm (lhs) bder.simps(2)} & $\dn$ & @{thm (rhs) bder.simps(2)}\\ + @{thm (lhs) bder.simps(3)} & $\dn$ & @{thm (rhs) bder.simps(3)}\\ + @{thm (lhs) bder.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bder.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bder.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bder.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bder.simps(6)} & $\dn$ & @{thm (rhs) bder.simps(6)} + \end{tabular} + \end{center} + + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) bmkeps.simps(1)} & $\dn$ & @{thm (rhs) bmkeps.simps(1)}\\ + @{thm (lhs) bmkeps.simps(2)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bmkeps.simps(2)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bmkeps.simps(3)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bmkeps.simps(3)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bmkeps.simps(4)} & $\dn$ & @{thm (rhs) bmkeps.simps(4)}\medskip\\ +\end{tabular} +\end{center} + + +@{thm [mode=IfThen] bder_retrieve} + +By induction on \r\ + +\begin{theorem}[Main Lemma]\mbox{}\\ +@{thm [mode=IfThen] MAIN_decode} +\end{theorem} + +\noindent +Definition of the bitcoded lexer + +@{thm blexer_def} + + +\begin{theorem} +@{thm blexer_correctness} +\end{theorem} + + + + + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) code.simps(1)} & $\dn$ & @{thm (rhs) code.simps(1)}\\ + @{thm (lhs) code.simps(2)} & $\dn$ & @{thm (rhs) code.simps(2)}\\ + @{thm (lhs) code.simps(3)} & $\dn$ & @{thm (rhs) code.simps(3)}\\ + @{thm (lhs) code.simps(4)} & $\dn$ & @{thm (rhs) code.simps(4)}\\ + @{thm (lhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\ + @{thm (lhs) code.simps(6)} & $\dn$ & @{thm (rhs) code.simps(6)}\\ + @{thm (lhs) code.simps(7)} & $\dn$ & @{thm (rhs) code.simps(7)} +\end{tabular} +\end{center} + +\begin{center} +\begin{tabular}{lcl} + @{term areg} & $::=$ & @{term "AZERO"}\\ + & $\mid$ & @{term "AONE bs"}\\ + & $\mid$ & @{term "ACHAR bs c"}\\ + & $\mid$ & @{term "AALT bs r1 r2"}\\ + & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\ + & $\mid$ & @{term "ASTAR bs r"} +\end{tabular} +\end{center} + + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) intern.simps(1)} & $\dn$ & @{thm (rhs) intern.simps(1)}\\ + @{thm (lhs) intern.simps(2)} & $\dn$ & @{thm (rhs) intern.simps(2)}\\ + @{thm (lhs) intern.simps(3)} & $\dn$ & @{thm (rhs) intern.simps(3)}\\ + @{thm (lhs) intern.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) intern.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) intern.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) intern.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) intern.simps(6)} & $\dn$ & @{thm (rhs) intern.simps(6)}\\ +\end{tabular} +\end{center} + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) erase.simps(1)} & $\dn$ & @{thm (rhs) erase.simps(1)}\\ + @{thm (lhs) erase.simps(2)[of bs]} & $\dn$ & @{thm (rhs) erase.simps(2)[of bs]}\\ + @{thm (lhs) erase.simps(3)[of bs]} & $\dn$ & @{thm (rhs) erase.simps(3)[of bs]}\\ + @{thm (lhs) erase.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) erase.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) erase.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) erase.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) erase.simps(6)[of bs]} & $\dn$ & @{thm (rhs) erase.simps(6)[of bs]}\\ +\end{tabular} +\end{center} + +Some simple facts about erase + +\begin{lemma}\mbox{}\\ +@{thm erase_bder}\\ +@{thm erase_intern} +\end{lemma} + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) bnullable.simps(1)} & $\dn$ & @{thm (rhs) bnullable.simps(1)}\\ + @{thm (lhs) bnullable.simps(2)} & $\dn$ & @{thm (rhs) bnullable.simps(2)}\\ + @{thm (lhs) bnullable.simps(3)} & $\dn$ & @{thm (rhs) bnullable.simps(3)}\\ + @{thm (lhs) bnullable.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bnullable.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bnullable.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bnullable.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bnullable.simps(6)} & $\dn$ & @{thm (rhs) bnullable.simps(6)}\medskip\\ + +% \end{tabular} +% \end{center} + +% \begin{center} +% \begin{tabular}{lcl} + + @{thm (lhs) bder.simps(1)} & $\dn$ & @{thm (rhs) bder.simps(1)}\\ + @{thm (lhs) bder.simps(2)} & $\dn$ & @{thm (rhs) bder.simps(2)}\\ + @{thm (lhs) bder.simps(3)} & $\dn$ & @{thm (rhs) bder.simps(3)}\\ + @{thm (lhs) bder.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bder.simps(4)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bder.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bder.simps(5)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bder.simps(6)} & $\dn$ & @{thm (rhs) bder.simps(6)} + \end{tabular} + \end{center} + + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) bmkeps.simps(1)} & $\dn$ & @{thm (rhs) bmkeps.simps(1)}\\ + @{thm (lhs) bmkeps.simps(2)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bmkeps.simps(2)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bmkeps.simps(3)[of bs "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bmkeps.simps(3)[of bs "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bmkeps.simps(4)} & $\dn$ & @{thm (rhs) bmkeps.simps(4)}\medskip\\ +\end{tabular} +\end{center} + + +@{thm [mode=IfThen] bder_retrieve} + +By induction on \r\ + +\begin{theorem}[Main Lemma]\mbox{}\\ +@{thm [mode=IfThen] MAIN_decode} +\end{theorem} + +\noindent +Definition of the bitcoded lexer + +@{thm blexer_def} + + +\begin{theorem} +@{thm blexer_correctness} +\end{theorem} + +\ +\*) \ No newline at end of file diff -r c80720289645 -r 0efa7ffd96ff thys2/Journal/session_graph.pdf Binary file thys2/Journal/session_graph.pdf has changed