diff -r 3cbcd7cda0a9 -r 0497408a3598 ChengsongTanPhdThesis/Chapters/ChapterBitcoded1.tex
--- a/ChengsongTanPhdThesis/Chapters/ChapterBitcoded1.tex	Wed Jul 13 08:27:28 2022 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,506 +0,0 @@
-% Chapter Template
-
-% Main chapter title
-\chapter{Correctness of Bit-coded Algorithm without Simplification}
-
-\label{ChapterBitcoded1} % Change X to a consecutive number; for referencing this chapter elsewhere, use \ref{ChapterX}
-%Then we illustrate how the algorithm without bitcodes falls short for such aggressive 
-%simplifications and therefore introduce our version of the bitcoded algorithm and 
-%its correctness proof in 
-%Chapter 3\ref{Chapter3}. 
-
-\section*{Bit-coded Algorithm}
-Bits and bitcodes (lists of bits) are defined as:
-
-\begin{center}
-		$b ::=   1 \mid  0 \qquad
-bs ::= [] \mid b::bs    
-$
-\end{center}
-
-\noindent
-The $1$ and $0$ are not in bold in order to avoid 
-confusion with the regular expressions $\ZERO$ and $\ONE$. Bitcodes (or
-bit-lists) can be used to encode values (or potentially incomplete values) in a
-compact form. This can be straightforwardly seen in the following
-coding function from values to bitcodes: 
-
-\begin{center}
-\begin{tabular}{lcl}
-  $\textit{code}(\Empty)$ & $\dn$ & $[]$\\
-  $\textit{code}(\Char\,c)$ & $\dn$ & $[]$\\
-  $\textit{code}(\Left\,v)$ & $\dn$ & $0 :: code(v)$\\
-  $\textit{code}(\Right\,v)$ & $\dn$ & $1 :: code(v)$\\
-  $\textit{code}(\Seq\,v_1\,v_2)$ & $\dn$ & $code(v_1) \,@\, code(v_2)$\\
-  $\textit{code}(\Stars\,[])$ & $\dn$ & $[0]$\\
-  $\textit{code}(\Stars\,(v\!::\!vs))$ & $\dn$ & $1 :: code(v) \;@\;
-                                                 code(\Stars\,vs)$
-\end{tabular}    
-\end{center} 
-
-\noindent
-Here $\textit{code}$ encodes a value into a bitcodes by converting
-$\Left$ into $0$, $\Right$ into $1$, and marks the start of a non-empty
-star iteration by $1$. The border where a local star terminates
-is marked by $0$. This coding is lossy, as it throws away the information about
-characters, and also does not encode the ``boundary'' between two
-sequence values. Moreover, with only the bitcode we cannot even tell
-whether the $1$s and $0$s are for $\Left/\Right$ or $\Stars$. The
-reason for choosing this compact way of storing information is that the
-relatively small size of bits can be easily manipulated and ``moved
-around'' in a regular expression. In order to recover values, we will 
-need the corresponding regular expression as an extra information. This
-means the decoding function is defined as:
-
-
-%\begin{definition}[Bitdecoding of Values]\mbox{}
-\begin{center}
-\begin{tabular}{@{}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}}
-  $\textit{decode}'\,bs\,(\ONE)$ & $\dn$ & $(\Empty, bs)$\\
-  $\textit{decode}'\,bs\,(c)$ & $\dn$ & $(\Char\,c, bs)$\\
-  $\textit{decode}'\,(0\!::\!bs)\;(r_1 + r_2)$ & $\dn$ &
-     $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}\;
-       (\Left\,v, bs_1)$\\
-  $\textit{decode}'\,(1\!::\!bs)\;(r_1 + r_2)$ & $\dn$ &
-     $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_2\;\textit{in}\;
-       (\Right\,v, bs_1)$\\                           
-  $\textit{decode}'\,bs\;(r_1\cdot r_2)$ & $\dn$ &
-        $\textit{let}\,(v_1, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}$\\
-  & &   $\textit{let}\,(v_2, bs_2) = \textit{decode}'\,bs_1\,r_2$\\
-  & &   \hspace{35mm}$\textit{in}\;(\Seq\,v_1\,v_2, bs_2)$\\
-  $\textit{decode}'\,(0\!::\!bs)\,(r^*)$ & $\dn$ & $(\Stars\,[], bs)$\\
-  $\textit{decode}'\,(1\!::\!bs)\,(r^*)$ & $\dn$ & 
-         $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r\;\textit{in}$\\
-  & &   $\textit{let}\,(\Stars\,vs, bs_2) = \textit{decode}'\,bs_1\,r^*$\\
-  & &   \hspace{35mm}$\textit{in}\;(\Stars\,v\!::\!vs, bs_2)$\bigskip\\
-  
-  $\textit{decode}\,bs\,r$ & $\dn$ &
-     $\textit{let}\,(v, bs') = \textit{decode}'\,bs\,r\;\textit{in}$\\
-  & & $\textit{if}\;bs' = []\;\textit{then}\;\textit{Some}\,v\;
-       \textit{else}\;\textit{None}$                       
-\end{tabular}    
-\end{center}    
-%\end{definition}
-
-Sulzmann and Lu's integrated the bitcodes into regular expressions to
-create annotated regular expressions \cite{Sulzmann2014}.
-\emph{Annotated regular expressions} are defined by the following
-grammar:%\comment{ALTS should have  an $as$ in  the definitions, not  just $a_1$ and $a_2$}
-
-\begin{center}
-\begin{tabular}{lcl}
-  $\textit{a}$ & $::=$  & $\ZERO$\\
-                  & $\mid$ & $_{bs}\ONE$\\
-                  & $\mid$ & $_{bs}{\bf c}$\\
-                  & $\mid$ & $_{bs}\sum\,as$\\
-                  & $\mid$ & $_{bs}a_1\cdot a_2$\\
-                  & $\mid$ & $_{bs}a^*$
-\end{tabular}    
-\end{center}  
-%(in \textit{ALTS})
-
-\noindent
-where $bs$ stands for bitcodes, $a$  for $\mathbf{a}$nnotated regular
-expressions and $as$ for a list of annotated regular expressions.
-The alternative constructor($\sum$) has been generalized to 
-accept a list of annotated regular expressions rather than just 2.
-We will show that these bitcodes encode information about
-the (POSIX) value that should be generated by the Sulzmann and Lu
-algorithm.
-
-
-To do lexing using annotated regular expressions, we shall first
-transform the usual (un-annotated) regular expressions into annotated
-regular expressions. This operation is called \emph{internalisation} and
-defined as follows:
-
-%\begin{definition}
-\begin{center}
-\begin{tabular}{lcl}
-  $(\ZERO)^\uparrow$ & $\dn$ & $\ZERO$\\
-  $(\ONE)^\uparrow$ & $\dn$ & $_{[]}\ONE$\\
-  $(c)^\uparrow$ & $\dn$ & $_{[]}{\bf c}$\\
-  $(r_1 + r_2)^\uparrow$ & $\dn$ &
-  $_{[]}\sum[\textit{fuse}\,[0]\,r_1^\uparrow,\,
-  \textit{fuse}\,[1]\,r_2^\uparrow]$\\
-  $(r_1\cdot r_2)^\uparrow$ & $\dn$ &
-         $_{[]}r_1^\uparrow \cdot r_2^\uparrow$\\
-  $(r^*)^\uparrow$ & $\dn$ &
-         $_{[]}(r^\uparrow)^*$\\
-\end{tabular}    
-\end{center}    
-%\end{definition}
-
-\noindent
-We use up arrows here to indicate that the basic un-annotated regular
-expressions are ``lifted up'' into something slightly more complex. In the
-fourth clause, $\textit{fuse}$ is an auxiliary function that helps to
-attach bits to the front of an annotated regular expression. Its
-definition is as follows:
-
-\begin{center}
-\begin{tabular}{lcl}
-  $\textit{fuse}\;bs \; \ZERO$ & $\dn$ & $\ZERO$\\
-  $\textit{fuse}\;bs\; _{bs'}\ONE$ & $\dn$ &
-     $_{bs @ bs'}\ONE$\\
-  $\textit{fuse}\;bs\;_{bs'}{\bf c}$ & $\dn$ &
-     $_{bs@bs'}{\bf c}$\\
-  $\textit{fuse}\;bs\,_{bs'}\sum\textit{as}$ & $\dn$ &
-     $_{bs@bs'}\sum\textit{as}$\\
-  $\textit{fuse}\;bs\; _{bs'}a_1\cdot a_2$ & $\dn$ &
-     $_{bs@bs'}a_1 \cdot a_2$\\
-  $\textit{fuse}\;bs\,_{bs'}a^*$ & $\dn$ &
-     $_{bs @ bs'}a^*$
-\end{tabular}    
-\end{center}  
-
-\noindent
-After internalising the regular expression, we perform successive
-derivative operations on the annotated regular expressions. This
-derivative operation is the same as what we had previously for the
-basic regular expressions, except that we beed to take care of
-the bitcodes:
-
-
-\iffalse
- %\begin{definition}{bder}
-\begin{center}
-  \begin{tabular}{@{}lcl@{}}
-  $(\textit{ZERO})\,\backslash c$ & $\dn$ & $\textit{ZERO}$\\  
-  $(\textit{ONE}\;bs)\,\backslash c$ & $\dn$ & $\textit{ZERO}$\\  
-  $(\textit{CHAR}\;bs\,d)\,\backslash c$ & $\dn$ &
-        $\textit{if}\;c=d\; \;\textit{then}\;
-         \textit{ONE}\;bs\;\textit{else}\;\textit{ZERO}$\\  
-  $(\textit{ALTS}\;bs\,as)\,\backslash c$ & $\dn$ &
-  $\textit{ALTS}\;bs\,(map (\backslash c) as)$\\
-  $(\textit{SEQ}\;bs\,a_1\,a_2)\,\backslash c$ & $\dn$ &
-     $\textit{if}\;\textit{bnullable}\,a_1$\\
-					       & &$\textit{then}\;\textit{ALTS}\,bs\,List((\textit{SEQ}\,[]\,(a_1\,\backslash c)\,a_2),$\\
-					       & &$\phantom{\textit{then}\;\textit{ALTS}\,bs\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\,\backslash c)))$\\
-  & &$\textit{else}\;\textit{SEQ}\,bs\,(a_1\,\backslash c)\,a_2$\\
-  $(\textit{STAR}\,bs\,a)\,\backslash c$ & $\dn$ &
-      $\textit{SEQ}\;bs\,(\textit{fuse}\, [\Z] (r\,\backslash c))\,
-       (\textit{STAR}\,[]\,r)$
-\end{tabular}    
-\end{center}    
-%\end{definition}
-
-\begin{center}
-  \begin{tabular}{@{}lcl@{}}
-  $(\textit{ZERO})\,\backslash c$ & $\dn$ & $\textit{ZERO}$\\  
-  $(_{bs}\textit{ONE})\,\backslash c$ & $\dn$ & $\textit{ZERO}$\\  
-  $(_{bs}\textit{CHAR}\;d)\,\backslash c$ & $\dn$ &
-        $\textit{if}\;c=d\; \;\textit{then}\;
-         _{bs}\textit{ONE}\;\textit{else}\;\textit{ZERO}$\\  
-  $(_{bs}\textit{ALTS}\;\textit{as})\,\backslash c$ & $\dn$ &
-  $_{bs}\textit{ALTS}\;(\textit{as}.\textit{map}(\backslash c))$\\
-  $(_{bs}\textit{SEQ}\;a_1\,a_2)\,\backslash c$ & $\dn$ &
-     $\textit{if}\;\textit{bnullable}\,a_1$\\
-					       & &$\textit{then}\;_{bs}\textit{ALTS}\,List((_{[]}\textit{SEQ}\,(a_1\,\backslash c)\,a_2),$\\
-					       & &$\phantom{\textit{then}\;_{bs}\textit{ALTS}\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\,\backslash c)))$\\
-  & &$\textit{else}\;_{bs}\textit{SEQ}\,(a_1\,\backslash c)\,a_2$\\
-  $(_{bs}\textit{STAR}\,a)\,\backslash c$ & $\dn$ &
-      $_{bs}\textit{SEQ}\;(\textit{fuse}\, [0] \; r\,\backslash c )\,
-       (_{bs}\textit{STAR}\,[]\,r)$
-\end{tabular}    
-\end{center}    
-%\end{definition}
-\fi
-
-\begin{center}
-  \begin{tabular}{@{}lcl@{}}
-  $(\ZERO)\,\backslash c$ & $\dn$ & $\ZERO$\\  
-  $(_{bs}\ONE)\,\backslash c$ & $\dn$ & $\ZERO$\\  
-  $(_{bs}{\bf d})\,\backslash c$ & $\dn$ &
-        $\textit{if}\;c=d\; \;\textit{then}\;
-         _{bs}\ONE\;\textit{else}\;\ZERO$\\  
-  $(_{bs}\sum \;\textit{as})\,\backslash c$ & $\dn$ &
-  $_{bs}\sum\;(\textit{map} (\_\backslash c) as )$\\
-  $(_{bs}\;a_1\cdot a_2)\,\backslash c$ & $\dn$ &
-     $\textit{if}\;\textit{bnullable}\,a_1$\\
-					       & &$\textit{then}\;_{bs}\sum\,[(_{[]}\,(a_1\,\backslash c)\cdot\,a_2),$\\
-					       & &$\phantom{\textit{then},\;_{bs}\sum\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\,\backslash c))]$\\
-  & &$\textit{else}\;_{bs}\,(a_1\,\backslash c)\cdot a_2$\\
-  $(_{bs}a^*)\,\backslash c$ & $\dn$ &
-      $_{bs}(\textit{fuse}\, [0] \; r\,\backslash c)\cdot
-       (_{[]}r^*))$
-\end{tabular}    
-\end{center}    
-
-%\end{definition}
-\noindent
-For instance, when we do derivative of  $_{bs}a^*$ with respect to c,
-we need to unfold it into a sequence,
-and attach an additional bit $0$ to the front of $r \backslash c$
-to indicate one more star iteration. Also the sequence clause
-is more subtle---when $a_1$ is $\textit{bnullable}$ (here
-\textit{bnullable} is exactly the same as $\textit{nullable}$, except
-that it is for annotated regular expressions, therefore we omit the
-definition). Assume that $\textit{bmkeps}$ correctly extracts the bitcode for how
-$a_1$ matches the string prior to character $c$ (more on this later),
-then the right branch of alternative, which is $\textit{fuse} \; \bmkeps \;  a_1 (a_2
-\backslash c)$ will collapse the regular expression $a_1$(as it has
-already been fully matched) and store the parsing information at the
-head of the regular expression $a_2 \backslash c$ by fusing to it. The
-bitsequence $\textit{bs}$, which was initially attached to the
-first element of the sequence $a_1 \cdot a_2$, has
-now been elevated to the top-level of $\sum$, as this information will be
-needed whichever way the sequence is matched---no matter whether $c$ belongs
-to $a_1$ or $ a_2$. After building these derivatives and maintaining all
-the lexing information, we complete the lexing by collecting the
-bitcodes using a generalised version of the $\textit{mkeps}$ function
-for annotated regular expressions, called $\textit{bmkeps}$:
-
-
-%\begin{definition}[\textit{bmkeps}]\mbox{}
-\begin{center}
-\begin{tabular}{lcl}
-  $\textit{bmkeps}\,(_{bs}\ONE)$ & $\dn$ & $bs$\\
-  $\textit{bmkeps}\,(_{bs}\sum a::\textit{as})$ & $\dn$ &
-     $\textit{if}\;\textit{bnullable}\,a$\\
-  & &$\textit{then}\;bs\,@\,\textit{bmkeps}\,a$\\
-  & &$\textit{else}\;bs\,@\,\textit{bmkeps}\,(_{bs}\sum \textit{as})$\\
-  $\textit{bmkeps}\,(_{bs} a_1 \cdot a_2)$ & $\dn$ &
-     $bs \,@\,\textit{bmkeps}\,a_1\,@\, \textit{bmkeps}\,a_2$\\
-  $\textit{bmkeps}\,(_{bs}a^*)$ & $\dn$ &
-     $bs \,@\, [0]$
-\end{tabular}    
-\end{center}    
-%\end{definition}
-
-\noindent
-This function completes the value information by travelling along the
-path of the regular expression that corresponds to a POSIX value and
-collecting all the bitcodes, and using $S$ to indicate the end of star
-iterations. If we take the bitcodes produced by $\textit{bmkeps}$ and
-decode them, we get the value we expect. The corresponding lexing
-algorithm looks as follows:
-
-\begin{center}
-\begin{tabular}{lcl}
-  $\textit{blexer}\;r\,s$ & $\dn$ &
-      $\textit{let}\;a = (r^\uparrow)\backslash s\;\textit{in}$\\                
-  & & $\;\;\textit{if}\; \textit{bnullable}(a)$\\
-  & & $\;\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,a)\,r$\\
-  & & $\;\;\textit{else}\;\textit{None}$
-\end{tabular}
-\end{center}
-
-\noindent
-In this definition $\_\backslash s$ is the  generalisation  of the derivative
-operation from characters to strings (just like the derivatives for un-annotated
-regular expressions).
-
-Now we introduce the simplifications, which is why we introduce the 
-bitcodes in the first place.
-
-\subsection*{Simplification Rules}
-
-This section introduces aggressive (in terms of size) simplification rules
-on annotated regular expressions
-to keep derivatives small. Such simplifications are promising
-as we have
-generated test data that show
-that a good tight bound can be achieved. We could only
-partially cover the search space as there are infinitely many regular
-expressions and strings. 
-
-One modification we introduced is to allow a list of annotated regular
-expressions in the $\sum$ constructor. This allows us to not just
-delete unnecessary $\ZERO$s and $\ONE$s from regular expressions, but
-also unnecessary ``copies'' of regular expressions (very similar to
-simplifying $r + r$ to just $r$, but in a more general setting). Another
-modification is that we use simplification rules inspired by Antimirov's
-work on partial derivatives. They maintain the idea that only the first
-``copy'' of a regular expression in an alternative contributes to the
-calculation of a POSIX value. All subsequent copies can be pruned away from
-the regular expression. A recursive definition of our  simplification function 
-that looks somewhat similar to our Scala code is given below:
-%\comment{Use $\ZERO$, $\ONE$ and so on. 
-%Is it $ALTS$ or $ALTS$?}\\
-
-\begin{center}
-  \begin{tabular}{@{}lcl@{}}
-   
-  $\textit{simp} \; (_{bs}a_1\cdot a_2)$ & $\dn$ & $ (\textit{simp} \; a_1, \textit{simp}  \; a_2) \; \textit{match} $ \\
-   &&$\quad\textit{case} \; (\ZERO, \_) \Rightarrow  \ZERO$ \\
-   &&$\quad\textit{case} \; (\_, \ZERO) \Rightarrow  \ZERO$ \\
-   &&$\quad\textit{case} \;  (\ONE, a_2') \Rightarrow  \textit{fuse} \; bs \;  a_2'$ \\
-   &&$\quad\textit{case} \; (a_1', \ONE) \Rightarrow  \textit{fuse} \; bs \;  a_1'$ \\
-   &&$\quad\textit{case} \; (a_1', a_2') \Rightarrow   _{bs}a_1' \cdot a_2'$ \\
-
-  $\textit{simp} \; (_{bs}\sum \textit{as})$ & $\dn$ & $\textit{distinct}( \textit{flatten} ( \textit{map} \; simp \; as)) \; \textit{match} $ \\
-  &&$\quad\textit{case} \; [] \Rightarrow  \ZERO$ \\
-   &&$\quad\textit{case} \; a :: [] \Rightarrow  \textit{fuse bs a}$ \\
-   &&$\quad\textit{case} \;  as' \Rightarrow _{bs}\sum \textit{as'}$\\ 
-
-   $\textit{simp} \; a$ & $\dn$ & $\textit{a} \qquad \textit{otherwise}$   
-\end{tabular}    
-\end{center}    
-
-\noindent
-The simplification does a pattern matching on the regular expression.
-When it detected that the regular expression is an alternative or
-sequence, it will try to simplify its child regular expressions
-recursively and then see if one of the children turns into $\ZERO$ or
-$\ONE$, which might trigger further simplification at the current level.
-The most involved part is the $\sum$ clause, where we use two
-auxiliary functions $\textit{flatten}$ and $\textit{distinct}$ to open up nested
-alternatives and reduce as many duplicates as possible. Function
-$\textit{distinct}$  keeps the first occurring copy only and removes all later ones
-when detected duplicates. Function $\textit{flatten}$ opens up nested $\sum$s.
-Its recursive definition is given below:
-
- \begin{center}
-  \begin{tabular}{@{}lcl@{}}
-  $\textit{flatten} \; (_{bs}\sum \textit{as}) :: \textit{as'}$ & $\dn$ & $(\textit{map} \;
-     (\textit{fuse}\;bs)\; \textit{as}) \; @ \; \textit{flatten} \; as' $ \\
-  $\textit{flatten} \; \ZERO :: as'$ & $\dn$ & $ \textit{flatten} \;  \textit{as'} $ \\
-    $\textit{flatten} \; a :: as'$ & $\dn$ & $a :: \textit{flatten} \; \textit{as'}$ \quad(otherwise) 
-\end{tabular}    
-\end{center}  
-
-\noindent
-Here $\textit{flatten}$ behaves like the traditional functional programming flatten
-function, except that it also removes $\ZERO$s. Or in terms of regular expressions, it
-removes parentheses, for example changing $a+(b+c)$ into $a+b+c$.
-
-Having defined the $\simp$ function,
-we can use the previous notation of  natural
-extension from derivative w.r.t.~character to derivative
-w.r.t.~string:%\comment{simp in  the [] case?}
-
-\begin{center}
-\begin{tabular}{lcl}
-$r \backslash_{simp} (c\!::\!s) $ & $\dn$ & $(r \backslash_{simp}\, c) \backslash_{simp}\, s$ \\
-$r \backslash_{simp} [\,] $ & $\dn$ & $r$
-\end{tabular}
-\end{center}
-
-\noindent
-to obtain an optimised version of the algorithm:
-
- \begin{center}
-\begin{tabular}{lcl}
-  $\textit{blexer\_simp}\;r\,s$ & $\dn$ &
-      $\textit{let}\;a = (r^\uparrow)\backslash_{simp}\, s\;\textit{in}$\\                
-  & & $\;\;\textit{if}\; \textit{bnullable}(a)$\\
-  & & $\;\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,a)\,r$\\
-  & & $\;\;\textit{else}\;\textit{None}$
-\end{tabular}
-\end{center}
-
-\noindent
-This algorithm keeps the regular expression size small, for example,
-with this simplification our previous $(a + aa)^*$ example's 8000 nodes
-will be reduced to just 6 and stays constant, no matter how long the
-input string is.
-
-
-
-
-
-
-
-
-
-
-
-%-----------------------------------
-%	SUBSECTION 1
-%-----------------------------------
-\section{Specifications of Some Helper Functions}
-Here we give some functions' definitions, 
-which we will use later.
-\begin{center}
-\begin{tabular}{ccc}
-$\retrieve \; \ACHAR \, \textit{bs} \, c \; \Char(c) = \textit{bs}$
-\end{tabular}
-\end{center}
-
-
-%----------------------------------------------------------------------------------------
-%	SECTION  correctness proof
-%----------------------------------------------------------------------------------------
-\section{Correctness of Bit-coded Algorithm (Without Simplification)}
-We now give the proof the correctness of the algorithm with bit-codes.
-
-Ausaf and Urban cleverly defined an auxiliary function called $\flex$,
-defined as
-\[
-\flex \; r \; f \; [] \; v \; = \; f\; v
-\flex \; r \; f \; c :: s \; v =  \flex r \; \lambda v. \, f (\inj \; r\; c\; v)\; s \; v
-\]
-which accumulates the characters that needs to be injected back, 
-and does the injection in a stack-like manner (last taken derivative first injected).
-$\flex$ is connected to the $\lexer$:
-\begin{lemma}
-$\flex \; r \; \textit{id}\; s \; \mkeps (r\backslash s) = \lexer \; r \; s$
-\end{lemma}
-$\flex$ provides us a bridge between $\lexer$ and $\blexer$.
-What is even better about $\flex$ is that it allows us to 
-directly operate on the value $\mkeps (r\backslash v)$,
-which is pivotal in the definition of  $\lexer $ and $\blexer$, but not visible as an argument.
-When the value created by $\mkeps$ becomes available, one can 
-prove some stepwise properties of lexing nicely:
-\begin{lemma}\label{flexStepwise}
-$\textit{flex} \; r \; f \; s@[c] \; v= \flex \; r \; f\; s \; (\inj \; (r\backslash s) \; c \; v) $
-\end{lemma}
-
-And for $\blexer$ we have a function with stepwise properties like $\flex$ as well,
-called $\retrieve$\ref{retrieveDef}.
-$\retrieve$ takes bit-codes from annotated regular expressions
-guided by a value.
-$\retrieve$ is connected to the $\blexer$ in the following way:
-\begin{lemma}\label{blexer_retrieve}
-$\blexer \; r \; s = \decode  \; (\retrieve \; (\internalise \; r) \; (\mkeps \; (r \backslash s) )) \; r$
-\end{lemma}
-If you take derivative of an annotated regular expression, 
-you can $\retrieve$ the same bit-codes as before the derivative took place,
-provided that you use the corresponding value:
-
-\begin{lemma}\label{retrieveStepwise}
-$\retrieve \; (r \backslash c)  \;  v= \retrieve \; r \; (\inj \; r\; c\; v)$
-\end{lemma}
-The other good thing about $\retrieve$ is that it can be connected to $\flex$:
-%centralLemma1
-\begin{lemma}\label{flex_retrieve}
-$\flex \; r \; \textit{id}\; s\; v = \decode \; (\retrieve \; (r\backslash s )\; v) \; r$
-\end{lemma}
-\begin{proof}
-By induction on $s$. The induction tactic is reverse induction on strings.
-$v$ is allowed to be arbitrary.
-The crucial point is to rewrite 
-\[
-\retrieve \; (r \backslash s@[c]) \; \mkeps (r \backslash s@[c]) 
-\]
-as
-\[
-\retrieve \; (r \backslash s) \; (\inj \; (r \backslash s) \; c\;  \mkeps (r \backslash s@[c]))
-\].
-This enables us to equate 
-\[
-\retrieve \; (r \backslash s@[c]) \; \mkeps (r \backslash s@[c]) 
-\] 
-with 
-\[
-\flex \; r \; \textit{id} \; s \; (\inj \; (r\backslash s) \; c\; (\mkeps (r\backslash s@[c])))
-\],
-which in turn can be rewritten as
-\[
-\flex \; r \; \textit{id} \; s@[c] \;  (\mkeps (r\backslash s@[c]))
-\].
-\end{proof}
-
-With the above lemma we can now link $\flex$ and $\blexer$.
-
-\begin{lemma}\label{flex_blexer}
-$\textit{flex} \; r \; \textit{id} \; s \; \mkeps(r \backslash s)  = \blexer \; r \; s$
-\end{lemma}
-\begin{proof}
-Using two of the above lemmas: \ref{flex_retrieve} and \ref{blexer_retrieve}.
-\end{proof}
-Finally 
-
-
-