lexing: ChengsongTanPhdThesis/Chapters/Bitcoded1.tex@e0f0a81f907b


% Chapter Template

% Main chapter title
\chapter{Bit-coded Algorithm of Sulzmann and Lu}

\label{Bitcoded1} % Change X to a consecutive number; for referencing this chapter elsewhere, use \ref{ChapterX}
%Then we illustrate how the algorithm without bitcodes falls short for such aggressive 
%simplifications and therefore introduce our version of the bitcoded algorithm and 
%its correctness proof in 
%Chapter 3\ref{Chapter3}. 
In this chapter, we are going to describe the bit-coded algorithm
introduced by Sulzmann and Lu \parencite{Sulzmann2014} to address the growth problem of 
regular expressions. 
We have implemented their algorithm in Scala, and found out inefficiencies
in their algorithm such as de-duplication not working properly and redundant
fixpoint construction. Their algorithm is improved and verified with the help of
formal proofs.
\section{The Motivation Behind Using Bitcodes}
We first do a recap of what was going on 
in the lexer algorithm in Chapter \ref{Inj},
\begin{center}
\begin{tabular}{lcl}
	$\lexer \; r \; [] $ & $=$ & $\textit{if} \; (\nullable \; r)\; \textit{then}\;  \Some(\mkeps \; r) \; \textit{else} \; \None$\\
	$\lexer \; r \;c::s$ & $=$ & $\textit{case}\; (\lexer \; (r\backslash c) \; s) \;\textit{of}\; $\\
	& & $\quad \phantom{\mid}\; \None \implies \None$\\
	& & $\quad \mid           \Some(v) \implies \Some(\inj \; r\; c\; v)$
\end{tabular}
\end{center}
\noindent
The algorithm recursively calls $\lexer$ on 
each new character input,
and before starting a child call
it stores information of previous lexing steps
on a stack, in the form of regular expressions
and characters: $r_0$, $c_0$, $r_1$, $c_1$, etc.
Each descent into deeper recursive calls in $\lexer$
causes a new pair of $r_i, c_i$ to be pushed to the call stack.
\begin{figure}[H]
\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,thick] 
%\draw (-6,-6) grid (6,6);
\node  [ circle ] (r) at (-6, 5) {$r$};

%\node (-4, 6) (c1) circle [radius = 0.3] {$c_1$};
\node  [circle, minimum size = 0.1, draw] (c1) at (-4, 5.4) {$c_1$};
%
%\node (-2, 5) (r1) circle [radius = 0.5] {$r_1$};
\node  [minimum size = 0.5, circle, draw] (r1) at (-2, 5) {$r_1$};

\node [minimum width = 2cm, rectangle, draw] (stack) at (0, 3) {Stack};

\path
	(r)
        edge [->, >=stealth',shorten >=1pt] node[left] {} (r1);

\path   (r1)
	edge [bend right, dashed] node {saved} (stack);
\path   (c1)
	edge [bend right, dashed] node {} (stack);


\end{tikzpicture}
\caption{First Derivative Taken}
\end{figure}



\begin{figure}[H]
\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,thick] 
%\draw (-6,-6) grid (6,6);
\node  [ circle ] (r) at (-6, 5) {$r$};

%\node (-4, 6) (c1) circle [radius = 0.3] {$c_1$};
\node  [circle, minimum size = 0.1, ] (c1) at (-4, 5.4) {$c_1$};
%
%\node (-2, 5) (r1) circle [radius = 0.5] {$r_1$};
\node  [minimum size = 0.5, circle, ] (r1) at (-2, 5) {$r_1$};


\node [circle, minimum size = 0.1, draw] (c2) at (0, 5.4) {$c_2$};
%
%\node (2, 5) (r2) circle [radius = 0.5] {$r_2$};
\node [circle, draw] (r2) at (2, 5) {$r_2$};
\node [minimum width = 3cm, minimum height = 1cm, rectangle, draw] (stack) at (0, 2) {\large Stack};

\path
	(r)
        edge [->, >=stealth',shorten >=1pt] node[left] {} (r1);

\path   (r2)
	edge [bend right, dashed] node {} (stack);
\path   (c2)
	edge [bend right, dashed] node {} (stack);

\path   (r1)
	edge [] node {} (r2);

\end{tikzpicture}
\caption{Second Derivative Taken}
\end{figure}
\noindent
As the number of derivative steps increase,
the stack would increase:

\begin{figure}[H]
\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,thick] 
%\draw (-6,-6) grid (6,6);
\node  [ circle ] (r) at (-6, 5) {$r$};

%\node (-4, 6) (c1) circle [radius = 0.3] {$c_1$};
\node  [circle, minimum size = 0.1, ] (c1) at (-4, 5.4) {$c_1$};
%
%\node (-2, 5) (r1) circle [radius = 0.5] {$r_1$};
\node  [minimum size = 0.5, circle, ] (r1) at (-2, 5) {$r_1$};


\node [circle, minimum size = 0.1, ] (c2) at (0, 5.4) {$c_2$};
%
%\node (2, 5) (r2) circle [radius = 0.5] {$r_2$};
\node [circle, ] (r2) at (2, 5) {$r_2$};
\node [minimum width = 4cm, minimum height = 2.5cm, rectangle, draw] (stack) at (0, 1) { \large Stack};

\node [] (ldots) at (3.5, 5) {};
%\node (6, 5) (rn) circle [radius = 0.5] {$r_n$};

\node [minimum size = 0.5, circle, ] (rn) at (6, 5) {};

\node (rldots) at ($(ldots)!.4!(rn)$) {\ldots};

\path
	(r)
        edge [->, >=stealth',shorten >=1pt] node[left] {} (r1);

\path   (rldots)
	edge [bend left, dashed] node {} (stack);

\path   (r1)
	edge [] node {} (r2);

\path   (r2)
	edge [] node {} (ldots);
\path   (ldots)
	edge [bend left, dashed] node {} (stack);
\path   (5.03, 4.9)
	edge [bend left, dashed] node {} (stack);
\end{tikzpicture}
\caption{More Derivatives Taken}
\end{figure}


\begin{figure}[H]
\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,thick] 
%\draw (-6,-6) grid (6,6);
\node  [radius = 0.5, circle, draw] (r) at (-6, 5) {$r$};

%\node (-4, 6) (c1) circle [radius = 0.3] {$c_1$};
\node  [circle, minimum size = 0.1, draw] (c1) at (-4, 5.4) {$c_1$};
%
%\node (-2, 5) (r1) circle [radius = 0.5] {$r_1$};
\node  [minimum size = 0.5, circle, draw] (r1) at (-2, 5) {$r_1$};
%
%\node (0, 6)  (c2) circle [radius = 0.3] {$c_2$};
\node [circle, minimum size = 0.1, draw] (c2) at (0, 5.4) {$c_2$};
%
%\node (2, 5) (r2) circle [radius = 0.5] {$r_2$};
\node [circle, draw] (r2) at (2, 5) {$r_2$};
%
%
\node [] (ldots) at (4.5, 5) {};
%\node (6, 5) (rn) circle [radius = 0.5] {$r_n$};

\node [minimum size = 0.5, circle, draw] (rn) at (6, 5) {$r_n$};

\node at ($(ldots)!.4!(rn)$) {\ldots};




\node [minimum size = 6cm, rectangle, draw] (stack) at (0, 0) {\Huge Stack};

\path
	(r)
        edge [->, >=stealth',shorten >=1pt] node[left] {} (r1);
\path
	(r1)
        edge [] node {} (r2);
\path   (r2)
	edge [] node {} (ldots);
\path   (r)
	edge [dashed, bend right] node {} (stack);

\path   (r1)
	edge [dashed, ] node {} (stack);

\path   (c1)
	edge [dashed, bend right] node {} (stack);

\path   (c2)
	edge [dashed] node {} (stack);
\path   (4.5, 5)
	edge [dashed, bend left] node {} (stack);
\path   (4.9, 5)
	edge [dashed, bend left] node {} (stack);
\path   (5.3, 5)
	edge [dashed, bend left] node {} (stack);
\path (r2)
	edge [dashed, ] node {} (stack);
\path (rn)
	edge [dashed, bend left] node {} (stack);
\end{tikzpicture}
\caption{Before Injection Phase Starts}
\end{figure}


\noindent
After all derivatives have been taken, the stack grows to a maximum size
and the pair of regular expressions and characters $r_i, c_{i+1}$ 
are then popped out and used in the injection phase.



\begin{figure}[H]
\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,thick] 
%\draw (-6,-6) grid (6,6);
\node  [radius = 0.5, circle, ] (r) at (-6, 5) {$r$};

%\node (-4, 6) (c1) circle [radius = 0.3] {$c_1$};
\node  [circle, minimum size = 0.1, ] (c1) at (-4, 5.4) {$c_1$};
%
%\node (-2, 5) (r1) circle [radius = 0.5] {$r_1$};
\node  [minimum size = 0.5, circle, ] (r1) at (-2, 5) {$r_1$};
%
%\node (0, 6)  (c2) circle [radius = 0.3] {$c_2$};
\node [circle, minimum size = 0.1, ] (c2) at (0, 5.4) {$c_2$};
%
%\node (2, 5) (r2) circle [radius = 0.5] {$r_2$};
\node [circle, ] (r2) at (2, 5) {$r_2$};
%
%
\node [] (ldots) at (4.5, 5) {};
%\node (6, 5) (rn) circle [radius = 0.5] {$r_n$};

\node [minimum size = 0.5, circle, ] (rn) at (6, 5) {$r_n$};

\node at ($(ldots)!.4!(rn)$) {\ldots};

\node [minimum size = 0.5, circle, ] (vn) at (6, -5) {$v_n$};

\node [] (ldots2) at (3.5, -5) {};

\node  [minimum size = 0.5, circle, ] (v2) at (2, -5) {$v_2$};

\node at ($(ldots2)!.4!(v2)$) {\ldots};


\node [circle, ] (v1) at (-2, -5) {$v_1$};

\node  [radius = 0.5, circle, ] (v) at (-6, -5) {$v$};

\node [minimum size = 6cm, rectangle, draw] (stack) at (0, 0) {\Huge Stack};

\path
	(r)
        edge [->, >=stealth',shorten >=1pt] node[left] {} (r1);
\path
	(r1)
        edge [] node {} (r2);
\path   (r2)
	edge [] node {} (ldots);
\path   (rn)
	edge [] node {$\mkeps$} (vn);
\path   (vn) 
	edge [] node {} (ldots2);
\path   (v2)
	edge [] node {$\inj \; r_1 \; c_2\;v_2$} (v1);

\path   (v1)
	edge [] node {$\inj \; r \; c_1 \; v_1$} (v);

\path (stack)
	edge [dashed] node {} (-4.2, -5.2);
\path (stack)
	edge [dashed] node {} (-4, -5.2);
\path (stack)
	edge [dashed] node {} (-0.1, -5.2);
\path (stack)
	edge [dashed] node {} (0.2, -5.26);
\path (stack)
	edge [dashed] node {} (3.2, -5);
\path (stack)
	edge [dashed] node {} (2.7, -5);
\path (stack)
	edge [dashed] node {} (3.7, -5);
\end{tikzpicture}
\caption{Stored $r_i, c_{i+1}$ Used by $\inj$}
\end{figure}
\noindent
Storing all $r_i, c_{i+1}$ pairs recursively
allows the algorithm to work in an elegant way, at the expense of 
storing quite a bit of verbose information.
The stack seems to grow at least quadratically fast with respect
to the input (not taking into account the size bloat of $r_i$),
which can be inefficient and prone to stack overflow.
\section{Bitcoded Algorithm}
To address this,
Sulzmann and Lu chose to  define a new datatype 
called \emph{annotated regular expression},
which condenses all the partial lexing information
(that was originally stored in $r_i, c_{i+1}$ pairs)
into bitcodes.
Annotated regular expressions 
are defined as the following 
Isabelle datatype \footnote{ We use subscript notation to indicate
	that the bitcodes are auxiliary information that do not
interfere with the structure of the regular expressions }:
\begin{center}
\begin{tabular}{lcl}
  $\textit{a}$ & $::=$  & $\ZERO$\\
                  & $\mid$ & $_{bs}\ONE$\\
                  & $\mid$ & $_{bs}{\bf c}$\\
                  & $\mid$ & $_{bs}\sum\,as$\\
                  & $\mid$ & $_{bs}a_1\cdot a_2$\\
                  & $\mid$ & $_{bs}a^*$
\end{tabular}    
\end{center}  
\noindent
where $bs$ stands for bit-codes, $a$  for $\mathbf{a}$nnotated regular
expressions and $as$ for lists of annotated regular expressions.
The alternative constructor, written, $\sum$, has been generalised to 
accept a list of annotated regular expressions rather than just two.
Why is it generalised? This is because when we open up nested 
alternatives, there could be more than two elements at the same level
after de-duplication, which can no longer be stored in a binary
constructor.
Bits and bitcodes (lists of bits) are defined as:
\begin{center}
		$b ::=   S \mid  Z \qquad
bs ::= [] \mid b::bs    
$
\end{center}
\noindent
We use $S$ and $Z$ rather than $1$ and $0$ is to avoid
confusion with the regular expressions $\ZERO$ and $\ONE$.
The idea is to use the attached bitcodes
to indicate which choice was made at a certain point
during lexing.
For example,
$(_{Z}a+_{S}b) \backslash a$ gives us
$_{Z}\ONE + \ZERO$, this $Z$ bitcode will
later be used to decode that a left branch was
selected at the time when the part $a+b$ is being taken
derivative of.
\subsection{A Bird's Eye View of the Bit-coded Lexer}
Before we give out the rest of the functions and definitions 
related to our
$\blexer$ (\emph{b}-itcoded lexer), we first provide a high-level
view of the algorithm, so the reader does not get lost in
the details.
\begin{figure}[H]
\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,thick] 
%\draw (-6,-6) grid (6,6);

	\node [circle, draw] (r0) at (-6, 2) {$r$};

	\node  [radius = 0.5, circle, draw] (r) at (-6, 5) {$_{bs}a$};
	\path (r0)
		edge [] node {$\internalise$} (r);
%\node (-4, 6) (c1) circle [radius = 0.3] {$c_1$};
\node  [circle, minimum size = 0.1, draw] (c1) at (-4, 5.4) {$c_1$};
%
%\node (-2, 5) (r1) circle [radius = 0.5] {$r_1$};
\node  [minimum size = 1.0cm, circle, draw] (r1) at (-2, 5) {$_{bs_1}a_1$};
%
%\node (0, 6)  (c2) circle [radius = 0.3] {$c_2$};
\node [circle, minimum size = 0.1, draw] (c2) at (0, 5.4) {$c_2$};
%
%\node (2, 5) (r2) circle [radius = 0.5] {$r_2$};
\node [circle, draw, minimum size = 1.4cm] (r2) at (2, 5) {$_{bs_2}a_2$};
%
%
\node [] (ldots) at (4.5, 5) {};
%\node (6, 5) (rn) circle [radius = 0.5] {$r_n$};

\node [minimum size = 2.2cm, circle, draw] (rn) at (6, 5) {$_{bs_n}a_n$};

\node at ($(ldots)!.1!(rn)$) {\ldots};

\node [minimum size = 0.5, circle, ] (v) at (6, 2) {$v$};

%\node [] (v2) at (4, -5) {};
%
%\node [draw, cross out] (ldots2) at (5, -5) {};
%
%\node at ($(ldots2)!.4!(v2)$) {\ldots};


\node [align = center] (decode) at (6.6, 3.2) {$\bmkeps$\\$\decode$};

\path (c1)
	edge [dashed, bend left] node {} (r0);

\path (c2)
	edge [dashed, bend left] node {} (r0);

\path (r1)
	edge [dashed, bend right] node {} (r2);


\path
	(r)
        edge [dashed, bend right] node[left] {} (r1);

\path
	(r)
        edge [->, >=stealth',shorten >=1pt] node[left] {} (r1);

\path
	(r1)
        edge [] node {} (r2);
\path   (r2)
	edge [] node {} (ldots);
\path   (rn)
	edge [] node {} (v);

\path	(r0)
	edge [dashed, bend right] node {} (decode);
%\path	(v)
	%edge [] node {} (ldots2);



\end{tikzpicture}
\caption{Bird's Eye View of $\blexer$}
\end{figure}
\noindent
The plain regular expressions
are first ``lifted'' to an annotated regular expression,
with the function called \emph{internalise}.
Then the annotated regular expression $_{bs}a$ will
go through successive derivatives with respect 
to the input stream of characters $c_1, c_2$ etc.
Each time a derivative is taken, the bitcodes
are moved around, discarded or augmented together 
with the regular expression they are attached to.
After all input has been consumed, the 
bitcodes are collected by $\bmkeps$,
which traverses the nullable part of the regular expression
and collect the bitcodes along the way.
The collected bitcodes are then $\decode$d with the guidance
of the input regular expression $r$.
The most notable improvements of $\blexer$
over $\lexer$ are 
\begin{itemize}
	\item

		An absence of the second injection phase.
	\item
		One need not store each pair of the 
		intermediate regular expressions $_{bs_i}a_i$ and  $c_{i+1}$. 
		The previous annotated regular expression $_{bs_i}a_i$'s information is passed
		on without loss to its successor $_{bs_{i+1}}a_{i+1}$,
		and $c_{i+1}$'s already contained in the strings in $L\;r$ \footnote{
		which can be easily recovered if we decode in the correct order}.
	\item
		The simplification works much better--one can maintain correctness
		while applying quite strong simplifications, as we shall wee.
\end{itemize}
\noindent
In the next section we will 
give the operations needed in $\blexer$,
with some remarks on the idea behind their definitions.
\subsection{Operations in $\textit{Blexer}$}
The first operation we define related to bit-coded regular expressions
is how we move bits to the inside of regular expressions.
Called $\fuse$, this operation attaches bit-codes 
to the front of an annotated regular expression:
\begin{center}
\begin{tabular}{lcl}
  $\textit{fuse}\;bs \; \ZERO$ & $\dn$ & $\ZERO$\\
  $\textit{fuse}\;bs\; _{bs'}\ONE$ & $\dn$ &
     $_{bs @ bs'}\ONE$\\
  $\textit{fuse}\;bs\;_{bs'}{\bf c}$ & $\dn$ &
     $_{bs@bs'}{\bf c}$\\
  $\textit{fuse}\;bs\,_{bs'}\sum\textit{as}$ & $\dn$ &
     $_{bs@bs'}\sum\textit{as}$\\
  $\textit{fuse}\;bs\; _{bs'}a_1\cdot a_2$ & $\dn$ &
     $_{bs@bs'}a_1 \cdot a_2$\\
  $\textit{fuse}\;bs\,_{bs'}a^*$ & $\dn$ &
     $_{bs @ bs'}a^*$
\end{tabular}    
\end{center} 

\noindent
With \emph{fuse} we are able to define the $\internalise$ function
that translates a ``standard'' regular expression into an
annotated regular expression.
This function will be applied before we start
with the derivative phase of the algorithm.

\begin{center}
\begin{tabular}{lcl}
  $(\ZERO)^\uparrow$ & $\dn$ & $\ZERO$\\
  $(\ONE)^\uparrow$ & $\dn$ & $_{[]}\ONE$\\
  $(c)^\uparrow$ & $\dn$ & $_{[]}{\bf c}$\\
  $(r_1 + r_2)^\uparrow$ & $\dn$ &
  $_{[]}\sum[\textit{fuse}\,[Z]\,r_1^\uparrow,\,
  \textit{fuse}\,[S]\,r_2^\uparrow]$\\
  $(r_1\cdot r_2)^\uparrow$ & $\dn$ &
         $_{[]}r_1^\uparrow \cdot r_2^\uparrow$\\
  $(r^*)^\uparrow$ & $\dn$ &
         $_{[]}(r^\uparrow)^*$\\
\end{tabular}    
\end{center}    
\noindent
We use an up arrow with postfix notation
to denote this operation
for convenience. 
The opposite of $\textit{internalise}$ is
$\erase$, where all the bit-codes are removed,
and the alternative operator $\sum$ for annotated
regular expressions is transformed to the binary version 
in plain regular expressions.
\begin{center}
	\begin{tabular}{lcl}
		$\ZERO_\downarrow$ & $\dn$ & $\ZERO$\\
		$( _{bs}\ONE )_\downarrow$ & $\dn$ & $\ONE$\\
		$( _{bs}\mathbf{c} )_\downarrow$ & $\dn$ & $\mathbf{c}$\\
		$( _{bs} a_1 \cdot a_2 )_\downarrow$ & $\dn$ & 
		$ (a_1) _\downarrow \cdot  (a_2) _\downarrow$\\
		$( _{bs} [])_\downarrow $ & $\dn$ & $\ZERO $\\
		$( _{bs} [a]  )_\downarrow$ & $\dn$ & $a_\downarrow$\\
		$_{bs} \sum [a_1, \; a_2]$ & $\dn$ & $ (a_1) _\downarrow + ( a_2 ) _\downarrow $\\
		$(_{bs} \sum (a :: as))_\downarrow$ & $\dn$ & $ a_\downarrow + \; (_{[]} \sum as)_\downarrow$\\
		$( _{bs} a^* )_\downarrow$ & $\dn$ & $(a_\downarrow)^*$
	\end{tabular}
\end{center}
\noindent
We also abbreviate the $\erase\; a$ operation
as $a_\downarrow$, for conciseness.

Testing whether an annotated regular expression
contains the empty string in its lauguage requires
a dedicated function $\bnullable$.
$\bnullable$ simply calls $\erase$ before $\nullable$.
\begin{definition}
		$\bnullable \; a \dn  \nullable \; (a_\downarrow)$
\end{definition}
The function for collecting 
bitcodes from a 
(b)nullable regular expression is called $\bmkeps$.
$\bmkeps$ is a variation of the $\textit{mkeps}$ function,
which follows the path $\mkeps$ takes to traverse the
$\nullable$ branches when visiting a regular expression,
but gives back bitcodes instead of a value.
\begin{center}
\begin{tabular}{lcl}
  $\textit{bmkeps}\,(_{bs}\ONE)$ & $\dn$ & $bs$\\
  $\textit{bmkeps}\,(_{bs}\sum a::\textit{as})$ & $\dn$ &
     $\textit{if}\;\textit{bnullable}\,a$\\
  & &$\textit{then}\;bs\,@\,\textit{bmkeps}\,a$\\
  & &$\textit{else}\;bs\,@\,\textit{bmkeps}\,(_{[]}\sum \textit{as})$\\
  $\textit{bmkeps}\,(_{bs} a_1 \cdot a_2)$ & $\dn$ &
     $bs \,@\,\textit{bmkeps}\,a_1\,@\, \textit{bmkeps}\,a_2$\\
  $\textit{bmkeps}\,(_{bs}a^*)$ & $\dn$ &
     $bs \,@\, [Z]$
\end{tabular}    
\end{center}    
\noindent
$\bmkeps$ retrieves the value $v$'s
information in the format
of bitcodes, by travelling along the
path of the regular expression that corresponds to a POSIX match,
collecting all the bitcodes, and attaching $S$ to indicate the end of star
iterations. \\
The bitcodes extracted by $\bmkeps$ need to be 
$\decode$d (with the guidance of a plain regular expression):
%\begin{definition}[Bitdecoding of Values]\mbox{}
\begin{center}
\begin{tabular}{@{}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}}
  $\textit{decode}'\,bs\,(\ONE)$ & $\dn$ & $(\Empty, bs)$\\
  $\textit{decode}'\,bs\,(c)$ & $\dn$ & $(\Char\,c, bs)$\\
  $\textit{decode}'\,(Z\!::\!bs)\;(r_1 + r_2)$ & $\dn$ &
     $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}\;
       (\Left\,v, bs_1)$\\
  $\textit{decode}'\,(S\!::\!bs)\;(r_1 + r_2)$ & $\dn$ &
     $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_2\;\textit{in}\;
       (\Right\,v, bs_1)$\\                           
  $\textit{decode}'\,bs\;(r_1\cdot r_2)$ & $\dn$ &
        $\textit{let}\,(v_1, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}$\\
  & &   $\textit{let}\,(v_2, bs_2) = \textit{decode}'\,bs_1\,r_2$\\
  & &   \hspace{35mm}$\textit{in}\;(\Seq\,v_1\,v_2, bs_2)$\\
  $\textit{decode}'\,(Z\!::\!bs)\,(r^*)$ & $\dn$ & $(\Stars\,[], bs)$\\
  $\textit{decode}'\,(S\!::\!bs)\,(r^*)$ & $\dn$ & 
         $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r\;\textit{in}$\\
  & &   $\textit{let}\,(\Stars\,vs, bs_2) = \textit{decode}'\,bs_1\,r^*$\\
  & &   \hspace{35mm}$\textit{in}\;(\Stars\,v\!::\!vs, bs_2)$\bigskip\\
  
  $\textit{decode}\,bs\,r$ & $\dn$ &
     $\textit{let}\,(v, bs') = \textit{decode}'\,bs\,r\;\textit{in}$\\
  & & $\textit{if}\;bs' = []\;\textit{then}\;\textit{Some}\,v\;
       \textit{else}\;\textit{None}$                       
\end{tabular}    
\end{center} 
\noindent
The function $\decode'$ returns a pair consisting of 
a partially decoded value and some leftover bit list that cannot
be decide yet.
The function $\decode'$ succeeds if the left-over 
bit-sequence is empty.
$\decode$ is terminating as $\decode'$ is terminating.
$\decode'$ is terminating 
because at least one of $\decode'$'s parameters will go down in terms
of size.\\
The reverse operation of $\decode$ is $\code$.
$\textit{code}$ encodes a value into a bitcode by converting
$\Left$ into $Z$, $\Right$ into $S$, and marks the start of any non-empty
star iteration by $S$. The border where a star iteration
terminates is marked by $Z$. 
This coding is lossy, as it throws away the information about
characters, and does not encode the ``boundary'' between two
sequence values. Moreover, with only the bitcode we cannot even tell
whether the $S$s and $Z$s are for $\Left/\Right$ or $\Stars$.  
\begin{center}
\begin{tabular}{lcl}
  $\textit{code}(\Empty)$ & $\dn$ & $[]$\\
  $\textit{code}(\Char\,c)$ & $\dn$ & $[]$\\
  $\textit{code}(\Left\,v)$ & $\dn$ & $Z :: code(v)$\\
  $\textit{code}(\Right\,v)$ & $\dn$ & $S :: code(v)$\\
  $\textit{code}(\Seq\,v_1\,v_2)$ & $\dn$ & $code(v_1) \,@\, code(v_2)$\\
  $\textit{code}(\Stars\,[])$ & $\dn$ & $[Z]$\\
  $\textit{code}(\Stars\,(v\!::\!vs))$ & $\dn$ & $S :: code(v) \;@\;
                                                 code(\Stars\,vs)$
\end{tabular}    
\end{center} 
\noindent
Assume we have a value $v$ and regular expression $r$
with $\vdash v:r$,
then we have the property that $\decode$ and $\code$ are
reverse operations of one another:
\begin{lemma}
\[If \vdash v : r \; then \;\decode \; (\code \; v) \; r = \textit{Some}(v) \]
\end{lemma}
\begin{proof}
By proving a more general version of the lemma, on $\decode'$:
\[\vdash v : r \implies \decode' \; ((\code \; v) @ ds) \; r = (v, ds) \]
Then setting $ds$ to be $[]$ and unfolding $\decode$ definition,
we obtain the property.
\end{proof}
\noindent
Now we give out the central part of this lexing algorithm,
the $\bder$ function (stands for \emph{b}itcoded-derivative):
\begin{center}
  \begin{tabular}{@{}lcl@{}}
  $(\ZERO)\,\backslash c$ & $\dn$ & $\ZERO$\\  
  $(_{bs}\ONE)\,\backslash c$ & $\dn$ & $\ZERO$\\  
  $(_{bs}{\bf d})\,\backslash c$ & $\dn$ &
        $\textit{if}\;c=d\; \;\textit{then}\;
         _{bs}\ONE\;\textit{else}\;\ZERO$\\  
  $(_{bs}\sum \;\textit{as})\,\backslash c$ & $\dn$ &
  $_{bs}\sum\;(\textit{map} \; (\_\backslash c) \; as )$\\
  $(_{bs}\;a_1\cdot a_2)\,\backslash c$ & $\dn$ &
     $\textit{if}\;\textit{bnullable}\,a_1$\\
					       & &$\textit{then}\;_{bs}\sum\,[(_{[]}\,(a_1\,\backslash c)\cdot\,a_2),$\\
					       & &$\phantom{\textit{then},\;_{bs}\sum\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\,\backslash c))]$\\
  & &$\textit{else}\;_{bs}\,(a_1\,\backslash c)\cdot a_2$\\
  $(_{bs}a^*)\,\backslash c$ & $\dn$ &
      $_{bs}(\textit{fuse}\, [Z] \; r\,\backslash c)\cdot
       (_{[]}r^*))$
\end{tabular}    
\end{center}    
\noindent
For most time we use the infix notation $(\_\backslash\_)$ 
to mean $\bder$ for brevity when
there is no danger of confusion with derivatives on plain regular expressions.
For example, we write $( _{[]}r^* ) \backslash c$ instead of $\bder \;c \; _{[]}r^*$,
as the bitcodes at the front of $r^*$ indicates that it is 
a bit-coded regular expression, not a plain one.\\
$\bder$ tells us how regular expressions can be recursively traversed,
where the bitcodes are augmented and carried around 
when a derivative is taken.
We give the intuition behind some of the more involved cases in 
$\bder$. \\
For example,
in the \emph{star} case,
a derivative on $_{bs}a^*$ means 
that one more star iteratoin needs to be taken.
we need to unfold it into a sequence,
and attach an additional bit $Z$ to the front of $r \backslash c$
as a record to indicate one new star iteration is unfolded.

\noindent
\begin{center}
  \begin{tabular}{@{}lcl@{}}
  $(_{bs}a^*)\,\backslash c$ & $\dn$ &
  $_{bs}(\underbrace{\textit{fuse}\, [Z] \; a\,\backslash c}_{\text{One more iteration}})\cdot
       (_{[]}a^*))$
\end{tabular}    
\end{center}   

\noindent
This information will be recovered later by the $\decode$ function.
The intuition is that the bit $Z$ will be decoded at the right location,
because we accumulate bits from left to right (a rigorous proof will be given
later).

%\begin{tikzpicture}[ > = stealth, % arrow head style
%        shorten > = 1pt, % don't touch arrow head to node
%        semithick % line style
%    ]
%
%    \tikzstyle{every state}=[
%        draw = black,
%        thin,
%        fill = cyan!29,
%        minimum size = 7mm
%    ]
%    \begin{scope}[node distance=1cm and 0cm, every node/.style=state]
%		\node (k) [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
%        {$bs$
%         \nodepart{two} $a^*$ };
%	 \node (l) [below =of k, rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
%        { $bs$ + [Z]
%         \nodepart{two}  $(a\backslash c )\cdot a^*$ };
%    \end{scope}
%    \path[->] 
%	      (k) edge (l);
%\end{tikzpicture}
%
%Pictorially the process looks like below.
%Like before, the red region denotes
%previous lexing information (stored as bitcodes in $bs$).

%\begin{tikzpicture}[every node/.append style={draw, rounded corners, inner sep=10pt}]
%	\begin{scope}[node distance=1cm]   
%		\node (a) [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
%        {$bs$
%         \nodepart{two} $a^*$ };
%	 \node (b) [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
%        { $bs$ + [Z]
%         \nodepart{two}  $(a\backslash c )\cdot a^*$ };
%%\caption{term 1 \ref{term:1}'s matching configuration}
% 	\end{scope}
%\end{tikzpicture}

\noindent
Another place the $\bder$ function differs
from derivatives on plain regular expressions
is the sequence case:
\begin{center}
  \begin{tabular}{@{}lcl@{}}

  $(_{bs}\;a_1\cdot a_2)\,\backslash c$ & $\dn$ &
     $\textit{if}\;\textit{bnullable}\,a_1$\\
					       & &$\textit{then}\;_{bs}\sum\,[(_{[]}\,(a_1\,\backslash c)\cdot\,a_2),$\\
					       & &$\phantom{\textit{then},\;_{bs}\sum\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\,\backslash c))]$\\
  & &$\textit{else}\;_{bs}\,(a_1\,\backslash c)\cdot a_2$
\end{tabular}    
\end{center}    
\noindent
The difference is that (when $a_1$ is $\bnullable$)
we use $\bmkeps$ to store the lexing information
in $a_1$ before collapsing 
it (as it has been fully matched by string prior to $c$),
and attach the collected bit-codes to the front of $a_2$
before throwing away $a_1$. We assume that $\bmkeps$ 
correctly extracts the bitcode for how $a_1$
matches the string prior to $c$ (more on this later).
The bitsequence $\textit{bs}$ which was initially 
attached to the first element of the sequence
$a_1 \cdot a_2$, has now been 
elevated to the top level of the $\sum$ constructor. 
This is because this piece of information will be needed 
whichever way the sequence is matched,
regardless of whether $c$ belongs to $a_1$ or $a_2$.

In the injection-based lexer, $r_1$ is immediately thrown away in 
subsequent derivatives on the right branch (when $r_1$ is $\nullable$),
depite $r_1$ potentially storing information of previous matches:
\begin{center}
	$(r_1 \cdot r_2 )\backslash c = (r_1 \backslash c) \cdot r_2 + r_2 \backslash c$
\end{center}
\noindent
this loss of information makes it necessary
to store on stack all the regular expressions'
``snapshots'' before they were
taken derivative of.
So that the related information will be available 
once the child recursive 
call finishes.\\
The rest of the clauses of $\bder$ is rather similar to
$\der$. \\
Generalising the derivative operation with bitcodes to strings, we have 
\begin{center}
	\begin{tabular}{@{}lcl@{}}
		$a\backslash_s [] $ & $\dn$ & $a$\\
		$a\backslash (c :: s) $ & $\dn$ & $(a \backslash c) \backslash_s s$
	\end{tabular}
\end{center}
\noindent
As we did earlier, we omit the $s$ subscript at $\backslash_s$ when there is no danger
of confusion.
\subsection{Putting Things Together}
Putting these operations altogether, we obtain a lexer with bit-coded regular expressions
as its internal data structures, which we call $\blexer$:

\begin{center}
\begin{tabular}{lcl}
  $\textit{blexer}\;r\,s$ & $\dn$ &
      $\textit{let}\;a = (r^\uparrow)\backslash s\;\textit{in}$\\                
  & & $\;\;\textit{if}\; \textit{bnullable}(a)$\\
  & & $\;\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,a)\,r$\\
  & & $\;\;\textit{else}\;\textit{None}$
\end{tabular}
\end{center}

\noindent
Ausaf and Urban formally proved the correctness of the $\blexer$, namely
\begin{property}
$\blexer \;r \; s = \lexer \; r \; s$.
\end{property}
This was claimed but not formalised in Sulzmann and Lu's work.
We introduce the proof later, after we give all
the needed auxiliary functions and definitions.
But before this we shall first walk the reader 
through a concrete example of our $\blexer$ calculating the right 
lexical information through bit-coded regular expressions.\\
Consider the regular expression $(aa)^* \cdot bc$ matching the string $aabc$
and assume we have just read the first character $a$:
\begin{center}
\begin{tikzpicture}[every node/.append style={draw, rounded corners, inner sep=10pt}]
    \node [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
	    {$\ONE \cdot a \cdot (aa)^* \cdot bc$ 
	    \nodepart{two} $[] \iff \Seq \; (\Stars \; [\Seq\; a \; ??, \;]), ??$};
\end{tikzpicture}
\end{center}
\noindent
We use the red area for (annotated) regular expressions and the blue 
area the (partial) bitcodes and (partial) values.
In the injection-based lexing algorithm, we ``neglect" the red area
by putting all the characters we have consumed and
intermediate regular expressions on the stack when 
we go from left to right in the derivative phase.
The red area grows till the string is exhausted.
During the injection phase, the value in the blue area
is built up incrementally, while the red area shrinks.
Before we have recovered all characters and intermediate
derivative regular expressions from the stack,
what values these characters and regular expressions correspond 
to are unknown: 
\begin{center}
\begin{tikzpicture}[every node/.append style={draw, rounded corners, inner sep=10pt}]
    \node [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={white!30,blue!20},]
	    {$(\ONE \cdot \ONE) \cdot (aa)^* \cdot bc $ correspond to:$???$
         \nodepart{two}  $b c$ corresponds to  $\Seq(\ldots, \Seq(\Char(b), \Char(c)))$};
%\caption{term 1 \ref{term:1}'s matching configuration}
\end{tikzpicture}
\end{center}
\noindent
However, they should be calculable,
as characters and regular expression shapes
after taking derivative w.r.t those characters
have already been known, therefore in our example,
we know that the value starts with two $a$s,
and makes up to an iteration in a Kleene star:
(We have put the injection-based lexing's partial 
result in the right part of the split rectangle
to contrast it with the partial valued produced
in a forward manner)
\begin{center}
\begin{tikzpicture}[every node/.append style={draw, rounded corners, inner sep=10pt}]
    \node [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
	    {$\stackrel{Bitcoded}{\longrightarrow} \Seq(\Stars[\Char(a), \Char(a)], ???)$
	\nodepart{two} $\Seq(\ldots, \Seq(\Char(b), \Char(c)))$  $\stackrel{Inj}{\longleftarrow}$};
%\caption{term 1 \ref{term:1}'s matching configuration}
\end{tikzpicture}
\end{center}
\noindent
If we do this kind of "attachment"
and each time augment the attached partially
constructed value when taking off a 
character:
\begin{center}
\begin{tikzpicture}[every node/.append style={draw, rounded corners, inner sep=10pt}]
	\node [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},] (spPoint)
        {$\Seq(\Stars[\Char(a), \Char(a)], \ldots)$
         \nodepart{two} Remaining: $b c$};
\end{tikzpicture}\\
$\downarrow$\\
\begin{tikzpicture}[every node/.append style={draw, rounded corners, inner sep=10pt}]
    \node [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
        {$\Seq(\Stars[\Char(a), \Char(a)], \Seq(\Char(b), \ldots))$
         \nodepart{two} Remaining: $c$};
\end{tikzpicture}\\
$\downarrow$\\
\begin{tikzpicture}[every node/.append style={draw, rounded corners, inner sep=10pt}]
    \node [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
        {$\Seq(\Stars[\Char(a), \Char(a)], \Seq(\Char(b), \Char(c)))$
         \nodepart{two} EOF};
\end{tikzpicture}
\end{center}
\noindent
In the end we could recover the value without a backward phase.
But (partial) values are a bit clumsy to stick together with a regular expression, so 
we instead use bit-codes to encode them.

Bits and bitcodes (lists of bits) are defined as:
\begin{center}
		$b ::=   S \mid  Z \qquad
bs ::= [] \mid b::bs    
$
\end{center}

\noindent

%-----------------------------------
%	SUBSECTION 1
%-----------------------------------
\section{Specifications of Some Helper Functions}
The functions we introduce will give a more detailed glimpse into 
the lexing process, which might not be possible
using $\lexer$ or $\blexer$ themselves.
The first function we shall look at is $\retrieve$.
\subsection{$\textit{Retrieve}$}
Our bit-coded lexer "retrieve"s the bitcodes using $\bmkeps$
after we finished doing all the derivatives:
\begin{center}
\begin{tabular}{lcl}
	& & $\ldots$\\
  & & $\;\;\textit{if}\; \textit{bnullable}(a)$\\
  & & $\;\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,a)\,r$\\
  & & $\ldots$
\end{tabular}
\end{center}
\noindent
Recall that $\bmkeps$ looks for the leftmost branch of an alternative
and completes a star's iterations by attaching a $Z$ at the end of the bitcodes
extracted. It "retrieves" a sequence by visiting both children and then stitch together 
two bitcodes using concatenation. After the entire tree structure of the regular 
expression has been traversed using the above manner, we get a bitcode encoding the 
lexing result.
We know that this "retrieved" bitcode leads to the correct value after decoding,
which is $v_0$ in the bird's eye view of the injection-based lexing diagram.
Now assume we keep every other data structure in the diagram \ref{InjFigure},
and only replace all the plain regular expression by their annotated counterparts,
computed during a $\blexer$ run.
Then we obtain a diagram for the annotated regular expression derivatives and
their corresponding values, though the values are never calculated in $\blexer$.
We have that $a_n$ contains all the lexing result information.
\vspace{20mm}
\begin{center}%\label{graph:injLexer}
\begin{tikzcd}[
	every matrix/.append style = {name=p},
	remember picture, overlay,
	]
	a_0 \arrow[r, "\backslash c_0"]  \arrow[d] & a_1 \arrow[r, "\backslash c_1"] \arrow[d] & a_2 \arrow[r, dashed] \arrow[d] & a_n \arrow[d] \\
v_0           & v_1 \arrow[l,"inj_{r_0} c_0"]                & v_2 \arrow[l, "inj_{r_1} c_1"]              & v_n \arrow[l, dashed]         
\end{tikzcd}
\begin{tikzpicture}[
	remember picture, overlay,
E/.style = {ellipse, draw=blue, dashed,
            inner xsep=4mm,inner ysep=-4mm, rotate=90, fit=#1}
                        ]
\node[E = (p-1-1) (p-2-1)] {};
\node[E = (p-1-4) (p-2-4)] {};
\end{tikzpicture}

\end{center}
\vspace{20mm}
\noindent
On the other hand, $v_0$ also encodes the correct lexing result, as we have proven for $\lexer$.
Encircled in the diagram  are the two pairs $v_0, a_0$ and $v_n, a_n$, which both 
encode the correct lexical result. Though for the leftmost pair, we have
the information condensed in $v_0$ the value part, whereas for the rightmost pair,
the information is concentrated on $a_n$.
We know that in the intermediate steps the pairs $v_i, a_i$, must in some way encode the complete
lexing information as well. Therefore, we need a unified approach to extract such lexing result
from a value $v_i$ and its annotated regular expression $a_i$. 
And the function $f$ must satisfy these requirements:
\begin{itemize}
	\item
		$f \; a_i\;v_i = f \; a_n \; v_n = \decode \; (\bmkeps \; a_n) \; (\erase \; a_0)$
	\item
		$f \; a_i\;v_i = f \; a_0 \; v_0 = v_0 = \decode \;(\code \; v_0) \; (\erase \; a_0)$
\end{itemize}
\noindent
If we factor out the common part $\decode \; \_ \; (\erase \; a_0)$,
The core of the function $f$ is something that produces the bitcodes
$\code \; v_0$.
It is unclear how, but Sulzmann and Lu came up with a function satisfying all the above
requirements, named \emph{retrieve}:



\begin{center}
\begin{tabular}{lcr}
	$\retrieve \; \, (_{bs} \ONE) \; \, (\Empty)$ & $\dn$ & $\textit{bs}$\\
	$\retrieve \; \, (_{bs} \mathbf{c} ) \; \Char(c)$ & $\dn$ & $ \textit{bs}$\\
	$\retrieve \; \, (_{bs} a_1 \cdot a_2) \; \Seq(v_1, v_2)$ & $\dn$ &  $\textit{bs} @ (\retrieve \; a_1\; v_1) @ (\retrieve \; a_2 \; v_2)$\\
	$\retrieve \; \, (_{bs} \Sigma (a :: \textit{as}) \; \,\Left(v)$ & $\dn$ & $\textit{bs} @ (\retrieve \; a \; v)$\\
	$\retrieve \; \, (_{bs} \Sigma (a :: \textit{as} \; \, \Right(v)$ & $\dn$ & $\textit{bs} @ (\retrieve \; (_{[]}\Sigma \textit{as}) \; v)$\\
	$\retrieve \; \, (_{bs} a^*) \; \, (\Stars(v :: vs)) $ & $\dn$ & $\textit{bs} @ (\retrieve \; a \; v) @ (\retrieve \; (_{[]} a^*) \; (\Stars(vs)))$\\
	$\retrieve \; \, (_{bs} a^*) \; \, (\Stars([]) $ & $\dn$ & $\textit{bs} @ [Z]$
\end{tabular}
\end{center}
\noindent
As promised, $\retrieve$ collects the right bit-codes from the 
final derivative $a_n$:
\begin{lemma}
	$\bnullable \; a \implies \bmkeps \; a = \retrieve \; a \; (\mkeps \; (\erase \; a))$
\end{lemma}
\begin{proof}
	By a routine induction on $a$.
\end{proof}
The design of $\retrieve$ enables extraction of bit-codes
from not only $\bnullable$ (annotated) regular expressions,
but also those that are not $\bnullable$.
For example, if we have the regular expression just internalised
and the lexing result value, we could $\retrieve$ the bitcdoes
that look as if we have en$\code$-ed the value:
\begin{lemma}
	$\vdash v : r \implies \retrieve \; (r)^\uparrow \; v = \code \; v$
\end{lemma}
\begin{proof}
	By induction on $r$.
\end{proof}
\noindent
The following property is more interesting, as
it provides a "bridge" between $a_0, v_0$ and $a_n, v_n$ in the
lexing diagram.
If you take derivative of an annotated regular expression, 
you can $\retrieve$ the same bit-codes as before the derivative took place,
provided that you use the corresponding value:

\begin{lemma}\label{retrieveStepwise}
	$\vdash v : (r\backslash c) \implies \retrieve \; (r \backslash c)  \;  v= \retrieve \; r \; (\inj \; r\; c\; v)$
\end{lemma}
\begin{proof}
	By induction on $r$, where $v$ is allowed to be arbitrary.
	The induction principle is function $\erase$'s cases.
\end{proof}
\noindent
$\retrieve$ is connected to the $\blexer$ in the following way:
\begin{lemma}\label{blexer_retrieve}
$\blexer \; r \; s = \decode  \; (\retrieve \; (\internalise \; r) \; (\mkeps \; (r \backslash s) )) \; r$
\end{lemma}
\noindent
$\retrieve$ allows free navigation on the diagram \ref{InjFigure} for annotated regular expressiones of $\blexer$.
For plain regular expressions something similar is required as well.

\subsection{$\flex$}
Ausaf and Urban cleverly defined an auxiliary function called $\flex$ for $\lexer$,
defined as
\begin{center}
\begin{tabular}{lcr}
$\flex \; r \; f \; [] \; v$       &  $=$ &   $f\; v$\\
$\flex \; r \; f \; c :: s \; v$ &  $=$ &   $\flex \; r \; \lambda v. \, f (\inj \; r\; c\; v)\; s \; v$
\end{tabular}
\end{center}
which accumulates the characters that needs to be injected back, 
and does the injection in a stack-like manner (last taken derivative first injected).
$\flex$ is connected to the $\lexer$:
\begin{lemma}
$\flex \; r \; \textit{id}\; s \; \mkeps (r\backslash s) = \lexer \; r \; s$
\end{lemma}
\begin{proof}
	By reverse induction on $s$.
\end{proof}
$\flex$ provides us a bridge between $\lexer$'s intermediate steps.
What is even better about $\flex$ is that it allows us to 
directly operate on the value $\mkeps (r\backslash v)$,
which is pivotal in the definition of  $\lexer $ and $\blexer$, but not visible as an argument.
When the value created by $\mkeps$ becomes available, one can 
prove some stepwise properties of lexing nicely:
\begin{lemma}\label{flexStepwise}
$\textit{flex} \; r \; f \; s@[c] \; v= \flex \; r \; f\; s \; (\inj \; (r\backslash s) \; c \; v) $
\end{lemma}
\begin{proof}
	By induction on the shape of $r\backslash s$
\end{proof}
\noindent
With $\flex$ and $\retrieve$ ready, we are ready to connect $\lexer$ and $\blexer$ .

\subsection{Correctness Proof of Bit-coded Algorithm}
\begin{lemma}\label{flex_retrieve}
$\flex \; r \; \textit{id}\; s\; v = \decode \; (\retrieve \; (r\backslash s )\; v) \; r$
\end{lemma}
\begin{proof}
By induction on $s$. The induction tactic is reverse induction on strings.
$v$ is allowed to be arbitrary.
The crucial point is to rewrite 
\[
\retrieve \; (r \backslash s@[c]) \; \mkeps (r \backslash s@[c]) 
\]
as
\[
\retrieve \; (r \backslash s) \; (\inj \; (r \backslash s) \; c\;  \mkeps (r \backslash s@[c]))
\].
This enables us to equate 
\[
\retrieve \; (r \backslash s@[c]) \; \mkeps (r \backslash s@[c]) 
\] 
with 
\[
\flex \; r \; \textit{id} \; s \; (\inj \; (r\backslash s) \; c\; (\mkeps (r\backslash s@[c])))
\],
which in turn can be rewritten as
\[
\flex \; r \; \textit{id} \; s@[c] \;  (\mkeps (r\backslash s@[c]))
\].
\end{proof}

With the above lemma we can now link $\flex$ and $\blexer$.

%----------------------------------------------------------------------------------------
%	SECTION  correctness proof
%----------------------------------------------------------------------------------------
\section{Correctness of Bit-coded Algorithm (Without Simplification)}
We now give the proof the correctness of the algorithm with bit-codes.

\begin{lemma}\label{flex_blexer}
$\textit{flex} \; r \; \textit{id} \; s \; \mkeps(r \backslash s)  = \blexer \; r \; s$
\end{lemma}
\begin{proof}
Using two of the above lemmas: \ref{flex_retrieve} and \ref{blexer_retrieve}.
\end{proof}
Finally the correctness of $\blexer$ is given as it outputs the same result as $\lexer$:
\begin{theorem}
	$\blexer\; r \; s = \lexer \; r \; s$
\end{theorem}
\begin{proof}
	Straightforward corollary of \ref{flex_blexer}.
\end{proof}
\noindent
To piece things together and spell out the exact correctness
of the bitcoded lexer
in terms of producing POSIX values,
we use the fact from the previous chapter that
\[
	If \; (r, s) \rightarrow v \; then \; \lexer \; r \; s = v
\]
to obtain this corollary:
\begin{corollary}\label{blexerPOSIX}
	$If \; (r, s) \rightarrow v \; then \blexer \; r \; s = v$
\end{corollary}
Our main reason for wanting a bit-coded algorithm over 
the injection-based one is for its capabilities of allowing
more aggressive simplifications.
We will elaborate on this in the next chapter.
author	Chengsong
	Wed, 17 Aug 2022 22:59:15 +0100
changeset 580	e0f0a81f907b
parent 579	35df9cdd36ca
child 581	9db2500629be
permissions	-rwxr-xr-x