# HG changeset patch
# User Chengsong
# Date 1660773555 -3600
# Node ID e0f0a81f907be87ed77adfc7c597a616b1227d35
# Parent  35df9cdd36ca00a49af21cd3fbf3db5304c26e9a
halfway chap3

diff -r 35df9cdd36ca -r e0f0a81f907b ChengsongTanPhdThesis/Chapters/Bitcoded1.tex
--- a/ChengsongTanPhdThesis/Chapters/Bitcoded1.tex	Wed Aug 17 01:09:13 2022 +0100
+++ b/ChengsongTanPhdThesis/Chapters/Bitcoded1.tex	Wed Aug 17 22:59:15 2022 +0100
@@ -15,7 +15,7 @@
 in their algorithm such as de-duplication not working properly and redundant
 fixpoint construction. Their algorithm is improved and verified with the help of
 formal proofs.
-\section{Bit-coded Algorithm}
+\section{The Motivation Behind Using Bitcodes}
 We first do a recap of what was going on 
 in the lexer algorithm in Chapter \ref{Inj},
 \begin{center}
@@ -35,7 +35,7 @@
 and characters: $r_0$, $c_0$, $r_1$, $c_1$, etc.
 Each descent into deeper recursive calls in $\lexer$
 causes a new pair of $r_i, c_i$ to be pushed to the call stack.
-\begin{figure}
+\begin{figure}[H]
 \begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,thick] 
 %\draw (-6,-6) grid (6,6);
 \node  [ circle ] (r) at (-6, 5) {$r$};
@@ -64,7 +64,7 @@
 
 
 
-\begin{figure}
+\begin{figure}[H]
 \begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,thick] 
 %\draw (-6,-6) grid (6,6);
 \node  [ circle ] (r) at (-6, 5) {$r$};
@@ -101,7 +101,7 @@
 As the number of derivative steps increase,
 the stack would increase:
 
-\begin{figure}
+\begin{figure}[H]
 \begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,thick] 
 %\draw (-6,-6) grid (6,6);
 \node  [ circle ] (r) at (-6, 5) {$r$};
@@ -145,11 +145,9 @@
 \end{tikzpicture}
 \caption{More Derivatives Taken}
 \end{figure}
-\noindent
-After all derivatives have been taken, the stack grows to a maximum size
-and the pair of regular expressions and characters $r_i, c_{i+1}$ 
-are then popped out and used in the injection phase:
+
 
+\begin{figure}[H]
 \begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,thick] 
 %\draw (-6,-6) grid (6,6);
 \node  [radius = 0.5, circle, draw] (r) at (-6, 5) {$r$};
@@ -174,18 +172,8 @@
 
 \node at ($(ldots)!.4!(rn)$) {\ldots};
 
-\node [minimum size = 0.5, circle, draw] (vn) at (6, -5) {$v_n$};
-
-\node [] (ldots2) at (3.5, -5) {};
-
-\node  [minimum size = 0.5, circle, draw] (v2) at (2, -5) {$v_2$};
-
-\node at ($(ldots2)!.4!(v2)$) {\ldots};
 
 
-\node [circle, draw] (v1) at (-2, -5) {$v_1$};
-
-\node  [radius = 0.5, circle, draw] (v) at (-6, -5) {$v$};
 
 \node [minimum size = 6cm, rectangle, draw] (stack) at (0, 0) {\Huge Stack};
 
@@ -197,15 +185,6 @@
         edge [] node {} (r2);
 \path   (r2)
 	edge [] node {} (ldots);
-\path   (rn)
-	edge [] node {$\mkeps$} (vn);
-\path   (vn) 
-	edge [] node {} (ldots2);
-\path   (v2)
-	edge [] node {$\inj \; r_1 \; c_2\;v_2$} (v1);
-
-\path   (v1)
-	edge [] node {$\inj \; r \; c_1 \; v_1$} (v);
 \path   (r)
 	edge [dashed, bend right] node {} (stack);
 
@@ -228,133 +207,634 @@
 \path (rn)
 	edge [dashed, bend left] node {} (stack);
 \end{tikzpicture}
-%\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,thick] 
-%%\draw (-6,-6) grid (6,6);
-%\node  [radius = 0.5, circle, draw] (r) at (-6, 5) {$r$};
+\caption{Before Injection Phase Starts}
+\end{figure}
+
+
+\noindent
+After all derivatives have been taken, the stack grows to a maximum size
+and the pair of regular expressions and characters $r_i, c_{i+1}$ 
+are then popped out and used in the injection phase.
+
+
+
+\begin{figure}[H]
+\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,thick] 
+%\draw (-6,-6) grid (6,6);
+\node  [radius = 0.5, circle, ] (r) at (-6, 5) {$r$};
+
+%\node (-4, 6) (c1) circle [radius = 0.3] {$c_1$};
+\node  [circle, minimum size = 0.1, ] (c1) at (-4, 5.4) {$c_1$};
 %
-%%\node (-4, 6) (c1) circle [radius = 0.3] {$c_1$};
-%\node  [circle, minimum size = 0.1] (c1) at (-4, 5.4) {$c_1$};
-%%
-%%\node (-2, 5) (r1) circle [radius = 0.5] {$r_1$};
-%\node  [minimum size = 0.5, circle, draw] (r1) at (-2, 5) {$r_1$};
-%%
-%%\node (0, 6)  (c2) circle [radius = 0.3] {$c_2$};
-%\node [circle, minimum size = 0.1] (c2) at (0, 5.4) {$c_2$};
-%%
-%%\node (2, 5) (r2) circle [radius = 0.5] {$r_2$};
-%\node [circle, draw] (r2) at (2, 5) {$r_2$};
-%%
-%%
-%\node [] (ldots) at (4.5, 5) {};
-%%\node (6, 5) (rn) circle [radius = 0.5] {$r_n$};
+%\node (-2, 5) (r1) circle [radius = 0.5] {$r_1$};
+\node  [minimum size = 0.5, circle, ] (r1) at (-2, 5) {$r_1$};
 %
-%\node [minimum size = 0.5, circle, draw] (rn) at (6, 5) {$r_n$};
-%
-%\node at ($(ldots)!.4!(rn)$) {\ldots};
-%
-%\node [minimum size = 0.5, circle, draw] (vn) at (6, -5) {$v_n$};
+%\node (0, 6)  (c2) circle [radius = 0.3] {$c_2$};
+\node [circle, minimum size = 0.1, ] (c2) at (0, 5.4) {$c_2$};
 %
-%\node [] (ldots2) at (3.5, -5) {};
-%
-%\node  [minimum size = 0.5, circle, draw] (v2) at (2, -5) {$v_2$};
-%
-%\node at ($(ldots2)!.4!(v2)$) {\ldots};
+%\node (2, 5) (r2) circle [radius = 0.5] {$r_2$};
+\node [circle, ] (r2) at (2, 5) {$r_2$};
 %
 %
-%\node [circle, draw] (v1) at (-2, -5) {$v_1$};
-%
-%\node  [radius = 0.5, circle, draw] (v) at (-6, -5) {$v$};
-%
-%\node [minimum size = 6cm, rectangle, draw] (stack) at (0, 0) {\Huge Stack};
+\node [] (ldots) at (4.5, 5) {};
+%\node (6, 5) (rn) circle [radius = 0.5] {$r_n$};
+
+\node [minimum size = 0.5, circle, ] (rn) at (6, 5) {$r_n$};
+
+\node at ($(ldots)!.4!(rn)$) {\ldots};
+
+\node [minimum size = 0.5, circle, ] (vn) at (6, -5) {$v_n$};
+
+\node [] (ldots2) at (3.5, -5) {};
+
+\node  [minimum size = 0.5, circle, ] (v2) at (2, -5) {$v_2$};
+
+\node at ($(ldots2)!.4!(v2)$) {\ldots};
+
+
+\node [circle, ] (v1) at (-2, -5) {$v_1$};
+
+\node  [radius = 0.5, circle, ] (v) at (-6, -5) {$v$};
+
+\node [minimum size = 6cm, rectangle, draw] (stack) at (0, 0) {\Huge Stack};
+
+\path
+	(r)
+        edge [->, >=stealth',shorten >=1pt] node[left] {} (r1);
+\path
+	(r1)
+        edge [] node {} (r2);
+\path   (r2)
+	edge [] node {} (ldots);
+\path   (rn)
+	edge [] node {$\mkeps$} (vn);
+\path   (vn) 
+	edge [] node {} (ldots2);
+\path   (v2)
+	edge [] node {$\inj \; r_1 \; c_2\;v_2$} (v1);
+
+\path   (v1)
+	edge [] node {$\inj \; r \; c_1 \; v_1$} (v);
+
+\path (stack)
+	edge [dashed] node {} (-4.2, -5.2);
+\path (stack)
+	edge [dashed] node {} (-4, -5.2);
+\path (stack)
+	edge [dashed] node {} (-0.1, -5.2);
+\path (stack)
+	edge [dashed] node {} (0.2, -5.26);
+\path (stack)
+	edge [dashed] node {} (3.2, -5);
+\path (stack)
+	edge [dashed] node {} (2.7, -5);
+\path (stack)
+	edge [dashed] node {} (3.7, -5);
+\end{tikzpicture}
+\caption{Stored $r_i, c_{i+1}$ Used by $\inj$}
+\end{figure}
+\noindent
+Storing all $r_i, c_{i+1}$ pairs recursively
+allows the algorithm to work in an elegant way, at the expense of 
+storing quite a bit of verbose information.
+The stack seems to grow at least quadratically fast with respect
+to the input (not taking into account the size bloat of $r_i$),
+which can be inefficient and prone to stack overflow.
+\section{Bitcoded Algorithm}
+To address this,
+Sulzmann and Lu chose to  define a new datatype 
+called \emph{annotated regular expression},
+which condenses all the partial lexing information
+(that was originally stored in $r_i, c_{i+1}$ pairs)
+into bitcodes.
+Annotated regular expressions 
+are defined as the following 
+Isabelle datatype \footnote{ We use subscript notation to indicate
+	that the bitcodes are auxiliary information that do not
+interfere with the structure of the regular expressions }:
+\begin{center}
+\begin{tabular}{lcl}
+  $\textit{a}$ & $::=$  & $\ZERO$\\
+                  & $\mid$ & $_{bs}\ONE$\\
+                  & $\mid$ & $_{bs}{\bf c}$\\
+                  & $\mid$ & $_{bs}\sum\,as$\\
+                  & $\mid$ & $_{bs}a_1\cdot a_2$\\
+                  & $\mid$ & $_{bs}a^*$
+\end{tabular}    
+\end{center}  
+\noindent
+where $bs$ stands for bit-codes, $a$  for $\mathbf{a}$nnotated regular
+expressions and $as$ for lists of annotated regular expressions.
+The alternative constructor, written, $\sum$, has been generalised to 
+accept a list of annotated regular expressions rather than just two.
+Why is it generalised? This is because when we open up nested 
+alternatives, there could be more than two elements at the same level
+after de-duplication, which can no longer be stored in a binary
+constructor.
+Bits and bitcodes (lists of bits) are defined as:
+\begin{center}
+		$b ::=   S \mid  Z \qquad
+bs ::= [] \mid b::bs    
+$
+\end{center}
+\noindent
+We use $S$ and $Z$ rather than $1$ and $0$ is to avoid
+confusion with the regular expressions $\ZERO$ and $\ONE$.
+The idea is to use the attached bitcodes
+to indicate which choice was made at a certain point
+during lexing.
+For example,
+$(_{Z}a+_{S}b) \backslash a$ gives us
+$_{Z}\ONE + \ZERO$, this $Z$ bitcode will
+later be used to decode that a left branch was
+selected at the time when the part $a+b$ is being taken
+derivative of.
+\subsection{A Bird's Eye View of the Bit-coded Lexer}
+Before we give out the rest of the functions and definitions 
+related to our
+$\blexer$ (\emph{b}-itcoded lexer), we first provide a high-level
+view of the algorithm, so the reader does not get lost in
+the details.
+\begin{figure}[H]
+\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,thick] 
+%\draw (-6,-6) grid (6,6);
+
+	\node [circle, draw] (r0) at (-6, 2) {$r$};
+
+	\node  [radius = 0.5, circle, draw] (r) at (-6, 5) {$_{bs}a$};
+	\path (r0)
+		edge [] node {$\internalise$} (r);
+%\node (-4, 6) (c1) circle [radius = 0.3] {$c_1$};
+\node  [circle, minimum size = 0.1, draw] (c1) at (-4, 5.4) {$c_1$};
 %
-%\path
-%	(r)
-%        edge [->, >=stealth',shorten >=1pt] node[left] {} (r1);
-%\path
-%	(r1)
-%        edge [] node {} (r2);
-%\path   (r2)
-%	edge [] node {} (ldots);
-%\path   (rn)
-%	edge [] node {$\mkeps$} (vn);
-%\path   (vn) 
-%	edge [] node {} (ldots2);
-%\path   (v2)
-%	edge [] node {} (v1);
+%\node (-2, 5) (r1) circle [radius = 0.5] {$r_1$};
+\node  [minimum size = 1.0cm, circle, draw] (r1) at (-2, 5) {$_{bs_1}a_1$};
+%
+%\node (0, 6)  (c2) circle [radius = 0.3] {$c_2$};
+\node [circle, minimum size = 0.1, draw] (c2) at (0, 5.4) {$c_2$};
+%
+%\node (2, 5) (r2) circle [radius = 0.5] {$r_2$};
+\node [circle, draw, minimum size = 1.4cm] (r2) at (2, 5) {$_{bs_2}a_2$};
+%
+%
+\node [] (ldots) at (4.5, 5) {};
+%\node (6, 5) (rn) circle [radius = 0.5] {$r_n$};
+
+\node [minimum size = 2.2cm, circle, draw] (rn) at (6, 5) {$_{bs_n}a_n$};
+
+\node at ($(ldots)!.1!(rn)$) {\ldots};
+
+\node [minimum size = 0.5, circle, ] (v) at (6, 2) {$v$};
+
+%\node [] (v2) at (4, -5) {};
+%
+%\node [draw, cross out] (ldots2) at (5, -5) {};
 %
-%\path   (v1)
-%	edge [] node {} (v);
-%\path   (r)
-%	edge [] node {saved} (stack);
-%
-%\path   (r1)
-%	edge [] node {saved} (stack);
-%\end{tikzpicture}
-\noindent
-The information stored in characters and regular expressions
-make the algorithm work in an elegant way, at the expense of being
-storing quite a bit of verbose information.
+%\node at ($(ldots2)!.4!(v2)$) {\ldots};
+
+
+\node [align = center] (decode) at (6.6, 3.2) {$\bmkeps$\\$\decode$};
+
+\path (c1)
+	edge [dashed, bend left] node {} (r0);
+
+\path (c2)
+	edge [dashed, bend left] node {} (r0);
+
+\path (r1)
+	edge [dashed, bend right] node {} (r2);
+
+
+\path
+	(r)
+        edge [dashed, bend right] node[left] {} (r1);
+
+\path
+	(r)
+        edge [->, >=stealth',shorten >=1pt] node[left] {} (r1);
+
+\path
+	(r1)
+        edge [] node {} (r2);
+\path   (r2)
+	edge [] node {} (ldots);
+\path   (rn)
+	edge [] node {} (v);
+
+\path	(r0)
+	edge [dashed, bend right] node {} (decode);
+%\path	(v)
+	%edge [] node {} (ldots2);
 
 
 
-The lexer algorithm in Chapter \ref{Inj},
-stores information of previous lexing steps
-on a stack, in the form of regular expressions
-and characters: $r_0$, $c_0$, $r_1$, $c_1$, etc.
-The red part represents what we already know during the first
-derivative phase,
-and the blue part represents the unknown part of input.
-\begin{ceqn}
-\begin{equation}%\label{graph:injLexer}
-	\begin{tikzcd}[ampersand replacement=\&, execute at end picture={
-			\begin{scope}[on background layer]
-				\node[rectangle, fill={red!30},
-					pattern=north east lines, pattern color=red,
-					fit={(-3,-1) (-3, 1) (1, -1) 
-						(1, 1)}
-				     ] 
-				     {}; ,
-				\node[rectangle, fill={blue!20},
-					pattern=north east lines, pattern color=blue,
-					fit= {(1, -1) (1, 1) (3, -1) (3, 1)}
-					]
-					{};
-				\end{scope}}
-					]
-r_0 \arrow[r, "\backslash c_0"]  \arrow[d] \& r_1 \arrow[r, "\backslash c_1"] \arrow[d] \& r_2 \arrow[r, dashed] \arrow[d] \& r_n \arrow[d, "mkeps" description] \\
-v_0           \& v_1 \arrow[l,"inj_{r_0} c_0"]                \& v_2 \arrow[l, "inj_{r_1} c_1"]              \& v_n \arrow[l, dashed]         \\
-\end{tikzcd}
-\end{equation}
-\end{ceqn}
+\end{tikzpicture}
+\caption{Bird's Eye View of $\blexer$}
+\end{figure}
+\noindent
+The plain regular expressions
+are first ``lifted'' to an annotated regular expression,
+with the function called \emph{internalise}.
+Then the annotated regular expression $_{bs}a$ will
+go through successive derivatives with respect 
+to the input stream of characters $c_1, c_2$ etc.
+Each time a derivative is taken, the bitcodes
+are moved around, discarded or augmented together 
+with the regular expression they are attached to.
+After all input has been consumed, the 
+bitcodes are collected by $\bmkeps$,
+which traverses the nullable part of the regular expression
+and collect the bitcodes along the way.
+The collected bitcodes are then $\decode$d with the guidance
+of the input regular expression $r$.
+The most notable improvements of $\blexer$
+over $\lexer$ are 
+\begin{itemize}
+	\item
+
+		An absence of the second injection phase.
+	\item
+		One need not store each pair of the 
+		intermediate regular expressions $_{bs_i}a_i$ and  $c_{i+1}$. 
+		The previous annotated regular expression $_{bs_i}a_i$'s information is passed
+		on without loss to its successor $_{bs_{i+1}}a_{i+1}$,
+		and $c_{i+1}$'s already contained in the strings in $L\;r$ \footnote{
+		which can be easily recovered if we decode in the correct order}.
+	\item
+		The simplification works much better--one can maintain correctness
+		while applying quite strong simplifications, as we shall wee.
+\end{itemize}
+\noindent
+In the next section we will 
+give the operations needed in $\blexer$,
+with some remarks on the idea behind their definitions.
+\subsection{Operations in $\textit{Blexer}$}
+The first operation we define related to bit-coded regular expressions
+is how we move bits to the inside of regular expressions.
+Called $\fuse$, this operation attaches bit-codes 
+to the front of an annotated regular expression:
+\begin{center}
+\begin{tabular}{lcl}
+  $\textit{fuse}\;bs \; \ZERO$ & $\dn$ & $\ZERO$\\
+  $\textit{fuse}\;bs\; _{bs'}\ONE$ & $\dn$ &
+     $_{bs @ bs'}\ONE$\\
+  $\textit{fuse}\;bs\;_{bs'}{\bf c}$ & $\dn$ &
+     $_{bs@bs'}{\bf c}$\\
+  $\textit{fuse}\;bs\,_{bs'}\sum\textit{as}$ & $\dn$ &
+     $_{bs@bs'}\sum\textit{as}$\\
+  $\textit{fuse}\;bs\; _{bs'}a_1\cdot a_2$ & $\dn$ &
+     $_{bs@bs'}a_1 \cdot a_2$\\
+  $\textit{fuse}\;bs\,_{bs'}a^*$ & $\dn$ &
+     $_{bs @ bs'}a^*$
+\end{tabular}    
+\end{center} 
+
+\noindent
+With \emph{fuse} we are able to define the $\internalise$ function
+that translates a ``standard'' regular expression into an
+annotated regular expression.
+This function will be applied before we start
+with the derivative phase of the algorithm.
+
+\begin{center}
+\begin{tabular}{lcl}
+  $(\ZERO)^\uparrow$ & $\dn$ & $\ZERO$\\
+  $(\ONE)^\uparrow$ & $\dn$ & $_{[]}\ONE$\\
+  $(c)^\uparrow$ & $\dn$ & $_{[]}{\bf c}$\\
+  $(r_1 + r_2)^\uparrow$ & $\dn$ &
+  $_{[]}\sum[\textit{fuse}\,[Z]\,r_1^\uparrow,\,
+  \textit{fuse}\,[S]\,r_2^\uparrow]$\\
+  $(r_1\cdot r_2)^\uparrow$ & $\dn$ &
+         $_{[]}r_1^\uparrow \cdot r_2^\uparrow$\\
+  $(r^*)^\uparrow$ & $\dn$ &
+         $_{[]}(r^\uparrow)^*$\\
+\end{tabular}    
+\end{center}    
+\noindent
+We use an up arrow with postfix notation
+to denote this operation
+for convenience. 
+The opposite of $\textit{internalise}$ is
+$\erase$, where all the bit-codes are removed,
+and the alternative operator $\sum$ for annotated
+regular expressions is transformed to the binary version 
+in plain regular expressions.
+\begin{center}
+	\begin{tabular}{lcl}
+		$\ZERO_\downarrow$ & $\dn$ & $\ZERO$\\
+		$( _{bs}\ONE )_\downarrow$ & $\dn$ & $\ONE$\\
+		$( _{bs}\mathbf{c} )_\downarrow$ & $\dn$ & $\mathbf{c}$\\
+		$( _{bs} a_1 \cdot a_2 )_\downarrow$ & $\dn$ & 
+		$ (a_1) _\downarrow \cdot  (a_2) _\downarrow$\\
+		$( _{bs} [])_\downarrow $ & $\dn$ & $\ZERO $\\
+		$( _{bs} [a]  )_\downarrow$ & $\dn$ & $a_\downarrow$\\
+		$_{bs} \sum [a_1, \; a_2]$ & $\dn$ & $ (a_1) _\downarrow + ( a_2 ) _\downarrow $\\
+		$(_{bs} \sum (a :: as))_\downarrow$ & $\dn$ & $ a_\downarrow + \; (_{[]} \sum as)_\downarrow$\\
+		$( _{bs} a^* )_\downarrow$ & $\dn$ & $(a_\downarrow)^*$
+	\end{tabular}
+\end{center}
+\noindent
+We also abbreviate the $\erase\; a$ operation
+as $a_\downarrow$, for conciseness.
+
+Testing whether an annotated regular expression
+contains the empty string in its lauguage requires
+a dedicated function $\bnullable$.
+$\bnullable$ simply calls $\erase$ before $\nullable$.
+\begin{definition}
+		$\bnullable \; a \dn  \nullable \; (a_\downarrow)$
+\end{definition}
+The function for collecting 
+bitcodes from a 
+(b)nullable regular expression is called $\bmkeps$.
+$\bmkeps$ is a variation of the $\textit{mkeps}$ function,
+which follows the path $\mkeps$ takes to traverse the
+$\nullable$ branches when visiting a regular expression,
+but gives back bitcodes instead of a value.
+\begin{center}
+\begin{tabular}{lcl}
+  $\textit{bmkeps}\,(_{bs}\ONE)$ & $\dn$ & $bs$\\
+  $\textit{bmkeps}\,(_{bs}\sum a::\textit{as})$ & $\dn$ &
+     $\textit{if}\;\textit{bnullable}\,a$\\
+  & &$\textit{then}\;bs\,@\,\textit{bmkeps}\,a$\\
+  & &$\textit{else}\;bs\,@\,\textit{bmkeps}\,(_{[]}\sum \textit{as})$\\
+  $\textit{bmkeps}\,(_{bs} a_1 \cdot a_2)$ & $\dn$ &
+     $bs \,@\,\textit{bmkeps}\,a_1\,@\, \textit{bmkeps}\,a_2$\\
+  $\textit{bmkeps}\,(_{bs}a^*)$ & $\dn$ &
+     $bs \,@\, [Z]$
+\end{tabular}    
+\end{center}    
+\noindent
+$\bmkeps$ retrieves the value $v$'s
+information in the format
+of bitcodes, by travelling along the
+path of the regular expression that corresponds to a POSIX match,
+collecting all the bitcodes, and attaching $S$ to indicate the end of star
+iterations. \\
+The bitcodes extracted by $\bmkeps$ need to be 
+$\decode$d (with the guidance of a plain regular expression):
+%\begin{definition}[Bitdecoding of Values]\mbox{}
+\begin{center}
+\begin{tabular}{@{}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}}
+  $\textit{decode}'\,bs\,(\ONE)$ & $\dn$ & $(\Empty, bs)$\\
+  $\textit{decode}'\,bs\,(c)$ & $\dn$ & $(\Char\,c, bs)$\\
+  $\textit{decode}'\,(Z\!::\!bs)\;(r_1 + r_2)$ & $\dn$ &
+     $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}\;
+       (\Left\,v, bs_1)$\\
+  $\textit{decode}'\,(S\!::\!bs)\;(r_1 + r_2)$ & $\dn$ &
+     $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_2\;\textit{in}\;
+       (\Right\,v, bs_1)$\\                           
+  $\textit{decode}'\,bs\;(r_1\cdot r_2)$ & $\dn$ &
+        $\textit{let}\,(v_1, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}$\\
+  & &   $\textit{let}\,(v_2, bs_2) = \textit{decode}'\,bs_1\,r_2$\\
+  & &   \hspace{35mm}$\textit{in}\;(\Seq\,v_1\,v_2, bs_2)$\\
+  $\textit{decode}'\,(Z\!::\!bs)\,(r^*)$ & $\dn$ & $(\Stars\,[], bs)$\\
+  $\textit{decode}'\,(S\!::\!bs)\,(r^*)$ & $\dn$ & 
+         $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r\;\textit{in}$\\
+  & &   $\textit{let}\,(\Stars\,vs, bs_2) = \textit{decode}'\,bs_1\,r^*$\\
+  & &   \hspace{35mm}$\textit{in}\;(\Stars\,v\!::\!vs, bs_2)$\bigskip\\
+  
+  $\textit{decode}\,bs\,r$ & $\dn$ &
+     $\textit{let}\,(v, bs') = \textit{decode}'\,bs\,r\;\textit{in}$\\
+  & & $\textit{if}\;bs' = []\;\textit{then}\;\textit{Some}\,v\;
+       \textit{else}\;\textit{None}$                       
+\end{tabular}    
+\end{center} 
 \noindent
-The red area expands as we move towards $r_n$, 
-indicating more is known about the lexing result.
-Despite knowing this partial lexing information during
-the forward derivative phase, we choose to store them
-all the way until $r_n$ is reached.
-Then we reconstruct the value character by character
-values at a later stage, using information in a Last-In-First-Out
-manner. Although the algorithm is elegant and natural,
-it can be inefficient and prone to stack overflow.\\
-It turns out we can store lexing
-information on the fly, while still using regular expression 
-derivatives.
-If we remove the individual 
-lexing steps, and use red and blue areas as before
-to indicate consumed (seen) input and constructed
-partial value (before recovering the rest of the stack),
-one could see that the seen part's lexical information
-is stored in the form of a regular expression.
+The function $\decode'$ returns a pair consisting of 
+a partially decoded value and some leftover bit list that cannot
+be decide yet.
+The function $\decode'$ succeeds if the left-over 
+bit-sequence is empty.
+$\decode$ is terminating as $\decode'$ is terminating.
+$\decode'$ is terminating 
+because at least one of $\decode'$'s parameters will go down in terms
+of size.\\
+The reverse operation of $\decode$ is $\code$.
+$\textit{code}$ encodes a value into a bitcode by converting
+$\Left$ into $Z$, $\Right$ into $S$, and marks the start of any non-empty
+star iteration by $S$. The border where a star iteration
+terminates is marked by $Z$. 
+This coding is lossy, as it throws away the information about
+characters, and does not encode the ``boundary'' between two
+sequence values. Moreover, with only the bitcode we cannot even tell
+whether the $S$s and $Z$s are for $\Left/\Right$ or $\Stars$.  
+\begin{center}
+\begin{tabular}{lcl}
+  $\textit{code}(\Empty)$ & $\dn$ & $[]$\\
+  $\textit{code}(\Char\,c)$ & $\dn$ & $[]$\\
+  $\textit{code}(\Left\,v)$ & $\dn$ & $Z :: code(v)$\\
+  $\textit{code}(\Right\,v)$ & $\dn$ & $S :: code(v)$\\
+  $\textit{code}(\Seq\,v_1\,v_2)$ & $\dn$ & $code(v_1) \,@\, code(v_2)$\\
+  $\textit{code}(\Stars\,[])$ & $\dn$ & $[Z]$\\
+  $\textit{code}(\Stars\,(v\!::\!vs))$ & $\dn$ & $S :: code(v) \;@\;
+                                                 code(\Stars\,vs)$
+\end{tabular}    
+\end{center} 
+\noindent
+Assume we have a value $v$ and regular expression $r$
+with $\vdash v:r$,
+then we have the property that $\decode$ and $\code$ are
+reverse operations of one another:
+\begin{lemma}
+\[If \vdash v : r \; then \;\decode \; (\code \; v) \; r = \textit{Some}(v) \]
+\end{lemma}
+\begin{proof}
+By proving a more general version of the lemma, on $\decode'$:
+\[\vdash v : r \implies \decode' \; ((\code \; v) @ ds) \; r = (v, ds) \]
+Then setting $ds$ to be $[]$ and unfolding $\decode$ definition,
+we obtain the property.
+\end{proof}
+\noindent
+Now we give out the central part of this lexing algorithm,
+the $\bder$ function (stands for \emph{b}itcoded-derivative):
+\begin{center}
+  \begin{tabular}{@{}lcl@{}}
+  $(\ZERO)\,\backslash c$ & $\dn$ & $\ZERO$\\  
+  $(_{bs}\ONE)\,\backslash c$ & $\dn$ & $\ZERO$\\  
+  $(_{bs}{\bf d})\,\backslash c$ & $\dn$ &
+        $\textit{if}\;c=d\; \;\textit{then}\;
+         _{bs}\ONE\;\textit{else}\;\ZERO$\\  
+  $(_{bs}\sum \;\textit{as})\,\backslash c$ & $\dn$ &
+  $_{bs}\sum\;(\textit{map} \; (\_\backslash c) \; as )$\\
+  $(_{bs}\;a_1\cdot a_2)\,\backslash c$ & $\dn$ &
+     $\textit{if}\;\textit{bnullable}\,a_1$\\
+					       & &$\textit{then}\;_{bs}\sum\,[(_{[]}\,(a_1\,\backslash c)\cdot\,a_2),$\\
+					       & &$\phantom{\textit{then},\;_{bs}\sum\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\,\backslash c))]$\\
+  & &$\textit{else}\;_{bs}\,(a_1\,\backslash c)\cdot a_2$\\
+  $(_{bs}a^*)\,\backslash c$ & $\dn$ &
+      $_{bs}(\textit{fuse}\, [Z] \; r\,\backslash c)\cdot
+       (_{[]}r^*))$
+\end{tabular}    
+\end{center}    
+\noindent
+For most time we use the infix notation $(\_\backslash\_)$ 
+to mean $\bder$ for brevity when
+there is no danger of confusion with derivatives on plain regular expressions.
+For example, we write $( _{[]}r^* ) \backslash c$ instead of $\bder \;c \; _{[]}r^*$,
+as the bitcodes at the front of $r^*$ indicates that it is 
+a bit-coded regular expression, not a plain one.\\
+$\bder$ tells us how regular expressions can be recursively traversed,
+where the bitcodes are augmented and carried around 
+when a derivative is taken.
+We give the intuition behind some of the more involved cases in 
+$\bder$. \\
+For example,
+in the \emph{star} case,
+a derivative on $_{bs}a^*$ means 
+that one more star iteratoin needs to be taken.
+we need to unfold it into a sequence,
+and attach an additional bit $Z$ to the front of $r \backslash c$
+as a record to indicate one new star iteration is unfolded.
+
+\noindent
+\begin{center}
+  \begin{tabular}{@{}lcl@{}}
+  $(_{bs}a^*)\,\backslash c$ & $\dn$ &
+  $_{bs}(\underbrace{\textit{fuse}\, [Z] \; a\,\backslash c}_{\text{One more iteration}})\cdot
+       (_{[]}a^*))$
+\end{tabular}    
+\end{center}   
+
+\noindent
+This information will be recovered later by the $\decode$ function.
+The intuition is that the bit $Z$ will be decoded at the right location,
+because we accumulate bits from left to right (a rigorous proof will be given
+later).
+
+%\begin{tikzpicture}[ > = stealth, % arrow head style
+%        shorten > = 1pt, % don't touch arrow head to node
+%        semithick % line style
+%    ]
+%
+%    \tikzstyle{every state}=[
+%        draw = black,
+%        thin,
+%        fill = cyan!29,
+%        minimum size = 7mm
+%    ]
+%    \begin{scope}[node distance=1cm and 0cm, every node/.style=state]
+%		\node (k) [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
+%        {$bs$
+%         \nodepart{two} $a^*$ };
+%	 \node (l) [below =of k, rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
+%        { $bs$ + [Z]
+%         \nodepart{two}  $(a\backslash c )\cdot a^*$ };
+%    \end{scope}
+%    \path[->] 
+%	      (k) edge (l);
+%\end{tikzpicture}
+%
+%Pictorially the process looks like below.
+%Like before, the red region denotes
+%previous lexing information (stored as bitcodes in $bs$).
+
+%\begin{tikzpicture}[every node/.append style={draw, rounded corners, inner sep=10pt}]
+%	\begin{scope}[node distance=1cm]   
+%		\node (a) [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
+%        {$bs$
+%         \nodepart{two} $a^*$ };
+%	 \node (b) [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
+%        { $bs$ + [Z]
+%         \nodepart{two}  $(a\backslash c )\cdot a^*$ };
+%%\caption{term 1 \ref{term:1}'s matching configuration}
+% 	\end{scope}
+%\end{tikzpicture}
+
+\noindent
+Another place the $\bder$ function differs
+from derivatives on plain regular expressions
+is the sequence case:
+\begin{center}
+  \begin{tabular}{@{}lcl@{}}
+
+  $(_{bs}\;a_1\cdot a_2)\,\backslash c$ & $\dn$ &
+     $\textit{if}\;\textit{bnullable}\,a_1$\\
+					       & &$\textit{then}\;_{bs}\sum\,[(_{[]}\,(a_1\,\backslash c)\cdot\,a_2),$\\
+					       & &$\phantom{\textit{then},\;_{bs}\sum\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\,\backslash c))]$\\
+  & &$\textit{else}\;_{bs}\,(a_1\,\backslash c)\cdot a_2$
+\end{tabular}    
+\end{center}    
+\noindent
+The difference is that (when $a_1$ is $\bnullable$)
+we use $\bmkeps$ to store the lexing information
+in $a_1$ before collapsing 
+it (as it has been fully matched by string prior to $c$),
+and attach the collected bit-codes to the front of $a_2$
+before throwing away $a_1$. We assume that $\bmkeps$ 
+correctly extracts the bitcode for how $a_1$
+matches the string prior to $c$ (more on this later).
+The bitsequence $\textit{bs}$ which was initially 
+attached to the first element of the sequence
+$a_1 \cdot a_2$, has now been 
+elevated to the top level of the $\sum$ constructor. 
+This is because this piece of information will be needed 
+whichever way the sequence is matched,
+regardless of whether $c$ belongs to $a_1$ or $a_2$.
+
+In the injection-based lexer, $r_1$ is immediately thrown away in 
+subsequent derivatives on the right branch (when $r_1$ is $\nullable$),
+depite $r_1$ potentially storing information of previous matches:
+\begin{center}
+	$(r_1 \cdot r_2 )\backslash c = (r_1 \backslash c) \cdot r_2 + r_2 \backslash c$
+\end{center}
+\noindent
+this loss of information makes it necessary
+to store on stack all the regular expressions'
+``snapshots'' before they were
+taken derivative of.
+So that the related information will be available 
+once the child recursive 
+call finishes.\\
+The rest of the clauses of $\bder$ is rather similar to
+$\der$. \\
+Generalising the derivative operation with bitcodes to strings, we have 
+\begin{center}
+	\begin{tabular}{@{}lcl@{}}
+		$a\backslash_s [] $ & $\dn$ & $a$\\
+		$a\backslash (c :: s) $ & $\dn$ & $(a \backslash c) \backslash_s s$
+	\end{tabular}
+\end{center}
+\noindent
+As we did earlier, we omit the $s$ subscript at $\backslash_s$ when there is no danger
+of confusion.
+\subsection{Putting Things Together}
+Putting these operations altogether, we obtain a lexer with bit-coded regular expressions
+as its internal data structures, which we call $\blexer$:
+
+\begin{center}
+\begin{tabular}{lcl}
+  $\textit{blexer}\;r\,s$ & $\dn$ &
+      $\textit{let}\;a = (r^\uparrow)\backslash s\;\textit{in}$\\                
+  & & $\;\;\textit{if}\; \textit{bnullable}(a)$\\
+  & & $\;\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,a)\,r$\\
+  & & $\;\;\textit{else}\;\textit{None}$
+\end{tabular}
+\end{center}
+
+\noindent
+Ausaf and Urban formally proved the correctness of the $\blexer$, namely
+\begin{property}
+$\blexer \;r \; s = \lexer \; r \; s$.
+\end{property}
+This was claimed but not formalised in Sulzmann and Lu's work.
+We introduce the proof later, after we give all
+the needed auxiliary functions and definitions.
+But before this we shall first walk the reader 
+through a concrete example of our $\blexer$ calculating the right 
+lexical information through bit-coded regular expressions.\\
 Consider the regular expression $(aa)^* \cdot bc$ matching the string $aabc$
-and assume we have just read the two characters $aa$:
+and assume we have just read the first character $a$:
 \begin{center}
 \begin{tikzpicture}[every node/.append style={draw, rounded corners, inner sep=10pt}]
     \node [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
-	    {Partial lexing info: $\ONE \cdot a \cdot (aa)^* \cdot bc$ etc.
-         \nodepart{two} $\Seq(\ldots, \Seq(\Char(b), \Char(c)))$};
+	    {$\ONE \cdot a \cdot (aa)^* \cdot bc$ 
+	    \nodepart{two} $[] \iff \Seq \; (\Stars \; [\Seq\; a \; ??, \;]), ??$};
 \end{tikzpicture}
 \end{center}
 \noindent
+We use the red area for (annotated) regular expressions and the blue 
+area the (partial) bitcodes and (partial) values.
 In the injection-based lexing algorithm, we ``neglect" the red area
 by putting all the characters we have consumed and
 intermediate regular expressions on the stack when 
@@ -430,401 +910,6 @@
 \end{center}
 
 \noindent
-Using $S$ and $Z$ rather than $1$ and $0$ is to avoid
-confusion with the regular expressions $\ZERO$ and $\ONE$.
-Bitcodes (or
-bit-lists) can be used to encode values (or potentially incomplete values) in a
-compact form. This can be straightforwardly seen in the following
-coding function from values to bitcodes: 
-\begin{center}
-\begin{tabular}{lcl}
-  $\textit{code}(\Empty)$ & $\dn$ & $[]$\\
-  $\textit{code}(\Char\,c)$ & $\dn$ & $[]$\\
-  $\textit{code}(\Left\,v)$ & $\dn$ & $Z :: code(v)$\\
-  $\textit{code}(\Right\,v)$ & $\dn$ & $S :: code(v)$\\
-  $\textit{code}(\Seq\,v_1\,v_2)$ & $\dn$ & $code(v_1) \,@\, code(v_2)$\\
-  $\textit{code}(\Stars\,[])$ & $\dn$ & $[Z]$\\
-  $\textit{code}(\Stars\,(v\!::\!vs))$ & $\dn$ & $S :: code(v) \;@\;
-                                                 code(\Stars\,vs)$
-\end{tabular}    
-\end{center} 
-\noindent
-Here $\textit{code}$ encodes a value into a bit-code by converting
-$\Left$ into $Z$, $\Right$ into $S$, and marks the start of any non-empty
-star iteration by $S$. The border where a local star terminates
-is marked by $Z$. 
-This coding is lossy, as it throws away the information about
-characters, and also does not encode the ``boundary'' between two
-sequence values. Moreover, with only the bitcode we cannot even tell
-whether the $S$s and $Z$s are for $\Left/\Right$ or $\Stars$. The
-reason for choosing this compact way of storing information is that the
-relatively small size of bits can be easily manipulated and ``moved
-around" in a regular expression. 
-
-Because of the lossiness, the process of decoding a bitlist requires additionally 
-a regular expression. The function $\decode$ is defined as:
-
-%\begin{definition}[Bitdecoding of Values]\mbox{}
-\begin{center}
-\begin{tabular}{@{}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}}
-  $\textit{decode}'\,bs\,(\ONE)$ & $\dn$ & $(\Empty, bs)$\\
-  $\textit{decode}'\,bs\,(c)$ & $\dn$ & $(\Char\,c, bs)$\\
-  $\textit{decode}'\,(Z\!::\!bs)\;(r_1 + r_2)$ & $\dn$ &
-     $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}\;
-       (\Left\,v, bs_1)$\\
-  $\textit{decode}'\,(S\!::\!bs)\;(r_1 + r_2)$ & $\dn$ &
-     $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_2\;\textit{in}\;
-       (\Right\,v, bs_1)$\\                           
-  $\textit{decode}'\,bs\;(r_1\cdot r_2)$ & $\dn$ &
-        $\textit{let}\,(v_1, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}$\\
-  & &   $\textit{let}\,(v_2, bs_2) = \textit{decode}'\,bs_1\,r_2$\\
-  & &   \hspace{35mm}$\textit{in}\;(\Seq\,v_1\,v_2, bs_2)$\\
-  $\textit{decode}'\,(Z\!::\!bs)\,(r^*)$ & $\dn$ & $(\Stars\,[], bs)$\\
-  $\textit{decode}'\,(S\!::\!bs)\,(r^*)$ & $\dn$ & 
-         $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r\;\textit{in}$\\
-  & &   $\textit{let}\,(\Stars\,vs, bs_2) = \textit{decode}'\,bs_1\,r^*$\\
-  & &   \hspace{35mm}$\textit{in}\;(\Stars\,v\!::\!vs, bs_2)$\bigskip\\
-  
-  $\textit{decode}\,bs\,r$ & $\dn$ &
-     $\textit{let}\,(v, bs') = \textit{decode}'\,bs\,r\;\textit{in}$\\
-  & & $\textit{if}\;bs' = []\;\textit{then}\;\textit{Some}\,v\;
-       \textit{else}\;\textit{None}$                       
-\end{tabular}    
-\end{center} 
-%\end{definition}
-
-\noindent
-The function $\decode'$ returns a pair consisting of 
-a partially decoded value and some leftover bit list that cannot
-be decide yet.
-The function $\decode'$ succeeds if the left-over 
-bit-sequence is empty.
-$\decode$ is terminating as $\decode'$ is terminating.
-$\decode'$ is terminating 
-because at least one of $\decode'$'s parameters will go down in terms
-of size.
-Assuming we have a value $v$ and regular expression $r$
-with $\vdash v:r$,
-then we have the property that $\decode$ and $\code$ are
-reverse operations of one another:
-\begin{lemma}
-\[If \vdash v : r \; then \;\decode \; (\code \; v) \; r = \textit{Some}(v) \]
-\end{lemma}
-\begin{proof}
-By proving a more general version of the lemma, on $\decode'$:
-\[\vdash v : r \implies \decode' \; ((\code \; v) @ ds) \; r = (v, ds) \]
-Then setting $ds$ to be $[]$ and unfolding $\decode$ definition,
-we obtain the property.
-\end{proof}
-With the $\code$ and $\decode$ functions in hand, we know how to 
-switch between bit-codes and values. 
-The next step is to integrate this information into regular expression.
-Attaching bits to the front of regular expressions is the solution Sulzamann and Lu
-gave for storing partial values in regular expressions. 
-Annotated regular expressions are therefore defined as the Isabelle
-datatype:
-
-\begin{center}
-\begin{tabular}{lcl}
-  $\textit{a}$ & $::=$  & $\ZERO$\\
-                  & $\mid$ & $_{bs}\ONE$\\
-                  & $\mid$ & $_{bs}{\bf c}$\\
-                  & $\mid$ & $_{bs}\sum\,as$\\
-                  & $\mid$ & $_{bs}a_1\cdot a_2$\\
-                  & $\mid$ & $_{bs}a^*$
-\end{tabular}    
-\end{center}  
-%(in \textit{ALTS})
-
-\noindent
-where $bs$ stands for bit-codes, $a$  for $\mathbf{a}$nnotated regular
-expressions and $as$ for lists of annotated regular expressions.
-The alternative constructor, written, $\sum$, has been generalised to 
-accept a list of annotated regular expressions rather than just two.
-Why is it generalised? This is because when we open up nested 
-alternatives, there could be more than two elements at the same level
-after de-duplication, which can no longer be stored in a binary
-constructor.
-
-The first operation we define related to bit-coded regular expressions
-is how we move bits to the inside of regular expressions.
-Called $\fuse$, this operation is attaches bit-codes 
-to the front of an annotated regular expression:
-\begin{center}
-\begin{tabular}{lcl}
-  $\textit{fuse}\;bs \; \ZERO$ & $\dn$ & $\ZERO$\\
-  $\textit{fuse}\;bs\; _{bs'}\ONE$ & $\dn$ &
-     $_{bs @ bs'}\ONE$\\
-  $\textit{fuse}\;bs\;_{bs'}{\bf c}$ & $\dn$ &
-     $_{bs@bs'}{\bf c}$\\
-  $\textit{fuse}\;bs\,_{bs'}\sum\textit{as}$ & $\dn$ &
-     $_{bs@bs'}\sum\textit{as}$\\
-  $\textit{fuse}\;bs\; _{bs'}a_1\cdot a_2$ & $\dn$ &
-     $_{bs@bs'}a_1 \cdot a_2$\\
-  $\textit{fuse}\;bs\,_{bs'}a^*$ & $\dn$ &
-     $_{bs @ bs'}a^*$
-\end{tabular}    
-\end{center} 
-
-\noindent
-With \emph{fuse} we are able to define the $\internalise$ function
-that translates a ``standard'' regular expression into an
-annotated regular expression.
-This function will be applied before we start
-with the derivative phase of the algorithm.
-
-\begin{center}
-\begin{tabular}{lcl}
-  $(\ZERO)^\uparrow$ & $\dn$ & $\ZERO$\\
-  $(\ONE)^\uparrow$ & $\dn$ & $_{[]}\ONE$\\
-  $(c)^\uparrow$ & $\dn$ & $_{[]}{\bf c}$\\
-  $(r_1 + r_2)^\uparrow$ & $\dn$ &
-  $_{[]}\sum[\textit{fuse}\,[Z]\,r_1^\uparrow,\,
-  \textit{fuse}\,[S]\,r_2^\uparrow]$\\
-  $(r_1\cdot r_2)^\uparrow$ & $\dn$ &
-         $_{[]}r_1^\uparrow \cdot r_2^\uparrow$\\
-  $(r^*)^\uparrow$ & $\dn$ &
-         $_{[]}(r^\uparrow)^*$\\
-\end{tabular}    
-\end{center}    
-%\end{definition}
-
-\noindent
-We use an up arrow with postfix notation
-to denote this operation.
-for convenience. The $\textit{internalise} \; r$
-notation is more cumbersome.
-The opposite of $\textit{internalise}$ is
-$\erase$, where all the bit-codes are removed,
-and the alternative operator $\sum$ for annotated
-regular expressions is transformed to the binary alternatives
-for plain regular expressions.
-\begin{center}
-	\begin{tabular}{lcl}
-		$\ZERO_\downarrow$ & $\dn$ & $\ZERO$\\
-		$( _{bs}\ONE )_\downarrow$ & $\dn$ & $\ONE$\\
-		$( _{bs}\mathbf{c} )_\downarrow$ & $\dn$ & $\mathbf{c}$\\
-		$( _{bs} a_1 \cdot a_2 )_\downarrow$ & $\dn$ & 
-		$ (a_1) _\downarrow \cdot  (a_2) _\downarrow$\\
-		$( _{bs} [])_\downarrow $ & $\dn$ & $\ZERO $\\
-		$( _{bs} [a]  )_\downarrow$ & $\dn$ & $a_\downarrow$\\
-		$_{bs} \sum [a_1, \; a_2]$ & $\dn$ & $ (a_1) _\downarrow + ( a_2 ) _\downarrow $\\
-		$(_{bs} \sum (a :: as))_\downarrow$ & $\dn$ & $ a_\downarrow + \; (_{[]} \sum as)_\downarrow$\\
-		$( _{bs} a^* )_\downarrow$ & $\dn$ & $(a_\downarrow)^*$
-	\end{tabular}
-\end{center}
-\noindent
-We also abbreviate the $\erase\; a$ operation
-as $a_\downarrow$, for conciseness.
-For bit-coded regular expressions, as a different datatype, 
-testing whether they contain empty string in their lauguage requires
-a dedicated function $\bnullable$
-which simply calls $\erase$ first before testing whether it is $\nullable$.
-\begin{definition}
-		$\bnullable \; a \dn  \nullable \; (a_\downarrow)$
-\end{definition}
-The function for collecting the
-bitcodes at the end of the derivative 
-phase from a (b)nullable regular expression
-is a generalised version of the $\textit{mkeps}$ function
-for annotated regular expressions, called $\textit{bmkeps}$:
-
-
-%\begin{definition}[\textit{bmkeps}]\mbox{}
-\begin{center}
-\begin{tabular}{lcl}
-  $\textit{bmkeps}\,(_{bs}\ONE)$ & $\dn$ & $bs$\\
-  $\textit{bmkeps}\,(_{bs}\sum a::\textit{as})$ & $\dn$ &
-     $\textit{if}\;\textit{bnullable}\,a$\\
-  & &$\textit{then}\;bs\,@\,\textit{bmkeps}\,a$\\
-  & &$\textit{else}\;bs\,@\,\textit{bmkeps}\,(_{[]}\sum \textit{as})$\\
-  $\textit{bmkeps}\,(_{bs} a_1 \cdot a_2)$ & $\dn$ &
-     $bs \,@\,\textit{bmkeps}\,a_1\,@\, \textit{bmkeps}\,a_2$\\
-  $\textit{bmkeps}\,(_{bs}a^*)$ & $\dn$ &
-     $bs \,@\, [Z]$
-\end{tabular}    
-\end{center}    
-%\end{definition}
-
-\noindent
-$\bmkeps$ completes the value information by travelling along the
-path of the regular expression that corresponds to a POSIX value and
-collecting all the bitcodes, and attaching $S$ to indicate the end of star
-iterations. 
-
-Now we give out the central part of this lexing algorithm,
-the $\bder$ function (stands for \emph{b}itcoded-derivative).
-For most time we use the infix notation $(\_\backslash\_)$ 
-to mean $\bder$ for brevity when
-there is no danger of confusion with derivatives on plain regular expressions.
-For example, we write $( _{[]}r^* ) \backslash c$ instead of $\bder \;c \; _{[]}r^*$,
-as the bitcodes at the front of $r^*$ indicates that it is 
-a bit-coded regular expression, not a plain one.
-$\bder$ tells us how regular expressions can be recursively traversed,
-where the bitcodes are augmented and carried around 
-when a derivative is taken.
-\begin{center}
-  \begin{tabular}{@{}lcl@{}}
-  $(\ZERO)\,\backslash c$ & $\dn$ & $\ZERO$\\  
-  $(_{bs}\ONE)\,\backslash c$ & $\dn$ & $\ZERO$\\  
-  $(_{bs}{\bf d})\,\backslash c$ & $\dn$ &
-        $\textit{if}\;c=d\; \;\textit{then}\;
-         _{bs}\ONE\;\textit{else}\;\ZERO$\\  
-  $(_{bs}\sum \;\textit{as})\,\backslash c$ & $\dn$ &
-  $_{bs}\sum\;(\textit{map} \; (\_\backslash c) \; as )$\\
-  $(_{bs}\;a_1\cdot a_2)\,\backslash c$ & $\dn$ &
-     $\textit{if}\;\textit{bnullable}\,a_1$\\
-					       & &$\textit{then}\;_{bs}\sum\,[(_{[]}\,(a_1\,\backslash c)\cdot\,a_2),$\\
-					       & &$\phantom{\textit{then},\;_{bs}\sum\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\,\backslash c))]$\\
-  & &$\textit{else}\;_{bs}\,(a_1\,\backslash c)\cdot a_2$\\
-  $(_{bs}a^*)\,\backslash c$ & $\dn$ &
-      $_{bs}(\textit{fuse}\, [Z] \; r\,\backslash c)\cdot
-       (_{[]}r^*))$
-\end{tabular}    
-\end{center}    
-\noindent
-We give the intuition behind some of the more involved cases in 
-$\bder$. For example,
-in the \emph{star} case,
-a derivative on $_{bs}a^*$ means 
-that one more star iteratoin needs to be taken.
-we need to unfold it into a sequence,
-and attach an additional bit $Z$ to the front of $r \backslash c$
-as a record to indicate one new star iteration is unfolded.
-
-\noindent
-\begin{center}
-  \begin{tabular}{@{}lcl@{}}
-  $(_{bs}a^*)\,\backslash c$ & $\dn$ &
-  $_{bs}(\underbrace{\textit{fuse}\, [Z] \; a\,\backslash c}_{\text{One more iteration}})\cdot
-       (_{[]}a^*))$
-\end{tabular}    
-\end{center}   
-
-\noindent
-This information will be recovered later by the $\decode$ function.
-The intuition is that the bit $Z$ will be decoded at the right location,
-because we accumulate bits from left to right (a rigorous proof will be given
-later).
-
-\begin{tikzpicture}[ > = stealth, % arrow head style
-        shorten > = 1pt, % don't touch arrow head to node
-        semithick % line style
-    ]
-
-    \tikzstyle{every state}=[
-        draw = black,
-        thin,
-        fill = cyan!29,
-        minimum size = 7mm
-    ]
-    \begin{scope}[node distance=1cm and 0cm, every node/.style=state]
-		\node (k) [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
-        {$bs$
-         \nodepart{two} $a^*$ };
-	 \node (l) [below =of k, rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
-        { $bs$ + [Z]
-         \nodepart{two}  $(a\backslash c )\cdot a^*$ };
-    \end{scope}
-    \path[->] 
-	      (k) edge (l);
-\end{tikzpicture}
-
-Pictorially the process looks like below.
-Like before, the red region denotes
-previous lexing information (stored as bitcodes in $bs$).
-
-\begin{tikzpicture}[every node/.append style={draw, rounded corners, inner sep=10pt}]
-	\begin{scope}[node distance=1cm]   
-		\node (a) [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
-        {$bs$
-         \nodepart{two} $a^*$ };
-	 \node (b) [rectangle split, rectangle split horizontal, rectangle split parts=2, rectangle split part fill={red!30,blue!20},]
-        { $bs$ + [Z]
-         \nodepart{two}  $(a\backslash c )\cdot a^*$ };
-%\caption{term 1 \ref{term:1}'s matching configuration}
- 	\end{scope}
-\end{tikzpicture}
-
-\noindent
-Another place in the $\bder$ function where it differs
-from normal derivatives (on un-annotated regular expressions)
-is the sequence case:
-\begin{center}
-  \begin{tabular}{@{}lcl@{}}
-
-  $(_{bs}\;a_1\cdot a_2)\,\backslash c$ & $\dn$ &
-     $\textit{if}\;\textit{bnullable}\,a_1$\\
-					       & &$\textit{then}\;_{bs}\sum\,[(_{[]}\,(a_1\,\backslash c)\cdot\,a_2),$\\
-					       & &$\phantom{\textit{then},\;_{bs}\sum\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\,\backslash c))]$\\
-  & &$\textit{else}\;_{bs}\,(a_1\,\backslash c)\cdot a_2$
-\end{tabular}    
-\end{center}    
-
-The difference is that (when $a_1$ is $\bnullable$)
-we use $\bmkeps$ to store the lexing information
-in $a_1$ before collapsing it (as it has been fully matched by string prior to $c$, 
-and attach the collected bit-codes to the front of $a_2$
-before throwing away $a_1$. We assume that $\bmkeps$ correctly extracts the bitcode for how $a_1$
-matches the string prior to $c$ (more on this later).
-The bitsequence $\textit{bs}$ which was initially attached to the first element of the sequence
-$a_1 \cdot a_2$, has now been elevated to the top level of teh $\sum$. 
-This is because this piece of information will be needed whichever way the sequence is matched,
-regardless of whether $c$ belongs to $a_1$ or $a_2$.
-
-In the injection-based lexing, $r_1$ is immediately thrown away in 
-subsequent derivatives on the right branch (when $r_1$ is $\nullable$),
-\begin{center}
-	$(r_1 \cdot r_2 )\backslash c = (r_1 \backslash c) \cdot r_2 + r_2 \backslash c$
-\end{center}
-\noindent
-as it knows $r_1$ is stored on stack and available once the recursive 
-call to later derivatives finish.
-Therefore, if the $\Right$ branch is taken in a $\POSIX$ match,
-we construct back the sequence value once step back by
-calling a on $\mkeps(r_1)$.
-\begin{center}
-	\begin{tabular}{lcr}
-		$\ldots r_1 \cdot r_2$ & $\rightarrow$ & $r_1\cdot r_2 + r_2 \backslash c \ldots $\\
-		$\ldots \Seq(v_1, v_2) (\Seq(\mkeps(r1), (\inj \; r_2 \; c\; v_{2c})))$ & $\leftarrow$ & $\Right(v_{2c})\ldots$
-	\end{tabular}
-\end{center}
-\noindent
-The rest of the clauses of $\bder$ is rather similar to
-$\der$, and is put together here as a wholesome definition
-for $\bder$:
-\noindent
-Generalising the derivative operation with bitcodes to strings, we have 
-\begin{center}
-	\begin{tabular}{@{}lcl@{}}
-		$a\backslash_s [] $ & $\dn$ & $a$\\
-		$a\backslash (c :: s) $ & $\dn$ & $(a \backslash c) \backslash_s s$
-	\end{tabular}
-\end{center}
-As we did earlier, we omit the $s$ subscript at $\backslash_s$ when there is no danger
-of confusion.
-Putting this all together, we obtain a lexer with bit-coded regular expressions
-as its internal data structures, which we call $\blexer$:
-
-\begin{center}
-\begin{tabular}{lcl}
-  $\textit{blexer}\;r\,s$ & $\dn$ &
-      $\textit{let}\;a = (r^\uparrow)\backslash s\;\textit{in}$\\                
-  & & $\;\;\textit{if}\; \textit{bnullable}(a)$\\
-  & & $\;\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,a)\,r$\\
-  & & $\;\;\textit{else}\;\textit{None}$
-\end{tabular}
-\end{center}
-
-\noindent
-Ausaf and Urban formally proved the correctness of the $\blexer$, namely
-\begin{conjecture}
-$\blexer \;r \; s = \lexer \; r \; s$.
-\end{conjecture}
-This was claimed but not formalised in Sulzmann and Lu's work.
-We introduce the proof later, after we give out all
-the needed auxiliary functions and definitions
-
 
 %-----------------------------------
 %	SUBSECTION 1
diff -r 35df9cdd36ca -r e0f0a81f907b ChengsongTanPhdThesis/main.tex
--- a/ChengsongTanPhdThesis/main.tex	Wed Aug 17 01:09:13 2022 +0100
+++ b/ChengsongTanPhdThesis/main.tex	Wed Aug 17 22:59:15 2022 +0100
@@ -43,6 +43,7 @@
 %\usepackage{fdsymbol} % Loads unicode-math
 
 \usepackage{verbatim}
+\usepackage{float}
 \usepackage{mathpazo} % Use the Palatino font by default
 \usepackage{hyperref}
 \usepackage{lipsum}
@@ -94,6 +95,7 @@
 \usepackage{tikz}
 \usetikzlibrary{automata, positioning, calc}
 \usetikzlibrary{arrows}
+\usetikzlibrary{shapes.misc}
 \usetikzlibrary{fit,
                 shapes.geometric,
 		patterns,