lexing: ChengsongTanPhdThesis/Chapters/ChapterBitcoded1.tex@28751de4b4ba


% Chapter Template

% Main chapter title
\chapter{Correctness of Bit-coded Algorithm without Simplification}

\label{ChapterBitcoded1} % Change X to a consecutive number; for referencing this chapter elsewhere, use \ref{ChapterX}
%Then we illustrate how the algorithm without bitcodes falls short for such aggressive 
%simplifications and therefore introduce our version of the bitcoded algorithm and 
%its correctness proof in 
%Chapter 3\ref{Chapter3}. 




%----------------------------------------------------------------------------------------
%	SECTION common identities
%----------------------------------------------------------------------------------------
\section{Common Identities In Simplification-Related Functions} 
Need to prove these before starting on the big correctness proof.

%-----------------------------------
%	SUBSECTION 
%-----------------------------------
\subsection{Idempotency of $\simp$}

\begin{equation}
	\simp \;r = \simp\; \simp \; r 
\end{equation}
This property means we do not have to repeatedly
apply simplification in each step, which justifies
our definition of $\blexersimp$.
It will also be useful in future proofs where properties such as 
closed forms are needed.
The proof is by structural induction on $r$.

%-----------------------------------
%	SUBSECTION 
%-----------------------------------
\subsection{Syntactic Equivalence Under $\simp$}
We prove that minor differences can be annhilated
by $\simp$.
For example,
\begin{center}
$\simp \;(\simpALTs\; (\map \;(\_\backslash \; x)\; (\distinct \; \mathit{rs}\; \phi))) = 
 \simp \;(\simpALTs \;(\distinct \;(\map \;(\_ \backslash\; x) \; \mathit{rs}) \; \phi))$
\end{center}






%----------------------------------------------------------------------------------------
%	SECTION  correctness proof
%----------------------------------------------------------------------------------------
\section{Correctness of Bit-coded Algorithm (Without Simplification)}
We now give the proof the correctness of the algorithm with bit-codes.

Ausaf and Urban cleverly defined an auxiliary function called $\flex$,
defined as
\[
\flex \; r \; f \; [] \; v \; = \; f\; v
\flex \; r \; f \; c :: s \; v =  \flex r \; \lambda v. \, f (\inj \; r\; c\; v)\; s \; v
\]
which accumulates the characters that needs to be injected back, 
and does the injection in a stack-like manner (last taken derivative first injected).
$\flex$ is connected to the $\lexer$:
\begin{lemma}
$\flex \; r \; \textit{id}\; s \; \mkeps (r\backslash s) = \lexer \; r \; s$
\end{lemma}
$\flex$ provides us a bridge between $\lexer$ and $\blexer$.
What is even better about $\flex$ is that it allows us to 
directly operate on the value $\mkeps (r\backslash v)$,
which is pivotal in the definition of  $\lexer $ and $\blexer$, but not visible as an argument.
When the value created by $\mkeps$ becomes available, one can 
prove some stepwise properties of lexing nicely:
\begin{lemma}\label{flexStepwise}
$\textit{flex} \; r \; f \; s@[c] \; v= \flex \; r \; f\; s \; (\inj \; (r\backslash s) \; c \; v) $
\end{lemma}

And for $\blexer$ we have a function with stepwise properties like $\flex$ as well,
called $\retrieve$\ref{retrieveDef}.
$\retrieve$ takes bit-codes from annotated regular expressions
guided by a value.
$\retrieve$ is connected to the $\blexer$ in the following way:
\begin{lemma}\label{blexer_retrieve}
$\blexer \; r \; s = \decode  \; (\retrieve \; (\internalise \; r) \; (\mkeps \; (r \backslash s) )) \; r$
\end{lemma}
If you take derivative of an annotated regular expression, 
you can $\retrieve$ the same bit-codes as before the derivative took place,
provided that you use the corresponding value:

\begin{lemma}\label{retrieveStepwise}
$\retrieve \; (r \backslash c)  \;  v= \retrieve \; r \; (\inj \; r\; c\; v)$
\end{lemma}
The other good thing about $\retrieve$ is that it can be connected to $\flex$:
%centralLemma1
\begin{lemma}\label{flex_retrieve}
$\flex \; r \; \textit{id}\; s\; v = \decode \; (\retrieve \; (r\backslash s )\; v) \; r$
\end{lemma}
\begin{proof}
By induction on $s$. The induction tactic is reverse induction on strings.
$v$ is allowed to be arbitrary.
The crucial point is to rewrite 
\[
\retrieve \; (r \backslash s@[c]) \; \mkeps (r \backslash s@[c]) 
\]
as
\[
\retrieve \; (r \backslash s) \; (\inj \; (r \backslash s) \; c\;  \mkeps (r \backslash s@[c]))
\].
This enables us to equate 
\[
\retrieve \; (r \backslash s@[c]) \; \mkeps (r \backslash s@[c]) 
\] 
with 
\[
\flex \; r \; \textit{id} \; s \; (\inj \; (r\backslash s) \; c\; (\mkeps (r\backslash s@[c])))
\],
which in turn can be rewritten as
\[
\flex \; r \; \textit{id} \; s@[c] \;  (\mkeps (r\backslash s@[c]))
\].
\end{proof}

With the above lemma we can now link $\flex$ and $\blexer$.

\begin{lemma}\label{flex_blexer}
$\textit{flex} \; r \; \textit{id} \; s \; \mkeps(r \backslash s)  = \blexer \; r \; s$
\end{lemma}
\begin{proof}
Using two of the above lemmas: \ref{flex_retrieve} and \ref{blexer_retrieve}.
\end{proof}
Finally
author	Chengsong
	Mon, 30 May 2022 14:41:09 +0100
changeset 528	28751de4b4ba
parent 527	2c907b118f78
child 530	823d9b19d21c
permissions	-rwxr-xr-x