--- a/ChengsongTanPhdThesis/Chapters/Bitcoded2.tex Sun Jul 09 00:29:02 2023 +0100
+++ b/ChengsongTanPhdThesis/Chapters/Bitcoded2.tex Sun Jul 09 02:08:12 2023 +0100
@@ -623,7 +623,7 @@
by zooming in to the middle bit involving $r_i$, $r_{i+1}$, $v_i$ and $v_{i+1}$,
and adding the bottom row to show how bitcodes encoding the lexing information
can be extracted from every pair $(r_i, \; v_i)$:
-\begin{center}
+\begin{center}\label{graph:injZoom}
\begin{tikzpicture}[->, >=stealth', shorten >= 1pt, auto, thick]
%\node [rectangle ] (1) at (-7, 2) {$\ldots$};
%\node [rectangle, draw] (2) at (-4, 2) {$r_i = _{bs'}(_Za+_Saa)^*$};
@@ -647,8 +647,8 @@
\node [rectangle, draw] (7) at ( 3, -2) {$v_{i+1} = v$};
\node [rectangle] (8) at ( 8, -2) {$\ldots$};
\node [rectangle] (9) at (-8, -6) {$\ldots$};
- \node [rectangle, draw] (10) at (-5, -6) {$\textit{bits}_{i} = bs' @ ZZS$};
- \node [rectangle, draw] (11) at (3, -6) {$\textit{bits}_{i+1} = bs'@ ZZS$};
+ \node [rectangle, draw] (10) at (-5, -6) {$\textit{bits}_{i} $};
+ \node [rectangle, draw] (11) at (3, -6) {$\textit{bits}_{i+1}$};
\node [rectangle] (12) at (8, -6) {$\ldots$};
@@ -657,7 +657,7 @@
\path (6) edge [] node {} (5);
\path (9) edge [] node {} (10);
- \path (11) edge [] node {} (12);
+ \path (11) edge [<-] node {} (12);
\path (8) edge [] node {} (7);
\path (3) edge [] node {} (4);
@@ -676,7 +676,7 @@
\path (10) edge [dashed, <->] node {$=$} (11);
- \path (7) edge [] node {$\inj \; r_{i+1} \; a \; v_i$} (6);
+ \path (7) edge [] node {$\inj \; r_{i+1} \; c \; v_i$} (6);
% \node [rectangle, draw] (r) at (-6, -1) {$(aa)^*(b+c)$};
% \node [rectangle, draw] (a) at (-6, 4) {$(aa)^*(_{Z}b + _{S}c)$};
@@ -775,140 +775,186 @@
%From this chapter we start with the main contribution of this thesis, which
-The $\blexer$ proof relies on a lockstep POSIX
-correspondence between the lexical value and the
-regular expression in each derivative and injection.
-If we zoom into the diagram \ref{graph:inj} and look specifically at
-the pairs $v_i, r_i$ and $v_{i+1},\, r_{i+1}$, we get the diagram demonstrating
-the invariant that the same bitcodes can be extracted from the pairs:
-\tikzset{three sided/.style={
- draw=none,
- append after command={
- [-,shorten <= -0.5\pgflinewidth]
- ([shift={(-1.5\pgflinewidth,-0.5\pgflinewidth)}]\tikzlastnode.north east)
- edge([shift={( 0.5\pgflinewidth,-0.5\pgflinewidth)}]\tikzlastnode.north west)
- ([shift={( 0.5\pgflinewidth,-0.5\pgflinewidth)}]\tikzlastnode.north west)
- edge([shift={( 0.5\pgflinewidth,+0.5\pgflinewidth)}]\tikzlastnode.south west)
- ([shift={( 0.5\pgflinewidth,+0.5\pgflinewidth)}]\tikzlastnode.south west)
- edge([shift={(-1.0\pgflinewidth,+0.5\pgflinewidth)}]\tikzlastnode.south east)
- }
- }
-}
-
-\tikzset{three sided1/.style={
- draw=none,
- append after command={
- [-,shorten <= -0.5\pgflinewidth]
- ([shift={(1.5\pgflinewidth,-0.5\pgflinewidth)}]\tikzlastnode.north west)
- edge([shift={(-0.5\pgflinewidth,-0.5\pgflinewidth)}]\tikzlastnode.north east)
- ([shift={(-0.5\pgflinewidth,-0.5\pgflinewidth)}]\tikzlastnode.north east)
- edge([shift={(-0.5\pgflinewidth,+0.5\pgflinewidth)}]\tikzlastnode.south east)
- ([shift={(-0.5\pgflinewidth,+0.5\pgflinewidth)}]\tikzlastnode.south east)
- edge([shift={(1.0\pgflinewidth,+0.5\pgflinewidth)}]\tikzlastnode.south west)
- }
- }
-}
-
-\begin{center}
- \begin{tikzpicture}[->, >=stealth', shorten >= 1pt, auto, thick]
- %\node [rectangle ] (1) at (-7, 2) {$\ldots$};
- %\node [rectangle, draw] (2) at (-4, 2) {$r_i = _{bs'}(_Za+_Saa)^*$};
- %\node [rectangle, draw] (3) at (4, 2) {$r_{i+1} = _{bs'}(_Z(_Z\ONE + _S(\ONE \cdot a)))\cdot(_Za+_Saa)^*$};
- %\node [rectangle] (4) at (9, 2) {$\ldots$};
- %\node [rectangle] (5) at (-7, -2) {$\ldots$};
- %\node [rectangle, draw] (6) at (-4, -2) {$v_i = \Stars \; [\Left (a)]$};
- %\node [rectangle, draw] (7) at ( 4, -2) {$v_{i+1} = \Seq (\Alt (\Left \; \Empty)) \; \Stars \, []$};
- %\node [rectangle] (8) at ( 9, -2) {$\ldots$};
- %\node [rectangle] (9) at (-7, -6) {$\ldots$};
- %\node [rectangle, draw] (10) at (-4, -6) {$\textit{bits}_{i} = bs' @ ZZS$};
- %\node [rectangle, draw] (11) at (4, -6) {$\textit{bits}_{i+1} = bs'@ ZZS$};
- %\node [rectangle] (12) at (9, -6) {$\ldots$};
-
- \node [rectangle ] (1) at (-8, 2) {$\ldots$};
- \node [rectangle, draw] (2) at (-5, 2) {$r_i = _{bs'}(_Za+_Saa)^*$};
- \node [rectangle, draw] (3) at (3, 2) {$r_{i+1} = _{bs'}(_Z(_Z\ONE + _S(\ONE \cdot a)))\cdot(_Za+_Saa)^*$};
- \node [rectangle] (4) at (8, 2) {$\ldots$};
- \node [rectangle] (5) at (-8, -2) {$\ldots$};
- \node [rectangle, draw] (6) at (-5, -2) {$v_i = \Stars \; [\Left (a)]$};
- \node [rectangle, draw] (7) at ( 3, -2) {$v_{i+1} = \Seq (\Alt (\Left \; \Empty)) \; \Stars \, []$};
- \node [rectangle] (8) at ( 8, -2) {$\ldots$};
- \node [rectangle] (9) at (-8, -6) {$\ldots$};
- \node [rectangle, draw] (10) at (-5, -6) {$\textit{bits}_{i} = bs' @ ZZS$};
- \node [rectangle, draw] (11) at (3, -6) {$\textit{bits}_{i+1} = bs'@ ZZS$};
- \node [rectangle] (12) at (8, -6) {$\ldots$};
-
-
+The $\blexer$ proof relies on $r_i, \; v_i$ to match each other in lockstep
+for each derivative step $i$, however only $v_0$ is needed and intermediate
+$v_i$s are purely proof scaffolding.
+Moreover property \ref{eq:stepwise}
+is stronger than needed for POSIX lexing: the precondition
+$\vdash v_{i+1}:r_{i+1}$ is too general in the sense that it allows arbitrary
+values inhabiting in $r_i$ to retrieve bitcodes.
+%correspondence between the lexical value and the
+%regular expression in derivative and injection operations at the same step $i$.
+%If we revisit the diagram \ref{graph:injZoom} with an example
+Consider a concrete example where $a_i = (_{ZZ}x + _{ZS}y + _{S}x)$ and
+$a_{i+1} = (_{ZZ}\ONE + \ZERO + _{S}\ONE)$.
+What is required in the proof of $\blexer$ is that for the POSIX value
+$v_i = \Left \; (\Left \; Empty)$,
+the property
+\[
+ %\vdash \Left \; (\Left \; Empty) : (\ONE+\ZERO+\ONE) \implies
+ \retrieve \; (_{ZZ}\ONE + \ZERO + _{S}\ONE) \; (\Left \; (\Left \; \Empty) ) =
+ \retrieve \; (_{ZZ}x + _{ZS}y + _{S}x ) \; (\Left \; (\Left \; \Char\; x) )
+\]
+holds, and for $\blexersimp$ a property of similar shape to
+\[
+ \retrieve \; _{ZZ}\ONE \; \; Empty =
+ \retrieve \; _{ZZ}x \; (\Char\; x)
+\]
+needs to hold as well.
+However for the definitely non-POSIX value $v_i' = \Right \; \Empty$
+the precondition $\vdash \Right \; \Empty : x+y+x$ holds as well, and therefore
+the following equality
+\[
+ \retrieve \; (_{ZZ}\ONE + \ZERO + _{S}\ONE) \; (\Right \; \Empty) =
+ \retrieve \; (_{ZZ}x + _{ZS}y + _{S}x ) \; (\Right \; (\Char\; x))
+\]
+by lemma \ref{retrieveStepwise} holds for $\blexer$ as well.
+This cannot hold or be proven anymore with $\blexersimp$ as the corresponding
+sub-regular expressions and values have been eliminated during the
+de-duplication procedure of our smplification.
+We are stuck with a property that holds in $\blexer$ but does not have a counterpart
+in $\blexersimp$.
+This needs not hold for the purpose of POSIX lexing though--we know the rightmost
+subexpression $x$ is not POSIX by the left priority rule.
+The inductive invariant \ref{eq:stepwise} can be weakened by restricting the precondition
+$\vdash v_i:r_i$ to $\exists s_i. \; (s_i, r_i) \rightarrow v_i$.
+We tried this route but it did not work well since we need to
+use a similar technique as the rectification functions by Ausaf et al,
+and they can get very complicated with our simplifications.
+After some trial-and-error we found a property of the form
+\begin{property}
+ If a POSIX value can be extracted from $a \backslash s$, then
+ it can be extracted from $a \backslash_{bsimps} s$ as well.
+\end{property}
+\noindent
+most natural to work with, and we defined a binary relation to capture
+the connection between $a\backslash s$ and $a \backslash_{bsimps} s$.
+%and look specifically at
+%the pairs $v_i, r_i$ and $v_{i+1},\, r_{i+1}$, we get the diagram demonstrating
+%the invariant that the same bitcodes can be extracted from the pairs:
+%\tikzset{three sided/.style={
+% draw=none,
+% append after command={
+% [-,shorten <= -0.5\pgflinewidth]
+% ([shift={(-1.5\pgflinewidth,-0.5\pgflinewidth)}]\tikzlastnode.north east)
+% edge([shift={( 0.5\pgflinewidth,-0.5\pgflinewidth)}]\tikzlastnode.north west)
+% ([shift={( 0.5\pgflinewidth,-0.5\pgflinewidth)}]\tikzlastnode.north west)
+% edge([shift={( 0.5\pgflinewidth,+0.5\pgflinewidth)}]\tikzlastnode.south west)
+% ([shift={( 0.5\pgflinewidth,+0.5\pgflinewidth)}]\tikzlastnode.south west)
+% edge([shift={(-1.0\pgflinewidth,+0.5\pgflinewidth)}]\tikzlastnode.south east)
+% }
+% }
+%}
+%
+%\tikzset{three sided1/.style={
+% draw=none,
+% append after command={
+% [-,shorten <= -0.5\pgflinewidth]
+% ([shift={(1.5\pgflinewidth,-0.5\pgflinewidth)}]\tikzlastnode.north west)
+% edge([shift={(-0.5\pgflinewidth,-0.5\pgflinewidth)}]\tikzlastnode.north east)
+% ([shift={(-0.5\pgflinewidth,-0.5\pgflinewidth)}]\tikzlastnode.north east)
+% edge([shift={(-0.5\pgflinewidth,+0.5\pgflinewidth)}]\tikzlastnode.south east)
+% ([shift={(-0.5\pgflinewidth,+0.5\pgflinewidth)}]\tikzlastnode.south east)
+% edge([shift={(1.0\pgflinewidth,+0.5\pgflinewidth)}]\tikzlastnode.south west)
+% }
+% }
+%}
+%
+%\begin{center}
+% \begin{tikzpicture}[->, >=stealth', shorten >= 1pt, auto, thick]
+% %\node [rectangle ] (1) at (-7, 2) {$\ldots$};
+% %\node [rectangle, draw] (2) at (-4, 2) {$r_i = _{bs'}(_Za+_Saa)^*$};
+% %\node [rectangle, draw] (3) at (4, 2) {$r_{i+1} = _{bs'}(_Z(_Z\ONE + _S(\ONE \cdot a)))\cdot(_Za+_Saa)^*$};
+% %\node [rectangle] (4) at (9, 2) {$\ldots$};
+% %\node [rectangle] (5) at (-7, -2) {$\ldots$};
+% %\node [rectangle, draw] (6) at (-4, -2) {$v_i = \Stars \; [\Left (a)]$};
+% %\node [rectangle, draw] (7) at ( 4, -2) {$v_{i+1} = \Seq (\Alt (\Left \; \Empty)) \; \Stars \, []$};
+% %\node [rectangle] (8) at ( 9, -2) {$\ldots$};
+% %\node [rectangle] (9) at (-7, -6) {$\ldots$};
+% %\node [rectangle, draw] (10) at (-4, -6) {$\textit{bits}_{i} = bs' @ ZZS$};
+% %\node [rectangle, draw] (11) at (4, -6) {$\textit{bits}_{i+1} = bs'@ ZZS$};
+% %\node [rectangle] (12) at (9, -6) {$\ldots$};
+%
+% \node [rectangle ] (1) at (-8, 2) {$\ldots$};
+% \node [rectangle, draw] (2) at (-5, 2) {$r_i = _{bs'}(_Za+_Saa)^*$};
+% \node [rectangle, draw] (3) at (3, 2) {$r_{i+1} = _{bs'}(_Z(_Z\ONE + _S(\ONE \cdot a)))\cdot(_Za+_Saa)^*$};
+% \node [rectangle] (4) at (8, 2) {$\ldots$};
+% \node [rectangle] (5) at (-8, -2) {$\ldots$};
+% \node [rectangle, draw] (6) at (-5, -2) {$v_i = \Stars \; [\Left (a)]$};
+% \node [rectangle, draw] (7) at ( 3, -2) {$v_{i+1} = \Seq (\Alt (\Left \; \Empty)) \; \Stars \, []$};
+% \node [rectangle] (8) at ( 8, -2) {$\ldots$};
+% \node [rectangle] (9) at (-8, -6) {$\ldots$};
+% \node [rectangle, draw] (10) at (-5, -6) {$\textit{bits}_{i} = bs' @ ZZS$};
+% \node [rectangle, draw] (11) at (3, -6) {$\textit{bits}_{i+1} = bs'@ ZZS$};
+% \node [rectangle] (12) at (8, -6) {$\ldots$};
+%
+%
+%
+% \path (1) edge [] node {} (2);
+% \path (5) edge [] node {} (6);
+% \path (9) edge [] node {} (10);
+%
+% \path (11) edge [] node {} (12);
+% \path (7) edge [] node {} (8);
+% \path (3) edge [] node {} (4);
+%
+%
+% \path (6) edge [dashed,bend right = 30] node {$\retrieve \; r_i \; v_i$} (10);
+% \path (2) edge [dashed,bend left = 48] node {} (10);
+%
+% \path (7) edge [dashed,bend right = 30] node {$\retrieve \; r_{i+1} \; v_{i+1}$} (11);
+% \path (3) edge [dashed,bend left = 45] node {} (11);
+%
+% \path (2) edge [] node {$\backslash a$} (3);
+% \path (2) edge [dashed, <->] node {$\vdash v_i : r_i$} (6);
+% \path (3) edge [dashed, <->] node {$\vdash v_{i+1} : r_{i+1}$} (7);
+% %\path (6) edge [] node {$\vdash v_i : r_i$} (10);
+% %\path (7) edge [dashed, <->] node {$\vdash v_i : r_i$} (11);
+%
+% \path (10) edge [dashed, <->] node {$=$} (11);
+%
+% \path (7) edge [] node {$\inj \; r_{i+1} \; a \; v_i$} (6);
+%
+%% \node [rectangle, draw] (r) at (-6, -1) {$(aa)^*(b+c)$};
+%% \node [rectangle, draw] (a) at (-6, 4) {$(aa)^*(_{Z}b + _{S}c)$};
+%% \path (r)
+%% edge [] node {$\internalise$} (a);
+%% \node [rectangle, draw] (a1) at (-3, 1) {$(_{Z}(\ONE \cdot a) \cdot (aa)^*) (_{Z}b + _Sc)$};
+%% \path (a)
+%% edge [] node {$\backslash a$} (a1);
+%%
+%% \node [rectangle, draw, three sided] (a21) at (-2.5, 4) {$(_{Z}\ONE \cdot (aa)^*)$};
+%% \node [rectangle, draw, three sided1] (a22) at (-0.8, 4) {$(_{Z}b + _{S}c)$};
+%% \path (a1)
+%% edge [] node {$\backslash a$} (a21);
+%% \node [rectangle, draw] (a3) at (0.5, 2) {$_{ZS}(_{Z}\ONE + \ZERO)$};
+%% \path (a22)
+%% edge [] node {$\backslash b$} (a3);
+%% \path (a21)
+%% edge [dashed, bend right] node {} (a3);
+%% \node [rectangle, draw] (bs) at (2, 4) {$ZSZ$};
+%% \path (a3)
+%% edge [below] node {$\bmkeps$} (bs);
+%% \node [rectangle, draw] (v) at (3, 0) {$\Seq \; (\Stars\; [\Seq \; a \; a]) \; (\Left \; b)$};
+%% \path (bs)
+%% edge [] node {$\decode$} (v);
+%
+%
+% \end{tikzpicture}
+% %\caption{$\blexer$ with the regular expression $(aa)^*(b+c)$ and $aab$}
+%\end{center}
- \path (1) edge [] node {} (2);
- \path (5) edge [] node {} (6);
- \path (9) edge [] node {} (10);
-
- \path (11) edge [] node {} (12);
- \path (7) edge [] node {} (8);
- \path (3) edge [] node {} (4);
-
-
- \path (6) edge [dashed,bend right = 30] node {$\retrieve \; r_i \; v_i$} (10);
- \path (2) edge [dashed,bend left = 48] node {} (10);
-
- \path (7) edge [dashed,bend right = 30] node {$\retrieve \; r_{i+1} \; v_{i+1}$} (11);
- \path (3) edge [dashed,bend left = 45] node {} (11);
-
- \path (2) edge [] node {$\backslash a$} (3);
- \path (2) edge [dashed, <->] node {$\vdash v_i : r_i$} (6);
- \path (3) edge [dashed, <->] node {$\vdash v_{i+1} : r_{i+1}$} (7);
- %\path (6) edge [] node {$\vdash v_i : r_i$} (10);
- %\path (7) edge [dashed, <->] node {$\vdash v_i : r_i$} (11);
-
- \path (10) edge [dashed, <->] node {$=$} (11);
-
- \path (7) edge [] node {$\inj \; r_{i+1} \; a \; v_i$} (6);
-
-% \node [rectangle, draw] (r) at (-6, -1) {$(aa)^*(b+c)$};
-% \node [rectangle, draw] (a) at (-6, 4) {$(aa)^*(_{Z}b + _{S}c)$};
-% \path (r)
-% edge [] node {$\internalise$} (a);
-% \node [rectangle, draw] (a1) at (-3, 1) {$(_{Z}(\ONE \cdot a) \cdot (aa)^*) (_{Z}b + _Sc)$};
-% \path (a)
-% edge [] node {$\backslash a$} (a1);
+%When simplifications are added, the inhabitation relation no longer holds,
+%causing the above diagram to break.
+%
+%Ausaf addressed this with an augmented lexer he called $\textit{slexer}$.
+%
%
-% \node [rectangle, draw, three sided] (a21) at (-2.5, 4) {$(_{Z}\ONE \cdot (aa)^*)$};
-% \node [rectangle, draw, three sided1] (a22) at (-0.8, 4) {$(_{Z}b + _{S}c)$};
-% \path (a1)
-% edge [] node {$\backslash a$} (a21);
-% \node [rectangle, draw] (a3) at (0.5, 2) {$_{ZS}(_{Z}\ONE + \ZERO)$};
-% \path (a22)
-% edge [] node {$\backslash b$} (a3);
-% \path (a21)
-% edge [dashed, bend right] node {} (a3);
-% \node [rectangle, draw] (bs) at (2, 4) {$ZSZ$};
-% \path (a3)
-% edge [below] node {$\bmkeps$} (bs);
-% \node [rectangle, draw] (v) at (3, 0) {$\Seq \; (\Stars\; [\Seq \; a \; a]) \; (\Left \; b)$};
-% \path (bs)
-% edge [] node {$\decode$} (v);
-
-
- \end{tikzpicture}
- %\caption{$\blexer$ with the regular expression $(aa)^*(b+c)$ and $aab$}
-\end{center}
-
-When simplifications are added, the inhabitation relation no longer holds,
-causing the above diagram to break.
-
-Ausaf addressed this with an augmented lexer he called $\textit{slexer}$.
-
-
-
-we note that the invariant
-$\vdash v_{i+1}: r_{i+1} \implies \retrieve \; r_{i+1} \; v_{i+1} $ is too strong
-to maintain because the precondition $\vdash v_i : r_i$ is too weak.
-It does not require $v_i$ to be a POSIX value
-
-
-
-
-
+%
+%we note that the invariant
+%$\vdash v_{i+1}: r_{i+1} \implies \retrieve \; r_{i+1} \; v_{i+1} $ is too strong
+%to maintain because the precondition $\vdash v_i : r_i$ is too weak.
+%It does not require $v_i$ to be a POSIX value
%
%
%which is essential for getting an understanding this thesis
@@ -927,12 +973,10 @@
%$\blexer$, called $\blexersimp$, by establishing
%$\blexer \; r \; s= \blexersimp \; r \; s$ using a term rewriting system.
%
-
-
%----------------------------------------------------------------------------------------
% SECTION rewrite relation
%----------------------------------------------------------------------------------------
-We first introduce the rewriting relation \emph{rrewrite}
+In the next section we first introduce the rewriting relation \emph{rrewrite}
($\rrewrite$) between two regular expressions,
which stands for an atomic
simplification.