--- a/etnms/etnms.tex Wed Jan 22 22:50:38 2020 +0000
+++ b/etnms/etnms.tex Thu Jan 23 22:49:19 2020 +0000
@@ -1208,77 +1208,108 @@
the bits to be moved differently.
\subsubsection{A Failed Attempt To Remedy the Problem Above}
-A simple class of regular expressions and strings
-pairs can be deduced
-from the above example which
-trigger the difference between
+A simple class of regular expression and string
+pairs $(r, s)$ can be deduced from the above example
+which trigger the difference between
$\rup\backslash_{simp} \, s$
and $\simp(\rup\backslash s)$:
-\[
-D = (c_1c_2, \{r_1\cdot r_2 \mid \text{$\simp(r_2) = r_2, \simp(r_1 \backslash c_1) = \ONE, r_1 \; not \; \nullable, c_2 \in L(r_2),
-\simp(r_2 \backslash c_2)$ is of shape alternative and $c_1c_2 \notin L(r_1)$}\})
-\]
-The culprit is the different order in which simplification is applied.
-For $\rup \backslash_{simp} s$,
+\begin{center}
+\begin{tabular}{lcl}
+$D =\{ (r_1 \cdot r_2,\; [c_1c_2]) \mid $ & $\simp(r_2) = r_2, \simp(r_1 \backslash c_1) = \ONE,$\\
+ $r_1 \; not \; \nullable, c_2 \in L(r_2),$ & $\exists \textit{rs},\textit{bs}.\; r_2 \backslash c_2 = _{bs}{\oplus rs}$\\
+$\exists \textit{rs}_1. \; \simp(r_2 \backslash c_2) = _{bs}{\oplus \textit{rs}_1}$ & $and \;\simp(r_1 \backslash [c_1c_2]) = \ZERO\}$\\
+\end{tabular}
+\end{center}
+We take a pair $(r, \;s)$ from the set $D$.
+
+Now we compute ${\bf \rup \backslash_{simp} s}$, we get:
\begin{center}
\begin{tabular}{lcl}
$(r_1\cdot r_2)\backslash_{simp} \, [c_1c_2]$ & $= \simp\left[ \big(\simp\left[ \left( r_1\cdot r_2 \right) \backslash c_1\right] \big)\backslash c_2\right]$\\
& $= \simp\left[ \big(\simp \left[ \left(r_1 \backslash c_1\right) \cdot r_2 \right] \big) \backslash c_2 \right]$\\
- & $= \simp \left[ (\fuse \; \bmkeps(r_1\backslash c_1) \; \simp(r_2) ) \backslash c_2 \right]$,
+ & $= \simp \left[ (\fuse \; \bmkeps(r_1\backslash c_1) \; \simp(r_2) ) \backslash c_2 \right]$,\\
+ & $= \simp \left[ (\fuse \; \bmkeps(r_1\backslash c_1) \; r_2 ) \backslash c_2 \right]$,
\end{tabular}
\end{center}
\noindent
from the definition of $D$ we know $r_1 \backslash c_1$ is nullable, therefore
$\bmkeps(r_1\backslash c_1)$ returns a bitcode, we shall call it
- $bs$. Also, $\simp(r_2 \backslash c_2)$ is a certain alternative,
- we shall call it $_{bs_1}\oplus rs$, and $( \left(r_1 \backslash c_1\right) \backslash c_2 \cdot r_2)$
- simplifies to $\ZERO$,
- so the above term can be rewritten as
+ $\textit{bs}_2$.
+The above term can be rewritten as
+\begin{center}
+$ \simp \left[ \fuse \; \textit{bs}_2\; r_2 \backslash c_2 \right]$,
+\end{center}
+which is equal to
\begin{center}
-\begin{tabular}{lcl}
-$\textit{distinct}(\flatten[\ZERO\;, \; _{\textit{bs++bs1}}\oplus rs] ) \; \textit{match} $ \\
- $\textit{case} \; [] \Rightarrow \ZERO$ \\
- $\textit{case} \; a :: [] \Rightarrow \textit{fuse bs a}$ \\
- $\textit{case} \; as' \Rightarrow \textit{ALTS}\;bs\;as'$\\
-\end{tabular}
+$\simp \left[ \fuse \; \textit{bs}_2 \; _{bs}{\oplus rs} \right]$\\
+$=\simp \left[ \; _{bs_2++bs}{\oplus rs} \right]$\\
+$= \; _{bs_2++bs}{\oplus \textit{rs}_1} $
+\end{center}
+\noindent
+by using the properties from the set $D$ again
+and again(The reason why we set so many conditions
+that the pair $(r,s)$ need to satisfy is because we can
+rewrite them easily to construct the difference.)
+
+Now we compute ${\bf \simp(\rup \backslash s)}$:
+\begin{center}
+$\simp \big[(r_1\cdot r_2) \backslash [c_1c_2] \big]= \simp \left[ ((r_1 \cdot r_2 )\backslash c_1) \backslash c_2 \right]$
\end{center}
-this, in turn, can be rewritten as $map (\fuse \textit{bs}++\textit{bs1}) rs$ based on
-the properties of the function $\simp$(more on this later).
-simplification is done along the way, disallowing the structure of nested alternatives,
-and this can actually be proven: simp(r) does not contain
-nested alternatives.
-\[
-\simp \left[(r_1\cdot r_2) \backslash [c_1c_2] \right]= \simp \left[ ((r_1 \cdot r_2 )\backslash c_1) \backslash c_2 \right]
-\]
-For $\simp(\rup \backslash s)$,
+\noindent
+Again, using the properties above, we obtain
+the following chain of equalities:
\begin{center}
-\begin{tabular}{lcl}
-$\simp\big[(r_1\cdot r_2)\backslash \, [c_1c_2]\big]$ & $= \simp\left[ \big( \left( r_1\cdot r_2 \right) \backslash c_1 \big)\backslash c_2\right]$\\
- & $= \simp\left[ \left(r_1 \backslash c_1\right) \cdot r_2 \big) \backslash c_2 \right]$\\
- & $= \simp \left[ \oplus[\big( \left(r_1 \backslash c_1\right) \backslash c_2 \big) \cdot r_2 \; , \; \fuse \; \bmkeps(r_1\backslash c_1) \; r_2 \backslash c_2 ] \right]$,
+$\simp(\rup \backslash s)= \simp \left[ ((r_1 \cdot r_2 )\backslash c_1) \backslash c_2 \right]= \simp\left[ \left(r_1 \backslash c_1\right) \cdot r_2 \big) \backslash c_2 \right]$\\
+$= \simp \left[ \oplus[\big( \left(r_1 \backslash c_1\right) \backslash c_2 \big) \cdot r_2 \; , \; \fuse \; (\bmkeps \;r_1\backslash c_1) \; r_2 \backslash c_2 ] \right]$,
+\end{center}
+\noindent
+as before, we call the bitcode returned by
+$\bmkeps(r_1\backslash c_1)$ as
+$\textit{bs}_2$.
+Also, $\simp(r_2 \backslash c_2)$ is
+$_{bs}\oplus \textit{rs}_1$,
+and $( \left(r_1 \backslash c_1\right) \backslash c_2 \cdot r_2)$
+simplifies to $\ZERO$,
+so the above term can be expanded as
+\begin{center}
+\begin{tabular}{l}
+$\textit{distinct}(\flatten[\ZERO\;, \; _{\textit{bs}_2++\textit{bs}}\oplus \textit{rs}_1] ) \; \textit{match} $ \\
+ $\textit{case} \; [] \Rightarrow \ZERO$ \\
+ $\textit{case} \; a :: [] \Rightarrow \textit{\fuse \; \textit{bs} a}$ \\
+ $\textit{case} \; as' \Rightarrow _{[]}\oplus as'$\\
\end{tabular}
\end{center}
\noindent
-as before, we call the bitcode returned by $\bmkeps(r_1\backslash c_1)$
- $bs$. Also, $\simp(r_2 \backslash c_2)$ is a certain alternative,
- we shall call it $_{bs_1}\oplus rs$, and $( \left(r_1 \backslash c_1\right) \backslash c_2 \cdot r_2)$
- simplifies to $\ZERO$,
- so the above term can be rewritten as
+Applying the definition of $\flatten$, we get
+\begin{center}
+$_{[]}\oplus (\textit{map} \; \fuse (\textit{bs}_2 ++ bs) rs_1)$
+\end{center}
+\noindent
+compared to the result
\begin{center}
-\begin{tabular}{lcl}
-$\textit{distinct}(\flatten[\ZERO\;, \; _{\textit{bs}++\textit{bs1}}\oplus rs] ) \; \textit{match} $ \\
- $\textit{case} \; [] \Rightarrow \ZERO$ \\
- $\textit{case} \; a :: [] \Rightarrow \textit{fuse bs a}$ \\
- $\textit{case} \; as' \Rightarrow \textit{ALTS}\;bs\;as'$\\
-\end{tabular}
+$ \; _{bs_2++bs}{\oplus \textit{rs}_1} $
\end{center}
-
-this, in turn, can be rewritten as $map (\fuse\; \textit{bs}++\textit{bs1}) rs$ based on
-the properties of the function $\simp$(more on this later).
-
-simplification is done along the way, disallowing the structure of nested alternatives,
-and this can actually be proven: simp(r) does not contain
-nested alternatives.
+\noindent
+Note how these two regular expressions only
+differ in terms of the position of the bits
+$\textit{bs}_2++\textit{bs}$. They are the same otherwise.
+What caused this difference to happen?
+The culprit is the $\flatten$ function, which spills
+out the bitcodes in the inner alternatives when
+there exists an outer alternative.
+Note how the absence of simplification
+caused $\simp(\rup \backslash s)$ to
+generate the nested alternatives structure:
+\begin{center}
+$ \oplus[\ZERO \;, \; _{bs}\oplus \textit{rs} ]$
+\end{center}
+and this will always trigger the $\flatten$ to
+spill out the sequence $\textit{bs}$,
+whereas when
+simplification is done along the way,
+the structure of nested alternatives is never created(we can
+actually prove that simplification function never allows nested
+alternatives to happen, more on this later).
%CONSTRUCTION SITE HERE
that is to say, despite the bits are being moved around on the regular expression