--- a/ChengsongTanPhdThesis/Chapters/Finite.tex Wed Jun 29 12:38:05 2022 +0100
+++ b/ChengsongTanPhdThesis/Chapters/Finite.tex Fri Jul 01 13:02:15 2022 +0100
@@ -768,7 +768,7 @@
We need more equalities like the above to enable a closed form,
but to proceed we need to introduce two rewrite relations,
to make things smoother.
-\subsubsection{The rewrite relation $\hrewrite$, $\frewrite$ and $\grewrite$}
+\subsubsection{The rewrite relation $\hrewrite$ , $\scfrewrites$ , $\frewrite$ and $\grewrite$}
Insired by the success we had in the correctness proof
in \ref{Bitcoded2}, where we invented
a term rewriting system to capture the similarity between terms,
@@ -777,7 +777,8 @@
similarities between terms that would be otherwise
hard to express.
-We use $\hrewrite$ for one-step atomic rewrite of regular expression simplification,
+We use $\hrewrite$ for one-step atomic rewrite of
+regular expression simplification,
$\frewrite$ for rewrite of list of regular expressions that
include all operations carried out in $\rflts$, and $\grewrite$ for
rewriting a list of regular expressions possible in both $\rflts$ and $\rdistinct{}{}$.
@@ -821,6 +822,18 @@
\end{mathpar}
\end{center}
+
+ List of rewrite rules for a list of regular expressions,
+ where each element can rewrite in many steps to the other (scf stands for
+ li\emph{s}t \emph{c}losed \emph{f}orm). This relation is similar to the
+ $\stackrel{s*}{\rightsquigarrow}$ for annotated regular expressions.
+
+\begin{center}
+\begin{mathpar}
+ \inferrule{}{[] \scfrewrites [] }
+ \inferrule{r \hrewrites r' \\ rs \scfrewrites rs'}{r :: rs \scfrewrites r' :: rs'}
+\end{mathpar}
+\end{center}
%frewrite
List of one-step rewrite rules for flattening
a list of regular expressions($\frewrite$):
@@ -838,11 +851,11 @@
a list of regular expressions ($\grewrite$):
\begin{center}
\begin{mathpar}
- \inferrule{}{\RZERO :: rs \frewrite rs \\}
+ \inferrule{}{\RZERO :: rs \grewrite rs \\}
- \inferrule{}{(\sum rs) :: rs_a \frewrite rs @ rs_a \\}
+ \inferrule{}{(\sum rs) :: rs_a \grewrite rs @ rs_a \\}
- \inferrule{rs_1 \frewrite rs_2}{r :: rs_1 \frewrite r :: rs_2}
+ \inferrule{rs_1 \grewrite rs_2}{r :: rs_1 \grewrite r :: rs_2}
\inferrule[dB]{}{rs_a @ [a] @ rs_b @[a] @ rs_c \grewrite rs_a @ [a] @ rsb @ rsc}
\end{mathpar}
@@ -851,78 +864,336 @@
\noindent
The reason why we take the trouble of defining
two separate list rewriting definitions $\frewrite$ and $\grewrite$
-is that sometimes $\grewrites$ is slightly too powerful
-for proving equivalence.
+is to separate the two stages of simplification: flattening and de-duplicating.
+Sometimes $\grewrites$ is slightly too powerful
+so we would rather use $\frewrites$ which makes certain rewriting steps
+more straightforward to prove.
For example, when proving the closed-form for the alternative regular expression,
one of the rewriting steps would be:
\begin{lemma}
- $\sum (\rDistinct \; (\map \; (\_ \backslash x) \; (\rflts \; rs)) \; \varnothing) \sequal
- \sum (\rDistinct \; (\rflts \; (\map \; (\_ \backslash x) \; rs)) \; \varnothing)
+ $\sum (\rDistinct \;\; (\map \; (\_ \backslash x) \; (\rflts \; rs)) \;\; \varnothing) \sequal
+ \sum (\rDistinct \;\; (\rflts \; (\map \; (\_ \backslash x) \; rs)) \;\; \varnothing)
$
\end{lemma}
\noindent
Proving this is by first showing
-\begin{lemma}
+\begin{lemma}\label{earlyLaterDerFrewrites}
$\map \; (\_ \backslash x) \; (\rflts \; rs) \frewrites
-\rflts \; (\map \; (\_ \backslash x) \; rs)$
+ \rflts \; (\map \; (\_ \backslash x) \; rs)$
\end{lemma}
\noindent
and then using lemma
\begin{lemma}\label{frewritesSimpeq}
If $rs_1 \frewrites rs_2 $, then $\sum (\rDistinct \; rs_1 \; \varnothing) \sequal
- \sum (\rDistinct \; rs_2 \; {})$.
+ \sum (\rDistinct \; rs_2 \; \varnothing)$.
\end{lemma}
-. But this trick will not work for $\grewrites$:
+\noindent
+is a piece of cake.
+But this trick will not work for $\grewrites$.
+For example, a rewriting step in proving
+closed forms is:
+\begin{center}
+$\rsimp{(\rsimpalts \; (\map \; (\_ \backslash x) \; (\rdistinct{(\rflts \; (\map \; (\rsimp{} \; \circ \; (\lambda r. \rderssimp{r}{xs}))))}{\varnothing})))}$\\
+$=$ \\
+$\rsimp{(\rsimpalts \; (\rdistinct{(\map \; (\_ \backslash x) \; (\rflts \; (\map \; (\rsimp{} \; \circ \; (\lambda r. \rderssimp{r}{xs})))) ) }{\varnothing}))} $
+\noindent
+\end{center}
+For this one would hope to have a rewriting relation between the two lists involved,
+similar to \ref{earlyLaterDerFrewrites}. However, it turns out that
\begin{center}
$\map \; (\_ \backslash x) \; (\rDistinct \; rs \; rset) \grewrites \rDistinct \; (\map \;
-(\_ \backslash x) \; rs) \; (\map \; (\_ \backslash x) \; rset)$
+(\_ \backslash x) \; rs) \; ( rset \backslash x)$
\end{center}
\noindent
-does \emph{not} hold.
-
-
+does $\mathbf{not}$ hold in general.
+For this rewriting step we will introduce some slightly more cumbersome
+proof technique in later sections.
+The point is that $\frewrite$
+allows us to prove equivalence in a straightforward two-step method that is
+not possible for $\grewrite$, thereby reducing the complexity of the entire proof.
-%this is for closed form for alts section, talk about that later------------------------------
-\begin{comment}
+\subsubsection{Terms That Can Be Rewritten Using $\hrewrites$, $\grewrites$, and $\frewrites$}
+We present in the below lemma a few pairs of terms that are rewritable via
+$\grewrites$:
+\begin{lemma}\label{gstarRdistinctGeneral}
+ \begin{itemize}
+ \item
+ $rs_1 @ rs \grewrites rs_1 @ (\rDistinct \; rs \; rs_1)$
+ \item
+ $rs \grewrites \rDistinct \; rs \; \varnothing$
+ \item
+ $rs_a @ (\rDistinct \; rs \; rs_a) \grewrites rs_a @ (\rDistinct \;
+ rs \; (\{\RZERO\} \cup rs_a))$
+ \item
+ $rs \;\; @ \;\; \rDistinct \; rs_a \; rset \grewrites rs @ \rDistinct \; rs_a \;
+ (rest \cup rs)$
+
+ \end{itemize}
+\end{lemma}
+\noindent
+If a pair of terms $rs_1, rs_2$ are rewritable via $\grewrites$ to each other,
+then they are equivalent under $\rsimp{}$:
+\begin{lemma}\label{grewritesSimpalts}
+ If $rs_1 \grewrites rs_2$, then
+ we have the following equivalence hold:
+ \begin{itemize}
+ \item
+ $\sum rs_1 \sequal \sum rs_2$
+ \item
+ $\rsimpalts \; rs_1 \sequal \rsimpalts \; rs_2$
+ \end{itemize}
+\end{lemma}
+\noindent
+Here are a few connecting lemmas showing that
+if a list of regular expressions can be rewritten using $\grewrites$ or $\frewrites $ or
+$\scfrewrites$,
+then an alternative constructor taking the list can also be rewritten using $\hrewrites$:
+\begin{lemma}
+ \begin{itemize}
+ \item
+ If $rs \grewrites rs'$ then $\sum rs \hrewrites \sum rs'$.
+ \item
+ If $rs \grewrites rs'$ then $\sum rs \hrewrites \rsimpalts \; rs'$
+ \item
+ If $rs_1 \scfrewrites rs_2$ then $\sum (rs @ rs_1) \hrewrites \sum (rs @ rs_2)$
+ \item
+ If $rs_1 \scfrewrites rs_2$ then $\sum rs_1 \hrewrites \sum rs_2$
+
+ \end{itemize}
+\end{lemma}
+\noindent
+Here comes the meat of the proof,
+which says that once two lists are rewritable to each other,
+then they are equivalent under $\rsimp{}$:
+\begin{lemma}
+ If $r_1 \hrewrites r_2$ then $r_1 \sequal r_2$.
+\end{lemma}
+
+\noindent
+And similar to \ref{Bitcoded2} one can preserve rewritability after taking derivative
+of two regular expressions on both sides:
+\begin{lemma}\label{interleave}
+ If $r \hrewrites r' $ then $\rder{c}{r} \hrewrites \rder{c}{r'}$
+\end{lemma}
+\noindent
+This allows proving more $\mathbf{rsimp}$-equivalent terms, involving $\backslash_r$ now.
+\begin{lemma}\label{insideSimpRemoval}
+ $\rsimp{\rder{c}{\rsimp{r}}} = \rsimp{\rder{c}{r}} $
+\end{lemma}
+\noindent
+\begin{proof}
+ By \ref{interleave} and \ref{rsimpIdem}.
+\end{proof}
+\noindent
+And this unlocks more equivalent terms:
+\begin{lemma}\label{Simpders}
+ As corollaries of \ref{insideSimpRemoval}, we have
+ \begin{itemize}
+ \item
+ If $s \neq []$ then $\rderssimp{r}{s} = \rsimp{(\rders \; r \; s)}$.
+ \item
+ $\rsimpalts \; (\map \; (\_ \backslash_r x) \;
+ (\rdistinct{rs}{\varnothing})) \sequal
+ \rsimpalts \; (\rDistinct \;
+ (\map \; (\_ \backslash_r x) rs) \;\varnothing )$
+ \end{itemize}
+ \end{lemma}
+\noindent
+
+Finally,
+together with
+\begin{lemma}\label{rderRsimpAltsCommute}
+ $\rder{x}{(\rsimpalts \; rs)} = \rsimpalts \; (\map \; (\rder{x}{\_}) \; rs)$
+\end{lemma}
+\noindent
+this leads to the first closed form--
+\begin{lemma}\label{altsClosedForm}
\begin{center}
-$\rsimp{(\rsimpalts \; (\map \; (\_ \backslash x) \; (\rdistinct{(\rflts \; (\map \; (\rsimp{} \; \circ \; (\lambda r. \rderssimp{r}{xs}))))}{\varnothing})))} =
-\rsimp{(\rsimpalts \; (\rdistinct{(\map \; (\_ \backslash x) \; (\rflts \; (\map \; (\rsimp{} \; \circ \; (\lambda r. \rderssimp{r}{xs})))) ) }{\varnothing}))} $
+ $\rderssimp{(\sum rs)}{s} \sequal
+ \sum \; (\map \; (\rderssimp{\_}{s}) \; rs)$
\end{center}
+\end{lemma}
\noindent
-This is not possible to prove
+\begin{proof}
+ By a reverse induction on the string $s$.
+ One rewriting step, as we mentioned earlier,
+ involves
+ \begin{center}
+ $\rsimpalts \; (\map \; (\_ \backslash x) \;
+ (\rdistinct{(\rflts \; (\map \; (\rsimp{} \; \circ \;
+ (\lambda r. \rderssimp{r}{xs}))))}{\varnothing}))
+ \sequal
+ \rsimpalts \; (\rdistinct{(\map \; (\_ \backslash x) \;
+ (\rflts \; (\map \; (\rsimp{} \; \circ \;
+ (\lambda r. \rderssimp{r}{xs})))) ) }{\varnothing}) $.
+ \end{center}
+ This can be proven by a combination of
+ \ref{grewritesSimpalts}, \ref{gstarRdistinctGeneral}, \ref{rderRsimpAltsCommute}, and
+ \ref{insideSimpRemoval}.
+\end{proof}
+\noindent
+This closed form has a variant which can be more convenient in later proofs:
+\begin{corollary}
+ If $s \neq []$ then
+ $\rderssimp \; (\sum \; rs) \; s =
+ \rsimp{(\sum \; (\map \; \rderssimp{\_}{s} \; rs))}$.
+\end{corollary}
+\noindent
+The harder closed forms are the sequence and star ones.
+Before we go on to obtain them, some preliminary definitions
+are needed to make proof statements concise.
-\end{comment}
-%this is for closed form for alts section, talk about that later------------------------------
-And we define an "grewrite" relation that works on lists:
+\section{"Closed Forms" of regular expressions' derivatives w.r.t strings}
+We note that the different ways in which regular expressions are
+nested do not matter under $\rsimp{}$:
+\begin{center}
+ To be proven:\\
+ $\rsimp{r} \stackrel{?}{=} \rsimp{r'}$ if $r = \sum [r_1, r_2, r_3, \ldots]$
+and $r' =(\ldots ((r_1 + r_2) + r_3) + \ldots)$
+\end{center}
+\noindent
+This intuition is also echoed by IndianPaper, where they gave
+a pencil-and-paper derivation of $(r_1 \cdot r_2)\backslash s$:
\begin{center}
-\begin{tabular}{lcl}
-$ \ZERO :: rs$ & $\rightarrow_g$ & $rs$
+ $((r_1 \cdot r_2) \backslash_r c_1) \stackrel{\backslash_r c_2}{=}
+ ((r_1 \backslash_r c_1) \cdot r_2 + (\delta\; (\rnullable \; r_1),\; r_2 \backslash_r c_1))
+ \stackrel{\backslash_r c_2}{=}
+ ((r_1 \backslash_r c_1c_2 \cdot r_2 + (\delta \; (\rnullable) \; r_1, r_2 \backslash_r c_1c_2)
+ + (\delta \; (\rnullable \; r_1 \backslash_r c)\; r_2 \backslash_r c_2)
+ $
+\end{center}
+\noindent
+The $\delta \; b \; r$ function above turns the entire term into $\RZERO$
+if the boolean condition $b$ evaluates to false, and outputs $r$ otherwise.
+The equality in their derivation steps should be interpretated
+as language equivalence. To pin down the intuition into rigorous terms,
+$\sflat{}$ is used to enable the transformation from
+a left-associative nested sequence of alternatives into
+a flattened list:
+$r = \sum [r_1, r_2, r_3, \ldots] \stackrel{\sflat{}}{\rightarrow}
+(\ldots ((r_1 + r_2) + r_3) + \ldots)$
+The definitions $\sflat{}$, $\sflataux{}$ are given below.
+
+ \begin{center}
+ \begin{tabular}{ccc}
+ $\sflataux{\AALTS{ }{r :: rs}}$ & $=$ & $\sflataux{r} @ rs$\\
+$\sflataux{\AALTS{ }{[]}}$ & $ = $ & $ []$\\
+$\sflataux r$ & $=$ & $ [r]$
\end{tabular}
\end{center}
-
+ \begin{center}
+ \begin{tabular}{ccc}
+ $\sflat{(\sum r :: rs)}$ & $=$ & $\sum (\sflataux{r} @ rs)$\\
+ $\sflat{\sum []}$ & $ = $ & $ \sum []$\\
+$\sflat r$ & $=$ & $ r$
+\end{tabular}
+\end{center}
+The intuition behind $\sflataux{\_}$ is to break up nested regexes
+of the $(\ldots((r_1 + r_2) + r_3) + \ldots )$(left-associated) shape
+into a more "balanced" list: $\AALTS{\_}{[r_1,\, r_2 ,\, r_3, \ldots]}$.
+It will return the singleton list $[r]$ otherwise.
+$\sflat{\_}$ works the same as $\sflataux{\_}$, except that it keeps
+the output type a regular expression, not a list.
+$\sflataux{\_}$ and $\sflat{\_}$ is only recursive in terms of the
+ first element of the list of children of
+an alternative regex ($\AALTS{ }{rs}$), and is purposefully built for dealing with the regular expression
+$r_1 \cdot r_2 \backslash s$.
+\subsection{The $\textit{vsuf}$ Function}
+Note that $((r_1 \backslash_r c_1c_2 \cdot r_2 + (\delta \; (\rnullable) \; r_1, r_2 \backslash_r c_1c_2)
+ + (\delta \; (\rnullable \; r_1 \backslash_r c)\; r_2 \backslash_r c_2)$ does not faithfully
+represent what would the intermediate derivatives would actually look like.
+For example, when $r_1$ and $r_1 \backslash_r c_1$ are nullable,
+the regular expression would not look like $(r_1 \backslash_r c_1c_2 + \RZERO ) + \RZERO$,
+but actually $r_1\backslash_r c_1c_2$, the redundant $\RZERO$s will not be created in the
+first place.
+In a closed-form one would want to take into account this
+and generate the list of string pairs $(s', s'')$ where $s'@s'' = s$ such that
+only $r_1 \backslash s'$ nullable.
+With $\sflat{\_}$ and $\sflataux{\_}$ we make
+precise what "closed forms" we have for the sequence derivatives and their simplifications,
+in other words, how can we construct $(r_1 \cdot r_2) \backslash s$
+and $\bderssimp{(r_1\cdot r_2)}{s}$,
+if we are only allowed to use a combination of $r_1 \backslash s'$ ($\bderssimp{r_1}{s'}$)
+and $r_2 \backslash s'$ ($\bderssimp{r_2}{s'}$), where $s'$ ranges over
+the substring of $s$?
+First let's look at a series of derivatives steps on a sequence
+regular expression, (assuming) that each time the first
+component of the sequence is always nullable):
+\begin{center}
-With these relations we prove
+$r_1 \cdot r_2 \quad \longrightarrow_{\backslash c} \quad r_1 \backslash c \cdot r_2 + r_2 \backslash c \quad \longrightarrow_{\backslash c'} \quad (r_1 \backslash cc' \cdot r_2 + r_2 \backslash c') + r_2 \backslash cc' \longrightarrow_{\backslash c''} \quad$\\
+$((r_1 \backslash cc'c'' \cdot r_2 + r_2 \backslash c'') + r_2 \backslash c'c'') + r_2 \backslash cc'c'' \longrightarrow_{\backslash c''} \quad
+ \ldots$
+
+\end{center}
+%TODO: cite indian paper
+Indianpaper have come up with a slightly more formal way of putting the above process:
+\begin{center}
+$r_1 \cdot r_2 \backslash (c_1 :: c_2 ::\ldots c_n) \myequiv r_1 \backslash (c_1 :: c_2:: \ldots c_n) +
+\delta(\nullable(r_1 \backslash (c_1 :: c_2 \ldots c_{n-1}) ), r_2 \backslash (c_n)) + \ldots
++ \delta (\nullable(r_1), r_2\backslash (c_1 :: c_2 ::\ldots c_n))$
+\end{center}
+where $\delta(b, r)$ will produce $r$
+if $b$ evaluates to true,
+and $\ZERO$ otherwise.
+
+ But the $\myequiv$ sign in the above equation means language equivalence instead of syntactical
+ equivalence. To make this intuition useful
+ for a formal proof, we need something
+stronger than language equivalence.
+With the help of $\sflat{\_}$ we can state the equation in Indianpaper
+more rigorously:
\begin{lemma}
-$rs \rightarrow rs' \implies \RALTS{rs} \rightarrow \RALTS{rs'}$
-\end{lemma}
-which enables us to prove "closed forms" equalities such as
-\begin{lemma}
-$\sflat{(r_1 \cdot r_2) \backslash s} = \RALTS{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\suffix \; s \; r_1 ))}$
+$\sflat{(r_1 \cdot r_2) \backslash s} = \RALTS{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\vsuf{s}{r_1}))}$
\end{lemma}
-But the most involved part of the above lemma is proving the following:
+The function $\vsuf{\_}{\_}$ is defined recursively on the structure of the string:
+
+\begin{center}
+\begin{tabular}{lcl}
+$\vsuf{[]}{\_} $ & $=$ & $[]$\\
+$\vsuf{c::cs}{r_1}$ & $ =$ & $ \textit{if} (\rnullable{r_1}) \textit{then} \; (\vsuf{cs}{(\rder{c}{r_1})}) @ [c :: cs]$\\
+ && $\textit{else} \; (\vsuf{cs}{(\rder{c}{r_1}) }) $
+\end{tabular}
+\end{center}
+It takes into account which prefixes $s'$ of $s$ would make $r \backslash s'$ nullable,
+and keep a list of suffixes $s''$ such that $s' @ s'' = s$. The list is sorted in
+the order $r_2\backslash s''$ appears in $(r_1\cdot r_2)\backslash s$.
+In essence, $\vsuf{\_}{\_}$ is doing a "virtual derivative" of $r_1 \cdot r_2$, but instead of producing
+the entire result of $(r_1 \cdot r_2) \backslash s$,
+it only stores all the $s''$ such that $r_2 \backslash s''$ are occurring terms in $(r_1\cdot r_2)\backslash s$.
+
+With this we can also add simplifications to both sides and get
\begin{lemma}
-$\bsimp{\RALTS{rs @ \RALTS{rs_1} @ rs'}} = \bsimp{\RALTS{rs @rs_1 @ rs'}} $
+$\rsimp{\sflat{(r_1 \cdot r_2) \backslash s} }= \rsimp{\AALTS{[[]}{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\vsuf{s}{r_1}))}}$
\end{lemma}
-which is needed in proving things like
+Together with the idempotency property of $\rsimp{}$\ref{rsimpIdem},
+%TODO: state the idempotency property of rsimp
+it is possible to convert the above lemma to obtain a "closed form"
+for derivatives nested with simplification:
\begin{lemma}
-$r \rightarrow_f r' \implies \rsimp{r} \rightarrow \rsimp{r'}$
+$\rderssimp{(r_1 \cdot r_2)}{s} = \rsimp{\AALTS{[[]}{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\vsuf{s}{r_1}))}}$
\end{lemma}
+And now the reason we have (1) in section 1 is clear:
+$\rsize{\rsimp{\RALTS{ (r_1 \backslash s) \cdot r_2 :: (\map \;(r_2 \backslash \_) \; (\vsuf{s}{r_1}))}}}$,
+is equal to $\rsize{\rsimp{\RALTS{ ((r_1 \backslash s) \cdot r_2 :: (\map \; (r_2 \backslash \_) \; (\textit{Suffix}(r1, s))))}}}$
+ as $\vsuf{s}{r_1}$ is one way of expressing the list $\textit{Suffix}( r_1, s)$.
-Fortunately this is provable by induction on the list $rs$
+%----------------------------------------------------------------------------------------
+% SECTION 3
+%----------------------------------------------------------------------------------------
+
+\section{interaction between $\distinctWith$ and $\flts$}
+Note that it is not immediately obvious that
+\begin{center}
+$\llbracket \distinctWith (\flts \textit{rs}) = \phi \rrbracket \leq \llbracket \distinctWith \textit{rs} = \phi \rrbracket $.
+\end{center}
+The intuition is that if we remove duplicates from the $\textit{LHS}$, at least the same amount of
+duplicates will be removed from the list $\textit{rs}$ in the $\textit{RHS}$.
+
\subsection{A Closed Form for the Sequence Regular Expression}
\begin{quote}\it
@@ -1223,119 +1494,6 @@
% SECTION ??
%----------------------------------------------------------------------------------------
-\section{"Closed Forms" of regular expressions' derivatives w.r.t strings}
-To embark on getting the "closed forms" of regexes, we first
-define a few auxiliary definitions to make expressing them smoothly.
-
- \begin{center}
- \begin{tabular}{ccc}
- $\sflataux{\AALTS{ }{r :: rs}}$ & $=$ & $\sflataux{r} @ rs$\\
-$\sflataux{\AALTS{ }{[]}}$ & $ = $ & $ []$\\
-$\sflataux r$ & $=$ & $ [r]$
-\end{tabular}
-\end{center}
-The intuition behind $\sflataux{\_}$ is to break up nested regexes
-of the $(\ldots((r_1 + r_2) + r_3) + \ldots )$(left-associated) shape
-into a more "balanced" list: $\AALTS{\_}{[r_1,\, r_2 ,\, r_3, \ldots]}$.
-It will return the singleton list $[r]$ otherwise.
-
-$\sflat{\_}$ works the same as $\sflataux{\_}$, except that it keeps
-the output type a regular expression, not a list:
- \begin{center}
- \begin{tabular}{ccc}
- $\sflat{\AALTS{ }{r :: rs}}$ & $=$ & $\sflataux{r} @ rs$\\
-$\sflat{\AALTS{ }{[]}}$ & $ = $ & $ \AALTS{ }{[]}$\\
-$\sflat r$ & $=$ & $ [r]$
-\end{tabular}
-\end{center}
-$\sflataux{\_}$ and $\sflat{\_}$ is only recursive in terms of the
- first element of the list of children of
-an alternative regex ($\AALTS{ }{rs}$), and is purposefully built for dealing with the regular expression
-$r_1 \cdot r_2 \backslash s$.
-
-With $\sflat{\_}$ and $\sflataux{\_}$ we make
-precise what "closed forms" we have for the sequence derivatives and their simplifications,
-in other words, how can we construct $(r_1 \cdot r_2) \backslash s$
-and $\bderssimp{(r_1\cdot r_2)}{s}$,
-if we are only allowed to use a combination of $r_1 \backslash s'$ ($\bderssimp{r_1}{s'}$)
-and $r_2 \backslash s'$ ($\bderssimp{r_2}{s'}$), where $s'$ ranges over
-the substring of $s$?
-First let's look at a series of derivatives steps on a sequence
-regular expression, (assuming) that each time the first
-component of the sequence is always nullable):
-\begin{center}
-
-$r_1 \cdot r_2 \quad \longrightarrow_{\backslash c} \quad r_1 \backslash c \cdot r_2 + r_2 \backslash c \quad \longrightarrow_{\backslash c'} \quad (r_1 \backslash cc' \cdot r_2 + r_2 \backslash c') + r_2 \backslash cc' \longrightarrow_{\backslash c''} \quad$\\
-$((r_1 \backslash cc'c'' \cdot r_2 + r_2 \backslash c'') + r_2 \backslash c'c'') + r_2 \backslash cc'c'' \longrightarrow_{\backslash c''} \quad
- \ldots$
-
-\end{center}
-%TODO: cite indian paper
-Indianpaper have come up with a slightly more formal way of putting the above process:
-\begin{center}
-$r_1 \cdot r_2 \backslash (c_1 :: c_2 ::\ldots c_n) \myequiv r_1 \backslash (c_1 :: c_2:: \ldots c_n) +
-\delta(\nullable(r_1 \backslash (c_1 :: c_2 \ldots c_{n-1}) ), r_2 \backslash (c_n)) + \ldots
-+ \delta (\nullable(r_1), r_2\backslash (c_1 :: c_2 ::\ldots c_n))$
-\end{center}
-where $\delta(b, r)$ will produce $r$
-if $b$ evaluates to true,
-and $\ZERO$ otherwise.
-
- But the $\myequiv$ sign in the above equation means language equivalence instead of syntactical
- equivalence. To make this intuition useful
- for a formal proof, we need something
-stronger than language equivalence.
-With the help of $\sflat{\_}$ we can state the equation in Indianpaper
-more rigorously:
-\begin{lemma}
-$\sflat{(r_1 \cdot r_2) \backslash s} = \RALTS{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\vsuf{s}{r_1}))}$
-\end{lemma}
-
-The function $\vsuf{\_}{\_}$ is defined recursively on the structure of the string:
-
-\begin{center}
-\begin{tabular}{lcl}
-$\vsuf{[]}{\_} $ & $=$ & $[]$\\
-$\vsuf{c::cs}{r_1}$ & $ =$ & $ \textit{if} (\rnullable{r_1}) \textit{then} \; (\vsuf{cs}{(\rder{c}{r_1})}) @ [c :: cs]$\\
- && $\textit{else} \; (\vsuf{cs}{(\rder{c}{r_1}) }) $
-\end{tabular}
-\end{center}
-It takes into account which prefixes $s'$ of $s$ would make $r \backslash s'$ nullable,
-and keep a list of suffixes $s''$ such that $s' @ s'' = s$. The list is sorted in
-the order $r_2\backslash s''$ appears in $(r_1\cdot r_2)\backslash s$.
-In essence, $\vsuf{\_}{\_}$ is doing a "virtual derivative" of $r_1 \cdot r_2$, but instead of producing
-the entire result of $(r_1 \cdot r_2) \backslash s$,
-it only stores all the $s''$ such that $r_2 \backslash s''$ are occurring terms in $(r_1\cdot r_2)\backslash s$.
-
-With this we can also add simplifications to both sides and get
-\begin{lemma}
-$\rsimp{\sflat{(r_1 \cdot r_2) \backslash s} }= \rsimp{\AALTS{[[]}{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\vsuf{s}{r_1}))}}$
-\end{lemma}
-Together with the idempotency property of $\rsimp{}$\ref{rsimpIdem},
-%TODO: state the idempotency property of rsimp
-it is possible to convert the above lemma to obtain a "closed form"
-for derivatives nested with simplification:
-\begin{lemma}
-$\rderssimp{(r_1 \cdot r_2)}{s} = \rsimp{\AALTS{[[]}{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\vsuf{s}{r_1}))}}$
-\end{lemma}
-And now the reason we have (1) in section 1 is clear:
-$\rsize{\rsimp{\RALTS{ (r_1 \backslash s) \cdot r_2 :: (\map \;(r_2 \backslash \_) \; (\vsuf{s}{r_1}))}}}$,
-is equal to $\rsize{\rsimp{\RALTS{ ((r_1 \backslash s) \cdot r_2 :: (\map \; (r_2 \backslash \_) \; (\textit{Suffix}(r1, s))))}}}$
- as $\vsuf{s}{r_1}$ is one way of expressing the list $\textit{Suffix}( r_1, s)$.
-
-%----------------------------------------------------------------------------------------
-% SECTION 3
-%----------------------------------------------------------------------------------------
-
-\section{interaction between $\distinctWith$ and $\flts$}
-Note that it is not immediately obvious that
-\begin{center}
-$\llbracket \distinctWith (\flts \textit{rs}) = \phi \rrbracket \leq \llbracket \distinctWith \textit{rs} = \phi \rrbracket $.
-\end{center}
-The intuition is that if we remove duplicates from the $\textit{LHS}$, at least the same amount of
-duplicates will be removed from the list $\textit{rs}$ in the $\textit{RHS}$.
-
-
%-----------------------------------
% SECTION syntactic equivalence under simp
%-----------------------------------