--- a/ChengsongTanPhdThesis/Chapters/Cubic.tex Sun Nov 06 23:06:10 2022 +0000
+++ b/ChengsongTanPhdThesis/Chapters/Cubic.tex Mon Nov 07 21:31:07 2022 +0000
@@ -42,10 +42,10 @@
include this new construct. With bounded repetitions
we are able to out-compete other verified lexers such as
Verbatim++ on regular expressions which involve a lot of
-repetitions. We also present the idempotency property proof
-of $\bsimp$, which leverages the idempotency proof of $\rsimp$.
-This reinforces our claim that the fixpoint construction
-originally required by Sulzmann and Lu can be removed in $\blexersimp$.
+repetitions. %We also present the idempotency property proof
+%of $\bsimp$, which leverages the idempotency proof of $\rsimp$.
+%This reinforces our claim that the fixpoint construction
+%originally required by Sulzmann and Lu can be removed in $\blexersimp$.
\\
Last but not least, we present our efforts and challenges we met
in further improving the algorithm by data
@@ -541,8 +541,674 @@
\subsubsection{Finiteness Proof Augmentation}
-The bounded repetitions can be handled
-using techniques similar to stars.
+The bounded repetitions are
+very similar to stars, and therefore the treatment
+is similar, with minor changes to handle some slight complications
+when the counter reaches 0.
+The exponential growth is similar:
+\begin{center}
+ \begin{tabular}{ll}
+ $r^{\{n\}} $ & $\longrightarrow_{\backslash c}$\\
+ $(r\backslash c) \cdot
+ r^{\{n - 1\}}*$ & $\longrightarrow_{\backslash c'}$\\
+ \\
+ $r \backslash cc' \cdot r^{\{n - 2\}}* +
+ r \backslash c' \cdot r^{\{n - 1\}}*$ &
+ $\longrightarrow_{\backslash c''}$\\
+ \\
+ $(r_1 \backslash cc'c'' \cdot r^{\{n-3\}}* +
+ r \backslash c''\cdot r^{\{n-1\}}) +
+ (r \backslash c'c'' \cdot r^{\{n-2\}}* +
+ r \backslash c'' \cdot r^{\{n-1\}}*)$ &
+ $\longrightarrow_{\backslash c'''}$ \\
+ \\
+ $\ldots$\\
+ \end{tabular}
+\end{center}
+Again, we assume that $r\backslash c$, $r \backslash cc'$ and so on
+are all nullable.
+The flattened list of terms for $r^{\{n\}} \backslash_{rs} s$
+\begin{center}
+ $[r_1 \backslash cc'c'' \cdot r^{\{n-3\}}*,\;
+ r \backslash c''\cdot r^{\{n-1\}}, \;
+ r \backslash c'c'' \cdot r^{\{n-2\}}*, \;
+ r \backslash c'' \cdot r^{\{n-1\}}*,\; \ldots ]$
+\end{center}
+that comes from
+\begin{center}
+ $(r_1 \backslash cc'c'' \cdot r^{\{n-3\}}* +
+ r \backslash c''\cdot r^{\{n-1\}}) +
+ (r \backslash c'c'' \cdot r^{\{n-2\}}* +
+ r \backslash c'' \cdot r^{\{n-1\}}*)+ \ldots$
+\end{center}
+are made of sequences with different tails, where the counters
+might differ.
+The observation for maintaining the bound is that
+these counters never exceed $n$, the original
+counter. With the number of counters staying finite,
+$\rDistinct$ will deduplicate and keep the list finite.
+We introduce this idea as a lemma once we describe all
+the necessary helper functions.
+
+Similar to the star case, we want
+\begin{center}
+ $\rderssimp{r^{\{n\}}}{s} = \rsimp{\sum rs}$.
+\end{center}
+where $rs$
+shall be in the form of
+$\map \; f \; Ss$, where $f$ is a function and
+$Ss$ a list of objects to act on.
+For star, the object's datatype is string.
+The list of strings $Ss$
+is generated using functions
+$\starupdate$ and $\starupdates$.
+The function that takes a string and returns a regular expression
+is the anonymous function $
+(\lambda s'. \; r\backslash s' \cdot r^{\{m\}})$.
+In the NTIMES setting,
+the $\starupdate$ and $\starupdates$ functions are replaced by
+$\textit{nupdate}$ and $\textit{nupdates}$:
+\begin{center}
+ \begin{tabular}{lcl}
+ $\nupdate \; c \; r \; [] $ & $\dn$ & $[]$\\
+ $\nupdate \; c \; r \;
+ (\Some \; (s, \; n + 1) \; :: \; Ss)$ & $\dn$ & %\\
+ $\textit{if} \;
+ (\rnullable \; (r \backslash_{rs} s))$ \\
+ & & $\;\;\textit{then}
+ \;\; \Some \; (s @ [c], n + 1) :: \Some \; ([c], n) :: (
+ \nupdate \; c \; r \; Ss)$ \\
+ & & $\textit{else} \;\; \Some \; (s @ [c], n+1) :: (
+ \nupdate \; c \; r \; Ss)$\\
+ $\nupdate \; c \; r \; (\textit{None} :: Ss)$ & $\dn$ &
+ $(\None :: \nupdate \; c \; r \; Ss)$\\
+ & & \\
+ %\end{tabular}
+%\end{center}
+%\begin{center}
+ %\begin{tabular}{lcl}
+ $\nupdates \; [] \; r \; Ss$ & $\dn$ & $Ss$\\
+ $\nupdates \; (c :: cs) \; r \; Ss$ & $\dn$ & $\nupdates \; cs \; r \; (
+ \nupdate \; c \; r \; Ss)$
+ \end{tabular}
+\end{center}
+\noindent
+which take into account when a subterm
+\begin{center}
+ $r \backslash_s s \cdot r^{\{n\}}$
+\end{center}
+counter $n$
+is 0, and therefore expands to
+\begin{center}
+$r \backslash_s (s@[c]) \cdot r^{\{n\}} \;+
+\; \ZERO$
+\end{center}
+after taking a derivative.
+The object now has type
+\begin{center}
+$\textit{option} \;(\textit{string}, \textit{nat})$
+\end{center}
+and therefore the function for converting such an option into
+a regular expression term is called $\opterm$:
+
+\begin{center}
+ \begin{tabular}{lcl}
+ $\opterm \; r \; SN$ & $\dn$ & $\textit{case} \; SN\; of$\\
+ & & $\;\;\Some \; (s, n) \Rightarrow
+ (r\backslash_{rs} s)\cdot r^{\{n\}}$\\
+ & & $\;\;\None \Rightarrow
+ \ZERO$\\
+ \end{tabular}
+\end{center}
+\noindent
+Put together, the list $\map \; f \; Ss$ is instantiated as
+\begin{center}
+ $\map \; (\opterm \; r) \; (\nupdates \; s \; r \;
+ [\Some \; ([c], n)])$.
+\end{center}
+For the closed form to be bounded, we would like
+simplification to be applied to each term in the list.
+Therefore we introduce some variants of $\opterm$,
+which help conveniently express the rewriting steps
+needed in the closed form proof.
+\begin{center}
+ \begin{tabular}{lcl}
+ $\optermOsimp \; r \; SN$ & $\dn$ & $\textit{case} \; SN\; of$\\
+ & & $\;\;\Some \; (s, n) \Rightarrow
+ \textit{rsimp} \; ((r\backslash_{rs} s)\cdot r^{\{n\}})$\\
+ & & $\;\;\None \Rightarrow
+ \ZERO$\\
+ \\
+ $\optermosimp \; r \; SN$ & $\dn$ & $\textit{case} \; SN\; of$\\
+ & & $\;\;\Some \; (s, n) \Rightarrow
+ (\textit{rsimp} \; (r\backslash_{rs} s))
+ \cdot r^{\{n\}}$\\
+ & & $\;\;\None \Rightarrow
+ \ZERO$\\
+ \\
+ $\optermsimp \; r \; SN$ & $\dn$ & $\textit{case} \; SN\; of$\\
+ & & $\;\;\Some \; (s, n) \Rightarrow
+ (r\backslash_{rsimps} s)\cdot r^{\{n\}}$\\
+ & & $\;\;\None \Rightarrow
+ \ZERO$\\
+ \end{tabular}
+\end{center}
+
+
+For a list of
+$\textit{option} \;(\textit{string}, \textit{nat})$ elements,
+we define the highest power for it recursively:
+\begin{center}
+ \begin{tabular}{lcl}
+ $\hpa \; [] \; n $ & $\dn$ & $n$\\
+ $\hpa \; (\None :: os) \; n $ & $\dn$ & $\hpa \; os \; n$\\
+ $\hpa \; (\Some \; (s, n) :: os) \; m$ & $\dn$ &
+ $\hpa \;os \; (\textit{max} \; n\; m)$\\
+ \\
+ $\hpower \; rs $ & $\dn$ & $\hpa \; rs \; 0$\\
+ \end{tabular}
+\end{center}
+
+Now the intuition that an NTIMES regular expression's power
+does not increase can be easily expressed as
+\begin{lemma}\label{nupdatesMono2}
+ $\hpower \; (\nupdates \;s \; r \; [\Some \; ([c], n)]) \leq n$
+\end{lemma}
+\begin{proof}
+ Note that the power is non-increasing after a $\nupdate$ application:
+ \begin{center}
+ $\hpa \;\; (\nupdate \; c \; r \; Ss)\;\; m \leq
+ \hpa\; \; Ss \; m$.
+ \end{center}
+ This is also the case for $\nupdates$:
+ \begin{center}
+ $\hpa \;\; (\nupdates \; s \; r \; Ss)\;\; m \leq
+ \hpa\; \; Ss \; m$.
+ \end{center}
+ Therefore we have that
+ \begin{center}
+ $\hpower \;\; (\nupdates \; s \; r \; Ss) \leq
+ \hpower \;\; Ss$
+ \end{center}
+ which leads to the lemma being proven.
+
+ \end{proof}
+
+
+We also define the inductive rules for
+the shape of derivatives of the NTIMES regular expressions:\\[-3em]
+\begin{center}
+ \begin{mathpar}
+ \inferrule{\mbox{}}{\cbn \;\ZERO}
+
+ \inferrule{\mbox{}}{\cbn \; \; r_a \cdot (r^{\{n\}})}
+
+ \inferrule{\cbn \; r_1 \;\; \; \cbn \; r_2}{\cbn \; r_1 + r_2}
+
+ \inferrule{\cbn \; r}{\cbn \; r + \ZERO}
+ \end{mathpar}
+\end{center}
+\noindent
+A derivative of NTIMES fits into the shape described by $\cbn$:
+\begin{lemma}\label{ntimesDersCbn}
+ $\cbn \; ((r' \cdot r^{\{n\}}) \backslash_{rs} s)$ holds.
+\end{lemma}
+\begin{proof}
+ By a reverse induction on $s$.
+ For the inductive case, note that if $\cbn \; r$ holds,
+ then $\cbn \; (r\backslash_r c)$ holds.
+\end{proof}
+In addition, for $\cbn$-shaped regular expressioins, one can flatten
+them:
+\begin{lemma}\label{ntimesHfauPushin}
+ If $\cbn \; r$ holds, then $\hflataux{r \backslash_r c} =
+ \textit{concat} \; (\map \; \hflataux{\map \; (\_\backslash_r c) \;
+ (\hflataux{r})})$
+\end{lemma}
+\begin{proof}
+ By an induction on the inductive cases of $\cbn$.
+\end{proof}
+
+
+
+This time we do not need to define the flattening functions for NTIMES only,
+because $\hflat{\_}$ and $\hflataux{\_}$ work on NTIMES already:
+\begin{lemma}\label{ntimesHfauInduct}
+$\hflataux{( (r\backslash_r c) \cdot r^{\{n\}}) \backslash_{rsimps} s} =
+ \map \; (\opterm \; r) \; (\nupdates \; s \; r \; [\Some \; ([c], n)])$
+\end{lemma}
+\begin{proof}
+ By a reverse induction on $s$.
+ The lemma \ref{ntimesDersCbn} is used.
+\end{proof}
+\noindent
+We have a recursive property for NTIMES with $\nupdate$
+similar to that for STAR,
+and one for $\nupdates $ as well:
+\begin{lemma}\label{nupdateInduct1}
+ (i) $\textit{concat} \; (\map \; (\hflataux{\_} \circ (
+ \opterm \; r)) \; Ss) = \map \; (\opterm \; r) \; (\nupdate \;
+ c \; r \; Ss)$\\
+ (ii) $\textit{concat} \; (\map \; \hflataux{\_}\;
+ \map \; (\_\backslash_r x) \;
+ (\map \; (\opterm \; r) \; (\nupdates \; xs \; r \; Ss))) =
+ \map \; (\opterm \; r) \; (\nupdates \;(xs@[x]) \; r\;Ss)$ holds.
+\end{lemma}
+\begin{proof}
+ (i) is by an induction on $Ss$.
+ (ii) is by an induction on $xs$.
+\end{proof}
+
+The $\nString$ predicate is defined for conveniently
+expressing that there are no empty strings in the
+$\Some \;(s, n)$ elements generated by $\nupdate$:
+\begin{center}
+ \begin{tabular}{lcl}
+ $\nString \; \None$ & $\dn$ & $ \textit{true}$\\
+ $\nString \; (\Some \; ([], n))$ & $\dn$ & $ \textit{false}$\\
+ $\nString \; (\Some \; (c::s, n))$ & $\dn$ & $ \textit{true}$\\
+ \end{tabular}
+\end{center}
+\begin{lemma}\label{nupdatesNonempty}
+ If for all elements $opt$ in $\textit{set} \; Ss$,
+ $\nString \; opt$ holds, the we have that
+ for all elements $opt$ in $\textit{set} \; (\nupdates \; s \; r \; Ss)$,
+ $\nString \; opt$ holds.
+\end{lemma}
+\begin{proof}
+ By an induction on $s$, where $Ss$ is set to vary over all possible values.
+\end{proof}
+
+\noindent
+
+\begin{lemma}\label{ntimesClosedFormsSteps}
+ The following list of equalities or rewriting relations hold:\\
+ (i) $r^{\{n+1\}} \backslash_{rsimps} (c::s) =
+ \textit{rsimp} \; (\sum (\map \; (\opterm \;r \;\_) \; (\nupdates \;
+ s \; r \; [\Some \; ([c], n)])))$\\
+ (ii)
+ \begin{center}
+ $\sum (\map \; (\opterm \; r) \; (\nupdates \; s \; r \; [
+ \Some \; ([c], n)]))$ \\ $ \sequal$\\
+ $\sum (\map \; (\textit{rsimp} \circ (\opterm \; r))\; (\nupdates \;
+ s\;r \; [\Some \; ([c], n)]))$\\
+ \end{center}
+ (iii)
+ \begin{center}
+ $\sum \;(\map \; (\optermosimp \; r) \; (\nupdates \; s \; r\; [\Some \;
+ ([c], n)]))$\\
+ $\sequal$\\
+ $\sum \;(\map \; (\optermsimp r) \; (\nupdates \; s \; r \; [\Some \;
+ ([c], n)])) $\\
+ \end{center}
+ (iv)
+ \begin{center}
+ $\sum \;(\map \; (\optermosimp \; r) \; (\nupdates \; s \; r\; [\Some \;
+ ([c], n)])) $ \\ $\sequal$\\
+ $\sum \;(\map \; (\optermOsimp r) \; (\nupdates \; s \; r \; [\Some \;
+ ([c], n)])) $\\
+ \end{center}
+ (v)
+ \begin{center}
+ $\sum \;(\map \; (\optermOsimp r) \; (\nupdates \; s \; r \; [\Some \;
+ ([c], n)])) $ \\ $\sequal$\\
+ $\sum \; (\map \; (\textit{rsimp} \circ (\opterm \; r)) \;
+ (\nupdates \; s \; r \; [\Some \; ([c], n)]))$
+ \end{center}
+\end{lemma}
+\begin{proof}
+ Routine.
+ (iii) and (iv) make use of the fact that all the strings $s$
+ inside $\Some \; (s, m)$ which are elements of the list
+ $\nupdates \; s\;r\;[\Some\; ([c], n)]$ are non-empty,
+ which is from lemma \ref{nupdatesNonempty}.
+ Once the string in $o = \Some \; (s, n)$ is
+ nonempty, $\optermsimp \; r \;o$,
+ $\optermosimp \; r \; o$ and $\optermosimp \; \; o$ are guaranteed
+ to be equal.
+\end{proof}
+
+\begin{theorem}\label{ntimesClosedForm}
+ The derivative of $r^{\{n+1\}}$ can be described as an alternative
+ containing a list
+ of terms:\\
+ $r^{\{n+1\}} \backslash_{rsimps} (c::s) = \textit{rsimp} \; (
+ \sum (\map \; (\optermsimp \; r) \; (\nupdates \; s \; r \;
+ [\Some \; ([c], n)])))$
+\end{theorem}
+\begin{proof}
+ By the rewriting steps described in lemma \ref{ntimesClosedFormsSteps}.
+\end{proof}
+
+\begin{lemma}\label{nupdatesNLeqN}
+ For an element $o$ in $\textit{set} \; (\nupdates \; s \; r \;
+ [\Some \; ([c], n)])$, either $o = \None$, or $o = \Some
+ \; (s', m)$ for some string $s'$ and number $m \leq n$.
+\end{lemma}
+
+
+\begin{lemma}\label{ntimesClosedFormListElemShape}
+ For any element $r'$ in $\textit{set} \; (\map \; (\optermsimp \; r) \; (
+ \nupdates \; s \; r \; [\Some \; ([c], n)]))$,
+ we have that $r'$ is either $\ZERO$ or $r \backslash_{rsimps} s' \cdot
+ r^{\{m\}}$ for some string $s'$ and number $m \leq n$.
+\end{lemma}
+\begin{proof}
+ Using lemma \ref{nupdatesNLeqN}.
+\end{proof}
+
+\begin{theorem}\label{ntimesClosedFormBounded}
+ Assuming that for any string $s$, $\llbracket r \backslash_{rsimps} s
+ \rrbracket_r \leq N$ holds, then we have that\\
+ $\llbracket r^{\{n+1\}} \backslash_{rsimps} s \rrbracket_r \leq
+ \textit{max} \; (c_N+1)* (N + \llbracket r^{\{n\}} \rrbracket+1)$,
+ where $c_N = \textit{card} \; (\textit{sizeNregex} \; (
+ N + \llbracket r^{\{n\}} \rrbracket_r+1))$.
+\end{theorem}
+\begin{proof}
+We have that for all regular expressions $r'$ in $\textit{set} \; (\map \; (\optermsimp \; r) \; (
+ \nupdates \; s \; r \; [\Some \; ([c], n)]))$,
+ $r'$'s size is less than or equal to $N + \llbracket r^{\{n\}}
+ \rrbracket_r + 1$
+because $r'$ can only be either a $\ZERO$ or $r \backslash_{rsimps} s' \cdot
+r^{\{m\}}$ for some string $s'$ and number
+$m \leq n$ (lemma \ref{ntimesClosedFormListElemShape}).
+In addition, we know that the list
+$\map \; (\optermsimp \; r) \; (
+\nupdates \; s \; r \; [\Some \; ([c], n)])$'s size is at most
+$c_N = \textit{card} \;
+(\sizeNregex \; ((N + \llbracket r^{\{n\}} \rrbracket) + 1))$.
+This gives us $\llbracket \llbracket r \backslash_{rsimps} \;s \llbracket_r
+\leq N * c_N$.
+\end{proof}
+
+
+Assuming we have that
+\begin{center}
+ $\rderssimp{r^*}{s} = \rsimp{(\sum \map \; (\lambda s'. r\backslash s' \cdot r^*) \; Ss)}$.
+\end{center}
+holds.
+The idea of $\starupdate$ and $\starupdates$
+is to update $Ss$ when another
+derivative is taken on $\rderssimp{r^*}{s}$
+w.r.t a character $c$ and a string $s'$
+respectively.
+Both $\starupdate$ and $\starupdates$ take three arguments as input:
+the new character $c$ or string $s$ to take derivative with,
+the regular expression
+$r$ under the star $r^*$, and the
+list of strings $Ss$ for the derivative $r^* \backslash s$
+up until this point
+such that
+\begin{center}
+$(r^*) \backslash s = \sum_{s' \in sSet} (r\backslash s') \cdot r^*$
+\end{center}
+is satisfied.
+
+Functions $\starupdate$ and $\starupdates$ characterise what the
+star derivatives will look like once ``straightened out'' into lists.
+The helper functions for such operations will be similar to
+$\sflat{\_}$, $\sflataux{\_}$ and $\sflataux{\_}$, which we defined for sequence.
+We use similar symbols to denote them, with a $*$ subscript to mark the difference.
+\begin{center}
+ \begin{tabular}{lcl}
+ $\hflataux{r_1 + r_2}$ & $\dn$ & $\hflataux{r_1} @ \hflataux{r_2}$\\
+ $\hflataux{r}$ & $\dn$ & $[r]$
+ \end{tabular}
+\end{center}
+
+\begin{center}
+ \begin{tabular}{lcl}
+ $\hflat{r_1 + r_2}$ & $\dn$ & $\sum (\hflataux {r_1} @ \hflataux {r_2}) $\\
+ $\hflat{r}$ & $\dn$ & $r$
+ \end{tabular}
+\end{center}
+\noindent
+These definitions are tailor-made for dealing with alternatives that have
+originated from a star's derivatives.
+A typical star derivative always has the structure of a balanced binary tree:
+\begin{center}
+ $(r_1 \backslash cc'c'' \cdot r^* + r \backslash c'') +
+ (r \backslash c'c'' \cdot r^* + r \backslash c'' \cdot r^*) $
+\end{center}
+All of the nested structures of alternatives
+generated from derivatives are binary, and therefore
+$\hflat{\_}$ and $\hflataux{\_}$ only deal with binary alternatives.
+$\hflat{\_}$ ``untangles'' like the following:
+\[
+ \sum ((r_1 + r_2) + (r_3 + r_4)) + \ldots \;
+ \stackrel{\hflat{\_}}{\longrightarrow} \;
+ \RALTS{[r_1, r_2, \ldots, r_n]}
+\]
+Here is a lemma stating the recursive property of $\starupdate$ and $\starupdates$,
+with the helpers $\hflat{\_}$ and $\hflataux{\_}$\footnote{The function $\textit{concat}$ takes a list of lists
+ and merges each of the element lists to form a flattened list.}:
+\begin{lemma}\label{stupdateInduct1}
+ \mbox
+ For a list of strings $Ss$, the following hold.
+ \begin{itemize}
+ \item
+ If we do a derivative on the terms
+ $r\backslash_r s \cdot r^*$ (where $s$ is taken from the list $Ss$),
+ the result will be the same as if we apply $\starupdate$ to $Ss$.
+ \begin{center}
+ \begin{tabular}{c}
+ $\textit{concat} \; (\map \; (\hflataux{\_} \circ ( (\_\backslash_r x)
+ \circ (\lambda s.\;\; (r \backslash_r s) \cdot r^*)))\; Ss )\;
+ $\\
+ \\
+ $=$ \\
+ \\
+ $\map \; (\lambda s. (r \backslash_r s) \cdot (r^*)) \;
+ (\starupdate \; x \; r \; Ss)$
+ \end{tabular}
+ \end{center}
+ \item
+ $\starupdates$ is ``composable'' w.r.t a derivative.
+ It piggybacks the character $x$ to the tail of the string
+ $xs$.
+ \begin{center}
+ \begin{tabular}{c}
+ $\textit{concat} \; (\map \; \hflataux{\_} \;
+ (\map \; (\_\backslash_r x) \;
+ (\map \; (\lambda s.\;\; (r \backslash_r s) \cdot
+ (r^*) ) \; (\starupdates \; xs \; r \; Ss))))$\\
+ \\
+ $=$\\
+ \\
+ $\map \; (\lambda s.\;\; (r\backslash_r s) \cdot (r^*)) \;
+ (\starupdates \; (xs @ [x]) \; r \; Ss)$
+ \end{tabular}
+ \end{center}
+ \end{itemize}
+\end{lemma}
+
+\begin{proof}
+ Part 1 is by induction on $Ss$.
+ Part 2 is by induction on $xs$, where $Ss$ is left to take arbitrary values.
+\end{proof}
+
+
+Like $\textit{createdBySequence}$, we need
+a predicate for ``star-created'' regular expressions:
+\begin{center}
+ \begin{mathpar}
+ \inferrule{\mbox{}}{ \textit{createdByStar}\; \RSEQ{ra}{\RSTAR{rb}} }
+
+ \inferrule{ \textit{createdByStar} \; r_1\; \land \; \textit{createdByStar} \; r_2 }{\textit{createdByStar} \; (r_1 + r_2) }
+ \end{mathpar}
+\end{center}
+\noindent
+All regular expressions created by taking derivatives of
+$r_1 \cdot (r_2)^*$ satisfy the $\textit{createdByStar}$ predicate:
+\begin{lemma}\label{starDersCbs}
+ $\textit{createdByStar} \; ((r_1 \cdot r_2^*) \backslash_r s) $ holds.
+\end{lemma}
+\begin{proof}
+ By a reverse induction on $s$.
+\end{proof}
+If a regular expression conforms to the shape of a star's derivative,
+then we can push an application of $\hflataux{\_}$ inside a derivative of it:
+\begin{lemma}\label{hfauPushin}
+ If $\textit{createdByStar} \; r$ holds, then
+ \begin{center}
+ $\hflataux{r \backslash_r c} = \textit{concat} \; (
+ \map \; \hflataux{\_} (\map \; (\_\backslash_r c) \;(\hflataux{r})))$
+ \end{center}
+ holds.
+\end{lemma}
+\begin{proof}
+ By an induction on the inductivev cases of $\textit{createdByStar}$.
+\end{proof}
+%This is not entirely true for annotated regular expressions:
+%%TODO: bsimp bders \neq bderssimp
+%\begin{center}
+% $(1+ (c\cdot \ASEQ{bs}{c^*}{c} ))$
+%\end{center}
+%For bit-codes, the order in which simplification is applied
+%might cause a difference in the location they are placed.
+%If we want something like
+%\begin{center}
+% $\bderssimp{r}{s} \myequiv \bsimp{\bders{r}{s}}$
+%\end{center}
+%Some "canonicalization" procedure is required,
+%which either pushes all the common bitcodes to nodes
+%as senior as possible:
+%\begin{center}
+% $_{bs}(_{bs_1 @ bs'}r_1 + _{bs_1 @ bs''}r_2) \rightarrow _{bs @ bs_1}(_{bs'}r_1 + _{bs''}r_2) $
+%\end{center}
+%or does the reverse. However bitcodes are not of interest if we are talking about
+%the $\llbracket r \rrbracket$ size of a regex.
+%Therefore for the ease and simplicity of producing a
+%proof for a size bound, we are happy to restrict ourselves to
+%unannotated regular expressions, and obtain such equalities as
+%TODO: rsimp sflat
+% The simplification of a flattened out regular expression, provided it comes
+%from the derivative of a star, is the same as the one nested.
+
+
+
+Now we introduce an inductive property
+for $\starupdate$ and $\hflataux{\_}$.
+\begin{lemma}\label{starHfauInduct}
+ If we do derivatives of $r^*$
+ with a string that starts with $c$,
+ then flatten it out,
+ we obtain a list
+ of the shape $\sum_{s' \in sS} (r\backslash_r s') \cdot r^*$,
+ where $sS = \starupdates \; s \; r \; [[c]]$. Namely,
+ \begin{center}
+ $\hflataux{(\rders{( (\rder{c}{r_0})\cdot(r_0^*))}{s})} =
+ \map \; (\lambda s_1. (r_0 \backslash_r s_1) \cdot (r_0^*)) \;
+ (\starupdates \; s \; r_0 \; [[c]])$
+ \end{center}
+holds.
+\end{lemma}
+\begin{proof}
+ By an induction on $s$, the inductive cases
+ being $[]$ and $s@[c]$. The lemmas \ref{hfauPushin} and \ref{starDersCbs} are used.
+\end{proof}
+\noindent
+
+$\hflataux{\_}$ has a similar effect as $\textit{flatten}$:
+\begin{lemma}\label{hflatauxGrewrites}
+ $a :: rs \grewrites \hflataux{a} @ rs$
+\end{lemma}
+\begin{proof}
+ By induction on $a$. $rs$ is set to take arbitrary values.
+\end{proof}
+It is also not surprising that $\textit{rsimp}$ will cover
+the effect of $\hflataux{\_}$:
+\begin{lemma}\label{cbsHfauRsimpeq1}
+ $\rsimp{(r_1 + r_2)} = \rsimp{(\RALTS{\hflataux{r_1} @ \hflataux{r_2}})}$
+\end{lemma}
+
+\begin{proof}
+ By using the rewriting relation $\rightsquigarrow$
+\end{proof}
+And from this we obtain a proof that a star's derivative will be the same
+as if it had all its nested alternatives created during deriving being flattened out:
+For example,
+\begin{lemma}\label{hfauRsimpeq2}
+ $\createdByStar{r} \implies \rsimp{r} = \rsimp{\RALTS{\hflataux{r}}}$
+\end{lemma}
+\begin{proof}
+ By structural induction on $r$, where the induction rules
+ are these of $\createdByStar{\_}$.
+ Lemma \ref{cbsHfauRsimpeq1} is used in the inductive case.
+\end{proof}
+
+
+%Here is a corollary that states the lemma in
+%a more intuitive way:
+%\begin{corollary}
+% $\hflataux{r^* \backslash_r (c::xs)} = \map \; (\lambda s. (r \backslash_r s) \cdot
+% (r^*))\; (\starupdates \; c\; r\; [[c]])$
+%\end{corollary}
+%\noindent
+%Note that this is also agnostic of the simplification
+%function we defined, and is therefore of more general interest.
+
+Together with the rewriting relation
+\begin{lemma}\label{starClosedForm6Hrewrites}
+ We have the following set of rewriting relations or equalities:
+ \begin{itemize}
+ \item
+ $\textit{rsimp} \; (r^* \backslash_r (c::s))
+ \sequal
+ \sum \; ( ( \sum (\lambda s. (r\backslash_r s) \cdot r^*) \; (
+ \starupdates \; s \; r \; [ c::[]] ) ) )$
+ \item
+ $r \backslash_{rsimp} (c::s) = \textit{rsimp} \; ( (
+ \sum ( (\map \; (\lambda s_1. (r\backslash s_1) \; r^*) \;
+ (\starupdates \;s \; r \; [ c::[] ])))))$
+ \item
+ $\sum ( (\map \; (\lambda s. (r\backslash s) \; r^*) \; Ss))
+ \sequal
+ \sum ( (\map \; (\lambda s. \textit{rsimp} \; (r\backslash s) \;
+ r^*) \; Ss) )$
+ \item
+ $\map \; (\lambda s. (\rsimp{r \backslash_r s}) \cdot (r^*)) \; Ss
+ \scfrewrites
+ \map \; (\lambda s. (\rsimp{r \backslash_r s}) \cdot (r^*)) \; Ss$
+ \item
+ $( ( \sum ( ( \map \ (\lambda s. \;\;
+ (\textit{rsimp} \; (r \backslash_r s)) \cdot r^*) \; (\starupdates \;
+ s \; r \; [ c::[] ])))))$\\
+ $\sequal$\\
+ $( ( \sum ( ( \map \ (\lambda s. \;\;
+ ( r \backslash_{rsimp} s)) \cdot r^*) \; (\starupdates \;
+ s \; r \; [ c::[] ]))))$\\
+ \end{itemize}
+\end{lemma}
+\begin{proof}
+ Part 1 leads to part 2.
+ The rest of them are routine.
+\end{proof}
+\noindent
+Next the closed form for star regular expressions can be derived:
+\begin{theorem}\label{starClosedForm}
+ $\rderssimp{r^*}{c::s} =
+ \rsimp{
+ (\sum (\map \; (\lambda s. (\rderssimp{r}{s})\cdot r^*) \;
+ (\starupdates \; s\; r \; [[c]])
+ )
+ )
+ }
+ $
+\end{theorem}
+\begin{proof}
+ By an induction on $s$.
+ The lemmas \ref{rsimpIdem}, \ref{starHfauInduct}, \ref{starClosedForm6Hrewrites}
+ and \ref{hfauRsimpeq2}
+ are used.
+ In \ref{starClosedForm6Hrewrites}, the equalities are
+ used to link the LHS and RHS.
+\end{proof}
+
+
+
+
The closed form for them looks like:
%\begin{center}
% \begin{tabular}{llrclll}
@@ -598,6 +1264,9 @@
\end{center}
\noindent
+
+
+
%----------------------------------------------------------------------------------------
% SECTION 1
%----------------------------------------------------------------------------------------
@@ -663,3 +1332,11 @@
% \]
%
%
+%\begin{center}
+% \begin{tabular}{lcl}
+% $\ntset \; r \; (n+1) \; c::cs $ & $\dn$ & $\nupdates \;
+% cs \; r \; [\Some \; ([c], n)]$\\
+% $\ntset \; r\; 0 \; \_$ & $\dn$ & $\None$\\
+% $\ntset \; r \; \_ \; [] $ & $ \dn$ & $[]$\\
+% \end{tabular}
+%\end{center}
--- a/ChengsongTanPhdThesis/Chapters/Finite.tex Sun Nov 06 23:06:10 2022 +0000
+++ b/ChengsongTanPhdThesis/Chapters/Finite.tex Mon Nov 07 21:31:07 2022 +0000
@@ -370,6 +370,14 @@
\end{center}
\noindent
where we omit the definition of $\textit{rnullable}$.
+The generalisation from derivative w.r.t to character to
+derivative w.r.t string is given as
+\begin{center}
+ \begin{tabular}{lcl}
+ $r \backslash_{rs} []$ & $\dn$ & $r$\\
+ $r \backslash_{rs} c::s$ & $\dn$ & $(r\backslash_r c) \backslash_{rs} s$
+ \end{tabular}
+\end{center}
The function $\distinctBy$ for r-regular expressions does not need
a function checking equivalence because
@@ -778,7 +786,7 @@
\subsubsection{The Properties of $\textit{Rflts}$}
We give in this subsection some properties
-involving $\backslash_r$, $\backslash_{rsimp}$, $\textit{rflts}$ and
+involving $\backslash_r$, $\backslash_{rsimps}$, $\textit{rflts}$ and
$\textit{rsimp}_{ALTS} $, together with any non-trivial lemmas that lead to them.
These will be helpful in later closed form proofs, when
we want to transform derivative terms which have
@@ -1430,7 +1438,7 @@
As corollaries of \ref{insideSimpRemoval}, we have
\begin{itemize}
\item
- If $s \neq []$ then $\rderssimp{r}{s} = \rsimp{(\rders \; r \; s)}$.
+ If $s \neq []$ then $\rderssimp{r}{s} = \rsimp{( r \backslash_{rs} s)}$.
\item
$\rsimpalts \; (\map \; (\_ \backslash_r x) \;
(\rdistinct{rs}{\varnothing})) \sequal
@@ -1844,7 +1852,7 @@
$\starupdate \; c \; r \; [] $ & $\dn$ & $[]$\\
$\starupdate \; c \; r \; (s :: Ss)$ & $\dn$ & \\
& & $\textit{if} \;
- (\rnullable \; (\rders \; r \; s))$ \\
+ (\rnullable \; (r \backslash_{rs} s))$ \\
& & $\textit{then} \;\; (s @ [c]) :: [c] :: (
\starupdate \; c \; r \; Ss)$ \\
& & $\textit{else} \;\; (s @ [c]) :: (
@@ -2034,7 +2042,7 @@
of the shape $\sum_{s' \in sS} (r\backslash_r s') \cdot r^*$,
where $sS = \starupdates \; s \; r \; [[c]]$. Namely,
\begin{center}
- $\hflataux{(\rders{( (\rder{c}{r_0})\cdot(r_0^*))}{s})} =
+ $\hflataux{(( (\rder{c}{r_0})\cdot(r_0^*))\backslash_{rs} s)} =
\map \; (\lambda s_1. (r_0 \backslash_r s_1) \cdot (r_0^*)) \;
(\starupdates \; s \; r_0 \; [[c]])$
\end{center}
@@ -2095,7 +2103,7 @@
\sum \; ( ( \sum (\lambda s. (r\backslash_r s) \cdot r^*) \; (
\starupdates \; s \; r \; [ c::[]] ) ) )$
\item
- $r \backslash_{rsimp} (c::s) = \textit{rsimp} \; ( (
+ $r \backslash_{rsimps} (c::s) = \textit{rsimp} \; ( (
\sum ( (\map \; (\lambda s_1. (r\backslash s_1) \; r^*) \;
(\starupdates \;s \; r \; [ c::[] ])))))$
\item
@@ -2113,7 +2121,7 @@
s \; r \; [ c::[] ])))))$\\
$\sequal$\\
$( ( \sum ( ( \map \ (\lambda s. \;\;
- ( r \backslash_{rsimp} s)) \cdot r^*) \; (\starupdates \;
+ ( r \backslash_{rsimps} s)) \cdot r^*) \; (\starupdates \;
s \; r \; [ c::[] ]))))$\\
\end{itemize}
\end{lemma}
@@ -2483,10 +2491,10 @@
\begin{center}
\begin{tabular}{lcll}
& & $ \llbracket \rderssimp{r_1\cdot r_2 }{s} \rrbracket_r $\\
-& $ = $ & $\llbracket \rsimp{(\sum(r_1 \backslash_{rsimp} s \cdot r_2 \; \; :: \; \;
- \map \; (r_2\backslash_{rsimp} \_)\; (\vsuf{s}{r})))} \rrbracket_r $ & (1) \\
- & $\leq$ & $\llbracket \rdistinct{(r_1 \backslash_{rsimp} s \cdot r_2 \; \; :: \; \;
- \map \; (r_2\backslash_{rsimp} \_)\; (\vsuf{s}{r}))}{\varnothing} \rrbracket_r + 1$ & (2) \\
+& $ = $ & $\llbracket \rsimp{(\sum(r_1 \backslash_{rsimps} s \cdot r_2 \; \; :: \; \;
+ \map \; (r_2\backslash_{rsimps} \_)\; (\vsuf{s}{r})))} \rrbracket_r $ & (1) \\
+ & $\leq$ & $\llbracket \rdistinct{(r_1 \backslash_{rsimps} s \cdot r_2 \; \; :: \; \;
+ \map \; (r_2\backslash_{rsimps} \_)\; (\vsuf{s}{r}))}{\varnothing} \rrbracket_r + 1$ & (2) \\
& $\leq$ & $2 + N_1 + \rsize{r_2} + (N_2 * (card\;(\sizeNregex \; N_2)))$ & (3)\\
\end{tabular}
\end{center}
@@ -2514,12 +2522,12 @@
\begin{center}
\begin{tabular}{lcll}
& & $ \llbracket \rderssimp{r^* }{c::cs} \rrbracket_r $\\
-& $ = $ & $\llbracket \rsimp{(\sum (\map \; (\lambda s. (r \backslash_{rsimp} s) \cdot r^*) \; (\starupdates\;
+& $ = $ & $\llbracket \rsimp{(\sum (\map \; (\lambda s. (r \backslash_{rsimps} s) \cdot r^*) \; (\starupdates\;
cs \; r \; [[c]] )) )} \rrbracket_r $ & (5) \\
& $\leq$ & $\llbracket
\rdistinct{
(\map \;
- (\lambda s. (r \backslash_{rsimp} s) \cdot r^*) \;
+ (\lambda s. (r \backslash_{rsimps} s) \cdot r^*) \;
(\starupdates\; cs \; r \; [[c]] )
)}
{\varnothing} \rrbracket_r + 1$ & (6) \\
@@ -2547,8 +2555,8 @@
\begin{center}
\begin{tabular}{lcll}
& & $ \llbracket \rderssimp{\sum rs }{c::cs} \rrbracket_r $\\
-& $ = $ & $\llbracket \rsimp{(\sum (\map \; (\_ \backslash_{rsimp} s) \; rs) )} \rrbracket_r $ & (9) \\
-& $\leq$ & $\llbracket (\sum (\map \; (\_ \backslash_{rsimp} s) \; rs) ) \rrbracket_r $ & (10) \\
+& $ = $ & $\llbracket \rsimp{(\sum (\map \; (\_ \backslash_{rsimps} s) \; rs) )} \rrbracket_r $ & (9) \\
+& $\leq$ & $\llbracket (\sum (\map \; (\_ \backslash_{rsimps} s) \; rs) ) \rrbracket_r $ & (10) \\
& $\leq$ & $1 + N * (length \; rs) $ & (11)\\
\end{tabular}
\end{center}