--- a/ChengsongTanPhdThesis/Chapters/Finite.tex Mon Oct 03 13:32:25 2022 +0100
+++ b/ChengsongTanPhdThesis/Chapters/Finite.tex Tue Oct 04 00:25:09 2022 +0100
@@ -524,7 +524,7 @@
In this section we introduce in detail
how the closed forms are obtained for regular expressions'
derivatives and how they are bounded.
-We start by proving some basic identities
+We start by proving some basic identities and inequalities
involving the simplification functions for r-regular expressions.
After that we use these identities to establish the
closed forms we need.
@@ -538,10 +538,16 @@
-\subsection{Some Basic Identities}
+\subsection{Some Basic Identities and Inequalities}
-
-\subsubsection{$\textit{rdistinct}$'s Deduplicates Successfully}
+In this subsection, we introduce lemmas
+that are repeatedly used in later proofs.
+Note that for the $\textit{rdistinct}$ function there
+will be a lot of conversion from lists to sets.
+We use the name $set$ to refere to the
+function that converts a list $rs$ to the set
+containing all the elements in $rs$.
+\subsubsection{$\textit{rdistinct}$'s Does the Job of De-duplication}
The $\textit{rdistinct}$ function, as its name suggests, will
remove duplicates in an r-regular expression list.
It will also correctly exclude any elements that
@@ -599,14 +605,14 @@
\noindent
On the other hand, if an element $r$ does not appear in the input list waiting to be deduplicated,
then expanding the accumulator to include that element will not cause the output list to change:
-\begin{lemma}
+\begin{lemma}\label{rdistinctOnDistinct}
The accumulator can be augmented to include elements not appearing in the input list,
and the output will not change.
\begin{itemize}
\item
- If $r \notin rs$, then $\rdistinct{rs}{acc} = \rdistinct{rs}{\{r\} \cup acc}$.
+ If $r \notin rs$, then $\rdistinct{rs}{acc} = \rdistinct{rs}{(\{r\} \cup acc)}$.
\item
- Particularly, when $acc = \varnothing$ and $rs$ de-duplicated, we have\\
+ Particularly, if $\;\;\textit{isDistinct} \; rs$, then we have\\
\[ \rdistinct{rs}{\varnothing} = rs \]
\end{itemize}
\end{lemma}
@@ -614,24 +620,9 @@
The first half is by induction on $rs$. The second half is a corollary of the first.
\end{proof}
\noindent
-The next property gives the condition for
-when $\rdistinct{\_}{\_}$ becomes an identical mapping
-for any prefix of an input list, in other words, when can
-we ``push out" the arguments of $\rdistinct{\_}{\_}$:
-\begin{lemma}\label{distinctRdistinctAppend}
- If $\textit{isDistinct} \; rs_1$, and $rs_1 \cap acc = \varnothing$,
- then
- \[\textit{rdistinct}\; (rs_1 @ rsa)\;\, acc
- = rs_1@(\textit{rdistinct} rsa \; (acc \cup rs_1))\]
-\end{lemma}
-\noindent
-In other words, it can be taken out and left untouched in the output.
-\begin{proof}
- By an induction on $rs_1$, where $rsa$ and $acc$ are allowed to be arbitrary.
-\end{proof}
-\noindent
-$\rdistinct{}{}$ removes any element in anywhere of a list, if it
-had appeared previously:
+The function $\textit{rdistinct}$ removes duplicates from anywhere in a list.
+Despite being seemingly obvious,
+the induction technique is not as straightforward.
\begin{lemma}\label{distinctRemovesMiddle}
The two properties hold if $r \in rs$:
\begin{itemize}
@@ -649,15 +640,15 @@
\noindent
\begin{proof}
By induction on $rs$. All other variables are allowed to be arbitrary.
- The second half of the lemma requires the first half.
- Note that for each half's two sub-propositions need to be proven concurrently,
+ The second part of the lemma requires the first.
+ Note that for each part, the two sub-propositions need to be proven concurrently,
so that the induction goes through.
\end{proof}
-
\noindent
-This allows us to prove ``Idempotency" of $\rdistinct{}{}$ of some kind:
+This allows us to prove a few more equivalence relations involving
+$\textit{rdistinct}$ (it will be useful later):
\begin{lemma}\label{rdistinctConcatGeneral}
- The following equalities involving multiple applications of $\rdistinct{}{}$ hold:
+ \mbox{}
\begin{itemize}
\item
$\rdistinct{(rs @ rs')}{\varnothing} = \rdistinct{((\rdistinct{rs}{\varnothing})@ rs')}{\varnothing}$
@@ -680,17 +671,46 @@
\begin{proof}
By \ref{rdistinctDoesTheJob} and \ref{distinctRemovesMiddle}.
\end{proof}
+\noindent
+$\textit{rdistinct}$ is composable w.r.t list concatenation:
+\begin{lemma}\label{distinctRdistinctAppend}
+ If $\;\; \textit{isDistinct} \; rs_1$,
+ and $(set \; rs_1) \cap acc = \varnothing$,
+ then applying $\textit{rdistinct}$ on $rs_1 @ rs_a$ does not
+ have an effect on $rs_1$:
+ \[\textit{rdistinct}\; (rs_1 @ rsa)\;\, acc
+ = rs_1@(\textit{rdistinct} rsa \; (acc \cup rs_1))\]
+\end{lemma}
+\begin{proof}
+ By an induction on
+ $rs_1$, where $rsa$ and $acc$ are allowed to be arbitrary.
+\end{proof}
+\noindent
+$\textit{rdistinct}$ needs to be applied only once, and
+applying it multiple times does not cause any difference:
+\begin{corollary}\label{distinctOnceEnough}
+ $\textit{rdistinct} \; (rs @ rsa) {} = \textit{rdistinct} \; (rdistinct \;
+ rs \{ \} @ (\textit{rdistinct} \; rs_a \; (set \; rs)))$
+\end{corollary}
+\begin{proof}
+ By lemma \ref{distinctRdistinctAppend}.
+\end{proof}
-\subsubsection{The Properties of $\backslash_r$, $\backslash_{rsimp}$, $\textit{Rflts}$ and $\textit{Rsimp}_{ALTS}$}
-We give in this subsection some properties of how $\backslash_r$, $\backslash_{rsimp}$, $\textit{Rflts}$ and $\textit{Rsimp}_{ALTS} $ interact with each other and with $@$, the concatenation operator.
+\subsubsection{The Properties of $\textit{Rflts}$}
+We give in this subsection some properties
+involving $\backslash_r$, $\backslash_{rsimp}$, $\textit{rflts}$ and
+$\textit{rsimp}_{ALTS} $, together with any non-trivial lemmas that lead to them.
These will be helpful in later closed form proofs, when
-we want to transform the ways in which multiple functions involving
-those are composed together
-in interleaving derivative and simplification steps.
+we want to transform derivative terms which have
+%the ways in which multiple functions involving
+%those are composed together
+interleaving derivatives and simplifications applied to them.
-When the function $\textit{Rflts}$
-is applied to the concatenation of two lists, the output can be calculated by first applying the
-functions on two lists separately, and then concatenating them together.
+\noindent
+%When the function $\textit{Rflts}$
+%is applied to the concatenation of two lists, the output can be calculated by first applying the
+%functions on two lists separately, and then concatenating them together.
+$\textit{Rflts}$ is composable in terms of concatenation:
\begin{lemma}\label{rfltsProps}
The function $\rflts$ has the below properties:\\
\begin{itemize}
@@ -720,6 +740,68 @@
and induction on $rs$, $rs'$, $rs$, $rs'$, $rs_a$ in the third, fourth, fifth, sixth and
last sub-lemma.
\end{proof}
+\noindent
+Now we introduce the property that the operations
+derivative and $\rsimpalts$
+commute, this will be used later in deriving the closed form for
+the alternative regular expression:
+\begin{lemma}\label{rderRsimpAltsCommute}
+ $\rder{x}{(\rsimpalts \; rs)} = \rsimpalts \; (\map \; (\rder{x}{\_}) \; rs)$
+\end{lemma}
+\noindent
+\subsubsection{$\textit{rsimp}$ Does Not Increment the Size}
+Although it seems evident, we need a series
+of non-trivial lemmas to establish this property.
+\begin{lemma}\label{rsimpMonoLemmas}
+ \mbox{}
+ \begin{itemize}
+ \item
+ \[
+ \llbracket \rsimpalts \; rs \rrbracket_r \leq
+ \llbracket \sum \; rs \rrbracket_r
+ \]
+ \item
+ \[
+ \llbracket \rsimpseq \; r_1 \; r_2 \rrbracket_r \leq
+ \llbracket r_1 \cdot r_2 \rrbracket_r
+ \]
+ \item
+ \[
+ \llbracket \rflts \; rs \rrbracket_r \leq
+ \llbracket rs \rrbracket_r
+ \]
+ \item
+ \[
+ \llbracket \rDistinct \; rs \; ss \rrbracket_r \leq
+ \llbracket rs \rrbracket_r
+ \]
+ \item
+ If all elements $a$ in the set $as$ satisfy the property
+ that $\llbracket \textit{rsimp} \; a \rrbracket_r \leq
+ \llbracket a \rrbracket_r$, then we have
+ \[
+ \llbracket \; \rsimpalts \; (\textit{rdistinct} \;
+ (\textit{rflts} \; (\textit{map}\;\textit{rsimp} as)) \{\})
+ \rrbracket \leq
+ \llbracket \; \sum \; (\rDistinct \; (\rflts \;(\map \;
+ \textit{rsimp} \; x))\; \{ \} ) \rrbracket_r
+ \]
+ \end{itemize}
+\end{lemma}
+\begin{proof}
+ Point 1, 3, 4 can be proven by an induction on $rs$.
+ Point 2 is by case analysis on $r_1$ and $r_2$.
+ The last part is a corollary of the previous ones.
+\end{proof}
+\noindent
+With the lemmas for each inductive case in place, we are ready to get
+the non-increasing property as a corollary:
+\begin{corollary}\label{rsimpMono}
+ $\llbracket \textit{rsimp} \; r \rrbracket_r$
+\end{corollary}
+\begin{proof}
+ By \ref{rsimpMonoLemmas}.
+\end{proof}
\subsubsection{The $RL$ Function: Language Interpretation of $\textit{Rrexp}$s}
Much like the definition of $L$ on plain regular expressions, one could also
@@ -901,7 +983,8 @@
If $\good \;r$ then $\rsimp{r} = r$.
\end{lemma}
\begin{proof}
- By an induction on the inductive cases of $\good$.
+ By an induction on the inductive cases of $\good$, using lemmas
+ \ref{goodaltsNonalt} and \ref{rdistinctOnDistinct}.
The lemma \ref{goodaltsNonalt} is used in the alternative
case where 2 or more elements are present in the list.
\end{proof}
@@ -928,8 +1011,8 @@
-We are also
-\subsection{$\rsimp$ is Idempotent}
+We are also ready to prove that $\textit{rsimp}$ is idempotent.
+\subsubsection{$\rsimp$ is Idempotent}
The idempotency of $\rsimp$ is very useful in
manipulating regular expression terms into desired
forms so that key steps allowing further rewriting to closed forms
@@ -975,7 +1058,7 @@
\end{itemize}
\end{lemma}
\begin{proof}
- By application of \ref{rsimpIdem} and \ref{good1}.
+ By application of lemmas \ref{rsimpIdem} and \ref{good1}.
\end{proof}
\noindent
@@ -1136,12 +1219,14 @@
\end{center}
\noindent
-The reason why we take the trouble of defining
-two separate list rewriting definitions $\frewrite$ and $\grewrite$
-is to separate the two stages of simplification: flattening and de-duplicating.
+We defined
+two separate list rewriting definitions $\frewrite$ and $\grewrite$.
+The rewriting steps that take place during
+flattening is characterised by $\frewrite$.
+$\grewrite$ characterises both flattening and de-duplicating.
Sometimes $\grewrites$ is slightly too powerful
-so we would rather use $\frewrites$ which makes certain rewriting steps
-more straightforward to prove.
+so we would rather use $\frewrites$ because we only
+need to prove about certain equivalence under the rewriting steps of $\frewrites$.
For example, when proving the closed-form for the alternative regular expression,
one of the rewriting steps would be:
\begin{lemma}
@@ -1273,15 +1358,12 @@
(\map \; (\_ \backslash_r x) rs) \;\varnothing )$
\end{itemize}
\end{lemma}
+\begin{proof}
+ Part 1 is by lemma \ref{insideSimpRemoval},
+ part 2 is by lemmas \ref{insideSimpRemoval} and \ref{distinctDer}.
+\end{proof}
\noindent
-
-Finally,
-together with
-\begin{lemma}\label{rderRsimpAltsCommute}
- $\rder{x}{(\rsimpalts \; rs)} = \rsimpalts \; (\map \; (\rder{x}{\_}) \; rs)$
-\end{lemma}
-\noindent
-this leads to the first closed form--
+This leads to the first closed form--
\begin{lemma}\label{altsClosedForm}
\begin{center}
$\rderssimp{(\sum rs)}{s} \sequal
@@ -2098,18 +2180,18 @@
%w;r;t the input characters number, where the size is usually cubic in terms of original size
%a*, aa*, aaa*, .....
%randomly generated regular expressions
-\begin{center}
+\begin{figure}{H}
\begin{tabular}{@{}c@{\hspace{0mm}}c@{\hspace{0mm}}c@{}}
\begin{tikzpicture}
\begin{axis}[
- xlabel={number of $a$'s},
+ xlabel={number of characters},
x label style={at={(1.05,-0.05)}},
ylabel={regex size},
enlargelimits=false,
xtick={0,5,...,30},
xmax=33,
- ymax=1000,
- ytick={0,100,...,1000},
+ %ymax=1000,
+ %ytick={0,100,...,1000},
scaled ticks=false,
axis lines=left,
width=5cm,
@@ -2129,8 +2211,8 @@
enlargelimits=false,
xtick={0,5,...,30},
xmax=33,
- ymax=1000,
- ytick={0,100,...,1000},
+ %ymax=1000,
+ %ytick={0,100,...,1000},
scaled ticks=false,
axis lines=left,
width=5cm,
@@ -2150,8 +2232,8 @@
enlargelimits=false,
xtick={0,5,...,30},
xmax=33,
- ymax=1000,
- ytick={0,100,...,1000},
+ %ymax=1000,
+ %ytick={0,100,...,1000},
scaled ticks=false,
axis lines=left,
width=5cm,
@@ -2164,7 +2246,7 @@
\end{tikzpicture}\\
\multicolumn{3}{c}{Graphs: size change of 3 randomly generated regular expressions $w.r.t.$ input string length.}
\end{tabular}
-\end{center}
+\end{figure}
\noindent
Most of the regex's sizes seem to stay within a polynomial bound $w.r.t$ the
original size.
@@ -2179,14 +2261,6 @@
%-----------------------------------
% SECTION syntactic equivalence under simp
%-----------------------------------
-\section{Syntactic Equivalence Under $\simp$}
-We prove that minor differences can be annhilated
-by $\simp$.
-For example,
-\begin{center}
- $\simp \;(\simpALTs\; (\map \;(\_\backslash \; x)\; (\distinct \; \mathit{rs}\; \phi))) =
- \simp \;(\simpALTs \;(\distinct \;(\map \;(\_ \backslash\; x) \; \mathit{rs}) \; \phi))$
-\end{center}
%----------------------------------------------------------------------------------------