ChengsongTanPhdThesis/Chapters/Finite.tex
changeset 611 bc1df466150a
parent 610 d028c662a3df
child 613 b0f0d884a547
--- a/ChengsongTanPhdThesis/Chapters/Finite.tex	Mon Oct 03 13:32:25 2022 +0100
+++ b/ChengsongTanPhdThesis/Chapters/Finite.tex	Tue Oct 04 00:25:09 2022 +0100
@@ -524,7 +524,7 @@
 In this section we introduce in detail
 how the closed forms are obtained for regular expressions'
 derivatives and how they are bounded.
-We start by proving some basic identities
+We start by proving some basic identities and inequalities
 involving the simplification functions for r-regular expressions.
 After that we use these identities to establish the
 closed forms we need.
@@ -538,10 +538,16 @@
 
 
 
-\subsection{Some Basic Identities}
+\subsection{Some Basic Identities and Inequalities}
 
-
-\subsubsection{$\textit{rdistinct}$'s Deduplicates Successfully}
+In this subsection, we introduce lemmas 
+that are repeatedly used in later proofs.
+Note that for the $\textit{rdistinct}$ function there
+will be a lot of conversion from lists to sets.
+We use the name $set$ to refere to the 
+function that converts a list $rs$ to the set
+containing all the elements in $rs$.
+\subsubsection{$\textit{rdistinct}$'s Does the Job of De-duplication}
 The $\textit{rdistinct}$ function, as its name suggests, will
 remove duplicates in an r-regular expression list.
 It will also correctly exclude any elements that 
@@ -599,14 +605,14 @@
 \noindent
 On the other hand, if an element $r$ does not appear in the input list waiting to be deduplicated,
 then expanding the accumulator to include that element will not cause the output list to change:
-\begin{lemma}
+\begin{lemma}\label{rdistinctOnDistinct}
 	The accumulator can be augmented to include elements not appearing in the input list,
 	and the output will not change.	
 	\begin{itemize}
 		\item
-			If $r \notin rs$, then $\rdistinct{rs}{acc} = \rdistinct{rs}{\{r\} \cup acc}$.
+			If $r \notin rs$, then $\rdistinct{rs}{acc} = \rdistinct{rs}{(\{r\} \cup acc)}$.
 		\item
-			Particularly, when $acc = \varnothing$ and $rs$ de-duplicated, we have\\
+			Particularly, if $\;\;\textit{isDistinct} \; rs$, then we have\\
 			\[ \rdistinct{rs}{\varnothing} = rs \]
 	\end{itemize}
 \end{lemma}
@@ -614,24 +620,9 @@
 	The first half is by induction on $rs$. The second half is a corollary of the first.
 \end{proof}
 \noindent
-The next property gives the condition for
-when $\rdistinct{\_}{\_}$ becomes an identical mapping
-for any prefix of an input list, in other words, when can 
-we ``push out" the arguments of $\rdistinct{\_}{\_}$:
-\begin{lemma}\label{distinctRdistinctAppend}
-	If $\textit{isDistinct} \; rs_1$, and $rs_1 \cap acc = \varnothing$,
-	then 
-	\[\textit{rdistinct}\;  (rs_1 @ rsa)\;\, acc
-	= rs_1@(\textit{rdistinct} rsa \; (acc \cup rs_1))\]
-\end{lemma}
-\noindent
-In other words, it can be taken out and left untouched in the output.
-\begin{proof}
-	By an induction on $rs_1$, where $rsa$ and $acc$ are allowed to be arbitrary.
-\end{proof}
-\noindent
-$\rdistinct{}{}$ removes any element in anywhere of a list, if it
-had appeared previously:
+The function $\textit{rdistinct}$ removes duplicates from anywhere in a list.
+Despite being seemingly obvious, 
+the induction technique is not as straightforward.
 \begin{lemma}\label{distinctRemovesMiddle}
 	The two properties hold if $r \in rs$:
 	\begin{itemize}
@@ -649,15 +640,15 @@
 \noindent
 \begin{proof}
 	By induction on $rs$. All other variables are allowed to be arbitrary.
-	The second half of the lemma requires the first half.
-	Note that for each half's two sub-propositions need to be proven concurrently,
+	The second part of the lemma requires the first.
+	Note that for each part, the two sub-propositions need to be proven concurrently,
 	so that the induction goes through.
 \end{proof}
-
 \noindent
-This allows us to prove ``Idempotency" of $\rdistinct{}{}$ of some kind:
+This allows us to prove a few more equivalence relations involving 
+$\textit{rdistinct}$ (it will be useful later):
 \begin{lemma}\label{rdistinctConcatGeneral}
-	The following equalities involving multiple applications  of $\rdistinct{}{}$ hold:
+	\mbox{}
 	\begin{itemize}
 		\item
 			$\rdistinct{(rs @ rs')}{\varnothing} = \rdistinct{((\rdistinct{rs}{\varnothing})@ rs')}{\varnothing}$
@@ -680,17 +671,46 @@
 \begin{proof}
 	By \ref{rdistinctDoesTheJob} and \ref{distinctRemovesMiddle}.
 \end{proof}
+\noindent
+$\textit{rdistinct}$ is composable w.r.t list concatenation:
+\begin{lemma}\label{distinctRdistinctAppend}
+			If $\;\; \textit{isDistinct} \; rs_1$, 
+			and $(set \; rs_1) \cap acc = \varnothing$,
+			then applying $\textit{rdistinct}$ on $rs_1 @ rs_a$ does not 
+			have an effect on $rs_1$:
+			\[\textit{rdistinct}\;  (rs_1 @ rsa)\;\, acc
+			= rs_1@(\textit{rdistinct} rsa \; (acc \cup rs_1))\]
+\end{lemma}
+\begin{proof}
+	By an induction on 
+	$rs_1$, where $rsa$ and $acc$ are allowed to be arbitrary.
+\end{proof}
+\noindent
+$\textit{rdistinct}$ needs to be applied only once, and 
+applying it multiple times does not cause any difference:
+\begin{corollary}\label{distinctOnceEnough}
+	$\textit{rdistinct} \; (rs @ rsa) {} = \textit{rdistinct} \; (rdistinct \; 
+	rs \{ \} @ (\textit{rdistinct} \; rs_a \; (set \; rs)))$
+\end{corollary}
+\begin{proof}
+	By lemma \ref{distinctRdistinctAppend}.
+\end{proof}
 
-\subsubsection{The Properties of $\backslash_r$, $\backslash_{rsimp}$, $\textit{Rflts}$ and $\textit{Rsimp}_{ALTS}$} 
-We give in this subsection some properties of how $\backslash_r$, $\backslash_{rsimp}$, $\textit{Rflts}$ and $\textit{Rsimp}_{ALTS} $ interact with each other and with $@$, the concatenation operator.
+\subsubsection{The Properties of $\textit{Rflts}$} 
+We give in this subsection some properties
+involving $\backslash_r$, $\backslash_{rsimp}$, $\textit{rflts}$ and 
+$\textit{rsimp}_{ALTS} $, together with any non-trivial lemmas that lead to them.
 These will be helpful in later closed form proofs, when
-we want to transform the ways in which multiple functions involving
-those are composed together
-in interleaving derivative and  simplification steps.
+we want to transform derivative terms which have
+%the ways in which multiple functions involving
+%those are composed together
+interleaving derivatives and  simplifications applied to them.
 
-When the function $\textit{Rflts}$ 
-is applied to the concatenation of two lists, the output can be calculated by first applying the
-functions on two lists separately, and then concatenating them together.
+\noindent
+%When the function $\textit{Rflts}$ 
+%is applied to the concatenation of two lists, the output can be calculated by first applying the
+%functions on two lists separately, and then concatenating them together.
+$\textit{Rflts}$ is composable in terms of concatenation:
 \begin{lemma}\label{rfltsProps}
 	The function $\rflts$ has the below properties:\\
 	\begin{itemize}
@@ -720,6 +740,68 @@
 	and induction on $rs$, $rs'$, $rs$, $rs'$, $rs_a$ in the third, fourth, fifth, sixth and 
 	last sub-lemma.
 \end{proof}
+\noindent
+Now we introduce the property that the operations 
+derivative and $\rsimpalts$
+commute, this will be used later in deriving the closed form for
+the alternative regular expression:
+\begin{lemma}\label{rderRsimpAltsCommute}
+	$\rder{x}{(\rsimpalts \; rs)} = \rsimpalts \; (\map \; (\rder{x}{\_}) \; rs)$
+\end{lemma}
+\noindent
+\subsubsection{$\textit{rsimp}$ Does Not Increment the Size}
+Although it seems evident, we need a series
+of non-trivial lemmas to establish this property.
+\begin{lemma}\label{rsimpMonoLemmas}
+	\mbox{}
+	\begin{itemize}
+		\item
+			\[
+				\llbracket \rsimpalts \; rs \rrbracket_r \leq
+				\llbracket \sum \; rs \rrbracket_r
+			\]
+		\item
+			\[
+				\llbracket \rsimpseq \; r_1 \;  r_2 \rrbracket_r \leq
+				\llbracket r_1 \cdot r_2 \rrbracket_r
+			\]
+		\item
+			\[
+				\llbracket \rflts \; rs \rrbracket_r  \leq
+				\llbracket rs \rrbracket_r 
+			\]
+		\item
+			\[
+				\llbracket \rDistinct \; rs \; ss \rrbracket_r  \leq
+				\llbracket rs \rrbracket_r 
+			\]
+		\item
+			If all elements $a$ in the set $as$ satisfy the property
+			that $\llbracket \textit{rsimp} \; a \rrbracket_r \leq
+			\llbracket a \rrbracket_r$, then we have 
+			\[
+				\llbracket \; \rsimpalts \; (\textit{rdistinct} \;
+				(\textit{rflts} \; (\textit{map}\;\textit{rsimp} as)) \{\})
+				\rrbracket \leq
+				\llbracket \; \sum \; (\rDistinct \; (\rflts \;(\map \;
+				\textit{rsimp} \; x))\; \{ \} ) \rrbracket_r 
+			\]
+	\end{itemize}
+\end{lemma}
+\begin{proof}
+	Point 1, 3, 4 can be proven by an induction on $rs$.
+	Point 2 is by case analysis on $r_1$ and $r_2$.
+	The last part is a corollary of the previous ones.
+\end{proof}
+\noindent
+With the lemmas for each inductive case in place, we are ready to get 
+the non-increasing property as a corollary:
+\begin{corollary}\label{rsimpMono}
+	$\llbracket \textit{rsimp} \; r \rrbracket_r$
+\end{corollary}
+\begin{proof}
+	By \ref{rsimpMonoLemmas}.
+\end{proof}
 
 \subsubsection{The $RL$ Function: Language Interpretation of $\textit{Rrexp}$s}
 Much like the definition of $L$ on plain regular expressions, one could also 
@@ -901,7 +983,8 @@
 	If $\good \;r$ then $\rsimp{r} = r$.
 \end{lemma}
 \begin{proof}
-	By an induction on the inductive cases of $\good$.
+	By an induction on the inductive cases of $\good$, using lemmas
+	\ref{goodaltsNonalt} and \ref{rdistinctOnDistinct}.
 	The lemma \ref{goodaltsNonalt} is used in the alternative
 	case where 2 or more elements are present in the list.
 \end{proof}
@@ -928,8 +1011,8 @@
 
 
 
-We are also 
-\subsection{$\rsimp$ is Idempotent}
+We are also ready to prove that $\textit{rsimp}$ is idempotent.
+\subsubsection{$\rsimp$ is Idempotent}
 The idempotency of $\rsimp$ is very useful in 
 manipulating regular expression terms into desired
 forms so that key steps allowing further rewriting to closed forms
@@ -975,7 +1058,7 @@
 	\end{itemize}
 \end{lemma}
 \begin{proof}
-	By application of \ref{rsimpIdem} and \ref{good1}.
+	By application of lemmas \ref{rsimpIdem} and \ref{good1}.
 \end{proof}
 
 \noindent
@@ -1136,12 +1219,14 @@
 \end{center}
 
 \noindent
-The reason why we take the trouble of defining 
-two separate list rewriting definitions $\frewrite$ and $\grewrite$
-is to separate the two stages of simplification: flattening and de-duplicating.
+We defined
+two separate list rewriting definitions $\frewrite$ and $\grewrite$.
+The rewriting steps that take place during
+flattening is characterised by $\frewrite$.
+$\grewrite$ characterises both flattening and de-duplicating.
 Sometimes $\grewrites$ is slightly too powerful
-so we would rather use $\frewrites$ which makes certain rewriting steps 
-more straightforward to prove.
+so we would rather use $\frewrites$ because we only
+need to prove about certain equivalence under the rewriting steps of $\frewrites$.
 For example, when proving the closed-form for the alternative regular expression,
 one of the rewriting steps would be:
 \begin{lemma}
@@ -1273,15 +1358,12 @@
 			(\map \; (\_ \backslash_r x) rs) \;\varnothing  )$
 	\end{itemize}
 \end{lemma}
+\begin{proof}
+	Part 1 is by lemma \ref{insideSimpRemoval},
+	part 2 is by lemmas \ref{insideSimpRemoval} and \ref{distinctDer}.
+\end{proof}
 \noindent
-
-Finally,
-together with 
-\begin{lemma}\label{rderRsimpAltsCommute}
-	$\rder{x}{(\rsimpalts \; rs)} = \rsimpalts \; (\map \; (\rder{x}{\_}) \; rs)$
-\end{lemma}
-\noindent
-this leads to the first closed form--
+This leads to the first closed form--
 \begin{lemma}\label{altsClosedForm}
 	\begin{center}
 		$\rderssimp{(\sum rs)}{s} \sequal
@@ -2098,18 +2180,18 @@
 %w;r;t the input characters number, where the size is usually cubic in terms of original size
 %a*, aa*, aaa*, .....
 %randomly generated regular expressions
-\begin{center}
+\begin{figure}{H}
 	\begin{tabular}{@{}c@{\hspace{0mm}}c@{\hspace{0mm}}c@{}}
 		\begin{tikzpicture}
 			\begin{axis}[
-				xlabel={number of $a$'s},
+				xlabel={number of characters},
 				x label style={at={(1.05,-0.05)}},
 				ylabel={regex size},
 				enlargelimits=false,
 				xtick={0,5,...,30},
 				xmax=33,
-				ymax=1000,
-				ytick={0,100,...,1000},
+				%ymax=1000,
+				%ytick={0,100,...,1000},
 				scaled ticks=false,
 				axis lines=left,
 				width=5cm,
@@ -2129,8 +2211,8 @@
 		  enlargelimits=false,
 		  xtick={0,5,...,30},
 		  xmax=33,
-		  ymax=1000,
-		  ytick={0,100,...,1000},
+		  %ymax=1000,
+		  %ytick={0,100,...,1000},
 		  scaled ticks=false,
 		  axis lines=left,
 		  width=5cm,
@@ -2150,8 +2232,8 @@
 		  enlargelimits=false,
 		  xtick={0,5,...,30},
 		  xmax=33,
-		  ymax=1000,
-		  ytick={0,100,...,1000},
+		  %ymax=1000,
+		  %ytick={0,100,...,1000},
 		  scaled ticks=false,
 		  axis lines=left,
 		  width=5cm,
@@ -2164,7 +2246,7 @@
   \end{tikzpicture}\\
   \multicolumn{3}{c}{Graphs: size change of 3 randomly generated regular expressions $w.r.t.$ input string length.}
 	\end{tabular}    
-\end{center}  
+\end{figure}  
 \noindent
 Most of the regex's sizes seem to stay within a polynomial bound $w.r.t$ the 
 original size.
@@ -2179,14 +2261,6 @@
 %-----------------------------------
 %	SECTION syntactic equivalence under simp
 %-----------------------------------
-\section{Syntactic Equivalence Under $\simp$}
-We prove that minor differences can be annhilated
-by $\simp$.
-For example,
-\begin{center}
-	$\simp \;(\simpALTs\; (\map \;(\_\backslash \; x)\; (\distinct \; \mathit{rs}\; \phi))) = 
-	\simp \;(\simpALTs \;(\distinct \;(\map \;(\_ \backslash\; x) \; \mathit{rs}) \; \phi))$
-\end{center}
 
 
 %----------------------------------------------------------------------------------------