ChengsongTanPhdThesis/Chapters/Finite.tex
changeset 614 d5e9bcb384ec
parent 613 b0f0d884a547
child 618 233cf2b97d1a
equal deleted inserted replaced
613:b0f0d884a547 614:d5e9bcb384ec
   783 the alternative regular expression:
   783 the alternative regular expression:
   784 \begin{lemma}\label{rderRsimpAltsCommute}
   784 \begin{lemma}\label{rderRsimpAltsCommute}
   785 	$\rder{x}{(\rsimpalts \; rs)} = \rsimpalts \; (\map \; (\rder{x}{\_}) \; rs)$
   785 	$\rder{x}{(\rsimpalts \; rs)} = \rsimpalts \; (\map \; (\rder{x}{\_}) \; rs)$
   786 \end{lemma}
   786 \end{lemma}
   787 \noindent
   787 \noindent
       
   788 
       
   789 \subsubsection{The $RL$ Function: Language Interpretation of $\textit{Rrexp}$s}
       
   790 Much like the definition of $L$ on plain regular expressions, one could also 
       
   791 define the language interpretation of $\rrexp$s.
       
   792 \begin{center}
       
   793 	\begin{tabular}{lcl}
       
   794 		$RL \; (\ZERO)$ & $\dn$ & $\phi$\\
       
   795 		$RL \; (\ONE)$ & $\dn$ & $\{[]\}$\\
       
   796 		$RL \; (c)$ & $\dn$ & $\{[c]\}$\\
       
   797 		$RL \; \sum rs$ & $\dn$ & $ \bigcup_{r \in rs} (RL \; r)$\\
       
   798 		$RL \; (r_1 \cdot r_2)$ & $\dn$ & $ RL \; (r_1) @ RL \; (r_2)$\\
       
   799 		$RL \; (r^*)$ & $\dn$ & $ (RL(r))^*$
       
   800 	\end{tabular}
       
   801 \end{center}
       
   802 \noindent
       
   803 The main use of $RL$ is to establish some connections between $\rsimp{}$ 
       
   804 and $\rnullable{}$:
       
   805 \begin{lemma}
       
   806 	The following properties hold:
       
   807 	\begin{itemize}
       
   808 		\item
       
   809 			If $\rnullable{r}$, then $\rsimp{r} \neq \RZERO$.
       
   810 		\item
       
   811 			$\rnullable{r \backslash s} \quad $ if and only if $\quad \rnullable{\rderssimp{r}{s}}$.
       
   812 	\end{itemize}
       
   813 \end{lemma}
       
   814 \begin{proof}
       
   815 	The first part is by induction on $r$. 
       
   816 	The second part is true because property 
       
   817 	\[ RL \; r = RL \; (\rsimp{r})\] holds.
       
   818 \end{proof}
       
   819 
       
   820 \subsubsection{Simplified $\textit{Rrexp}$s are Good}
       
   821 We formalise the notion of ``good" regular expressions,
       
   822 which means regular expressions that
       
   823 are not fully simplified. For alternative regular expressions that means they
       
   824 do not contain any nested alternatives like 
       
   825 \[ r_1 + (r_2 + r_3) \], un-removed $\RZERO$s like \[\RZERO + r\]
       
   826 or duplicate elements in a children regular expression list like \[ \sum [r, r, \ldots]\]:
       
   827 \begin{center}
       
   828 	\begin{tabular}{@{}lcl@{}}
       
   829 		$\good\; \RZERO$ & $\dn$ & $\textit{false}$\\
       
   830 		$\good\; \RONE$ & $\dn$ & $\textit{true}$\\
       
   831 		$\good\; \RCHAR{c}$ & $\dn$ & $\btrue$\\
       
   832 		$\good\; \RALTS{[]}$ & $\dn$ & $\bfalse$\\
       
   833 		$\good\; \RALTS{[r]}$ & $\dn$ & $\bfalse$\\
       
   834 		$\good\; \RALTS{r_1 :: r_2 :: rs}$ & $\dn$ & 
       
   835 		$\textit{isDistinct} \; (r_1 :: r_2 :: rs) \;$\\
       
   836 						   & & $\textit{and}\; (\forall r' \in (r_1 :: r_2 :: rs).\; \good \; r'\; \,  \textit{and}\; \, \textit{nonAlt}\; r')$\\
       
   837 		$\good \; \RSEQ{\RZERO}{r}$ & $\dn$ & $\bfalse$\\
       
   838 		$\good \; \RSEQ{\RONE}{r}$ & $\dn$ & $\bfalse$\\
       
   839 		$\good \; \RSEQ{r}{\RZERO}$ & $\dn$ & $\bfalse$\\
       
   840 		$\good \; \RSEQ{r_1}{r_2}$ & $\dn$ & $\good \; r_1 \;\, \textit{and} \;\, \good \; r_2$\\
       
   841 		$\good \; \RSTAR{r}$ & $\dn$ & $\btrue$\\
       
   842 	\end{tabular}
       
   843 \end{center}
       
   844 \noindent
       
   845 The predicate $\textit{nonAlt}$ evaluates to true when the regular expression is not an
       
   846 alternative, and false otherwise.
       
   847 The $\good$ property is preserved under $\rsimp_{ALTS}$, provided that
       
   848 its non-empty argument list of expressions are all good themsleves, and $\textit{nonAlt}$, 
       
   849 and unique:
       
   850 \begin{lemma}\label{rsimpaltsGood}
       
   851 	If $rs \neq []$ and forall $r \in rs. \textit{nonAlt} \; r$ and $\textit{isDistinct} \; rs$,
       
   852 	then $\good \; (\rsimpalts \; rs)$ if and only if forall $r \in rs. \; \good \; r$.
       
   853 \end{lemma}
       
   854 \noindent
       
   855 We also note that
       
   856 if a regular expression $r$ is good, then $\rflts$ on the singleton
       
   857 list $[r]$ will not break goodness:
       
   858 \begin{lemma}\label{flts2}
       
   859 	If $\good \; r$, then forall $r' \in \rflts \; [r]. \; \good \; r'$ and $\textit{nonAlt} \; r'$.
       
   860 \end{lemma}
       
   861 \begin{proof}
       
   862 	By an induction on $r$.
       
   863 \end{proof}
       
   864 \noindent
       
   865 The other observation we make about $\rsimp{r}$ is that it never
       
   866 comes with nested alternatives, which we describe as the $\nonnested$
       
   867 property:
       
   868 \begin{center}
       
   869 	\begin{tabular}{lcl}
       
   870 		$\nonnested \; \, \sum []$ & $\dn$ & $\btrue$\\
       
   871 		$\nonnested \; \, \sum ((\sum rs_1) :: rs_2)$ & $\dn$ & $\bfalse$\\
       
   872 		$\nonnested \; \, \sum (r :: rs)$ & $\dn$ & $\nonnested (\sum rs)$\\
       
   873 		$\nonnested \; \, r $ & $\dn$ & $\btrue$
       
   874 	\end{tabular}
       
   875 \end{center}
       
   876 \noindent
       
   877 The $\rflts$ function
       
   878 always opens up nested alternatives,
       
   879 which enables $\rsimp$ to be non-nested:
       
   880 
       
   881 \begin{lemma}\label{nonnestedRsimp}
       
   882 	$\nonnested \; (\rsimp{r})$
       
   883 \end{lemma}
       
   884 \begin{proof}
       
   885 	By an induction on $r$.
       
   886 \end{proof}
       
   887 \noindent
       
   888 With this we could prove that a regular expressions
       
   889 after simplification and flattening and de-duplication,
       
   890 will not contain any alternative regular expression directly:
       
   891 \begin{lemma}\label{nonaltFltsRd}
       
   892 	If $x \in \rdistinct{\rflts\; (\map \; \rsimp{} \; rs)}{\varnothing}$ 
       
   893 	then $\textit{nonAlt} \; x$.
       
   894 \end{lemma}
       
   895 \begin{proof}
       
   896 	By \ref{nonnestedRsimp}.
       
   897 \end{proof}
       
   898 \noindent
       
   899 The other thing we know is that once $\rsimp{}$ had finished
       
   900 processing an alternative regular expression, it will not
       
   901 contain any $\RZERO$s, this is because all the recursive 
       
   902 calls to the simplification on the children regular expressions
       
   903 make the children good, and $\rflts$ will not take out
       
   904 any $\RZERO$s out of a good regular expression list,
       
   905 and $\rdistinct{}$ will not mess with the result.
       
   906 \begin{lemma}\label{flts3Obv}
       
   907 	The following are true:
       
   908 	\begin{itemize}
       
   909 		\item
       
   910 			If for all $r \in rs. \, \good \; r $ or $r = \RZERO$,
       
   911 			then for all $r \in \rflts\; rs. \, \good \; r$.
       
   912 		\item
       
   913 			If $x \in \rdistinct{\rflts\; (\map \; rsimp{}\; rs)}{\varnothing}$
       
   914 			and for all $y$ such that $\llbracket y \rrbracket_r$ less than
       
   915 			$\llbracket rs \rrbracket_r + 1$, either
       
   916 			$\good \; (\rsimp{y})$ or $\rsimp{y} = \RZERO$,
       
   917 			then $\good \; x$.
       
   918 	\end{itemize}
       
   919 \end{lemma}
       
   920 \begin{proof}
       
   921 	The first part is by induction on $rs$, where the induction
       
   922 	rule is the inductive cases for $\rflts$.
       
   923 	The second part is a corollary from the first part.
       
   924 \end{proof}
       
   925 
       
   926 And this leads to good structural property of $\rsimp{}$,
       
   927 that after simplification, a regular expression is
       
   928 either good or $\RZERO$:
       
   929 \begin{lemma}\label{good1}
       
   930 	For any r-regular expression $r$, $\good \; \rsimp{r}$ or $\rsimp{r} = \RZERO$.
       
   931 \end{lemma}
       
   932 \begin{proof}
       
   933 	By an induction on $r$. The inductive measure is the size $\llbracket \rrbracket_r$.
       
   934 	Lemma \ref{rsimpSize} says that 
       
   935 	$\llbracket \rsimp{r}\rrbracket_r$ is smaller than or equal to
       
   936 	$\llbracket r \rrbracket_r$.
       
   937 	Therefore, in the $r_1 \cdot r_2$ and $\sum rs$ case,
       
   938 	Inductive hypothesis applies to the children regular expressions
       
   939 	$r_1$, $r_2$, etc. The lemma \ref{flts3Obv}'s precondition is satisfied
       
   940 	by that as well.
       
   941 	The lemmas \ref{nonnestedRsimp} and  \ref{nonaltFltsRd} are used
       
   942 	to ensure that goodness is preserved at the topmost level.
       
   943 \end{proof}
       
   944 We shall prove that any good regular expression is 
       
   945 a fixed-point for $\rsimp{}$.
       
   946 First we prove an auxiliary lemma:
       
   947 \begin{lemma}\label{goodaltsNonalt}
       
   948 	If $\good \; \sum rs$, then $\rflts\; rs = rs$.
       
   949 \end{lemma}
       
   950 \begin{proof}
       
   951 	By an induction on $\sum rs$. The inductive rules are the cases
       
   952 	for $\good$.
       
   953 \end{proof}
       
   954 \noindent
       
   955 Now we are ready to prove that good regular expressions are invariant
       
   956 of $\rsimp{}$ application:
       
   957 \begin{lemma}\label{test}
       
   958 	If $\good \;r$ then $\rsimp{r} = r$.
       
   959 \end{lemma}
       
   960 \begin{proof}
       
   961 	By an induction on the inductive cases of $\good$, using lemmas
       
   962 	\ref{goodaltsNonalt} and \ref{rdistinctOnDistinct}.
       
   963 	The lemma \ref{goodaltsNonalt} is used in the alternative
       
   964 	case where 2 or more elements are present in the list.
       
   965 \end{proof}
       
   966 \noindent
       
   967 Given below is a property involving $\rflts$, $\rdistinct{}{}$, $\rsimp{}$ and $\rsimp_{ALTS}$,
       
   968 which requires $\ref{good1}$ to go through smoothly.
       
   969 It says that an application of $\rsimp_{ALTS}$ can be "absorbed",
       
   970 if it its output is concatenated with a list and then applied to $\rflts$.
       
   971 \begin{lemma}\label{flattenRsimpalts}
       
   972 	$\rflts \; ( (\rsimp_{ALTS} \; 
       
   973 	(\rdistinct{(\rflts \; (\map \; \rsimp{}\; rs))}{\varnothing})) :: 
       
   974 	\map \; \rsimp{} \; rs' ) = 
       
   975 	\rflts \; ( (\rdistinct{(\rflts \; (\map \; \rsimp{}\; rs))}{\varnothing}) @ (
       
   976 	\map \; \rsimp{rs'}))$
       
   977 
       
   978 
       
   979 \end{lemma}
       
   980 \begin{proof}
       
   981 	By \ref{good1}.
       
   982 \end{proof}
       
   983 \noindent
       
   984 
       
   985 
       
   986 
       
   987 
       
   988 
       
   989 We are also ready to prove that $\textit{rsimp}$ is idempotent.
       
   990 \subsubsection{$\rsimp$ is Idempotent}
       
   991 The idempotency of $\rsimp$ is very useful in 
       
   992 manipulating regular expression terms into desired
       
   993 forms so that key steps allowing further rewriting to closed forms
       
   994 are possible.
       
   995 \begin{lemma}\label{rsimpIdem}
       
   996 	$\rsimp{r} = \rsimp{\rsimp{r}}$
       
   997 \end{lemma}
       
   998 
       
   999 \begin{proof}
       
  1000 	By \ref{test} and \ref{good1}.
       
  1001 \end{proof}
       
  1002 \noindent
       
  1003 This property means we do not have to repeatedly
       
  1004 apply simplification in each step, which justifies
       
  1005 our definition of $\blexersimp$.
       
  1006 
       
  1007 
       
  1008 On the other hand, we could repeat the same $\rsimp{}$ applications
       
  1009 on regular expressions as many times as we want, if we have at least
       
  1010 one simplification applied to it, and apply it wherever we would like to:
       
  1011 \begin{corollary}\label{headOneMoreSimp}
       
  1012 	The following properties hold, directly from \ref{rsimpIdem}:
       
  1013 
       
  1014 	\begin{itemize}
       
  1015 		\item
       
  1016 			$\map \; \rsimp{(r :: rs)} = \map \; \rsimp{} \; (\rsimp{r} :: rs)$
       
  1017 		\item
       
  1018 			$\rsimp{(\RALTS{rs})} = \rsimp{(\RALTS{\map \; \rsimp{} \; rs})}$
       
  1019 	\end{itemize}
       
  1020 \end{corollary}
       
  1021 \noindent
       
  1022 This will be useful in later closed form proof's rewriting steps.
       
  1023 Similarly, we point out the following useful facts below:
       
  1024 \begin{lemma}
       
  1025 	The following equalities hold if $r = \rsimp{r'}$ for some $r'$:
       
  1026 	\begin{itemize}
       
  1027 		\item
       
  1028 			If $r  = \sum rs$ then $\rsimpalts \; rs = \sum rs$.
       
  1029 		\item
       
  1030 			If $r = \sum rs$ then $\rdistinct{rs}{\varnothing} = rs$.
       
  1031 		\item
       
  1032 			$\rsimpalts \; (\rdistinct{\rflts \; [r]}{\varnothing}) = r$.
       
  1033 	\end{itemize}
       
  1034 \end{lemma}
       
  1035 \begin{proof}
       
  1036 	By application of lemmas \ref{rsimpIdem} and \ref{good1}.
       
  1037 \end{proof}
       
  1038 
       
  1039 \noindent
       
  1040 With the idempotency of $\rsimp{}$ and its corollaries, 
       
  1041 we can start proving some key equalities leading to the 
       
  1042 closed forms.
       
  1043 Now presented are a few equivalent terms under $\rsimp{}$.
       
  1044 We use $r_1 \sequal r_2 $ here to denote $\rsimp{r_1} = \rsimp{r_2}$.
       
  1045 \begin{lemma}
       
  1046 	\begin{itemize}
       
  1047 		The following equivalence hold:
       
  1048 	\item
       
  1049 		$\rsimpalts \; (\RZERO :: rs) \sequal \rsimpalts\; rs$
       
  1050 	\item
       
  1051 		$\rsimpalts \; rs \sequal \rsimpalts (\map \; \rsimp{} \; rs)$
       
  1052 	\item
       
  1053 		$\RALTS{\RALTS{rs}} \sequal \RALTS{rs}$
       
  1054 	\item
       
  1055 		$\sum ((\sum rs_a) :: rs_b) \sequal \sum rs_a @ rs_b$
       
  1056 	\item
       
  1057 		$\RALTS{rs} = \RALTS{\map \; \rsimp{} \; rs}$
       
  1058 \end{itemize}
       
  1059 \end{lemma}
       
  1060 \begin{proof}
       
  1061 	By induction on the lists involved.
       
  1062 \end{proof}
       
  1063 \noindent
       
  1064 The above allows us to prove
       
  1065 two similar equalities (which are a bit more involved).
       
  1066 It says that we could flatten out the elements
       
  1067 before simplification and still get the same result.
       
  1068 \begin{lemma}\label{simpFlatten3}
       
  1069 	One can flatten the inside $\sum$ of a $\sum$ if it is being 
       
  1070 	simplified. Concretely,
       
  1071 	\begin{itemize}
       
  1072 		\item
       
  1073 			If for all $r \in rs, rs', rs''$, we have $\good \; r $
       
  1074 			or $r = \RZERO$, then $\sum (rs' @ rs @ rs'') \sequal 
       
  1075 			\sum (rs' @ [\sum rs] @ rs'')$ holds. As a corollary,
       
  1076 		\item
       
  1077 			$\sum (rs' @ [\sum rs] @ rs'') \sequal \sum (rs' @ rs @ rs'')$
       
  1078 	\end{itemize}
       
  1079 \end{lemma}
       
  1080 \begin{proof}
       
  1081 	By rewriting steps involving the use of \ref{test} and \ref{rdistinctConcatGeneral}.
       
  1082 	The second sub-lemma is a corollary of the previous.
       
  1083 \end{proof}
       
  1084 %Rewriting steps not put in--too long and complicated-------------------------------
       
  1085 \begin{comment}
       
  1086 	\begin{center}
       
  1087 		$\rsimp{\sum (rs' @ rs @ rs'')}  \stackrel{def of bsimp}{=}$  \\
       
  1088 		$\rsimpalts \; (\rdistinct{\rflts \; ((\map \; \rsimp{}\; rs') @ (\map \; \rsimp{} \; rs ) @ (\map \; \rsimp{} \; rs''))}{\varnothing})$ \\
       
  1089 		$\stackrel{by \ref{test}}{=} 
       
  1090 		\rsimpalts \; (\rdistinct{(\rflts \; rs' @ \rflts \; rs @ \rflts \; rs'')}{
       
  1091 		\varnothing})$\\
       
  1092 		$\stackrel{by \ref{rdistinctConcatGeneral}}{=}
       
  1093 		\rsimpalts \; (\rdistinct{\rflts \; rs'}{\varnothing} @ \rdistinct{(
       
  1094 		\rflts\; rs @ \rflts \; rs'')}{\rflts \; rs'})$\\
       
  1095 
       
  1096 	\end{center}
       
  1097 \end{comment}
       
  1098 %Rewriting steps not put in--too long and complicated-------------------------------
       
  1099 \noindent
       
  1100 
       
  1101 
   788 We need more equalities like the above to enable a closed form,
  1102 We need more equalities like the above to enable a closed form,
   789 for which we need to introduce a few rewrite relations
  1103 for which we need to introduce a few rewrite relations
   790 to help
  1104 to help
   791 us obtain them.
  1105 us obtain them.
   792 
  1106 
  1627 \end{corollary}
  1941 \end{corollary}
  1628 \begin{proof}
  1942 \begin{proof}
  1629 	By \ref{rsimpMonoLemmas}.
  1943 	By \ref{rsimpMonoLemmas}.
  1630 \end{proof}
  1944 \end{proof}
  1631 
  1945 
  1632 \subsubsection{The $RL$ Function: Language Interpretation of $\textit{Rrexp}$s}
       
  1633 Much like the definition of $L$ on plain regular expressions, one could also 
       
  1634 define the language interpretation of $\rrexp$s.
       
  1635 \begin{center}
       
  1636 	\begin{tabular}{lcl}
       
  1637 		$RL \; (\ZERO)$ & $\dn$ & $\phi$\\
       
  1638 		$RL \; (\ONE)$ & $\dn$ & $\{[]\}$\\
       
  1639 		$RL \; (c)$ & $\dn$ & $\{[c]\}$\\
       
  1640 		$RL \; \sum rs$ & $\dn$ & $ \bigcup_{r \in rs} (RL \; r)$\\
       
  1641 		$RL \; (r_1 \cdot r_2)$ & $\dn$ & $ RL \; (r_1) @ RL \; (r_2)$\\
       
  1642 		$RL \; (r^*)$ & $\dn$ & $ (RL(r))^*$
       
  1643 	\end{tabular}
       
  1644 \end{center}
       
  1645 \noindent
       
  1646 The main use of $RL$ is to establish some connections between $\rsimp{}$ 
       
  1647 and $\rnullable{}$:
       
  1648 \begin{lemma}
       
  1649 	The following properties hold:
       
  1650 	\begin{itemize}
       
  1651 		\item
       
  1652 			If $\rnullable{r}$, then $\rsimp{r} \neq \RZERO$.
       
  1653 		\item
       
  1654 			$\rnullable{r \backslash s} \quad $ if and only if $\quad \rnullable{\rderssimp{r}{s}}$.
       
  1655 	\end{itemize}
       
  1656 \end{lemma}
       
  1657 \begin{proof}
       
  1658 	The first part is by induction on $r$. 
       
  1659 	The second part is true because property 
       
  1660 	\[ RL \; r = RL \; (\rsimp{r})\] holds.
       
  1661 \end{proof}
       
  1662 
       
  1663 \subsubsection{Simplified $\textit{Rrexp}$s are Good}
       
  1664 We formalise the notion of ``good" regular expressions,
       
  1665 which means regular expressions that
       
  1666 are not fully simplified. For alternative regular expressions that means they
       
  1667 do not contain any nested alternatives like 
       
  1668 \[ r_1 + (r_2 + r_3) \], un-removed $\RZERO$s like \[\RZERO + r\]
       
  1669 or duplicate elements in a children regular expression list like \[ \sum [r, r, \ldots]\]:
       
  1670 \begin{center}
       
  1671 	\begin{tabular}{@{}lcl@{}}
       
  1672 		$\good\; \RZERO$ & $\dn$ & $\textit{false}$\\
       
  1673 		$\good\; \RONE$ & $\dn$ & $\textit{true}$\\
       
  1674 		$\good\; \RCHAR{c}$ & $\dn$ & $\btrue$\\
       
  1675 		$\good\; \RALTS{[]}$ & $\dn$ & $\bfalse$\\
       
  1676 		$\good\; \RALTS{[r]}$ & $\dn$ & $\bfalse$\\
       
  1677 		$\good\; \RALTS{r_1 :: r_2 :: rs}$ & $\dn$ & 
       
  1678 		$\textit{isDistinct} \; (r_1 :: r_2 :: rs) \;$\\
       
  1679 						   & & $\textit{and}\; (\forall r' \in (r_1 :: r_2 :: rs).\; \good \; r'\; \,  \textit{and}\; \, \textit{nonAlt}\; r')$\\
       
  1680 		$\good \; \RSEQ{\RZERO}{r}$ & $\dn$ & $\bfalse$\\
       
  1681 		$\good \; \RSEQ{\RONE}{r}$ & $\dn$ & $\bfalse$\\
       
  1682 		$\good \; \RSEQ{r}{\RZERO}$ & $\dn$ & $\bfalse$\\
       
  1683 		$\good \; \RSEQ{r_1}{r_2}$ & $\dn$ & $\good \; r_1 \;\, \textit{and} \;\, \good \; r_2$\\
       
  1684 		$\good \; \RSTAR{r}$ & $\dn$ & $\btrue$\\
       
  1685 	\end{tabular}
       
  1686 \end{center}
       
  1687 \noindent
       
  1688 The predicate $\textit{nonAlt}$ evaluates to true when the regular expression is not an
       
  1689 alternative, and false otherwise.
       
  1690 The $\good$ property is preserved under $\rsimp_{ALTS}$, provided that
       
  1691 its non-empty argument list of expressions are all good themsleves, and $\textit{nonAlt}$, 
       
  1692 and unique:
       
  1693 \begin{lemma}\label{rsimpaltsGood}
       
  1694 	If $rs \neq []$ and forall $r \in rs. \textit{nonAlt} \; r$ and $\textit{isDistinct} \; rs$,
       
  1695 	then $\good \; (\rsimpalts \; rs)$ if and only if forall $r \in rs. \; \good \; r$.
       
  1696 \end{lemma}
       
  1697 \noindent
       
  1698 We also note that
       
  1699 if a regular expression $r$ is good, then $\rflts$ on the singleton
       
  1700 list $[r]$ will not break goodness:
       
  1701 \begin{lemma}\label{flts2}
       
  1702 	If $\good \; r$, then forall $r' \in \rflts \; [r]. \; \good \; r'$ and $\textit{nonAlt} \; r'$.
       
  1703 \end{lemma}
       
  1704 \begin{proof}
       
  1705 	By an induction on $r$.
       
  1706 \end{proof}
       
  1707 \noindent
       
  1708 The other observation we make about $\rsimp{r}$ is that it never
       
  1709 comes with nested alternatives, which we describe as the $\nonnested$
       
  1710 property:
       
  1711 \begin{center}
       
  1712 	\begin{tabular}{lcl}
       
  1713 		$\nonnested \; \, \sum []$ & $\dn$ & $\btrue$\\
       
  1714 		$\nonnested \; \, \sum ((\sum rs_1) :: rs_2)$ & $\dn$ & $\bfalse$\\
       
  1715 		$\nonnested \; \, \sum (r :: rs)$ & $\dn$ & $\nonnested (\sum rs)$\\
       
  1716 		$\nonnested \; \, r $ & $\dn$ & $\btrue$
       
  1717 	\end{tabular}
       
  1718 \end{center}
       
  1719 \noindent
       
  1720 The $\rflts$ function
       
  1721 always opens up nested alternatives,
       
  1722 which enables $\rsimp$ to be non-nested:
       
  1723 
       
  1724 \begin{lemma}\label{nonnestedRsimp}
       
  1725 	$\nonnested \; (\rsimp{r})$
       
  1726 \end{lemma}
       
  1727 \begin{proof}
       
  1728 	By an induction on $r$.
       
  1729 \end{proof}
       
  1730 \noindent
       
  1731 With this we could prove that a regular expressions
       
  1732 after simplification and flattening and de-duplication,
       
  1733 will not contain any alternative regular expression directly:
       
  1734 \begin{lemma}\label{nonaltFltsRd}
       
  1735 	If $x \in \rdistinct{\rflts\; (\map \; \rsimp{} \; rs)}{\varnothing}$ 
       
  1736 	then $\textit{nonAlt} \; x$.
       
  1737 \end{lemma}
       
  1738 \begin{proof}
       
  1739 	By \ref{nonnestedRsimp}.
       
  1740 \end{proof}
       
  1741 \noindent
       
  1742 The other thing we know is that once $\rsimp{}$ had finished
       
  1743 processing an alternative regular expression, it will not
       
  1744 contain any $\RZERO$s, this is because all the recursive 
       
  1745 calls to the simplification on the children regular expressions
       
  1746 make the children good, and $\rflts$ will not take out
       
  1747 any $\RZERO$s out of a good regular expression list,
       
  1748 and $\rdistinct{}$ will not mess with the result.
       
  1749 \begin{lemma}\label{flts3Obv}
       
  1750 	The following are true:
       
  1751 	\begin{itemize}
       
  1752 		\item
       
  1753 			If for all $r \in rs. \, \good \; r $ or $r = \RZERO$,
       
  1754 			then for all $r \in \rflts\; rs. \, \good \; r$.
       
  1755 		\item
       
  1756 			If $x \in \rdistinct{\rflts\; (\map \; rsimp{}\; rs)}{\varnothing}$
       
  1757 			and for all $y$ such that $\llbracket y \rrbracket_r$ less than
       
  1758 			$\llbracket rs \rrbracket_r + 1$, either
       
  1759 			$\good \; (\rsimp{y})$ or $\rsimp{y} = \RZERO$,
       
  1760 			then $\good \; x$.
       
  1761 	\end{itemize}
       
  1762 \end{lemma}
       
  1763 \begin{proof}
       
  1764 	The first part is by induction on $rs$, where the induction
       
  1765 	rule is the inductive cases for $\rflts$.
       
  1766 	The second part is a corollary from the first part.
       
  1767 \end{proof}
       
  1768 
       
  1769 And this leads to good structural property of $\rsimp{}$,
       
  1770 that after simplification, a regular expression is
       
  1771 either good or $\RZERO$:
       
  1772 \begin{lemma}\label{good1}
       
  1773 	For any r-regular expression $r$, $\good \; \rsimp{r}$ or $\rsimp{r} = \RZERO$.
       
  1774 \end{lemma}
       
  1775 \begin{proof}
       
  1776 	By an induction on $r$. The inductive measure is the size $\llbracket \rrbracket_r$.
       
  1777 	Lemma \ref{rsimpSize} says that 
       
  1778 	$\llbracket \rsimp{r}\rrbracket_r$ is smaller than or equal to
       
  1779 	$\llbracket r \rrbracket_r$.
       
  1780 	Therefore, in the $r_1 \cdot r_2$ and $\sum rs$ case,
       
  1781 	Inductive hypothesis applies to the children regular expressions
       
  1782 	$r_1$, $r_2$, etc. The lemma \ref{flts3Obv}'s precondition is satisfied
       
  1783 	by that as well.
       
  1784 	The lemmas \ref{nonnestedRsimp} and  \ref{nonaltFltsRd} are used
       
  1785 	to ensure that goodness is preserved at the topmost level.
       
  1786 \end{proof}
       
  1787 We shall prove that any good regular expression is 
       
  1788 a fixed-point for $\rsimp{}$.
       
  1789 First we prove an auxiliary lemma:
       
  1790 \begin{lemma}\label{goodaltsNonalt}
       
  1791 	If $\good \; \sum rs$, then $\rflts\; rs = rs$.
       
  1792 \end{lemma}
       
  1793 \begin{proof}
       
  1794 	By an induction on $\sum rs$. The inductive rules are the cases
       
  1795 	for $\good$.
       
  1796 \end{proof}
       
  1797 \noindent
       
  1798 Now we are ready to prove that good regular expressions are invariant
       
  1799 of $\rsimp{}$ application:
       
  1800 \begin{lemma}\label{test}
       
  1801 	If $\good \;r$ then $\rsimp{r} = r$.
       
  1802 \end{lemma}
       
  1803 \begin{proof}
       
  1804 	By an induction on the inductive cases of $\good$, using lemmas
       
  1805 	\ref{goodaltsNonalt} and \ref{rdistinctOnDistinct}.
       
  1806 	The lemma \ref{goodaltsNonalt} is used in the alternative
       
  1807 	case where 2 or more elements are present in the list.
       
  1808 \end{proof}
       
  1809 \noindent
       
  1810 Given below is a property involving $\rflts$, $\rdistinct{}{}$, $\rsimp{}$ and $\rsimp_{ALTS}$,
       
  1811 which requires $\ref{good1}$ to go through smoothly.
       
  1812 It says that an application of $\rsimp_{ALTS}$ can be "absorbed",
       
  1813 if it its output is concatenated with a list and then applied to $\rflts$.
       
  1814 \begin{lemma}\label{flattenRsimpalts}
       
  1815 	$\rflts \; ( (\rsimp_{ALTS} \; 
       
  1816 	(\rdistinct{(\rflts \; (\map \; \rsimp{}\; rs))}{\varnothing})) :: 
       
  1817 	\map \; \rsimp{} \; rs' ) = 
       
  1818 	\rflts \; ( (\rdistinct{(\rflts \; (\map \; \rsimp{}\; rs))}{\varnothing}) @ (
       
  1819 	\map \; \rsimp{rs'}))$
       
  1820 
       
  1821 
       
  1822 \end{lemma}
       
  1823 \begin{proof}
       
  1824 	By \ref{good1}.
       
  1825 \end{proof}
       
  1826 \noindent
       
  1827 
       
  1828 
       
  1829 
       
  1830 
       
  1831 
       
  1832 We are also ready to prove that $\textit{rsimp}$ is idempotent.
       
  1833 \subsubsection{$\rsimp$ is Idempotent}
       
  1834 The idempotency of $\rsimp$ is very useful in 
       
  1835 manipulating regular expression terms into desired
       
  1836 forms so that key steps allowing further rewriting to closed forms
       
  1837 are possible.
       
  1838 \begin{lemma}\label{rsimpIdem}
       
  1839 	$\rsimp{r} = \rsimp{\rsimp{r}}$
       
  1840 \end{lemma}
       
  1841 
       
  1842 \begin{proof}
       
  1843 	By \ref{test} and \ref{good1}.
       
  1844 \end{proof}
       
  1845 \noindent
       
  1846 This property means we do not have to repeatedly
       
  1847 apply simplification in each step, which justifies
       
  1848 our definition of $\blexersimp$.
       
  1849 
       
  1850 
       
  1851 On the other hand, we could repeat the same $\rsimp{}$ applications
       
  1852 on regular expressions as many times as we want, if we have at least
       
  1853 one simplification applied to it, and apply it wherever we would like to:
       
  1854 \begin{corollary}\label{headOneMoreSimp}
       
  1855 	The following properties hold, directly from \ref{rsimpIdem}:
       
  1856 
       
  1857 	\begin{itemize}
       
  1858 		\item
       
  1859 			$\map \; \rsimp{(r :: rs)} = \map \; \rsimp{} \; (\rsimp{r} :: rs)$
       
  1860 		\item
       
  1861 			$\rsimp{(\RALTS{rs})} = \rsimp{(\RALTS{\map \; \rsimp{} \; rs})}$
       
  1862 	\end{itemize}
       
  1863 \end{corollary}
       
  1864 \noindent
       
  1865 This will be useful in later closed form proof's rewriting steps.
       
  1866 Similarly, we point out the following useful facts below:
       
  1867 \begin{lemma}
       
  1868 	The following equalities hold if $r = \rsimp{r'}$ for some $r'$:
       
  1869 	\begin{itemize}
       
  1870 		\item
       
  1871 			If $r  = \sum rs$ then $\rsimpalts \; rs = \sum rs$.
       
  1872 		\item
       
  1873 			If $r = \sum rs$ then $\rdistinct{rs}{\varnothing} = rs$.
       
  1874 		\item
       
  1875 			$\rsimpalts \; (\rdistinct{\rflts \; [r]}{\varnothing}) = r$.
       
  1876 	\end{itemize}
       
  1877 \end{lemma}
       
  1878 \begin{proof}
       
  1879 	By application of lemmas \ref{rsimpIdem} and \ref{good1}.
       
  1880 \end{proof}
       
  1881 
       
  1882 \noindent
       
  1883 With the idempotency of $\rsimp{}$ and its corollaries, 
       
  1884 we can start proving some key equalities leading to the 
       
  1885 closed forms.
       
  1886 Now presented are a few equivalent terms under $\rsimp{}$.
       
  1887 We use $r_1 \sequal r_2 $ here to denote $\rsimp{r_1} = \rsimp{r_2}$.
       
  1888 \begin{lemma}
       
  1889 	\begin{itemize}
       
  1890 		The following equivalence hold:
       
  1891 	\item
       
  1892 		$\rsimpalts \; (\RZERO :: rs) \sequal \rsimpalts\; rs$
       
  1893 	\item
       
  1894 		$\rsimpalts \; rs \sequal \rsimpalts (\map \; \rsimp{} \; rs)$
       
  1895 	\item
       
  1896 		$\RALTS{\RALTS{rs}} \sequal \RALTS{rs}$
       
  1897 	\item
       
  1898 		$\sum ((\sum rs_a) :: rs_b) \sequal \sum rs_a @ rs_b$
       
  1899 	\item
       
  1900 		$\RALTS{rs} = \RALTS{\map \; \rsimp{} \; rs}$
       
  1901 \end{itemize}
       
  1902 \end{lemma}
       
  1903 \begin{proof}
       
  1904 	By induction on the lists involved.
       
  1905 \end{proof}
       
  1906 \noindent
       
  1907 The above allows us to prove
       
  1908 two similar equalities (which are a bit more involved).
       
  1909 It says that we could flatten out the elements
       
  1910 before simplification and still get the same result.
       
  1911 \begin{lemma}\label{simpFlatten3}
       
  1912 	One can flatten the inside $\sum$ of a $\sum$ if it is being 
       
  1913 	simplified. Concretely,
       
  1914 	\begin{itemize}
       
  1915 		\item
       
  1916 			If for all $r \in rs, rs', rs''$, we have $\good \; r $
       
  1917 			or $r = \RZERO$, then $\sum (rs' @ rs @ rs'') \sequal 
       
  1918 			\sum (rs' @ [\sum rs] @ rs'')$ holds. As a corollary,
       
  1919 		\item
       
  1920 			$\sum (rs' @ [\sum rs] @ rs'') \sequal \sum (rs' @ rs @ rs'')$
       
  1921 	\end{itemize}
       
  1922 \end{lemma}
       
  1923 \begin{proof}
       
  1924 	By rewriting steps involving the use of \ref{test} and \ref{rdistinctConcatGeneral}.
       
  1925 	The second sub-lemma is a corollary of the previous.
       
  1926 \end{proof}
       
  1927 %Rewriting steps not put in--too long and complicated-------------------------------
       
  1928 \begin{comment}
       
  1929 	\begin{center}
       
  1930 		$\rsimp{\sum (rs' @ rs @ rs'')}  \stackrel{def of bsimp}{=}$  \\
       
  1931 		$\rsimpalts \; (\rdistinct{\rflts \; ((\map \; \rsimp{}\; rs') @ (\map \; \rsimp{} \; rs ) @ (\map \; \rsimp{} \; rs''))}{\varnothing})$ \\
       
  1932 		$\stackrel{by \ref{test}}{=} 
       
  1933 		\rsimpalts \; (\rdistinct{(\rflts \; rs' @ \rflts \; rs @ \rflts \; rs'')}{
       
  1934 		\varnothing})$\\
       
  1935 		$\stackrel{by \ref{rdistinctConcatGeneral}}{=}
       
  1936 		\rsimpalts \; (\rdistinct{\rflts \; rs'}{\varnothing} @ \rdistinct{(
       
  1937 		\rflts\; rs @ \rflts \; rs'')}{\rflts \; rs'})$\\
       
  1938 
       
  1939 	\end{center}
       
  1940 \end{comment}
       
  1941 %Rewriting steps not put in--too long and complicated-------------------------------
       
  1942 \noindent
       
  1943 \subsection{Estimating the Closed Forms' sizes}
  1946 \subsection{Estimating the Closed Forms' sizes}
  1944 We now summarize the closed forms below:
  1947 We now summarize the closed forms below:
  1945 \begin{itemize}
  1948 \begin{itemize}
  1946 	\item
  1949 	\item
  1947 		$\rderssimp{(\sum rs)}{s} \sequal
  1950 		$\rderssimp{(\sum rs)}{s} \sequal