ChengsongTanPhdThesis/Chapters/Finite.tex
changeset 596 b306628a0eab
parent 595 fa92124d1fb7
child 601 ce4e5151a836
equal deleted inserted replaced
595:fa92124d1fb7 596:b306628a0eab
   352 It tests if all the elements of a list are unique.\\
   352 It tests if all the elements of a list are unique.\\
   353 With $\textit{rdistinct}$,
   353 With $\textit{rdistinct}$,
   354 and the flatten function for $\rrexp$s:
   354 and the flatten function for $\rrexp$s:
   355  \begin{center}
   355  \begin{center}
   356   \begin{tabular}{@{}lcl@{}}
   356   \begin{tabular}{@{}lcl@{}}
   357   $\textit{rflts} \; (_{bs}\sum \textit{as}) :: \textit{as'}$ & $\dn$ & $(\textit{map} \;
   357   $\textit{rflts} \; (\sum \textit{as}) :: \textit{as'}$ & $\dn$ & $as \; @ \; \textit{rflts} \; as' $ \\
   358      (\textit{fuse}\;bs)\; \textit{as}) \; @ \; \textit{rflts} \; as' $ \\
       
   359   $\textit{rflts} \; \ZERO :: as'$ & $\dn$ & $ \textit{rflts} \;  \textit{as'} $ \\
   358   $\textit{rflts} \; \ZERO :: as'$ & $\dn$ & $ \textit{rflts} \;  \textit{as'} $ \\
   360     $\textit{rflts} \; a :: as'$ & $\dn$ & $a :: \textit{rflts} \; \textit{as'}$ \quad(otherwise) 
   359     $\textit{rflts} \; a :: as'$ & $\dn$ & $a :: \textit{rflts} \; \textit{as'}$ \quad(otherwise) 
   361 \end{tabular}    
   360 \end{tabular}    
   362 \end{center}  
   361 \end{center}  
   363 \noindent
   362 \noindent
   364 
       
   365 one can chain together all the other modules
   363 one can chain together all the other modules
   366 of $\bsimp{\_}$ (removing the functionalities related to bit-sequences)
   364 such as $\rsimpalts$:
   367 and get $\textit{rsimp}$ and $\rderssimp{\_}{\_}$.
   365 \begin{center}
       
   366   \begin{tabular}{@{}lcl@{}}
       
   367 	  $\rsimpalts \;\; nil$ & $\dn$ & $\RZERO$\\
       
   368 	  $\rsimpalts \;\; r::nil$ & $\dn$ & $r$\\
       
   369 	  $\rsimpalts \;\; rs$ & $\dn$ & $\sum rs$\\
       
   370 \end{tabular}    
       
   371 \end{center}  
       
   372 \noindent
       
   373 and $\rsimpseq$:
       
   374 \begin{center}
       
   375   \begin{tabular}{@{}lcl@{}}
       
   376 	  $\rsimpseq \;\; \RZERO \; \_ $ &   $=$ &   $\RZERO$\\
       
   377 	  $\rsimpseq \;\; \_ \; \RZERO $ &   $=$ &   $\RZERO$\\
       
   378 	  $\rsimpseq \;\; \RONE \cdot r_2$ & $\dn$ & $r_2$\\
       
   379 	  $\rsimpseq \;\; r_1 r_2$ & $\dn$ & $r_1 \cdot r_2$\\
       
   380 \end{tabular}    
       
   381 \end{center}  
       
   382 and get $\textit{rsimp}$ and $\rderssimp{\_}{\_}$:
   368 \begin{center}
   383 \begin{center}
   369   \begin{tabular}{@{}lcl@{}}
   384   \begin{tabular}{@{}lcl@{}}
   370    
   385    
   371 	  $\textit{rsimp} \; (_{bs}a_1\cdot a_2)$ & $\dn$ & $ \textit{rsimp}_{ASEQ} \; bs \;(\textit{rsimp} \; a_1) \; (\textit{rsimp}  \; a_2)  $ \\
   386 	  $\textit{rsimp} \; (r_1\cdot r_2)$ & $\dn$ & $ \textit{rsimp}_{SEQ} \; bs \;(\textit{rsimp} \; r_1) \; (\textit{rsimp}  \; r_2)  $ \\
   372 	  $\textit{rsimp} \; (_{bs}\sum \textit{as})$ & $\dn$ & $\textit{rsimp}_{ALTS} \; \textit{bs} \; (\textit{rdistinct} \; ( \textit{rflts} ( \textit{map} \; rsimp \; as)) \; \rerases \; \varnothing) $ \\
   387 	  $\textit{rsimp} \; (_{bs}\sum \textit{rs})$ & $\dn$ & $\textit{rsimp}_{ALTS} \; \textit{bs} \; (\textit{rdistinct} \; ( \textit{rflts} ( \textit{map} \; rsimp \; rs)) \; \rerases \; \varnothing) $ \\
   373    $\textit{rsimp} \; a$ & $\dn$ & $\textit{a} \qquad \textit{otherwise}$   
   388    $\textit{rsimp} \; r$ & $\dn$ & $\textit{r} \qquad \textit{otherwise}$   
   374 \end{tabular}    
   389 \end{tabular}    
   375 \end{center} 
   390 \end{center} 
   376 We omit these functions, as they are routine. Please refer to the formalisation
   391 \begin{center}
   377 (in file BasicIdentities.thy) for the exact definition.
   392 	\begin{tabular}{@{}lcl@{}}
       
   393 		$r\backslash_{rsimp} \, c$ & $\dn$ & $\rsimp \; (r\backslash_r \, c)$
       
   394 	\end{tabular}
       
   395 \end{center}
       
   396 
       
   397 \begin{center}
       
   398 	\begin{tabular}{@{}lcl@{}}
       
   399 $r \backslash_{rsimps} (c\!::\!s) $ & $\dn$ & $(r \backslash_{rsimp}\, c) \backslash_{rsimps}\, s$ \\
       
   400 $r \backslash_{rsimps} [\,] $ & $\dn$ & $r$
       
   401 	\end{tabular}
       
   402 \end{center}
       
   403 \noindent
   378 With $\rrexp$ the size caclulation of annotated regular expressions'
   404 With $\rrexp$ the size caclulation of annotated regular expressions'
   379 simplification and derivatives can be done by the size of their unlifted 
   405 simplification and derivatives can be done by the size of their unlifted 
   380 counterpart with the unlifted version of simplification and derivatives applied.
   406 counterpart with the unlifted version of simplification and derivatives applied.
   381 \begin{lemma}\label{sizeRelations}
   407 \begin{lemma}\label{sizeRelations}
   382 	The following equalities hold:
   408 	The following equalities hold:
   383 	\begin{itemize}
   409 	\begin{itemize}
   384 		\item
   410 		\item
   385 			$\asize{\bsimp{a}} = \rsize{\rsimp{\rerase{a}}}$
   411 			$\asize{\bsimp{a}} = \rsize{\rsimp{\rerase{a}}}$
   386 		\item
   412 		\item
   387 			$\asize{\bderssimp{r}{s}} =  \rsize{\rderssimp{\rerase{r}}{s}}$
   413 			$\asize{\bderssimp{a}{s}} =  \rsize{\rderssimp{\rerase{a}}{s}}$
   388 	\end{itemize}
   414 	\end{itemize}
   389 \end{lemma}
   415 \end{lemma}
   390 \noindent
   416 \noindent
       
   417 With lemma \ref{sizeRelations},
       
   418 a bound on $\rsize{\rderssimp{\rerase{a}}{s}}$
       
   419 \[
       
   420 	\llbracket \rderssimp{a}{s} \rrbracket_r \leq N_r
       
   421 \]
       
   422 \noindent
       
   423 would apply to $\asize{\bderssimp{a}{s}}$ as well.
       
   424 \[
       
   425 	\llbracket a \backslash_{bsimps} s \rrbracket \leq N_r.
       
   426 \]
   391 In the following content, we will focus on $\rrexp$'s size bound.
   427 In the following content, we will focus on $\rrexp$'s size bound.
   392 We will piece together this bound and show the same bound for annotated regular 
   428 Whatever bounds we proved for $ \rsize{\rderssimp{\rerase{r}}{s}}$  
   393 expressions in the end.
   429 will automatically apply to $\asize{\bderssimp{r}{s}}$.\\
   394 Unless stated otherwise in this chapter all $\textit{rexp}$s without
   430 Unless stated otherwise in this chapter all regular expressions without
   395 bitcodes are seen as $\rrexp$s.
   431 bitcodes are seen as $\rrexp$s.
   396 We also use $r_1 + r_2$ and $\RALTS{[r_1, r_2]}$ interchageably
   432 We also use $r_1 + r_2$ and $\RALTS{[r_1, r_2]}$ interchageably
   397 as the former suits people's intuitive way of stating a binary alternative
   433 as the former suits people's intuitive way of stating a binary alternative
   398 regular expression.
   434 regular expression.
   399 
   435 
   400 
   436 
   401 
   437 
   402 %-----------------------------------
   438 %-----------------------------------
   403 %	SECTION ?
   439 %	SUB SECTION ROADMAP RREXP BOUND
   404 %-----------------------------------
   440 %-----------------------------------
   405 \subsection{Finiteness Proof Using $\rrexp$s}
   441 
   406 Now that we have defined the $\rrexp$ datatype, and proven that its size changes
   442 %\subsection{Roadmap to a Bound for $\textit{Rrexp}$}
   407 w.r.t derivatives and simplifications mirrors precisely those of annotated regular expressions,
   443 
   408 we aim to bound the size of $r \backslash s$ for any $\rrexp$  $r$.
   444 %The way we obtain the bound for $\rrexp$s is by two steps:
   409 Once we have a bound like: 
   445 %\begin{itemize}
   410 \[
   446 %	\item
   411 	\llbracket r \backslash_{rsimp} s \rrbracket_r \leq N_r
   447 %		First, we rewrite $r\backslash s$ into something else that is easier
   412 \]
   448 %		to bound. This step is especially important for the inductive case 
   413 \noindent
   449 %		$r_1 \cdot r_2$ and $r^*$, where the derivative can grow and bloat in a wild way,
   414 we could easily extend that to 
   450 %		but after simplification they will always be equal or smaller to a form consisting of an alternative
   415 \[
   451 %		list of regular expressions $f \; (g\; (\sum rs))$ with some functions applied to it, where each element will be distinct after the function application.
   416 	\llbracket a \backslash_{bsimps} s \rrbracket \leq N_r.
   452 %	\item
   417 \]
   453 %		Then, for such a sum  list of regular expressions $f\; (g\; (\sum rs))$, we can control its size
   418 
   454 %		by estimation, since $\distinctBy$ and $\flts$ are well-behaved and working together would only 
   419 \subsection{Roadmap to a Bound for $\textit{Rrexp}$}
   455 %		reduce the size of a regular expression, not adding to it.
   420 
   456 %\end{itemize}
   421 The way we obtain the bound for $\rrexp$s is by two steps:
   457 %
   422 \begin{itemize}
   458 %\section{Step One: Closed Forms}
   423 	\item
   459 %We transform the function application $\rderssimp{r}{s}$
   424 		First, we rewrite $r\backslash s$ into something else that is easier
   460 %into an equivalent 
   425 		to bound. This step is especially important for the inductive case 
   461 %form $f\; (g \; (\sum rs))$.
   426 		$r_1 \cdot r_2$ and $r^*$, where the derivative can grow and bloat in a wild way,
   462 %The functions $f$ and $g$ can be anything from $\flts$, $\distinctBy$ and other helper functions from $\bsimp{\_}$.
   427 		but after simplification they will always be equal or smaller to a form consisting of an alternative
   463 %This way we get a different but equivalent way of expressing : $r\backslash s = f \; (g\; (\sum rs))$, we call the
   428 		list of regular expressions $f \; (g\; (\sum rs))$ with some functions applied to it, where each element will be distinct after the function application.
   464 %right hand side the "closed form" of $r\backslash s$.
   429 	\item
   465 %
   430 		Then, for such a sum  list of regular expressions $f\; (g\; (\sum rs))$, we can control its size
   466 %\begin{quote}\it
   431 		by estimation, since $\distinctBy$ and $\flts$ are well-behaved and working together would only 
   467 %	Claim: For regular expressions $r_1 \cdot r_2$, we claim that
   432 		reduce the size of a regular expression, not adding to it.
   468 %	\begin{center}
   433 \end{itemize}
   469 %		$ \rderssimp{r_1 \cdot r_2}{s} = 
   434 
   470 %		\rsimp{(\sum (r_1 \backslash s \cdot r_2 ) \; :: \;(\map \; \rderssimp{r2}{\_} \;(\vsuf{s}{r_1})))}$
   435 \section{Step One: Closed Forms}
   471 %	\end{center}
   436 We transform the function application $\rderssimp{r}{s}$
   472 %\end{quote}
   437 into an equivalent 
   473 %\noindent
   438 form $f\; (g \; (\sum rs))$.
   474 %We explain in detail how we reached those claims.
   439 The functions $f$ and $g$ can be anything from $\flts$, $\distinctBy$ and other helper functions from $\bsimp{\_}$.
       
   440 This way we get a different but equivalent way of expressing : $r\backslash s = f \; (g\; (\sum rs))$, we call the
       
   441 right hand side the "closed form" of $r\backslash s$.
       
   442 
       
   443 \begin{quote}\it
       
   444 	Claim: For regular expressions $r_1 \cdot r_2$, we claim that
       
   445 	\begin{center}
       
   446 		$ \rderssimp{r_1 \cdot r_2}{s} = 
       
   447 		\rsimp{(\sum (r_1 \backslash s \cdot r_2 ) \; :: \;(\map \; \rderssimp{r2}{\_} \;(\vsuf{s}{r_1})))}$
       
   448 	\end{center}
       
   449 \end{quote}
       
   450 \noindent
       
   451 We explain in detail how we reached those claims.
       
   452 \subsection{Basic Properties needed for Closed Forms}
   475 \subsection{Basic Properties needed for Closed Forms}
   453 
   476 The next steps involves getting a closed form for
       
   477 $\rderssimp{r}{s}$ and then obtaining
       
   478 an estimate for the closed form.
   454 \subsubsection{$\textit{rdistinct}$'s Deduplicates Successfully}
   479 \subsubsection{$\textit{rdistinct}$'s Deduplicates Successfully}
   455 The $\textit{rdistinct}$ function, as its name suggests, will
   480 The $\textit{rdistinct}$ function, as its name suggests, will
   456 remove duplicates in an \emph{r}$\textit{rexp}$ list, 
   481 remove duplicates in an \emph{r}$\textit{rexp}$ list, 
   457 according to the accumulator
   482 according to the accumulator
   458 and leave only one of each different element in a list:
   483 and leave only one of each different element in a list: