ChengsongTanPhdThesis/Chapters/ChapterFinite.tex
changeset 530 823d9b19d21c
parent 528 28751de4b4ba
equal deleted inserted replaced
529:96e93df60954 530:823d9b19d21c
     5 \label{ChapterFinite} 
     5 \label{ChapterFinite} 
     6 %  In Chapter 4 \ref{Chapter4} we give the second guarantee
     6 %  In Chapter 4 \ref{Chapter4} we give the second guarantee
     7 %of our bitcoded algorithm, that is a finite bound on the size of any 
     7 %of our bitcoded algorithm, that is a finite bound on the size of any 
     8 %regex's derivatives. 
     8 %regex's derivatives. 
     9 
     9 
    10 
    10 In this chapter we give a guarantee in terms of time complexity:
    11 %-----------------------------------
    11 given a regular expression $r$, for any string $s$ 
    12 %	SECTION ?
    12 our algorithm's internal data structure is finitely bounded.
    13 %-----------------------------------
    13 To obtain such a proof, we need to 
    14 \section{preparatory properties for getting a finiteness bound}
    14 \begin{itemize}
    15 Before we get to the proof that says the intermediate result of our lexer will
    15 \item
    16 remain finitely bounded, which is an important efficiency/liveness guarantee,
    16 Define an new datatype for regular expressions that makes it easy
    17 we shall first develop a few preparatory properties and definitions to 
    17 to reason about the size of an annotated regular expression.
    18 make the process of proving that a breeze.
    18 \item
    19 
    19 A set of equalities for this new datatype that enables one to
    20 We define rewriting relations for $\rrexp$s, which allows us to do the 
    20 rewrite $\bderssimp{r_1 \cdot r_2}{s}$ and $\bderssimp{r^*}{s}$ etc.
    21 same trick as we did for the correctness proof,
    21 by their children regexes $r_1$, $r_2$, and $r$.
    22 but this time we will have stronger equalities established.
    22 \item
    23 \subsection{"hrewrite" relation}
    23 Using those equalities to actually get those rewriting equations, which we call
    24 List of 1-step rewrite rules for regular expressions simplification without bitcodes:
    24 "closed forms".
    25 \begin{center}
    25 \item
    26 \begin{tabular}{lcl}
    26 Bound the closed forms, thereby bounding the original
    27 $r_1 \cdot \ZERO$ & $\rightarrow$ & $\ZERO$
    27 $\blexersimp$'s internal data structures.
    28 \end{tabular}
    28 \end{itemize}
    29 \end{center}
    29 
    30 
    30 \section{the $\mathbf{r}$-rexp datatype and the size functions}
    31 And we define an "grewrite" relation that works on lists:
    31 
    32 \begin{center}
    32 We have a size function for bitcoded regular expressions, written
    33 \begin{tabular}{lcl}
       
    34 $ \ZERO :: rs$ & $\rightarrow_g$ & $rs$
       
    35 \end{tabular}
       
    36 \end{center}
       
    37 
       
    38 
       
    39 
       
    40 With these relations we prove
       
    41 \begin{lemma}
       
    42 $rs \rightarrow rs'  \implies \RALTS{rs} \rightarrow \RALTS{rs'}$
       
    43 \end{lemma}
       
    44 which enables us to prove "closed forms" equalities such as
       
    45 \begin{lemma}
       
    46 $\sflat{(r_1 \cdot r_2) \backslash s} = \RALTS{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\vsuf{s}{r_1}))}$
       
    47 \end{lemma}
       
    48 
       
    49 But the most involved part of the above lemma is proving the following:
       
    50 \begin{lemma}
       
    51 $\bsimp{\RALTS{rs @ \RALTS{rs_1} @ rs'}} = \bsimp{\RALTS{rs @rs_1 @ rs'}} $ 
       
    52 \end{lemma}
       
    53 which is needed in proving things like 
       
    54 \begin{lemma}
       
    55 $r \rightarrow_f r'  \implies \rsimp{r} \rightarrow \rsimp{r'}$
       
    56 \end{lemma}
       
    57 
       
    58 Fortunately this is provable by induction on the list $rs$
       
    59 
       
    60 
       
    61 
       
    62 %-----------------------------------
       
    63 %	SECTION 2
       
    64 %-----------------------------------
       
    65 
       
    66 \section{Finiteness Property}
       
    67 In this section let us describe our argument for why the size of the simplified
       
    68 derivatives with the aggressive simplification function is finitely bounded.
       
    69  Suppose
       
    70 we have a size function for bitcoded regular expressions, written
       
    71 $\llbracket r\rrbracket$, which counts the number of nodes if we regard $r$ as a tree
    33 $\llbracket r\rrbracket$, which counts the number of nodes if we regard $r$ as a tree
    72 
    34 
    73 \begin{center}
    35 \begin{center}
    74 \begin{tabular}{ccc}
    36 \begin{tabular}{ccc}
    75 $\llbracket \ACHAR{bs}{c} \rrbracket $ & $\dn$ & $1$\\
    37 $\llbracket \ACHAR{bs}{c} \rrbracket $ & $\dn$ & $1$\\
   167  bitcodes are seen as $\rrexp$s.
   129  bitcodes are seen as $\rrexp$s.
   168  We also use $r_1 + r_2$ and $\RALTS{[r_1, r_2]}$ interchageably
   130  We also use $r_1 + r_2$ and $\RALTS{[r_1, r_2]}$ interchageably
   169  as the former suits people's intuitive way of stating a binary alternative
   131  as the former suits people's intuitive way of stating a binary alternative
   170  regular expression.
   132  regular expression.
   171 
   133 
       
   134 
       
   135 
       
   136 %-----------------------------------
       
   137 %	SECTION ?
       
   138 %-----------------------------------
       
   139 \section{preparatory properties for getting a finiteness bound}
       
   140 Before we get to the proof that says the intermediate result of our lexer will
       
   141 remain finitely bounded, which is an important efficiency/liveness guarantee,
       
   142 we shall first develop a few preparatory properties and definitions to 
       
   143 make the process of proving that a breeze.
       
   144 
       
   145 We define rewriting relations for $\rrexp$s, which allows us to do the 
       
   146 same trick as we did for the correctness proof,
       
   147 but this time we will have stronger equalities established.
       
   148 \subsection{"hrewrite" relation}
       
   149 List of 1-step rewrite rules for regular expressions simplification without bitcodes:
       
   150 \begin{figure}
       
   151 \caption{the "h-rewrite" rules}
       
   152 \[
       
   153 r_1 \cdot \ZERO \rightarrow_h \ZERO \]
       
   154 
       
   155 \[
       
   156 \ZERO \cdot r_2 \rightarrow \ZERO 
       
   157 \]
       
   158 \end{figure}
       
   159 And we define an "grewrite" relation that works on lists:
       
   160 \begin{center}
       
   161 \begin{tabular}{lcl}
       
   162 $ \ZERO :: rs$ & $\rightarrow_g$ & $rs$
       
   163 \end{tabular}
       
   164 \end{center}
       
   165 
       
   166 
       
   167 
       
   168 With these relations we prove
       
   169 \begin{lemma}
       
   170 $rs \rightarrow rs'  \implies \RALTS{rs} \rightarrow \RALTS{rs'}$
       
   171 \end{lemma}
       
   172 which enables us to prove "closed forms" equalities such as
       
   173 \begin{lemma}
       
   174 $\sflat{(r_1 \cdot r_2) \backslash s} = \RALTS{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\suffix \; s \; r_1 ))}$
       
   175 \end{lemma}
       
   176 
       
   177 But the most involved part of the above lemma is proving the following:
       
   178 \begin{lemma}
       
   179 $\bsimp{\RALTS{rs @ \RALTS{rs_1} @ rs'}} = \bsimp{\RALTS{rs @rs_1 @ rs'}} $ 
       
   180 \end{lemma}
       
   181 which is needed in proving things like 
       
   182 \begin{lemma}
       
   183 $r \rightarrow_f r'  \implies \rsimp{r} \rightarrow \rsimp{r'}$
       
   184 \end{lemma}
       
   185 
       
   186 Fortunately this is provable by induction on the list $rs$
       
   187 
       
   188 
       
   189 
       
   190 %-----------------------------------
       
   191 %	SECTION 2
       
   192 %-----------------------------------
   172 
   193 
   173 \begin{theorem}
   194 \begin{theorem}
   174 For any regex $r$, $\exists N_r. \forall s. \; \llbracket{\bderssimp{r}{s}}\rrbracket \leq N_r$
   195 For any regex $r$, $\exists N_r. \forall s. \; \llbracket{\bderssimp{r}{s}}\rrbracket \leq N_r$
   175 \end{theorem}
   196 \end{theorem}
   176 
   197