handouts/ho02.tex
changeset 399 5c1fbb39c93e
parent 394 2f9fe225ecc8
child 412 1cef3924f7a2
equal deleted inserted replaced
398:c8ce95067c1a 399:5c1fbb39c93e
     2 \usepackage{../style}
     2 \usepackage{../style}
     3 \usepackage{../langs}
     3 \usepackage{../langs}
     4 \usepackage{../graphics}
     4 \usepackage{../graphics}
     5 \usepackage{../data}
     5 \usepackage{../data}
     6 
     6 
     7 \pgfplotsset{compat=1.11}
     7 
     8 \begin{document}
     8 \begin{document}
       
     9 \fnote{\copyright{} Christian Urban, King's College London, 2014, 2015, 2016}
       
    10 
     9 
    11 
    10 \section*{Handout 2 (Regular Expression Matching)}
    12 \section*{Handout 2 (Regular Expression Matching)}
    11 
    13 
    12 This lecture is about implementing a more efficient regular
    14 This lecture is about implementing a more efficient regular
    13 expression matcher (the plots on the right)---more efficient
    15 expression matcher (the plots on the right)---more efficient
    14 than the matchers from regular expression libraries in Ruby
    16 than the matchers from regular expression libraries in Ruby
    15 and Python (the plots on the left). These plots show the
    17 and Python (the plots on the left). These plots show the
    16 running time for the evil regular expression
    18 running time for the evil regular expression
    17 $a^?{}^{\{n\}}\cdot a^{\{n\}}$ and strings composed of $n$ \pcode{a}s.
    19 $a^?{}^{\{n\}}\cdot a^{\{n\}}$ and strings composed of $n$
    18 We will use this regular expression and strings as running
    20 \pcode{a}s. We will use this regular expression and strings as
    19 example. To see the substantial differences in the two plots
    21 running example. To see the substantial differences in the two
    20 below, note the different scales of the $x$-axes. 
    22 plots below, note the different scales of the $x$-axes. 
    21 
    23 
    22 
    24 
    23 \begin{center}
    25 \begin{center}
    24 \begin{tabular}{@{}cc@{}}
    26 \begin{tabular}{@{}cc@{}}
    25 \begin{tikzpicture}
    27 \begin{tikzpicture}
    26 \begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs},
    28 \begin{axis}[
       
    29     xlabel={strings of {\tt a}s},
       
    30     ylabel={time in secs},
    27     enlargelimits=false,
    31     enlargelimits=false,
    28     xtick={0,5,...,30},
    32     xtick={0,5,...,30},
    29     xmax=33,
    33     xmax=33,
    30     ymax=35,
    34     ymax=35,
    31     ytick={0,5,...,30},
    35     ytick={0,5,...,30},
    42   table {re-ruby.data};  
    46   table {re-ruby.data};  
    43 \end{axis}
    47 \end{axis}
    44 \end{tikzpicture}
    48 \end{tikzpicture}
    45 &
    49 &
    46 \begin{tikzpicture}
    50 \begin{tikzpicture}
    47   \begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs},
    51   \begin{axis}[
       
    52     xlabel={strings of \texttt{a}s},
       
    53     ylabel={time in secs},
    48     enlargelimits=false,
    54     enlargelimits=false,
    49     xtick={0,3000,...,12000},
    55     xtick={0,3000,...,12000},
    50     xmax=12500,
    56     xmax=12500,
    51     ymax=35,
    57     ymax=35,
    52     ytick={0,5,...,30},
    58     ytick={0,5,...,30},
   117 \end{tabular}
   123 \end{tabular}
   118 \end{center}
   124 \end{center}
   119 
   125 
   120 \noindent I leave it to you to verify these equivalences and
   126 \noindent I leave it to you to verify these equivalences and
   121 non-equivalences. It is also interesting to look at some
   127 non-equivalences. It is also interesting to look at some
   122 corner cases involving $\epsilon$ and $\varnothing$:
   128 corner cases involving $\ONE$ and $\ZERO$:
   123 
   129 
   124 \begin{center}
   130 \begin{center}
   125 \begin{tabular}{rcl}
   131 \begin{tabular}{rcl}
   126 $a \cdot \varnothing$ & $\not\equiv$ & $a$\\
   132 $a \cdot \ZERO$ & $\not\equiv$ & $a$\\
   127 $a + \epsilon$ & $\not\equiv$ & $a$\\
   133 $a + \ONE$ & $\not\equiv$ & $a$\\
   128 $\epsilon$ & $\equiv$ & $\varnothing^*$\\
   134 $\ONE$ & $\equiv$ & $\ZERO^*$\\
   129 $\epsilon^*$ & $\equiv$ & $\epsilon$\\
   135 $\ONE^*$ & $\equiv$ & $\ONE$\\
   130 $\varnothing^*$ & $\not\equiv$ & $\varnothing$\\
   136 $\ZERO^*$ & $\not\equiv$ & $\ZERO$\\
   131 \end{tabular}
   137 \end{tabular}
   132 \end{center}
   138 \end{center}
   133 
   139 
   134 \noindent Again I leave it to you to make sure you agree
   140 \noindent Again I leave it to you to make sure you agree
   135 with these equivalences and non-equivalences. 
   141 with these equivalences and non-equivalences. 
   138 For our matching algorithm however the following seven
   144 For our matching algorithm however the following seven
   139 equivalences will play an important role:
   145 equivalences will play an important role:
   140 
   146 
   141 \begin{center}
   147 \begin{center}
   142 \begin{tabular}{rcl}
   148 \begin{tabular}{rcl}
   143 $r + \varnothing$  & $\equiv$ & $r$\\
   149 $r + \ZERO$  & $\equiv$ & $r$\\
   144 $\varnothing + r$  & $\equiv$ & $r$\\
   150 $\ZERO + r$  & $\equiv$ & $r$\\
   145 $r \cdot \epsilon$ & $\equiv$ & $r$\\
   151 $r \cdot \ONE$ & $\equiv$ & $r$\\
   146 $\epsilon \cdot r$     & $\equiv$ & $r$\\
   152 $\ONE \cdot r$     & $\equiv$ & $r$\\
   147 $r \cdot \varnothing$ & $\equiv$ & $\varnothing$\\
   153 $r \cdot \ZERO$ & $\equiv$ & $\ZERO$\\
   148 $\varnothing \cdot r$ & $\equiv$ & $\varnothing$\\
   154 $\ZERO \cdot r$ & $\equiv$ & $\ZERO$\\
   149 $r + r$ & $\equiv$ & $r$
   155 $r + r$ & $\equiv$ & $r$
   150 \end{tabular}
   156 \end{tabular}
   151 \end{center}
   157 \end{center}
   152 
   158 
   153 \noindent which always hold no matter what the regular
   159 \noindent which always hold no matter what the regular
   154 expression $r$ looks like. The first two are easy to verify
   160 expression $r$ looks like. The first two are easy to verify
   155 since $L(\varnothing)$ is the empty set. The next two are also
   161 since $L(\ZERO)$ is the empty set. The next two are also
   156 easy to verify since $L(\epsilon) = \{[]\}$ and appending the
   162 easy to verify since $L(\ONE) = \{[]\}$ and appending the
   157 empty string to every string of another set, leaves the set
   163 empty string to every string of another set, leaves the set
   158 unchanged. Be careful to fully comprehend the fifth and sixth
   164 unchanged. Be careful to fully comprehend the fifth and sixth
   159 equivalence: if you concatenate two sets of strings and one is
   165 equivalence: if you concatenate two sets of strings and one is
   160 the empty set, then the concatenation will also be the empty
   166 the empty set, then the concatenation will also be the empty
   161 set. To see this, check the definition of $\_ @ \_$. The
   167 set. To see this, check the definition of $\_ @ \_$. The
   165 equivalences and read them from left to right. In this way we
   171 equivalences and read them from left to right. In this way we
   166 can view them as \emph{simplification rules}. Consider for 
   172 can view them as \emph{simplification rules}. Consider for 
   167 example the regular expression 
   173 example the regular expression 
   168 
   174 
   169 \begin{equation}
   175 \begin{equation}
   170 (r_1 + \varnothing) \cdot \epsilon + ((\epsilon + r_2) + r_3) \cdot (r_4 \cdot \varnothing)
   176 (r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot (r_4 \cdot \ZERO)
   171 \label{big}
   177 \label{big}
   172 \end{equation}
   178 \end{equation}
   173 
   179 
   174 \noindent If we can find an equivalent regular expression that
   180 \noindent If we can find an equivalent regular expression that
   175 is simpler (smaller for example), then this might potentially
   181 is simpler (smaller for example), then this might potentially
   180 You can verify this by iteratively applying the simplification
   186 You can verify this by iteratively applying the simplification
   181 rules from above:
   187 rules from above:
   182 
   188 
   183 \begin{center}
   189 \begin{center}
   184 \begin{tabular}{ll}
   190 \begin{tabular}{ll}
   185  & $(r_1 + \varnothing) \cdot \epsilon + ((\epsilon + r_2) + r_3) \cdot 
   191  & $(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot 
   186 (\underline{r_4 \cdot \varnothing})$\smallskip\\
   192 (\underline{r_4 \cdot \ZERO})$\smallskip\\
   187 $\equiv$ & $(r_1 + \varnothing) \cdot \epsilon + \underline{((\epsilon + r_2) + r_3) \cdot 
   193 $\equiv$ & $(r_1 + \ZERO) \cdot \ONE + \underline{((\ONE + r_2) + r_3) \cdot 
   188 \varnothing}$\smallskip\\
   194 \ZERO}$\smallskip\\
   189 $\equiv$ & $\underline{(r_1 + \varnothing) \cdot \epsilon} + \varnothing$\smallskip\\
   195 $\equiv$ & $\underline{(r_1 + \ZERO) \cdot \ONE} + \ZERO$\smallskip\\
   190 $\equiv$ & $(\underline{r_1 + \varnothing}) + \varnothing$\smallskip\\
   196 $\equiv$ & $(\underline{r_1 + \ZERO}) + \ZERO$\smallskip\\
   191 $\equiv$ & $\underline{r_1 + \varnothing}$\smallskip\\
   197 $\equiv$ & $\underline{r_1 + \ZERO}$\smallskip\\
   192 $\equiv$ & $r_1$\
   198 $\equiv$ & $r_1$\
   193 \end{tabular}
   199 \end{tabular}
   194 \end{center}
   200 \end{center}
   195 
   201 
   196 \noindent In each step, I underlined where a simplification
   202 \noindent In each step, I underlined where a simplification
   197 rule is applied. Our matching algorithm in the next section
   203 rule is applied. Our matching algorithm in the next section
   198 will often generate such ``useless'' $\epsilon$s and
   204 will often generate such ``useless'' $\ONE$s and
   199 $\varnothing$s, therefore simplifying them away will make the
   205 $\ZERO$s, therefore simplifying them away will make the
   200 algorithm quite a bit faster.
   206 algorithm quite a bit faster.
   201 
   207 
   202 \subsection*{The Matching Algorithm}
   208 \subsection*{The Matching Algorithm}
   203 
   209 
   204 The algorithm we will define below consists of two parts. One
   210 The algorithm we will define below consists of two parts. One
   207 (this means it returns a boolean in Scala). This can be easily
   213 (this means it returns a boolean in Scala). This can be easily
   208 defined recursively as follows:
   214 defined recursively as follows:
   209 
   215 
   210 \begin{center}
   216 \begin{center}
   211 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
   217 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
   212 $nullable(\varnothing)$      & $\dn$ & $\textit{false}$\\
   218 $nullable(\ZERO)$      & $\dn$ & $\textit{false}$\\
   213 $nullable(\epsilon)$         & $\dn$ & $true$\\
   219 $nullable(\ONE)$         & $\dn$ & $true$\\
   214 $nullable(c)$                & $\dn$ & $\textit{false}$\\
   220 $nullable(c)$                & $\dn$ & $\textit{false}$\\
   215 $nullable(r_1 + r_2)$     & $\dn$ &  $nullable(r_1) \vee nullable(r_2)$\\ 
   221 $nullable(r_1 + r_2)$     & $\dn$ &  $nullable(r_1) \vee nullable(r_2)$\\ 
   216 $nullable(r_1 \cdot r_2)$ & $\dn$ &  $nullable(r_1) \wedge nullable(r_2)$\\
   222 $nullable(r_1 \cdot r_2)$ & $\dn$ &  $nullable(r_1) \wedge nullable(r_2)$\\
   217 $nullable(r^*)$              & $\dn$ & $true$ \\
   223 $nullable(r^*)$              & $\dn$ & $true$ \\
   218 \end{tabular}
   224 \end{tabular}
   241 expression look like that can match just $s$? The definition
   247 expression look like that can match just $s$? The definition
   242 of this function is as follows:
   248 of this function is as follows:
   243 
   249 
   244 \begin{center}
   250 \begin{center}
   245 \begin{tabular}{l@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
   251 \begin{tabular}{l@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
   246   $der\, c\, (\varnothing)$      & $\dn$ & $\varnothing$\\
   252   $der\, c\, (\ZERO)$      & $\dn$ & $\ZERO$\\
   247   $der\, c\, (\epsilon)$         & $\dn$ & $\varnothing$ \\
   253   $der\, c\, (\ONE)$         & $\dn$ & $\ZERO$ \\
   248   $der\, c\, (d)$                & $\dn$ & if $c = d$ then $\epsilon$ else $\varnothing$\\
   254   $der\, c\, (d)$                & $\dn$ & if $c = d$ then $\ONE$ else $\ZERO$\\
   249   $der\, c\, (r_1 + r_2)$        & $\dn$ & $der\, c\, r_1 + der\, c\, r_2$\\
   255   $der\, c\, (r_1 + r_2)$        & $\dn$ & $der\, c\, r_1 + der\, c\, r_2$\\
   250   $der\, c\, (r_1 \cdot r_2)$  & $\dn$  & if $nullable (r_1)$\\
   256   $der\, c\, (r_1 \cdot r_2)$  & $\dn$  & if $nullable (r_1)$\\
   251   & & then $(der\,c\,r_1) \cdot r_2 + der\, c\, r_2$\\ 
   257   & & then $(der\,c\,r_1) \cdot r_2 + der\, c\, r_2$\\ 
   252   & & else $(der\, c\, r_1) \cdot r_2$\\
   258   & & else $(der\, c\, r_1) \cdot r_2$\\
   253   $der\, c\, (r^*)$          & $\dn$ & $(der\,c\,r) \cdot (r^*)$
   259   $der\, c\, (r^*)$          & $\dn$ & $(der\,c\,r) \cdot (r^*)$
   256 
   262 
   257 \noindent The first two clauses can be rationalised as
   263 \noindent The first two clauses can be rationalised as
   258 follows: recall that $der$ should calculate a regular
   264 follows: recall that $der$ should calculate a regular
   259 expression so that given the ``input'' regular expression can
   265 expression so that given the ``input'' regular expression can
   260 match a string of the form $c\!::\!s$, we want a regular
   266 match a string of the form $c\!::\!s$, we want a regular
   261 expression for $s$. Since neither $\varnothing$ nor $\epsilon$
   267 expression for $s$. Since neither $\ZERO$ nor $\ONE$
   262 can match a string of the form $c\!::\!s$, we return
   268 can match a string of the form $c\!::\!s$, we return
   263 $\varnothing$. In the third case we have to make a
   269 $\ZERO$. In the third case we have to make a
   264 case-distinction: In case the regular expression is $c$, then
   270 case-distinction: In case the regular expression is $c$, then
   265 clearly it can recognise a string of the form $c\!::\!s$, just
   271 clearly it can recognise a string of the form $c\!::\!s$, just
   266 that $s$ is the empty string. Therefore we return the
   272 that $s$ is the empty string. Therefore we return the
   267 $\epsilon$-regular expression. In the other case we again
   273 $\ONE$-regular expression. In the other case we again
   268 return $\varnothing$ since no string of the $c\!::\!s$ can be
   274 return $\ZERO$ since no string of the $c\!::\!s$ can be
   269 matched. Next come the recursive cases, which are a bit more
   275 matched. Next come the recursive cases, which are a bit more
   270 involved. Fortunately, the $+$-case is still relatively
   276 involved. Fortunately, the $+$-case is still relatively
   271 straightforward: all strings of the form $c\!::\!s$ are either
   277 straightforward: all strings of the form $c\!::\!s$ are either
   272 matched by the regular expression $r_1$ or $r_2$. So we just
   278 matched by the regular expression $r_1$ or $r_2$. So we just
   273 have to recursively call $der$ with these two regular
   279 have to recursively call $der$ with these two regular
   290 
   296 
   291 If this did not make sense, here is another way to rationalise
   297 If this did not make sense, here is another way to rationalise
   292 the definition of $der$ by considering the following operation
   298 the definition of $der$ by considering the following operation
   293 on sets:
   299 on sets:
   294 
   300 
   295 \[
   301 \begin{equation}\label{Der}
   296 Der\,c\,A\;\dn\;\{s\,|\,c\!::\!s \in A\}
   302 Der\,c\,A\;\dn\;\{s\,|\,c\!::\!s \in A\}
   297 \]
   303 \end{equation}
   298 
   304 
   299 \noindent This operation essentially transforms a set of
   305 \noindent This operation essentially transforms a set of
   300 strings $A$ by filtering out all strings that do not start
   306 strings $A$ by filtering out all strings that do not start
   301 with $c$ and then strips off the $c$ from all the remaining
   307 with $c$ and then strips off the $c$ from all the remaining
   302 strings. For example suppose $A = \{f\!oo, bar, f\!rak\}$ then
   308 strings. For example suppose $A = \{f\!oo, bar, f\!rak\}$ then
   404 for \pcode{matches} are shown in Figure~\ref{scala1}.
   410 for \pcode{matches} are shown in Figure~\ref{scala1}.
   405 
   411 
   406 \begin{figure}[p]
   412 \begin{figure}[p]
   407 \lstinputlisting{../progs/app5.scala}
   413 \lstinputlisting{../progs/app5.scala}
   408 \caption{Scala implementation of the nullable and 
   414 \caption{Scala implementation of the nullable and 
   409   derivatives functions. These functions are easy to
   415   derivative functions. These functions are easy to
   410   implement in functional languages, because pattern 
   416   implement in functional languages, because pattern 
   411   matching and recursion allow us to mimic the mathematical
   417   matching and recursion allow us to mimic the mathematical
   412   definitions very closely.\label{scala1}}
   418   definitions very closely.\label{scala1}}
   413 \end{figure}
   419 \end{figure}
   414 
   420 
   465 \end{center}
   471 \end{center}
   466 
   472 
   467 \noindent Our algorithm traverses such regular expressions at
   473 \noindent Our algorithm traverses such regular expressions at
   468 least once every time a derivative is calculated. So having
   474 least once every time a derivative is calculated. So having
   469 large regular expressions will cause problems. This problem
   475 large regular expressions will cause problems. This problem
   470 is aggravated by $a^?$ being represented as $a + \epsilon$.
   476 is aggravated by $a^?$ being represented as $a + \ONE$.
   471 
   477 
   472 We can however fix this by having an explicit constructor for
   478 We can however fix this by having an explicit constructor for
   473 $r^{\{n\}}$. In Scala we would introduce a constructor like
   479 $r^{\{n\}}$. In Scala we would introduce a constructor like
   474 
   480 
   475 \begin{center}
   481 \begin{center}
   521 size of regular expressions it needs to handle. This is of
   527 size of regular expressions it needs to handle. This is of
   522 course obvious because both $nullable$ and $der$ frequently
   528 course obvious because both $nullable$ and $der$ frequently
   523 need to traverse the whole regular expression. There seems,
   529 need to traverse the whole regular expression. There seems,
   524 however, one more issue for making the algorithm run faster.
   530 however, one more issue for making the algorithm run faster.
   525 The derivative function often produces ``useless''
   531 The derivative function often produces ``useless''
   526 $\varnothing$s and $\epsilon$s. To see this, consider $r = ((a
   532 $\ZERO$s and $\ONE$s. To see this, consider $r = ((a
   527 \cdot b) + b)^*$ and the following two derivatives
   533 \cdot b) + b)^*$ and the following two derivatives
   528 
   534 
   529 \begin{center}
   535 \begin{center}
   530 \begin{tabular}{l}
   536 \begin{tabular}{l}
   531 $der\,a\,r = ((\epsilon \cdot b) + \varnothing) \cdot r$\\
   537 $der\,a\,r = ((\ONE \cdot b) + \ZERO) \cdot r$\\
   532 $der\,b\,r = ((\varnothing \cdot b) + \epsilon)\cdot r$\\
   538 $der\,b\,r = ((\ZERO \cdot b) + \ONE)\cdot r$\\
   533 $der\,c\,r = ((\varnothing \cdot b) + \varnothing)\cdot r$
   539 $der\,c\,r = ((\ZERO \cdot b) + \ZERO)\cdot r$
   534 \end{tabular}
   540 \end{tabular}
   535 \end{center}
   541 \end{center}
   536 
   542 
   537 \noindent 
   543 \noindent 
   538 If we simplify them according to the simple rules from the
   544 If we simplify them according to the simple rules from the
   541 
   547 
   542 \begin{center}
   548 \begin{center}
   543 \begin{tabular}{l}
   549 \begin{tabular}{l}
   544 $der\,a\,r \equiv b \cdot r$\\
   550 $der\,a\,r \equiv b \cdot r$\\
   545 $der\,b\,r \equiv r$\\
   551 $der\,b\,r \equiv r$\\
   546 $der\,c\,r \equiv \varnothing$
   552 $der\,c\,r \equiv \ZERO$
   547 \end{tabular}
   553 \end{tabular}
   548 \end{center}
   554 \end{center}
   549 
   555 
   550 \noindent I leave it to you to contemplate whether such a
   556 \noindent I leave it to you to contemplate whether such a
   551 simplification can have any impact on the correctness of our
   557 simplification can have any impact on the correctness of our
   566 \caption{The simplification function and modified 
   572 \caption{The simplification function and modified 
   567 \texttt{ders}-function; this function now
   573 \texttt{ders}-function; this function now
   568 calls \texttt{der} first, but then simplifies
   574 calls \texttt{der} first, but then simplifies
   569 the resulting derivative regular expressions before
   575 the resulting derivative regular expressions before
   570 building the next derivative, see
   576 building the next derivative, see
   571 Line~28.\label{scala2}}
   577 Line~\ref{simpline}.\label{scala2}}
   572 \end{figure}
   578 \end{figure}
   573 
   579 
   574 \begin{center}
   580 \begin{center}
   575 \begin{tikzpicture}
   581 \begin{tikzpicture}
   576 \begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs},
   582 \begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs},
   603 mostly by some form of induction. Remember that regular
   609 mostly by some form of induction. Remember that regular
   604 expressions are defined as 
   610 expressions are defined as 
   605 
   611 
   606 \begin{center}
   612 \begin{center}
   607 \begin{tabular}{r@{\hspace{1mm}}r@{\hspace{1mm}}l@{\hspace{13mm}}l}
   613 \begin{tabular}{r@{\hspace{1mm}}r@{\hspace{1mm}}l@{\hspace{13mm}}l}
   608   $r$ & $::=$ &   $\varnothing$         & null\\
   614   $r$ & $::=$ &   $\ZERO$         & null language\\
   609         & $\mid$ & $\epsilon$           & empty string / \texttt{""} / []\\
   615         & $\mid$ & $\ONE$           & empty string / \texttt{""} / []\\
   610         & $\mid$ & $c$                  & single character\\
   616         & $\mid$ & $c$                  & single character\\
   611         & $\mid$ & $r_1 + r_2$          & alternative / choice\\
   617         & $\mid$ & $r_1 + r_2$          & alternative / choice\\
   612         & $\mid$ & $r_1 \cdot r_2$      & sequence\\
   618         & $\mid$ & $r_1 \cdot r_2$      & sequence\\
   613         & $\mid$ & $r^*$                & star (zero or more)\\
   619         & $\mid$ & $r^*$                & star (zero or more)\\
   614   \end{tabular}
   620   \end{tabular}
   617 \noindent If you want to show a property $P(r)$ for all 
   623 \noindent If you want to show a property $P(r)$ for all 
   618 regular expressions $r$, then you have to follow essentially 
   624 regular expressions $r$, then you have to follow essentially 
   619 the recipe:
   625 the recipe:
   620 
   626 
   621 \begin{itemize}
   627 \begin{itemize}
   622 \item $P$ has to hold for $\varnothing$, $\epsilon$ and $c$
   628 \item $P$ has to hold for $\ZERO$, $\ONE$ and $c$
   623  (these are the base cases).
   629  (these are the base cases).
   624 \item $P$ has to hold for $r_1 + r_2$ under the assumption 
   630 \item $P$ has to hold for $r_1 + r_2$ under the assumption 
   625    that $P$ already holds for $r_1$ and $r_2$.
   631    that $P$ already holds for $r_1$ and $r_2$.
   626 \item $P$ has to hold for $r_1 \cdot r_2$ under the 
   632 \item $P$ has to hold for $r_1 \cdot r_2$ under the 
   627   assumption that $P$ already holds for $r_1$ and $r_2$.
   633   assumption that $P$ already holds for $r_1$ and $r_2$.
   638 \label{nullableprop}
   644 \label{nullableprop}
   639 \end{equation}
   645 \end{equation}
   640 
   646 
   641 \noindent
   647 \noindent
   642 Let us say that this property is $P(r)$, then the first case
   648 Let us say that this property is $P(r)$, then the first case
   643 we need to check is whether $P(\varnothing)$ (see recipe 
   649 we need to check is whether $P(\ZERO)$ (see recipe 
   644 above). So we have to show that
   650 above). So we have to show that
   645 
   651 
   646 \[
   652 \[
   647 nullable(\varnothing) \;\;\text{if and only if}\;\; 
   653 nullable(\ZERO) \;\;\text{if and only if}\;\; 
   648 []\in L(\varnothing)
   654 []\in L(\ZERO)
   649 \]
   655 \]
   650 
   656 
   651 \noindent whereby $nullable(\varnothing)$ is by definition of
   657 \noindent whereby $nullable(\ZERO)$ is by definition of
   652 the function $nullable$ always $\textit{false}$. We also have
   658 the function $nullable$ always $\textit{false}$. We also have
   653 that $L(\varnothing)$ is by definition $\{\}$. It is
   659 that $L(\ZERO)$ is by definition $\{\}$. It is
   654 impossible that the empty string $[]$ is in the empty set.
   660 impossible that the empty string $[]$ is in the empty set.
   655 Therefore also the right-hand side is false. Consequently we
   661 Therefore also the right-hand side is false. Consequently we
   656 verified this case: both sides are false. We would still need
   662 verified this case: both sides are false. We would still need
   657 to do this for $P(\varepsilon)$ and $P(c)$. I leave this to
   663 to do this for $P(\ONE)$ and $P(c)$. I leave this to
   658 you to verify.
   664 you to verify.
   659 
   665 
   660 Next we need to check the inductive cases, for example
   666 Next we need to check the inductive cases, for example
   661 $P(r_1 + r_2)$, which is
   667 $P(r_1 + r_2)$, which is
   662 
   668 
   731 \begin{equation}
   737 \begin{equation}
   732 Ders\,s\,(L(r)) = L(ders\,s\,r)
   738 Ders\,s\,(L(r)) = L(ders\,s\,r)
   733 \label{dersprop}
   739 \label{dersprop}
   734 \end{equation}
   740 \end{equation}
   735 
   741 
   736 \noindent by induction on $s$. In this proof you can assume
   742 \noindent by induction on $s$. Recall $Der$ is defined for 
   737 the following property for $der$ and $Der$ has already been
   743 character---see \eqref{Der}; $Ders$ is similar, but for strings:
   738 proved, that is you can assume
   744 
       
   745 \[
       
   746 Ders\,s\,A\;\dn\;\{s'\,|\,s @ s' \in A\}
       
   747 \]
       
   748 
       
   749 \noindent In this proof you can assume the following property
       
   750 for $der$ and $Der$ has already been proved, that is you can
       
   751 assume
   739 
   752 
   740 \[
   753 \[
   741 L(der\,c\,r) = Der\,c\,(L(r))
   754 L(der\,c\,r) = Der\,c\,(L(r))
   742 \]
   755 \]
   743 
   756