handouts/ho02.tex
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Sun, 28 Sep 2014 18:07:58 +0100
changeset 261 24531cfaa36a
parent 259 e5f4b8ff23b8
child 262 ee4304bc6350
permissions -rw-r--r--
updated handouts
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
123
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     1
\documentclass{article}
251
5b5a68df6d16 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 217
diff changeset
     2
\usepackage{../style}
217
cd6066f1056a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 140
diff changeset
     3
\usepackage{../langs}
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
     4
\usepackage{../graphics}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
     5
\usepackage{../data}
123
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     6
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     7
\begin{document}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     8
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     9
\section*{Handout 2}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    10
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    11
This lecture is about implementing a more efficient regular
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    12
expression matcher (the plots on the right)---more efficient
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    13
than the matchers from regular expression libraries in Ruby and
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    14
Python (the plots on the left). These plots show the running 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    15
time for the evil regular expression $a?^{\{n\}}a^{\{n\}}$.
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    16
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    17
\begin{center}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    18
\begin{tabular}{@{}cc@{}}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    19
\begin{tikzpicture}[y=.072cm, x=.12cm]
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    20
 	%axis
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    21
	\draw (0,0) -- coordinate (x axis mid) (30,0);
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    22
   \draw (0,0) -- coordinate (y axis mid) (0,30);
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    23
   %ticks
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    24
   \foreach \x in {0,5,...,30}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    25
     \draw (\x,1pt) -- (\x,-3pt) node[anchor=north] {\x};
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    26
   \foreach \y in {0,5,...,30}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    27
     \draw (1pt,\y) -- (-3pt,\y) node[anchor=east] {\y}; 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    28
	%labels      
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    29
	\node[below=0.6cm] at (x axis mid) {number of \texttt{a}s};
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    30
	\node[rotate=90,left=0.9cm] at (y axis mid) {time in secs};
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    31
	%plots
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    32
	\draw[color=blue] plot[mark=*] 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    33
		file {re-python.data};
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    34
	\draw[color=brown] plot[mark=triangle*] 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    35
		file {re-ruby.data};	 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    36
   %legend
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    37
	\begin{scope}[shift={(4,20)}] 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    38
	\draw[color=blue] (0,0) -- 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    39
		plot[mark=*] (0.25,0) -- (0.5,0) 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    40
		node[right]{\small Python};
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    41
	\draw[yshift=-4mm, color=brown] (0,0) -- 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    42
		plot[mark=triangle*] (0.25,0) -- (0.5,0)
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    43
		node[right]{\small Ruby};
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    44
	\end{scope}	
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    45
\end{tikzpicture} 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    46
&
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    47
\begin{tikzpicture}[y=.072cm, x=.0004cm]
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    48
 	%axis
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    49
	\draw (0,0) -- coordinate (x axis mid) (12000,0);
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    50
   \draw (0,0) -- coordinate (y axis mid) (0,30);
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    51
   %ticks
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    52
   \foreach \x in {0,3000,...,12000}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    53
     	\draw (\x,1pt) -- (\x,-3pt) node[anchor=north] {\x};
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    54
   \foreach \y in {0,5,...,30}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    55
     	\draw (1pt,\y) -- (-3pt,\y) node[anchor=east] {\y}; 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    56
	%labels      
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    57
	\node[below=0.6cm] at (x axis mid) {number of \texttt{a}s};
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    58
	\node[rotate=90,left=0.9cm] at (y axis mid) {time in secs};
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    59
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    60
	%plots
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    61
   \draw[color=green] plot[mark=square*, mark options={fill=white} ] 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    62
		file {re2b.data};
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    63
	\draw[color=black] plot[mark=square*, mark options={fill=white} ] 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    64
		file {re3.data};	 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    65
\end{tikzpicture}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    66
\end{tabular}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    67
\end{center}\medskip
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    68
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    69
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    70
\noindent Having specified in the previous lecture what
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    71
problem our regular expression matcher, which we will call
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    72
\pcode{matches}, is supposed to solve, namely for any given
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    73
regular expression $r$ and string $s$ answer \textit{true} if
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    74
and only if
123
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    75
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    76
\[
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    77
s \in L(r)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    78
\]
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    79
251
5b5a68df6d16 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 217
diff changeset
    80
\noindent we can look at an algorithm to solve this problem.
5b5a68df6d16 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 217
diff changeset
    81
Clearly we cannot use the function $L$ directly for this,
5b5a68df6d16 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 217
diff changeset
    82
because in general the set of strings $L$ returns is infinite
5b5a68df6d16 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 217
diff changeset
    83
(recall what $L(a^*)$ is). In such cases there is no way we
5b5a68df6d16 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 217
diff changeset
    84
can implement an exhaustive test for whether a string is
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    85
member of this set or not. In contrast our matching algorithm
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    86
will mainly operate on the regular expression $r$ and string
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    87
$s$, which are both finite. Before we come to the matching
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    88
algorithm, however, let us have a closer look at what it means
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    89
when two regular expressions are equivalent.
258
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
    90
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
    91
\subsection*{Regular Expression Equivalences}
123
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    92
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    93
We already defined in Handout 1 what it means for two regular
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    94
expressions to be equivalent, namely if their meaning is the
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    95
same language:
258
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
    96
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    97
\[
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    98
r_1 \equiv r_2 \;\dn\; L(r_1) = L(r_2)
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
    99
\]
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   100
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   101
\noindent
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   102
It is relatively easy to verify that some concrete equivalences
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   103
hold, for example
124
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
   104
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
   105
\begin{center}
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   106
\begin{tabular}{rcl}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   107
$(a + b)  + c$ & $\equiv$ & $a + (b + c)$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   108
$a + a$        & $\equiv$ & $a$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   109
$a + b$        & $\equiv$ & $b + a$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   110
$(a \cdot b) \cdot c$ & $\equiv$ & $a \cdot (b \cdot c)$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   111
$c \cdot (a + b)$ & $\equiv$ & $(c \cdot a) + (c \cdot b)$\\
124
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
   112
\end{tabular}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
   113
\end{center}
123
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   114
124
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
   115
\noindent
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   116
but also easy to verify that the following regular expressions
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   117
are \emph{not} equivalent
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   118
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   119
\begin{center}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   120
\begin{tabular}{rcl}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   121
$a \cdot a$ & $\not\equiv$ & $a$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   122
$a + (b \cdot c)$ & $\not\equiv$ & $(a + b) \cdot (a + c)$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   123
\end{tabular}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   124
\end{center}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   125
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   126
\noindent I leave it to you to verify these equivalences and
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   127
non-equivalences. It is also interesting to look at some
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   128
corner cases involving $\epsilon$ and $\varnothing$:
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   129
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   130
\begin{center}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   131
\begin{tabular}{rcl}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   132
$a \cdot \varnothing$ & $\not\equiv$ & $a$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   133
$a + \epsilon$ & $\not\equiv$ & $a$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   134
$\epsilon$ & $\equiv$ & $\varnothing^*$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   135
$\epsilon^*$ & $\equiv$ & $\epsilon$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   136
$\varnothing^*$ & $\not\equiv$ & $\varnothing$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   137
\end{tabular}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   138
\end{center}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   139
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   140
\noindent Again I leave it to you to make sure you agree
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   141
with these equivalences and non-equivalences. 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   142
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   143
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   144
For our matching algorithm however the following six
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   145
equivalences will play an important role:
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   146
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   147
\begin{center}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   148
\begin{tabular}{rcl}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   149
$r + \varnothing$  & $\equiv$ & $r$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   150
$\varnothing + r$  & $\equiv$ & $r$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   151
$r \cdot \epsilon$ & $\equiv$ & $r$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   152
$\epsilon \cdot r$     & $\equiv$ & $r$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   153
$r \cdot \varnothing$ & $\equiv$ & $\varnothing$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   154
$\varnothing \cdot r$ & $\equiv$ & $\varnothing$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   155
$r + r$ & $\equiv$ & $r$
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   156
\end{tabular}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   157
\end{center}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   158
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   159
\noindent which always hold no matter what the regular
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   160
expression $r$ looks like. The first are easy to verify since
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   161
$L(\varnothing)$ is the empty set. The next two are also easy 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   162
to verify since $L(\epsilon) = \{[]\}$ and appending the empty 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   163
string to every string of another set, leaves the set 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   164
unchanged. Be careful to fully comprehend the fifth and 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   165
sixth equivalence: if you concatenate two sets of strings
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   166
and one is the empty set, then the concatenation will also be
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   167
the empty set. Check the definition of \pcode{_ @ _}.
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   168
The last equivalence is again trivial.
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   169
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   170
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   171
What will be important later on is that we can orient these
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   172
equivalences and read them from left to right. In this way we
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   173
can view them as \emph{simplification rules}. Suppose for 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   174
example the regular expression 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   175
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   176
\begin{equation}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   177
(r_1 + \varnothing) \cdot \epsilon + ((\epsilon + r_2) + r_3) \cdot (r_4 \cdot \varnothing)
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   178
\label{big}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   179
\end{equation}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   180
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   181
\noindent If we can find an equivalent regular expression that
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   182
is simpler (smaller for example), then this might potentially 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   183
make our matching algorithm is faster. The reason is that 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   184
whether a string $s$ is in $L(r)$ or in $L(r')$ with $r\equiv r'$
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   185
will always give the same answer. In the example above you 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   186
will see that the regular expression is equivalent to $r_1$
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   187
if you iteratively apply the simplification rules from above:
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   188
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   189
\begin{center}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   190
\begin{tabular}{ll}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   191
 & $(r_1 + \varnothing) \cdot \epsilon + ((\epsilon + r_2) + r_3) \cdot 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   192
(\underline{r_4 \cdot \varnothing})$\smallskip\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   193
$\equiv$ & $(r_1 + \varnothing) \cdot \epsilon + \underline{((\epsilon + r_2) + r_3) \cdot 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   194
\varnothing}$\smallskip\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   195
$\equiv$ & $\underline{(r_1 + \varnothing) \cdot \epsilon} + \varnothing$\smallskip\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   196
$\equiv$ & $(\underline{r_1 + \varnothing}) + \varnothing$\smallskip\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   197
$\equiv$ & $\underline{r_1 + \varnothing}$\smallskip\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   198
$\equiv$ & $r_1$\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   199
\end{tabular}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   200
\end{center}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   201
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   202
\noindent In each step I underlined where a simplification
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   203
rule is applied. Our matching algorithm in the next section
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   204
will often generate such ``useless'' $\epsilon$s and
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   205
$\varnothing$s, therefore simplifying them away will make the
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   206
algorithm quite a bit faster.
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   207
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   208
\subsection*{The Matching Algorithm}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   209
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   210
The algorithm we will define below consists of two parts. One
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   211
is the function $nullable$ which takes a regular expression as
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   212
argument and decides whether it can match the empty string
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   213
(this means it returns a boolean in Scala). This can be easily
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   214
defined recursively as follows:
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   215
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   216
\begin{center}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   217
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   218
$nullable(\varnothing)$      & $\dn$ & $\textit{false}$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   219
$nullable(\epsilon)$         & $\dn$ & $true$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   220
$nullable(c)$                & $\dn$ & $\textit{false}$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   221
$nullable(r_1 + r_2)$     & $\dn$ &  $nullable(r_1) \vee nullable(r_2)$\\ 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   222
$nullable(r_1 \cdot r_2)$ & $\dn$ &  $nullable(r_1) \wedge nullable(r_2)$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   223
$nullable(r^*)$              & $\dn$ & $true$ \\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   224
\end{tabular}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   225
\end{center}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   226
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   227
\noindent The idea behind this function is that the following
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   228
property holds:
124
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
   229
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
   230
\[
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   231
nullable(r) \;\;\text{if and only if}\;\; []\in L(r)
124
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
   232
\]
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
   233
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   234
\noindent Note on the left-hand side we have a function we can
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   235
implement; on the right we have its specification (which we
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   236
cannot implement in a programming language).
124
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
   237
258
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
   238
The other function of our matching algorithm calculates a
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   239
\emph{derivative} of a regular expression. This is a function
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   240
which will take a regular expression, say $r$, and a
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   241
character, say $c$, as argument and return a new regular
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   242
expression. Be careful that the intuition behind this function
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   243
is not so easy to grasp on first reading. Essentially this
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   244
function solves the following problem: if $r$ can match a
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   245
string of the form $c\!::\!s$, what does the regular
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   246
expression look like that can match just $s$. The definition
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   247
of this function is as follows:
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   248
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   249
\begin{center}
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   250
\begin{tabular}{l@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   251
  $der\, c\, (\varnothing)$      & $\dn$ & $\varnothing$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   252
  $der\, c\, (\epsilon)$         & $\dn$ & $\varnothing$ \\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   253
  $der\, c\, (d)$                & $\dn$ & if $c = d$ then $\epsilon$ else $\varnothing$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   254
  $der\, c\, (r_1 + r_2)$        & $\dn$ & $der\, c\, r_1 + der\, c\, r_2$\\
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   255
  $der\, c\, (r_1 \cdot r_2)$  & $\dn$  & if $nullable (r_1)$\\
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   256
  & & then $(der\,c\,r_1) \cdot r_2 + der\, c\, r_2$\\ 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   257
  & & else $(der\, c\, r_1) \cdot r_2$\\
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   258
  $der\, c\, (r^*)$          & $\dn$ & $(der\,c\,r) \cdot (r^*)$
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   259
  \end{tabular}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   260
\end{center}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   261
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   262
\noindent The first two clauses can be rationalised as
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   263
follows: recall that $der$ should calculate a regular
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   264
expression, if the ``input'' regular expression can match a
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   265
string of the form $c\!::\!s$. Since neither $\varnothing$ nor
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   266
$\epsilon$ can match such a string we return $\varnothing$. In
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   267
the third case we have to make a case-distinction: In case the
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   268
regular expression is $c$, then clearly it can recognise a
258
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
   269
string of the form $c\!::\!s$, just that $s$ is the empty
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   270
string. Therefore we return the $\epsilon$-regular expression.
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   271
In the other case we again return $\varnothing$ since no
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   272
string of the $c\!::\!s$ can be matched. Next come the
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   273
recursive cases. Fortunately, the $+$-case is still relatively
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   274
straightforward: all strings of the form $c\!::\!s$ are either
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   275
matched by the regular expression $r_1$ or $r_2$. So we just
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   276
have to recursively call $der$ with these two regular
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   277
expressions and compose the results again with $+$. Yes, makes
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   278
sense? The $\cdot$-case is more complicated: if $r_1\cdot r_2$
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   279
matches a string of the form $c\!::\!s$, then the first part
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   280
must be matched by $r_1$. Consequently, it makes sense to
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   281
construct the regular expression for $s$ by calling $der$ with
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   282
$r_1$ and ``appending'' $r_2$. There is however one exception
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   283
to this simple rule: if $r_1$ can match the empty string, then
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   284
all of $c\!::\!s$ is matched by $r_2$. So in case $r_1$ is
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   285
nullable (that is can match the empty string) we have to allow
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   286
the choice $der\,c\,r_2$ for calculating the regular
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   287
expression that can match $s$. Therefore we have to 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   288
add the regular expression $der\,c\,r_2$.
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   289
The $*$-case is again simple:
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   290
if $r^*$ matches a string of the form $c\!::\!s$, then the
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   291
first part must be ``matched'' by a single copy of $r$.
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   292
Therefore we call recursively $der\,c\,r$ and ``append'' $r^*$
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   293
in order to match the rest of $s$.
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   294
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   295
If this did not make sense, here is another way to rationalise
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   296
the definition of $der$ by considering the following operation
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   297
on sets:
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   298
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   299
\[
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   300
Der\,c\,A\;\dn\;\{s\,|\,c\!::\!s \in A\}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   301
\]
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   302
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   303
\noindent
258
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
   304
which essentially transforms a set of strings $A$ by filtering out all
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
   305
strings that do not start with $c$ and then strips off the $c$ from
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   306
all the remaining strings. For example suppose $A = \{f\!oo, bar,
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   307
f\!rak\}$ then
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   308
\[
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   309
Der\,f\,A = \{oo, rak\}\quad,\quad
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   310
Der\,b\,A = \{ar\}  \quad \text{and} \quad
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   311
Der\,a\,A = \varnothing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   312
\]
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   313
 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   314
\noindent
258
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
   315
Note that in the last case $Der$ is empty, because no string in $A$
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
   316
starts with $a$. With this operation we can state the following
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
   317
property about $der$:
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   318
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   319
\[
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   320
L(der\,c\,r) = Der\,c\,(L(r))
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   321
\]
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   322
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   323
\noindent
258
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
   324
This property clarifies what regular expression $der$ calculates,
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
   325
namely take the set of strings that $r$ can match (that is $L(r)$),
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
   326
filter out all strings not starting with $c$ and strip off the $c$
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
   327
from the remaining strings---this is exactly the language that
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
   328
$der\,c\,r$ can match.
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   329
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   330
If we want to find out whether the string $abc$ is matched by
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   331
the regular expression $r_1$ then we can iteratively apply $der$
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   332
as follows
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   333
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   334
\begin{center}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   335
\begin{tabular}{rll}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   336
Input: $r_1$, $abc$\medskip\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   337
Step 1: & build derivative of $a$ and $r_1$ & $(r_2 = der\,a\,r_1)$\smallskip\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   338
Step 2: & build derivative of $b$ and $r_2$ & $(r_3 = der\,b\,r_2)$\smallskip\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   339
Step 3: & build derivative of $c$ and $r_3$ & $(r_4 = der\,b\,r_3)$\smallskip\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   340
Step 4: & the string is exhausted; test & ($nullable(r_4)$)\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   341
        & whether $r_4$ can recognise the\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   342
        & empty string\smallskip\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   343
Output: & result of the test $\Rightarrow true \,\text{or}\, \textit{false}$\\        
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   344
\end{tabular}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   345
\end{center}
140
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   346
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   347
\noindent Again the operation $Der$ might help to rationalise
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   348
this algorithm. We want to know whether $abc \in L(r_1)$. We
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   349
do not know yet. But lets assume it is. Then $Der\,a\,L(r_1)$
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   350
builds the set where all the strings not starting with $a$ are
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   351
filtered out. Of the remaining strings, the $a$ is stripped
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   352
off. Then we continue with filtering out all strings not
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   353
starting with $b$ and stripping off the $b$ from the remaining
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   354
strings, that means we build $Der\,b\,(Der\,a\,(L(r_1)))$.
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   355
Finally we filter out all strings not starting with $c$ and
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   356
strip off $c$ from the remaining string. This is
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   357
$Der\,c\,(Der\,b\,(Der\,a\,(L(r))))$. Now if $abc$ was in the 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   358
original set ($L(r_1)$), then in $Der\,c\,(Der\,b\,(Der\,a\,(L(r))))$ 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   359
must be the empty string. If not then $abc$ was not in the 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   360
language we started with.
140
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   361
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   362
Our matching algorithm using $der$ and $nullable$ works
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   363
similarly, just using regular expression instead of sets. For
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   364
this we need to extend the notion of derivatives from
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   365
characters to strings. This can be done using the following
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   366
function, taking a string and regular expression as input and
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   367
a regular expression as output.
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   368
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   369
\begin{center}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   370
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   371
  $\textit{ders}\, []\, r$     & $\dn$ & $r$ & \\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   372
  $\textit{ders}\, (c\!::\!s)\, r$ & $\dn$ & $\textit{ders}\,s\,(der\,c\,r)$ & \\
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   373
  \end{tabular}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   374
\end{center}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   375
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   376
\noindent This function essentially iterates $der$ taking one
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   377
character at the time from the original string until it is
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   378
exhausted. Having $ders$ in place, we can finally define our
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   379
matching algorithm:
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   380
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   381
\[
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   382
matches\,s\,r = nullable(ders\,s\,r)
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   383
\]
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   384
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   385
\noindent
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   386
We can claim that
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   387
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   388
\[
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   389
matches\,s\,r\quad\text{if and only if}\quad s\in L(r)
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   390
\]
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   391
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   392
\noindent holds, which means our algorithm satisfies the
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   393
specification. Of course we can claim many things\ldots
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   394
whether the claim holds any water is a different question,
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   395
which for example is the point of the Strand-2 Coursework.
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   396
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   397
This algorithm was introduced by Janus Brzozowski in 1964. Its
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   398
main attractions are simplicity and being fast, as well as
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   399
being easily extendable for other regular expressions such as
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   400
$r^{\{n\}}$, $r^?$, $\sim{}r$ and so on (this is subject of
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   401
Strand-1 Coursework 1).
258
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
   402
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
   403
\subsection*{The Matching Algorithm in Scala}
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 251
diff changeset
   404
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   405
Another attraction of the algorithm is that it can be easily
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   406
implemented in a functional programming language, like Scala.
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   407
Given the implementation of regular expressions in Scala given
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   408
in the first lecture and handout, the functions for
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   409
\pcode{matches} are shown in Figure~\ref{scala1}.
126
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 125
diff changeset
   410
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 125
diff changeset
   411
\begin{figure}[p]
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   412
\lstinputlisting{../progs/app5.scala}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   413
\caption{Scala implementation of the nullable and 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   414
  derivatives functions.\label{scala1}}
126
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 125
diff changeset
   415
\end{figure}
123
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   416
261
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   417
For running the algorithm with our favourite example, the evil
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   418
regular expression $a?^{\{n\}}a^{\{n\}}$, we need to implement
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   419
the optional regular expression and the exactly $n$-times
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   420
regular expression. This can be done with the translations
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   421
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   422
\lstinputlisting[numbers=none]{../progs/app51.scala}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   423
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   424
\noindent Running the matcher with the example, we find it is
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   425
slightly worse then the matcher in Ruby and Python.
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   426
Ooops\ldots\medskip
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   427
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   428
\noindent Analysing this failure a bit we notice that 
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   429
for $a^{\{n\}}$ we generate quite big regular expressions:
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   430
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   431
\begin{center}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   432
\begin{tabular}{rl}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   433
1: & $a$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   434
2: & $a\cdot a$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   435
3: & $a\cdot a\cdot a$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   436
& \ldots\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   437
13: & $a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a$\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   438
& \ldots\\
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   439
20:
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   440
\end{tabular}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   441
\end{center}
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   442
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   443
\noindent Our algorithm traverses such regular expressions at
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   444
least once every time a derivative is calculated. So having
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   445
large regular expressions, will cause problems. This problem
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   446
is aggravated with $a?$ being represented as $a + \epsilon$.
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   447
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   448
24531cfaa36a updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 259
diff changeset
   449
123
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   450
\end{document}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   451
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   452
%%% Local Variables: 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   453
%%% mode: latex
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   454
%%% TeX-master: t
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   455
%%% End: