handouts/ho02.tex
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Sat, 12 Oct 2013 10:12:38 +0100
changeset 140 1be892087df2
parent 133 09efdf5cf07c
child 217 cd6066f1056a
permissions -rw-r--r--
added
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
123
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     1
\documentclass{article}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     2
\usepackage{charter}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     3
\usepackage{hyperref}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     4
\usepackage{amssymb}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     5
\usepackage{amsmath}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     6
\usepackage[T1]{fontenc}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     7
\usepackage{listings}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     8
\usepackage{xcolor}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     9
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    10
\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    11
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    12
\definecolor{javared}{rgb}{0.6,0,0} % for strings
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    13
\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    14
\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    15
\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    16
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    17
\lstdefinelanguage{scala}{
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    18
  morekeywords={abstract,case,catch,class,def,%
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    19
    do,else,extends,false,final,finally,%
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    20
    for,if,implicit,import,match,mixin,%
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    21
    new,null,object,override,package,%
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    22
    private,protected,requires,return,sealed,%
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    23
    super,this,throw,trait,true,try,%
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    24
    type,val,var,while,with,yield},
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    25
  otherkeywords={=>,<-,<\%,<:,>:,\#,@},
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    26
  sensitive=true,
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    27
  morecomment=[l]{//},
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    28
  morecomment=[n]{/*}{*/},
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    29
  morestring=[b]",
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    30
  morestring=[b]',
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    31
  morestring=[b]"""
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    32
}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    33
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    34
\lstset{language=Scala,
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    35
	basicstyle=\ttfamily,
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    36
	keywordstyle=\color{javapurple}\bfseries,
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    37
	stringstyle=\color{javagreen},
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    38
	commentstyle=\color{javagreen},
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    39
	morecomment=[s][\color{javadocblue}]{/**}{*/},
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    40
	numbers=left,
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    41
	numberstyle=\tiny\color{black},
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    42
	stepnumber=1,
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    43
	numbersep=10pt,
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    44
	tabsize=2,
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    45
	showspaces=false,
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    46
	showstringspaces=false}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    47
	
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    48
\begin{document}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    49
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    50
\section*{Handout 2}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    51
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    52
Having specified what problem our matching algorithm, $match$, is supposed to solve, namely
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    53
for a given regular expression $r$ and string $s$ answer $true$ if and only if
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    54
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    55
\[
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    56
s \in L(r)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    57
\]
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    58
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    59
\noindent
140
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
    60
we can look at an algorithm to solve this problem.
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
    61
Clearly we cannot use the function $L$ directly for this, because in general
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
    62
the set of strings $L$ returns is infinite (recall what $L(a^*)$ is). In such cases there is no way we can implement
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
    63
an exhaustive test for whether a string is member of this set or not.
123
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    64
140
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
    65
The algorithm we will define below consists of two parts. One is the function $nullable$ which takes a
124
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    66
regular expression as argument and decides whether it can match the empty string (this means it returns a 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    67
boolean). This can be easily defined recursively as follows:
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    68
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    69
\begin{center}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    70
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    71
$nullable(\varnothing)$      & $\dn$ & $f\!\/alse$\\
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    72
$nullable(\epsilon)$           & $\dn$ &  $true$\\
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    73
$nullable (c)$                    & $\dn$ &  $f\!alse$\\
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    74
$nullable (r_1 + r_2)$       & $\dn$ &  $nullable(r_1) \vee nullable(r_2)$\\ 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    75
$nullable (r_1 \cdot r_2)$ & $\dn$ &  $nullable(r_1) \wedge nullable(r_2)$\\
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    76
$nullable (r^*)$                & $\dn$ & $true$ \\
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    77
\end{tabular}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    78
\end{center}
123
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    79
124
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    80
\noindent
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    81
The idea behind this function is that the following property holds:
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    82
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    83
\[
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    84
nullable(r) \;\;\text{if and only if}\;\; ""\in L(r)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    85
\]
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    86
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    87
\noindent
140
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
    88
Note on the left-hand side we have a function we can implement; on the right we have its specification. 
124
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    89
140
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
    90
The other function of our matching algorithm calculates a \emph{derivative} of a regular expression. This is a function
124
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    91
which will take a regular expression, say $r$, and a character, say $c$, as argument and return 
140
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
    92
a new regular expression. Be careful that the intuition behind this function is not so easy to grasp on first
124
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 123
diff changeset
    93
reading. Essentially this function solves the following problem: if $r$ can match a string of the form
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
    94
$c\!::\!s$, what does the regular expression look like that can match just $s$. The definition of this
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
    95
function is as follows:
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
    96
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
    97
\begin{center}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
    98
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
    99
  $der\, c\, (\varnothing)$      & $\dn$ & $\varnothing$ & \\
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   100
  $der\, c\, (\epsilon)$           & $\dn$ & $\varnothing$ & \\
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   101
  $der\, c\, (d)$                     & $\dn$ & if $c = d$ then $\epsilon$ else $\varnothing$ & \\
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   102
  $der\, c\, (r_1 + r_2)$        & $\dn$ & $der\, c\, r_1 + der\, c\, r_2$ & \\
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   103
  $der\, c\, (r_1 \cdot r_2)$  & $\dn$  & if $nullable (r_1)$\\
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   104
  & & then $(der\,c\,r_1) \cdot r_2 + der\, c\, r_2$\\ 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   105
  & & else $(der\, c\, r_1) \cdot r_2$\\
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   106
  $der\, c\, (r^*)$          & $\dn$ & $(der\,c\,r) \cdot (r^*)$ &
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   107
  \end{tabular}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   108
\end{center}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   109
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   110
\noindent
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   111
The first two clauses can be rationalised as follows: recall that $der$ should calculate a regular
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   112
expression, if the ``input'' regular expression can match a string of the form $c\!::\!s$. Since neither
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   113
$\varnothing$ nor $\epsilon$ can match such a string we return $\varnothing$. In the third case
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   114
we have to make a case-distinction: In case the regular expression is $c$, then clearly it can recognise
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   115
a string of the form $c\!::\!s$, just that $s$ is the empty string. Therefore we return the $\epsilon$-regular 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   116
expression. In the other case we again return $\varnothing$ since no string of the $c\!::\!s$ can be matched.
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   117
The $+$-case is relatively straightforward: all strings of the form $c\!::\!s$ are either matched by the
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   118
regular expression $r_1$ or $r_2$. So we just have to recursively call $der$ with these two regular
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   119
expressions and compose the results again with $+$. The $\cdot$-case is more complicated:
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   120
if $r_1\cdot r_2$ matches a string of the form $c\!::\!s$, then the first part must be matched by $r_1$.
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   121
Consequently, it makes sense to construct the regular expression for $s$ by calling $der$ with $r_1$ and
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   122
``appending'' $r_2$. There is however one exception to this simple rule: if $r_1$ can match the empty
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   123
string, then all of $c\!::\!s$ is matched by $r_2$. So in case $r_1$ is nullable (that is can match the
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   124
empty string) we have to allow the choice $der\,c\,r_2$ for calculating the regular expression that can match 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   125
$s$. The $*$-case is again simple: if $r^*$ matches a string of the form $c\!::\!s$, then the first part must be
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   126
``matched'' by a single copy of $r$. Therefore we call recursively $der\,c\,r$ and ``append'' $r^*$ in order to 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   127
match the rest of $s$.
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   128
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   129
Another way to rationalise the definition of $der$ is to consider the following operation on sets:
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   130
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   131
\[
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   132
Der\,c\,A\;\dn\;\{s\,|\,c\!::\!s \in A\}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   133
\]
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   134
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   135
\noindent
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   136
which essentially transforms a set of strings $A$ by filtering out all strings that do not start with $c$ and then
140
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   137
strips off the $c$ from all the remaining strings. For example suppose $A = \{"f\!oo", "bar", "f\!rak"\}$ then
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   138
\[
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   139
Der\,f\,A = \{"oo", "rak"\}\quad,\quad
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   140
Der\,b\,A = \{"ar"\}  \quad \text{and} \quad
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   141
Der\,a\,A = \varnothing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   142
\]
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   143
 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   144
\noindent
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   145
Note that in the last case $Der$ is empty, because no string in $A$ starts with $a$. With this operation we can
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   146
state the following property about $der$:
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   147
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   148
\[
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   149
L(der\,c\,r) = Der\,c\,(L(r))
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   150
\]
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   151
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   152
\noindent
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   153
This property clarifies what regular expression $der$ calculates, namely take the set of strings
140
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   154
that $r$ can match (that is $L(r)$), filter out all strings not starting with $c$ and strip off the $c$ from the
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   155
remaining strings---this is exactly the language that $der\,c\,r$ can match.
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   156
140
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   157
If we want to find out whether the string $"abc"$ is matched by the regular expression $r$
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   158
then we can iteratively apply $Der$ as follows
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   159
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   160
\begin{enumerate}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   161
\item $Der\,a\,(L(r))$
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   162
\item $Der\,b\,(Der\,a\,(L(r)))$
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   163
\item $Der\,c\,(Der\,b\,(Der\,a\,(L(r))))$
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   164
\end{enumerate}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   165
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   166
\noindent
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   167
In the last step we need to test whether the empty string is in the set. Our matching algorithm will work similarly, 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 133
diff changeset
   168
just using regular expression instead of sets. For this we need to lift the notion of derivatives from characters to strings. This can be
125
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   169
done using the following function, taking a string and regular expression as input and a regular expression 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   170
as output.
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   171
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   172
\begin{center}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   173
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   174
  $der\!s\, []\, r$     & $\dn$ & $r$ & \\
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   175
  $der\!s\, (c\!::\!s)\, r$ & $\dn$ & $der\!s\,s\,(der\,c\,r)$ & \\
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   176
  \end{tabular}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   177
\end{center}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   178
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   179
\noindent
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   180
Having $ders$ in place, we can finally define our matching algorithm:
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   181
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   182
\[
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   183
match\,s\,r = nullable(ders\,s\,r)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   184
\]
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   185
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   186
\noindent
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   187
We claim that
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   188
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   189
\[
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   190
match\,s\,r\quad\text{if and only if}\quad s\in L(r)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   191
\]
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   192
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 124
diff changeset
   193
\noindent
133
09efdf5cf07c updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 126
diff changeset
   194
holds, which means our algorithm satisfies the specification. This algorithm was introduced by
09efdf5cf07c updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 126
diff changeset
   195
Janus Brzozowski in 1964. Its main attractions are simplicity and being fast, as well as
09efdf5cf07c updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 126
diff changeset
   196
being easily extendable for other regular expressions such as $r^{\{n\}}$, $r^?$, $\sim{}r$ and so on.
126
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 125
diff changeset
   197
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 125
diff changeset
   198
\begin{figure}[p]
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 125
diff changeset
   199
{\lstset{language=Scala}\texttt{\lstinputlisting{../progs/app5.scala}}}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 125
diff changeset
   200
{\lstset{language=Scala}\texttt{\lstinputlisting{../progs/app6.scala}}}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 125
diff changeset
   201
\caption{Scala implementation of the nullable and derivatives functions.}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 125
diff changeset
   202
\end{figure}
123
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   203
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   204
\end{document}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   205
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   206
%%% Local Variables: 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   207
%%% mode: latex
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   208
%%% TeX-master: t
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   209
%%% End: