author | Christian Urban <christian dot urban at kcl dot ac dot uk> |
Fri, 01 Nov 2013 13:16:43 +0000 | |
changeset 175 | 5801e8c0e528 |
parent 140 | 1be892087df2 |
child 217 | cd6066f1056a |
permissions | -rw-r--r-- |
123
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
1 |
\documentclass{article} |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
2 |
\usepackage{charter} |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
3 |
\usepackage{hyperref} |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
4 |
\usepackage{amssymb} |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
5 |
\usepackage{amsmath} |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
6 |
\usepackage[T1]{fontenc} |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
7 |
\usepackage{listings} |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
8 |
\usepackage{xcolor} |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
9 |
|
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
10 |
\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
11 |
|
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
12 |
\definecolor{javared}{rgb}{0.6,0,0} % for strings |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
13 |
\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
14 |
\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
15 |
\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
16 |
|
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
17 |
\lstdefinelanguage{scala}{ |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
18 |
morekeywords={abstract,case,catch,class,def,% |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
19 |
do,else,extends,false,final,finally,% |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
20 |
for,if,implicit,import,match,mixin,% |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
21 |
new,null,object,override,package,% |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
22 |
private,protected,requires,return,sealed,% |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
23 |
super,this,throw,trait,true,try,% |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
24 |
type,val,var,while,with,yield}, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
25 |
otherkeywords={=>,<-,<\%,<:,>:,\#,@}, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
26 |
sensitive=true, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
27 |
morecomment=[l]{//}, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
28 |
morecomment=[n]{/*}{*/}, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
29 |
morestring=[b]", |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
30 |
morestring=[b]', |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
31 |
morestring=[b]""" |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
32 |
} |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
33 |
|
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
34 |
\lstset{language=Scala, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
35 |
basicstyle=\ttfamily, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
36 |
keywordstyle=\color{javapurple}\bfseries, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
37 |
stringstyle=\color{javagreen}, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
38 |
commentstyle=\color{javagreen}, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
39 |
morecomment=[s][\color{javadocblue}]{/**}{*/}, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
40 |
numbers=left, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
41 |
numberstyle=\tiny\color{black}, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
42 |
stepnumber=1, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
43 |
numbersep=10pt, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
44 |
tabsize=2, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
45 |
showspaces=false, |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
46 |
showstringspaces=false} |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
47 |
|
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
48 |
\begin{document} |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
49 |
|
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
50 |
\section*{Handout 2} |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
51 |
|
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
52 |
Having specified what problem our matching algorithm, $match$, is supposed to solve, namely |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
53 |
for a given regular expression $r$ and string $s$ answer $true$ if and only if |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
54 |
|
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
55 |
\[ |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
56 |
s \in L(r) |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
57 |
\] |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
58 |
|
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
59 |
\noindent |
140
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
60 |
we can look at an algorithm to solve this problem. |
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
61 |
Clearly we cannot use the function $L$ directly for this, because in general |
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
62 |
the set of strings $L$ returns is infinite (recall what $L(a^*)$ is). In such cases there is no way we can implement |
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
63 |
an exhaustive test for whether a string is member of this set or not. |
123
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
64 |
|
140
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
65 |
The algorithm we will define below consists of two parts. One is the function $nullable$ which takes a |
124
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
66 |
regular expression as argument and decides whether it can match the empty string (this means it returns a |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
67 |
boolean). This can be easily defined recursively as follows: |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
68 |
|
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
69 |
\begin{center} |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
70 |
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
71 |
$nullable(\varnothing)$ & $\dn$ & $f\!\/alse$\\ |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
72 |
$nullable(\epsilon)$ & $\dn$ & $true$\\ |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
73 |
$nullable (c)$ & $\dn$ & $f\!alse$\\ |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
74 |
$nullable (r_1 + r_2)$ & $\dn$ & $nullable(r_1) \vee nullable(r_2)$\\ |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
75 |
$nullable (r_1 \cdot r_2)$ & $\dn$ & $nullable(r_1) \wedge nullable(r_2)$\\ |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
76 |
$nullable (r^*)$ & $\dn$ & $true$ \\ |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
77 |
\end{tabular} |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
78 |
\end{center} |
123
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
79 |
|
124
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
80 |
\noindent |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
81 |
The idea behind this function is that the following property holds: |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
82 |
|
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
83 |
\[ |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
84 |
nullable(r) \;\;\text{if and only if}\;\; ""\in L(r) |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
85 |
\] |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
86 |
|
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
87 |
\noindent |
140
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
88 |
Note on the left-hand side we have a function we can implement; on the right we have its specification. |
124
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
89 |
|
140
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
90 |
The other function of our matching algorithm calculates a \emph{derivative} of a regular expression. This is a function |
124
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
91 |
which will take a regular expression, say $r$, and a character, say $c$, as argument and return |
140
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
92 |
a new regular expression. Be careful that the intuition behind this function is not so easy to grasp on first |
124
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
93 |
reading. Essentially this function solves the following problem: if $r$ can match a string of the form |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
94 |
$c\!::\!s$, what does the regular expression look like that can match just $s$. The definition of this |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
95 |
function is as follows: |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
96 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
97 |
\begin{center} |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
98 |
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
99 |
$der\, c\, (\varnothing)$ & $\dn$ & $\varnothing$ & \\ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
100 |
$der\, c\, (\epsilon)$ & $\dn$ & $\varnothing$ & \\ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
101 |
$der\, c\, (d)$ & $\dn$ & if $c = d$ then $\epsilon$ else $\varnothing$ & \\ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
102 |
$der\, c\, (r_1 + r_2)$ & $\dn$ & $der\, c\, r_1 + der\, c\, r_2$ & \\ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
103 |
$der\, c\, (r_1 \cdot r_2)$ & $\dn$ & if $nullable (r_1)$\\ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
104 |
& & then $(der\,c\,r_1) \cdot r_2 + der\, c\, r_2$\\ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
105 |
& & else $(der\, c\, r_1) \cdot r_2$\\ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
106 |
$der\, c\, (r^*)$ & $\dn$ & $(der\,c\,r) \cdot (r^*)$ & |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
107 |
\end{tabular} |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
108 |
\end{center} |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
109 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
110 |
\noindent |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
111 |
The first two clauses can be rationalised as follows: recall that $der$ should calculate a regular |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
112 |
expression, if the ``input'' regular expression can match a string of the form $c\!::\!s$. Since neither |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
113 |
$\varnothing$ nor $\epsilon$ can match such a string we return $\varnothing$. In the third case |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
114 |
we have to make a case-distinction: In case the regular expression is $c$, then clearly it can recognise |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
115 |
a string of the form $c\!::\!s$, just that $s$ is the empty string. Therefore we return the $\epsilon$-regular |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
116 |
expression. In the other case we again return $\varnothing$ since no string of the $c\!::\!s$ can be matched. |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
117 |
The $+$-case is relatively straightforward: all strings of the form $c\!::\!s$ are either matched by the |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
118 |
regular expression $r_1$ or $r_2$. So we just have to recursively call $der$ with these two regular |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
119 |
expressions and compose the results again with $+$. The $\cdot$-case is more complicated: |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
120 |
if $r_1\cdot r_2$ matches a string of the form $c\!::\!s$, then the first part must be matched by $r_1$. |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
121 |
Consequently, it makes sense to construct the regular expression for $s$ by calling $der$ with $r_1$ and |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
122 |
``appending'' $r_2$. There is however one exception to this simple rule: if $r_1$ can match the empty |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
123 |
string, then all of $c\!::\!s$ is matched by $r_2$. So in case $r_1$ is nullable (that is can match the |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
124 |
empty string) we have to allow the choice $der\,c\,r_2$ for calculating the regular expression that can match |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
125 |
$s$. The $*$-case is again simple: if $r^*$ matches a string of the form $c\!::\!s$, then the first part must be |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
126 |
``matched'' by a single copy of $r$. Therefore we call recursively $der\,c\,r$ and ``append'' $r^*$ in order to |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
127 |
match the rest of $s$. |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
128 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
129 |
Another way to rationalise the definition of $der$ is to consider the following operation on sets: |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
130 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
131 |
\[ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
132 |
Der\,c\,A\;\dn\;\{s\,|\,c\!::\!s \in A\} |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
133 |
\] |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
134 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
135 |
\noindent |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
136 |
which essentially transforms a set of strings $A$ by filtering out all strings that do not start with $c$ and then |
140
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
137 |
strips off the $c$ from all the remaining strings. For example suppose $A = \{"f\!oo", "bar", "f\!rak"\}$ then |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
138 |
\[ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
139 |
Der\,f\,A = \{"oo", "rak"\}\quad,\quad |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
140 |
Der\,b\,A = \{"ar"\} \quad \text{and} \quad |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
141 |
Der\,a\,A = \varnothing |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
142 |
\] |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
143 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
144 |
\noindent |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
145 |
Note that in the last case $Der$ is empty, because no string in $A$ starts with $a$. With this operation we can |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
146 |
state the following property about $der$: |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
147 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
148 |
\[ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
149 |
L(der\,c\,r) = Der\,c\,(L(r)) |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
150 |
\] |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
151 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
152 |
\noindent |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
153 |
This property clarifies what regular expression $der$ calculates, namely take the set of strings |
140
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
154 |
that $r$ can match (that is $L(r)$), filter out all strings not starting with $c$ and strip off the $c$ from the |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
155 |
remaining strings---this is exactly the language that $der\,c\,r$ can match. |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
156 |
|
140
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
157 |
If we want to find out whether the string $"abc"$ is matched by the regular expression $r$ |
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
158 |
then we can iteratively apply $Der$ as follows |
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
159 |
|
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
160 |
\begin{enumerate} |
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
161 |
\item $Der\,a\,(L(r))$ |
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
162 |
\item $Der\,b\,(Der\,a\,(L(r)))$ |
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
163 |
\item $Der\,c\,(Der\,b\,(Der\,a\,(L(r))))$ |
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
164 |
\end{enumerate} |
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
165 |
|
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
166 |
\noindent |
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
167 |
In the last step we need to test whether the empty string is in the set. Our matching algorithm will work similarly, |
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
168 |
just using regular expression instead of sets. For this we need to lift the notion of derivatives from characters to strings. This can be |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
169 |
done using the following function, taking a string and regular expression as input and a regular expression |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
170 |
as output. |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
171 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
172 |
\begin{center} |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
173 |
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
174 |
$der\!s\, []\, r$ & $\dn$ & $r$ & \\ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
175 |
$der\!s\, (c\!::\!s)\, r$ & $\dn$ & $der\!s\,s\,(der\,c\,r)$ & \\ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
176 |
\end{tabular} |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
177 |
\end{center} |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
178 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
179 |
\noindent |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
180 |
Having $ders$ in place, we can finally define our matching algorithm: |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
181 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
182 |
\[ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
183 |
match\,s\,r = nullable(ders\,s\,r) |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
184 |
\] |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
185 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
186 |
\noindent |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
187 |
We claim that |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
188 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
189 |
\[ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
190 |
match\,s\,r\quad\text{if and only if}\quad s\in L(r) |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
191 |
\] |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
192 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
193 |
\noindent |
133
09efdf5cf07c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
126
diff
changeset
|
194 |
holds, which means our algorithm satisfies the specification. This algorithm was introduced by |
09efdf5cf07c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
126
diff
changeset
|
195 |
Janus Brzozowski in 1964. Its main attractions are simplicity and being fast, as well as |
09efdf5cf07c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
126
diff
changeset
|
196 |
being easily extendable for other regular expressions such as $r^{\{n\}}$, $r^?$, $\sim{}r$ and so on. |
126
7c7185cb4f2b
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
125
diff
changeset
|
197 |
|
7c7185cb4f2b
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
125
diff
changeset
|
198 |
\begin{figure}[p] |
7c7185cb4f2b
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
125
diff
changeset
|
199 |
{\lstset{language=Scala}\texttt{\lstinputlisting{../progs/app5.scala}}} |
7c7185cb4f2b
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
125
diff
changeset
|
200 |
{\lstset{language=Scala}\texttt{\lstinputlisting{../progs/app6.scala}}} |
7c7185cb4f2b
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
125
diff
changeset
|
201 |
\caption{Scala implementation of the nullable and derivatives functions.} |
7c7185cb4f2b
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
125
diff
changeset
|
202 |
\end{figure} |
123
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
203 |
|
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
204 |
\end{document} |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
205 |
|
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
206 |
%%% Local Variables: |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
207 |
%%% mode: latex |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
208 |
%%% TeX-master: t |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
209 |
%%% End: |