author | Christian Urban <christian dot urban at kcl dot ac dot uk> |
Mon, 13 Oct 2014 06:26:30 +0100 | |
changeset 277 | 8eb3261294ba |
parent 272 | 1446bc47a294 |
child 291 | 201c2c6d8696 |
permissions | -rw-r--r-- |
123
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
1 |
\documentclass{article} |
251
5b5a68df6d16
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
217
diff
changeset
|
2 |
\usepackage{../style} |
217
cd6066f1056a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
140
diff
changeset
|
3 |
\usepackage{../langs} |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
4 |
\usepackage{../graphics} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
5 |
\usepackage{../data} |
123
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
6 |
|
268
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
7 |
\pgfplotsset{compat=1.11} |
123
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
8 |
\begin{document} |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
9 |
|
272
1446bc47a294
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
268
diff
changeset
|
10 |
\section*{Handout 2 (Regular Expression Matching)} |
123
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
11 |
|
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
12 |
This lecture is about implementing a more efficient regular |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
13 |
expression matcher (the plots on the right)---more efficient |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
14 |
than the matchers from regular expression libraries in Ruby and |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
15 |
Python (the plots on the left). These plots show the running |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
16 |
time for the evil regular expression $a?^{\{n\}}a^{\{n\}}$. |
263
92e6985018ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
262
diff
changeset
|
17 |
Note the different scales of the $x$-axes. |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
18 |
|
263
92e6985018ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
262
diff
changeset
|
19 |
|
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
20 |
\begin{center} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
21 |
\begin{tabular}{@{}cc@{}} |
268
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
22 |
\begin{tikzpicture} |
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
23 |
\begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs}, |
262
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
24 |
enlargelimits=false, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
25 |
xtick={0,5,...,30}, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
26 |
xmax=30, |
268
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
27 |
ymax=35, |
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
28 |
ytick={0,5,...,30}, |
262
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
29 |
scaled ticks=false, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
30 |
axis lines=left, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
31 |
width=5cm, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
32 |
height=5cm, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
33 |
legend entries={Python,Ruby}, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
34 |
legend pos=north west, |
268
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
35 |
legend cell align=left] |
262
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
36 |
\addplot[blue,mark=*, mark options={fill=white}] |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
37 |
table {re-python.data}; |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
38 |
\addplot[brown,mark=pentagon*, mark options={fill=white}] |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
39 |
table {re-ruby.data}; |
268
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
40 |
\end{axis} |
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
41 |
\end{tikzpicture} |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
42 |
& |
268
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
43 |
\begin{tikzpicture} |
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
44 |
\begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs}, |
262
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
45 |
enlargelimits=false, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
46 |
xtick={0,3000,...,12000}, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
47 |
xmax=12000, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
48 |
ymax=35, ytick={0,5,...,30}, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
49 |
scaled ticks=false, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
50 |
axis lines=left, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
51 |
width=6.5cm, |
268
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
52 |
height=5cm] |
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
53 |
\addplot[green,mark=square*,mark options={fill=white}] table {re2b.data}; |
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
54 |
\addplot[black,mark=square*,mark options={fill=white}] table {re3.data}; |
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
55 |
\end{axis} |
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
56 |
\end{tikzpicture} |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
57 |
\end{tabular} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
58 |
\end{center}\medskip |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
59 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
60 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
61 |
\noindent Having specified in the previous lecture what |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
62 |
problem our regular expression matcher, which we will call |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
63 |
\pcode{matches}, is supposed to solve, namely for any given |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
64 |
regular expression $r$ and string $s$ answer \textit{true} if |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
65 |
and only if |
123
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
66 |
|
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
67 |
\[ |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
68 |
s \in L(r) |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
69 |
\] |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
70 |
|
251
5b5a68df6d16
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
217
diff
changeset
|
71 |
\noindent we can look at an algorithm to solve this problem. |
5b5a68df6d16
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
217
diff
changeset
|
72 |
Clearly we cannot use the function $L$ directly for this, |
5b5a68df6d16
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
217
diff
changeset
|
73 |
because in general the set of strings $L$ returns is infinite |
5b5a68df6d16
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
217
diff
changeset
|
74 |
(recall what $L(a^*)$ is). In such cases there is no way we |
5b5a68df6d16
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
217
diff
changeset
|
75 |
can implement an exhaustive test for whether a string is |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
76 |
member of this set or not. In contrast our matching algorithm |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
77 |
will mainly operate on the regular expression $r$ and string |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
78 |
$s$, which are both finite. Before we come to the matching |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
79 |
algorithm, however, let us have a closer look at what it means |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
80 |
when two regular expressions are equivalent. |
258
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
81 |
|
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
82 |
\subsection*{Regular Expression Equivalences} |
123
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
83 |
|
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
84 |
We already defined in Handout 1 what it means for two regular |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
85 |
expressions to be equivalent, namely if their meaning is the |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
86 |
same language: |
258
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
87 |
|
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
88 |
\[ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
89 |
r_1 \equiv r_2 \;\dn\; L(r_1) = L(r_2) |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
90 |
\] |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
91 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
92 |
\noindent |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
93 |
It is relatively easy to verify that some concrete equivalences |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
94 |
hold, for example |
124
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
95 |
|
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
96 |
\begin{center} |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
97 |
\begin{tabular}{rcl} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
98 |
$(a + b) + c$ & $\equiv$ & $a + (b + c)$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
99 |
$a + a$ & $\equiv$ & $a$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
100 |
$a + b$ & $\equiv$ & $b + a$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
101 |
$(a \cdot b) \cdot c$ & $\equiv$ & $a \cdot (b \cdot c)$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
102 |
$c \cdot (a + b)$ & $\equiv$ & $(c \cdot a) + (c \cdot b)$\\ |
124
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
103 |
\end{tabular} |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
104 |
\end{center} |
123
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
105 |
|
124
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
106 |
\noindent |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
107 |
but also easy to verify that the following regular expressions |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
108 |
are \emph{not} equivalent |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
109 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
110 |
\begin{center} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
111 |
\begin{tabular}{rcl} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
112 |
$a \cdot a$ & $\not\equiv$ & $a$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
113 |
$a + (b \cdot c)$ & $\not\equiv$ & $(a + b) \cdot (a + c)$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
114 |
\end{tabular} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
115 |
\end{center} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
116 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
117 |
\noindent I leave it to you to verify these equivalences and |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
118 |
non-equivalences. It is also interesting to look at some |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
119 |
corner cases involving $\epsilon$ and $\varnothing$: |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
120 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
121 |
\begin{center} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
122 |
\begin{tabular}{rcl} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
123 |
$a \cdot \varnothing$ & $\not\equiv$ & $a$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
124 |
$a + \epsilon$ & $\not\equiv$ & $a$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
125 |
$\epsilon$ & $\equiv$ & $\varnothing^*$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
126 |
$\epsilon^*$ & $\equiv$ & $\epsilon$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
127 |
$\varnothing^*$ & $\not\equiv$ & $\varnothing$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
128 |
\end{tabular} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
129 |
\end{center} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
130 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
131 |
\noindent Again I leave it to you to make sure you agree |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
132 |
with these equivalences and non-equivalences. |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
133 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
134 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
135 |
For our matching algorithm however the following six |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
136 |
equivalences will play an important role: |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
137 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
138 |
\begin{center} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
139 |
\begin{tabular}{rcl} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
140 |
$r + \varnothing$ & $\equiv$ & $r$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
141 |
$\varnothing + r$ & $\equiv$ & $r$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
142 |
$r \cdot \epsilon$ & $\equiv$ & $r$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
143 |
$\epsilon \cdot r$ & $\equiv$ & $r$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
144 |
$r \cdot \varnothing$ & $\equiv$ & $\varnothing$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
145 |
$\varnothing \cdot r$ & $\equiv$ & $\varnothing$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
146 |
$r + r$ & $\equiv$ & $r$ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
147 |
\end{tabular} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
148 |
\end{center} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
149 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
150 |
\noindent which always hold no matter what the regular |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
151 |
expression $r$ looks like. The first are easy to verify since |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
152 |
$L(\varnothing)$ is the empty set. The next two are also easy |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
153 |
to verify since $L(\epsilon) = \{[]\}$ and appending the empty |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
154 |
string to every string of another set, leaves the set |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
155 |
unchanged. Be careful to fully comprehend the fifth and |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
156 |
sixth equivalence: if you concatenate two sets of strings |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
157 |
and one is the empty set, then the concatenation will also be |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
158 |
the empty set. Check the definition of \pcode{_ @ _}. |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
159 |
The last equivalence is again trivial. |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
160 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
161 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
162 |
What will be important later on is that we can orient these |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
163 |
equivalences and read them from left to right. In this way we |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
164 |
can view them as \emph{simplification rules}. Suppose for |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
165 |
example the regular expression |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
166 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
167 |
\begin{equation} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
168 |
(r_1 + \varnothing) \cdot \epsilon + ((\epsilon + r_2) + r_3) \cdot (r_4 \cdot \varnothing) |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
169 |
\label{big} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
170 |
\end{equation} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
171 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
172 |
\noindent If we can find an equivalent regular expression that |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
173 |
is simpler (smaller for example), then this might potentially |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
174 |
make our matching algorithm is faster. The reason is that |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
175 |
whether a string $s$ is in $L(r)$ or in $L(r')$ with $r\equiv r'$ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
176 |
will always give the same answer. In the example above you |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
177 |
will see that the regular expression is equivalent to $r_1$ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
178 |
if you iteratively apply the simplification rules from above: |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
179 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
180 |
\begin{center} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
181 |
\begin{tabular}{ll} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
182 |
& $(r_1 + \varnothing) \cdot \epsilon + ((\epsilon + r_2) + r_3) \cdot |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
183 |
(\underline{r_4 \cdot \varnothing})$\smallskip\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
184 |
$\equiv$ & $(r_1 + \varnothing) \cdot \epsilon + \underline{((\epsilon + r_2) + r_3) \cdot |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
185 |
\varnothing}$\smallskip\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
186 |
$\equiv$ & $\underline{(r_1 + \varnothing) \cdot \epsilon} + \varnothing$\smallskip\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
187 |
$\equiv$ & $(\underline{r_1 + \varnothing}) + \varnothing$\smallskip\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
188 |
$\equiv$ & $\underline{r_1 + \varnothing}$\smallskip\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
189 |
$\equiv$ & $r_1$\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
190 |
\end{tabular} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
191 |
\end{center} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
192 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
193 |
\noindent In each step I underlined where a simplification |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
194 |
rule is applied. Our matching algorithm in the next section |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
195 |
will often generate such ``useless'' $\epsilon$s and |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
196 |
$\varnothing$s, therefore simplifying them away will make the |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
197 |
algorithm quite a bit faster. |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
198 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
199 |
\subsection*{The Matching Algorithm} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
200 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
201 |
The algorithm we will define below consists of two parts. One |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
202 |
is the function $nullable$ which takes a regular expression as |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
203 |
argument and decides whether it can match the empty string |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
204 |
(this means it returns a boolean in Scala). This can be easily |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
205 |
defined recursively as follows: |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
206 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
207 |
\begin{center} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
208 |
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
209 |
$nullable(\varnothing)$ & $\dn$ & $\textit{false}$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
210 |
$nullable(\epsilon)$ & $\dn$ & $true$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
211 |
$nullable(c)$ & $\dn$ & $\textit{false}$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
212 |
$nullable(r_1 + r_2)$ & $\dn$ & $nullable(r_1) \vee nullable(r_2)$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
213 |
$nullable(r_1 \cdot r_2)$ & $\dn$ & $nullable(r_1) \wedge nullable(r_2)$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
214 |
$nullable(r^*)$ & $\dn$ & $true$ \\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
215 |
\end{tabular} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
216 |
\end{center} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
217 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
218 |
\noindent The idea behind this function is that the following |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
219 |
property holds: |
124
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
220 |
|
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
221 |
\[ |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
222 |
nullable(r) \;\;\text{if and only if}\;\; []\in L(r) |
124
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
223 |
\] |
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
224 |
|
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
225 |
\noindent Note on the left-hand side we have a function we can |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
226 |
implement; on the right we have its specification (which we |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
227 |
cannot implement in a programming language). |
124
dd8b5a3dac0a
adde
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
123
diff
changeset
|
228 |
|
258
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
229 |
The other function of our matching algorithm calculates a |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
230 |
\emph{derivative} of a regular expression. This is a function |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
231 |
which will take a regular expression, say $r$, and a |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
232 |
character, say $c$, as argument and return a new regular |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
233 |
expression. Be careful that the intuition behind this function |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
234 |
is not so easy to grasp on first reading. Essentially this |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
235 |
function solves the following problem: if $r$ can match a |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
236 |
string of the form $c\!::\!s$, what does the regular |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
237 |
expression look like that can match just $s$. The definition |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
238 |
of this function is as follows: |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
239 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
240 |
\begin{center} |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
241 |
\begin{tabular}{l@ {\hspace{2mm}}c@ {\hspace{2mm}}l} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
242 |
$der\, c\, (\varnothing)$ & $\dn$ & $\varnothing$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
243 |
$der\, c\, (\epsilon)$ & $\dn$ & $\varnothing$ \\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
244 |
$der\, c\, (d)$ & $\dn$ & if $c = d$ then $\epsilon$ else $\varnothing$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
245 |
$der\, c\, (r_1 + r_2)$ & $\dn$ & $der\, c\, r_1 + der\, c\, r_2$\\ |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
246 |
$der\, c\, (r_1 \cdot r_2)$ & $\dn$ & if $nullable (r_1)$\\ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
247 |
& & then $(der\,c\,r_1) \cdot r_2 + der\, c\, r_2$\\ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
248 |
& & else $(der\, c\, r_1) \cdot r_2$\\ |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
249 |
$der\, c\, (r^*)$ & $\dn$ & $(der\,c\,r) \cdot (r^*)$ |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
250 |
\end{tabular} |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
251 |
\end{center} |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
252 |
|
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
253 |
\noindent The first two clauses can be rationalised as |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
254 |
follows: recall that $der$ should calculate a regular |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
255 |
expression, if the ``input'' regular expression can match a |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
256 |
string of the form $c\!::\!s$. Since neither $\varnothing$ nor |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
257 |
$\epsilon$ can match such a string we return $\varnothing$. In |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
258 |
the third case we have to make a case-distinction: In case the |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
259 |
regular expression is $c$, then clearly it can recognise a |
258
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
260 |
string of the form $c\!::\!s$, just that $s$ is the empty |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
261 |
string. Therefore we return the $\epsilon$-regular expression. |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
262 |
In the other case we again return $\varnothing$ since no |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
263 |
string of the $c\!::\!s$ can be matched. Next come the |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
264 |
recursive cases. Fortunately, the $+$-case is still relatively |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
265 |
straightforward: all strings of the form $c\!::\!s$ are either |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
266 |
matched by the regular expression $r_1$ or $r_2$. So we just |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
267 |
have to recursively call $der$ with these two regular |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
268 |
expressions and compose the results again with $+$. Yes, makes |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
269 |
sense? The $\cdot$-case is more complicated: if $r_1\cdot r_2$ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
270 |
matches a string of the form $c\!::\!s$, then the first part |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
271 |
must be matched by $r_1$. Consequently, it makes sense to |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
272 |
construct the regular expression for $s$ by calling $der$ with |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
273 |
$r_1$ and ``appending'' $r_2$. There is however one exception |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
274 |
to this simple rule: if $r_1$ can match the empty string, then |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
275 |
all of $c\!::\!s$ is matched by $r_2$. So in case $r_1$ is |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
276 |
nullable (that is can match the empty string) we have to allow |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
277 |
the choice $der\,c\,r_2$ for calculating the regular |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
278 |
expression that can match $s$. Therefore we have to |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
279 |
add the regular expression $der\,c\,r_2$. |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
280 |
The $*$-case is again simple: |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
281 |
if $r^*$ matches a string of the form $c\!::\!s$, then the |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
282 |
first part must be ``matched'' by a single copy of $r$. |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
283 |
Therefore we call recursively $der\,c\,r$ and ``append'' $r^*$ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
284 |
in order to match the rest of $s$. |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
285 |
|
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
286 |
If this did not make sense, here is another way to rationalise |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
287 |
the definition of $der$ by considering the following operation |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
288 |
on sets: |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
289 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
290 |
\[ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
291 |
Der\,c\,A\;\dn\;\{s\,|\,c\!::\!s \in A\} |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
292 |
\] |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
293 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
294 |
\noindent |
258
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
295 |
which essentially transforms a set of strings $A$ by filtering out all |
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
296 |
strings that do not start with $c$ and then strips off the $c$ from |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
297 |
all the remaining strings. For example suppose $A = \{f\!oo, bar, |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
298 |
f\!rak\}$ then |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
299 |
\[ |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
300 |
Der\,f\,A = \{oo, rak\}\quad,\quad |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
301 |
Der\,b\,A = \{ar\} \quad \text{and} \quad |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
302 |
Der\,a\,A = \varnothing |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
303 |
\] |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
304 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
305 |
\noindent |
258
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
306 |
Note that in the last case $Der$ is empty, because no string in $A$ |
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
307 |
starts with $a$. With this operation we can state the following |
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
308 |
property about $der$: |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
309 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
310 |
\[ |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
311 |
L(der\,c\,r) = Der\,c\,(L(r)) |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
312 |
\] |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
313 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
314 |
\noindent |
258
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
315 |
This property clarifies what regular expression $der$ calculates, |
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
316 |
namely take the set of strings that $r$ can match (that is $L(r)$), |
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
317 |
filter out all strings not starting with $c$ and strip off the $c$ |
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
318 |
from the remaining strings---this is exactly the language that |
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
319 |
$der\,c\,r$ can match. |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
320 |
|
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
321 |
If we want to find out whether the string $abc$ is matched by |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
322 |
the regular expression $r_1$ then we can iteratively apply $der$ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
323 |
as follows |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
324 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
325 |
\begin{center} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
326 |
\begin{tabular}{rll} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
327 |
Input: $r_1$, $abc$\medskip\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
328 |
Step 1: & build derivative of $a$ and $r_1$ & $(r_2 = der\,a\,r_1)$\smallskip\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
329 |
Step 2: & build derivative of $b$ and $r_2$ & $(r_3 = der\,b\,r_2)$\smallskip\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
330 |
Step 3: & build derivative of $c$ and $r_3$ & $(r_4 = der\,b\,r_3)$\smallskip\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
331 |
Step 4: & the string is exhausted; test & ($nullable(r_4)$)\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
332 |
& whether $r_4$ can recognise the\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
333 |
& empty string\smallskip\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
334 |
Output: & result of the test $\Rightarrow true \,\text{or}\, \textit{false}$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
335 |
\end{tabular} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
336 |
\end{center} |
140
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
337 |
|
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
338 |
\noindent Again the operation $Der$ might help to rationalise |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
339 |
this algorithm. We want to know whether $abc \in L(r_1)$. We |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
340 |
do not know yet. But lets assume it is. Then $Der\,a\,L(r_1)$ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
341 |
builds the set where all the strings not starting with $a$ are |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
342 |
filtered out. Of the remaining strings, the $a$ is stripped |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
343 |
off. Then we continue with filtering out all strings not |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
344 |
starting with $b$ and stripping off the $b$ from the remaining |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
345 |
strings, that means we build $Der\,b\,(Der\,a\,(L(r_1)))$. |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
346 |
Finally we filter out all strings not starting with $c$ and |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
347 |
strip off $c$ from the remaining string. This is |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
348 |
$Der\,c\,(Der\,b\,(Der\,a\,(L(r))))$. Now if $abc$ was in the |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
349 |
original set ($L(r_1)$), then in $Der\,c\,(Der\,b\,(Der\,a\,(L(r))))$ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
350 |
must be the empty string. If not then $abc$ was not in the |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
351 |
language we started with. |
140
1be892087df2
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
133
diff
changeset
|
352 |
|
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
353 |
Our matching algorithm using $der$ and $nullable$ works |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
354 |
similarly, just using regular expression instead of sets. For |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
355 |
this we need to extend the notion of derivatives from |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
356 |
characters to strings. This can be done using the following |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
357 |
function, taking a string and regular expression as input and |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
358 |
a regular expression as output. |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
359 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
360 |
\begin{center} |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
361 |
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
362 |
$\textit{ders}\, []\, r$ & $\dn$ & $r$ & \\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
363 |
$\textit{ders}\, (c\!::\!s)\, r$ & $\dn$ & $\textit{ders}\,s\,(der\,c\,r)$ & \\ |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
364 |
\end{tabular} |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
365 |
\end{center} |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
366 |
|
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
367 |
\noindent This function essentially iterates $der$ taking one |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
368 |
character at the time from the original string until it is |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
369 |
exhausted. Having $ders$ in place, we can finally define our |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
370 |
matching algorithm: |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
371 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
372 |
\[ |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
373 |
matches\,s\,r = nullable(ders\,s\,r) |
125
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
374 |
\] |
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
375 |
|
39c75cf4e079
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
124
diff
changeset
|
376 |
\noindent |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
377 |
We can claim that |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
378 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
379 |
\[ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
380 |
matches\,s\,r\quad\text{if and only if}\quad s\in L(r) |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
381 |
\] |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
382 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
383 |
\noindent holds, which means our algorithm satisfies the |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
384 |
specification. Of course we can claim many things\ldots |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
385 |
whether the claim holds any water is a different question, |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
386 |
which for example is the point of the Strand-2 Coursework. |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
387 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
388 |
This algorithm was introduced by Janus Brzozowski in 1964. Its |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
389 |
main attractions are simplicity and being fast, as well as |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
390 |
being easily extendable for other regular expressions such as |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
391 |
$r^{\{n\}}$, $r^?$, $\sim{}r$ and so on (this is subject of |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
392 |
Strand-1 Coursework 1). |
258
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
393 |
|
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
394 |
\subsection*{The Matching Algorithm in Scala} |
1e4da6d2490c
updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
251
diff
changeset
|
395 |
|
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
396 |
Another attraction of the algorithm is that it can be easily |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
397 |
implemented in a functional programming language, like Scala. |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
398 |
Given the implementation of regular expressions in Scala given |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
399 |
in the first lecture and handout, the functions for |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
400 |
\pcode{matches} are shown in Figure~\ref{scala1}. |
126
7c7185cb4f2b
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
125
diff
changeset
|
401 |
|
7c7185cb4f2b
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
125
diff
changeset
|
402 |
\begin{figure}[p] |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
403 |
\lstinputlisting{../progs/app5.scala} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
404 |
\caption{Scala implementation of the nullable and |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
405 |
derivatives functions.\label{scala1}} |
126
7c7185cb4f2b
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
125
diff
changeset
|
406 |
\end{figure} |
123
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
407 |
|
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
408 |
For running the algorithm with our favourite example, the evil |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
409 |
regular expression $a?^{\{n\}}a^{\{n\}}$, we need to implement |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
410 |
the optional regular expression and the exactly $n$-times |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
411 |
regular expression. This can be done with the translations |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
412 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
413 |
\lstinputlisting[numbers=none]{../progs/app51.scala} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
414 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
415 |
\noindent Running the matcher with the example, we find it is |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
416 |
slightly worse then the matcher in Ruby and Python. |
262
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
417 |
Ooops\ldots |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
418 |
|
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
419 |
\begin{center} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
420 |
\begin{tikzpicture} \begin{axis}[ xlabel={\pcode{a}s}, ylabel={time in secs}, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
421 |
enlargelimits=false, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
422 |
xtick={0,5,...,30}, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
423 |
xmax=30, ytick={0,5,...,30}, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
424 |
scaled ticks=false, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
425 |
axis lines=left, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
426 |
width=6cm, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
427 |
height=5cm, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
428 |
legend entries={Python,Ruby,Scala V1}, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
429 |
legend pos=outer north east, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
430 |
legend cell align=left ] |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
431 |
\addplot[blue,mark=*, mark options={fill=white}] |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
432 |
table {re-python.data}; |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
433 |
\addplot[brown,mark=pentagon*, mark options={fill=white}] |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
434 |
table {re-ruby.data}; |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
435 |
\addplot[red,mark=triangle*,mark options={fill=white}] |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
436 |
table {re1.data}; \end{axis} \end{tikzpicture} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
437 |
\end{center} |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
438 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
439 |
\noindent Analysing this failure a bit we notice that |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
440 |
for $a^{\{n\}}$ we generate quite big regular expressions: |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
441 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
442 |
\begin{center} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
443 |
\begin{tabular}{rl} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
444 |
1: & $a$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
445 |
2: & $a\cdot a$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
446 |
3: & $a\cdot a\cdot a$\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
447 |
& \ldots\\ |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
448 |
13: & $a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a$\\ |
262
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
449 |
& \ldots |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
450 |
\end{tabular} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
451 |
\end{center} |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
452 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
453 |
\noindent Our algorithm traverses such regular expressions at |
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
454 |
least once every time a derivative is calculated. So having |
262
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
455 |
large regular expressions will cause problems. This problem |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
456 |
is aggravated by $a?$ being represented as $a + \epsilon$. |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
457 |
|
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
458 |
We can fix this by having an explicit constructor for |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
459 |
$r^{\{n\}}$. In Scala we would introduce a constructor like |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
460 |
|
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
461 |
\begin{center} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
462 |
\code{case class NTIMES(r: Rexp, n: Int) extends Rexp} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
463 |
\end{center} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
464 |
|
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
465 |
\noindent With this we have a constant ``size'' regular |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
466 |
expression for our running example no matter how large $n$ |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
467 |
is. This means we have to also add cases for $nullable$ and |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
468 |
$der$. Does the change have any effect? |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
469 |
|
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
470 |
\begin{center} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
471 |
\begin{tikzpicture} \begin{axis}[ xlabel={\pcode{a}s}, ylabel={time in secs}, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
472 |
enlargelimits=false, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
473 |
xtick={0,100,...,1000}, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
474 |
xmax=1000, ytick={0,5,...,30}, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
475 |
scaled ticks=false, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
476 |
axis lines=left, |
263
92e6985018ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
262
diff
changeset
|
477 |
width=9.5cm, |
262
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
478 |
height=5cm, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
479 |
legend entries={Python,Ruby,Scala V1,Scala V2}, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
480 |
legend pos=outer north east, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
481 |
legend cell align=left ] |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
482 |
\addplot[blue,mark=*, mark options={fill=white}] |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
483 |
table {re-python.data}; |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
484 |
\addplot[brown,mark=pentagon*, mark options={fill=white}] |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
485 |
table {re-ruby.data}; |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
486 |
\addplot[red,mark=triangle*,mark options={fill=white}] |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
487 |
table {re1.data}; \addplot[green,mark=square*,mark options={fill=white}] |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
488 |
table {re2b.data}; \end{axis} \end{tikzpicture} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
489 |
\end{center} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
490 |
|
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
491 |
\noindent Now we are talking business! The modified matcher |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
492 |
can within 30 seconds handle regular expressions up to |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
493 |
$n = 950$ before a StackOverflow is raised. |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
494 |
|
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
495 |
The moral is that our algorithm is rather sensitive to the |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
496 |
size of regular expressions it needs to handle. This is of |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
497 |
course obvious because both $nullable$ and $der$ need to |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
498 |
traverse the whole regular expression. There seems to be one |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
499 |
more source of making the algorithm run faster. The derivative |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
500 |
function often produces ``useless'' $\varnothing$s and |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
501 |
$\epsilon$s. To see this, consider $r = ((a \cdot b) + b)^*$ |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
502 |
and the following two derivatives |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
503 |
|
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
504 |
\begin{center} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
505 |
\begin{tabular}{l} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
506 |
$der\,a\,r = ((\epsilon \cdot b) + \varnothing) \cdot r$\\ |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
507 |
$der\,b\,r = ((\varnothing \cdot b) + \epsilon)\cdot r$\\ |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
508 |
$der\,c\,r = ((\varnothing \cdot b) + \varnothing)\cdot r$ |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
509 |
\end{tabular} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
510 |
\end{center} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
511 |
|
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
512 |
\noindent |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
513 |
If we simplify them according to the simple rules from the |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
514 |
beginning, we can replace the right-hand sides by the |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
515 |
smaller equivalent regular expressions |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
516 |
|
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
517 |
\begin{center} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
518 |
\begin{tabular}{l} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
519 |
$der\,a\,r \equiv b \cdot r$\\ |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
520 |
$der\,b\,r \equiv r$\\ |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
521 |
$der\,c\,r \equiv \varnothing$ |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
522 |
\end{tabular} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
523 |
\end{center} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
524 |
|
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
525 |
\noindent I leave it to you to contemplate whether such a |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
526 |
simplification can have any impact on the correctness of our |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
527 |
algorithm (will it change any answers?). Figure~\ref{scala2} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
528 |
give a simplification function that recursively traverses a |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
529 |
regular expression and simplifies it according to the rules |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
530 |
given at the beginning. There are only rules for $+$, $\cdot$ |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
531 |
and $n$-times (the latter because we added it in the second |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
532 |
version of our matcher). There is no rule for a star, because |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
533 |
empirical data and also a little thought showed that |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
534 |
simplifying under a star is waste of computation time. The |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
535 |
simplification function will be called after every derivation. |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
536 |
This additional step removes all the ``junk'' the derivative |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
537 |
function introduced. Does this improve the speed? You bet!! |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
538 |
|
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
539 |
\begin{figure}[p] |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
540 |
\lstinputlisting{../progs/app6.scala} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
541 |
\caption{The simplification function and modified |
268
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
542 |
\texttt{ders}-function.\label{scala2}} |
262
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
543 |
\end{figure} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
544 |
|
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
545 |
\begin{center} |
268
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
546 |
\begin{tikzpicture} |
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
547 |
\begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs}, |
262
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
548 |
enlargelimits=false, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
549 |
xtick={0,2000,...,12000}, |
268
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
550 |
xmax=12000, |
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
551 |
ytick={0,5,...,30}, |
262
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
552 |
scaled ticks=false, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
553 |
axis lines=left, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
554 |
width=9cm, |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
555 |
height=4cm, |
268
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
556 |
legend entries={Scala V2,Scala V3}] |
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
557 |
\addplot[green,mark=square*,mark options={fill=white}] table {re2b.data}; |
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
558 |
\addplot[black,mark=square*,mark options={fill=white}] table {re3.data}; |
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
559 |
\end{axis} |
18bef085a7ca
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
263
diff
changeset
|
560 |
\end{tikzpicture} |
262
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
561 |
\end{center} |
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
562 |
|
ee4304bc6350
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
261
diff
changeset
|
563 |
\end{document} |
261
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
564 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
565 |
|
24531cfaa36a
updated handouts
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
259
diff
changeset
|
566 |
|
123
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
567 |
|
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
568 |
%%% Local Variables: |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
569 |
%%% mode: latex |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
570 |
%%% TeX-master: t |
a75f9c9d8f94
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
571 |
%%% End: |