15
|
1 |
\documentclass[dvipsnames,14pt,t,xelatex]{beamer}
|
|
2 |
\usepackage{./slides}
|
|
3 |
\usepackage{./graph}
|
|
4 |
\usepackage{./langs}
|
|
5 |
\usepackage{./data}
|
16
|
6 |
\usepackage{changepage}
|
15
|
7 |
\hfuzz=220pt
|
|
8 |
|
|
9 |
\lstset{language=Scala,
|
|
10 |
style=mystyle,
|
|
11 |
numbersep=0pt,
|
|
12 |
numbers=none,
|
|
13 |
xleftmargin=0mm}
|
|
14 |
|
|
15 |
\newcommand{\bl}[1]{\textcolor{blue}{#1}}
|
|
16 |
|
|
17 |
% beamer stuff
|
|
18 |
\renewcommand{\slidecaption}{CFL 01, King's College London}
|
|
19 |
|
|
20 |
|
|
21 |
\begin{filecontents}{re-python2.data}
|
|
22 |
1 0.033
|
|
23 |
5 0.036
|
|
24 |
10 0.034
|
|
25 |
15 0.036
|
|
26 |
18 0.059
|
|
27 |
19 0.084
|
|
28 |
20 0.141
|
|
29 |
21 0.248
|
|
30 |
22 0.485
|
|
31 |
23 0.878
|
|
32 |
24 1.71
|
|
33 |
25 3.40
|
|
34 |
26 7.08
|
|
35 |
27 14.12
|
|
36 |
28 26.69
|
|
37 |
\end{filecontents}
|
|
38 |
|
|
39 |
\begin{filecontents}{re-java.data}
|
|
40 |
5 0.00298
|
|
41 |
10 0.00418
|
|
42 |
15 0.00996
|
|
43 |
16 0.01710
|
|
44 |
17 0.03492
|
|
45 |
18 0.03303
|
|
46 |
19 0.05084
|
|
47 |
20 0.10177
|
|
48 |
21 0.19960
|
|
49 |
22 0.41159
|
|
50 |
23 0.82234
|
|
51 |
24 1.70251
|
|
52 |
25 3.36112
|
|
53 |
26 6.63998
|
|
54 |
27 13.35120
|
|
55 |
28 29.81185
|
|
56 |
\end{filecontents}
|
|
57 |
|
|
58 |
\begin{filecontents}{re-java9.data}
|
|
59 |
1000 0.01410
|
|
60 |
2000 0.04882
|
|
61 |
3000 0.10609
|
|
62 |
4000 0.17456
|
|
63 |
5000 0.27530
|
|
64 |
6000 0.41116
|
|
65 |
7000 0.53741
|
|
66 |
8000 0.70261
|
|
67 |
9000 0.93981
|
|
68 |
10000 0.97419
|
|
69 |
11000 1.28697
|
|
70 |
12000 1.51387
|
|
71 |
14000 2.07079
|
|
72 |
16000 2.69846
|
|
73 |
20000 4.41823
|
|
74 |
24000 6.46077
|
|
75 |
26000 7.64373
|
|
76 |
30000 9.99446
|
|
77 |
34000 12.966885
|
|
78 |
38000 16.281621
|
|
79 |
42000 19.180228
|
|
80 |
46000 21.984721
|
|
81 |
50000 26.950203
|
|
82 |
60000 43.0327746
|
|
83 |
\end{filecontents}
|
|
84 |
|
16
|
85 |
\begin{filecontents}{re-usize.data}
|
|
86 |
1 16
|
|
87 |
2 33
|
|
88 |
3 63
|
|
89 |
4 108
|
|
90 |
5 181
|
|
91 |
6 297
|
|
92 |
7 484
|
|
93 |
8 785
|
|
94 |
9 1271
|
|
95 |
10 2056
|
|
96 |
11 3325
|
|
97 |
12 5377
|
|
98 |
13 8696
|
|
99 |
14 14065
|
|
100 |
15 22751
|
|
101 |
16 36804
|
|
102 |
17 59541
|
|
103 |
18 96329
|
|
104 |
19 155852
|
|
105 |
20 252161
|
|
106 |
21 407991
|
|
107 |
22 660128
|
|
108 |
23 1068093
|
|
109 |
24 1728193
|
|
110 |
25 2796256
|
|
111 |
26 4524417
|
|
112 |
27 7320639
|
|
113 |
\end{filecontents}
|
15
|
114 |
|
16
|
115 |
\begin{filecontents}{re-ssize.data}
|
|
116 |
1 12
|
|
117 |
2 19
|
|
118 |
3 19
|
|
119 |
4 19
|
|
120 |
5 19
|
|
121 |
6 19
|
|
122 |
7 19
|
|
123 |
8 19
|
|
124 |
9 19
|
|
125 |
10 19
|
|
126 |
11 19
|
|
127 |
12 19
|
|
128 |
13 19
|
|
129 |
14 19
|
|
130 |
15 19
|
|
131 |
16 19
|
|
132 |
17 19
|
|
133 |
18 19
|
|
134 |
19 19
|
|
135 |
20 19
|
|
136 |
21 19
|
|
137 |
22 19
|
|
138 |
23 19
|
|
139 |
24 19
|
|
140 |
25 19
|
|
141 |
26 19
|
|
142 |
27 19
|
|
143 |
28 19
|
|
144 |
29 19
|
|
145 |
30 19
|
|
146 |
31 19
|
|
147 |
32 19
|
|
148 |
33 19
|
|
149 |
34 19
|
|
150 |
35 19
|
|
151 |
36 19
|
|
152 |
37 19
|
|
153 |
38 19
|
|
154 |
39 19
|
|
155 |
40 19
|
|
156 |
\end{filecontents}
|
15
|
157 |
|
|
158 |
\begin{document}
|
|
159 |
|
|
160 |
|
|
161 |
|
|
162 |
|
|
163 |
|
|
164 |
|
|
165 |
|
|
166 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
167 |
\begin{frame}[t]
|
|
168 |
\frametitle{%
|
|
169 |
\begin{tabular}{@ {}c@ {}}
|
16
|
170 |
\\[-1mm]
|
|
171 |
\LARGE Fast Regular Expression \\[-1mm]
|
|
172 |
\LARGE Matching Using Derivatives\\[-3mm]
|
15
|
173 |
\end{tabular}}
|
|
174 |
|
|
175 |
\begin{center}
|
|
176 |
%\includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm}
|
|
177 |
%\includegraphics[scale=0.31]{pics/ante2.jpg}\\
|
|
178 |
%\footnotesize\textcolor{gray}{Antikythera automaton, 100 BC (Archimedes?)}
|
|
179 |
\end{center}
|
|
180 |
|
|
181 |
\normalsize
|
|
182 |
\begin{center}
|
|
183 |
\begin{tabular}{ll}
|
16
|
184 |
%Email: & christian.urban at kcl.ac.uk\\
|
|
185 |
%Office: & N\liningnums{7.07} (North Wing, Bush House)\\
|
|
186 |
%Slides: & KEATS
|
|
187 |
Student: & Chengsong Tan\\
|
|
188 |
Supervisor: & Christian Urban \\
|
|
189 |
Date: & 2019/5/10
|
15
|
190 |
\end{tabular}
|
|
191 |
\end{center}
|
|
192 |
|
|
193 |
\end{frame}
|
|
194 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
195 |
|
|
196 |
\begin{frame}[t]
|
16
|
197 |
\frametitle{Regular Expressions}
|
|
198 |
|
|
199 |
Their inductive definition:\\
|
|
200 |
\hspace{10pt}
|
|
201 |
\vspace{100pt}
|
|
202 |
|
|
203 |
\begin{textblock}{6}(1.5,3.5)
|
|
204 |
\begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
|
|
205 |
\bl{$r$} & \bl{$::=$} & \bl{$\ZERO$} & nothing\\
|
|
206 |
& \bl{$\mid$} & \bl{$\ONE$} & empty string / \pcode{""} / $[]$\\
|
|
207 |
& \bl{$\mid$} & \bl{$c$} & character\\
|
|
208 |
& \bl{$\mid$} & \bl{$r_1 + r_2$} & alternative / choice\\
|
|
209 |
& \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
|
|
210 |
& \bl{$\mid$} & \bl{$r^*$} & star (zero or more)\\
|
|
211 |
\end{tabular}
|
|
212 |
\end{textblock}
|
|
213 |
|
|
214 |
\begin{itemize}
|
|
215 |
\item { Formalized by Stephen Coles Kleene in 1950s}
|
|
216 |
\item {\bf What could be possibly interesting?}
|
|
217 |
\end{itemize}
|
|
218 |
\end{frame}
|
|
219 |
|
|
220 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
221 |
\iffalse
|
|
222 |
\begin{frame}[t]
|
|
223 |
\frametitle{Regular Expressions}
|
15
|
224 |
|
|
225 |
John Regehr {\small(Univ.~Utah, LLVM compiler hacker)}\smallskip\\
|
|
226 |
|
|
227 |
\begin{bubble}[10.5cm]
|
|
228 |
\bf ``\ldots{}It’s effectively a perpetual
|
|
229 |
employment act for solid compiler hackers.''
|
|
230 |
\end{bubble}
|
|
231 |
|
|
232 |
\onslide<1->{
|
|
233 |
\only<2>{
|
|
234 |
\begin{itemize}
|
|
235 |
\item {\bf Hardware is getting weirder
|
|
236 |
rather than getting clocked faster}
|
|
237 |
|
|
238 |
\begin{itemize}
|
|
239 |
\item Almost all processors are
|
|
240 |
multicores nowadays and it looks like there is increasing asymmetry in
|
|
241 |
resources across cores. Processors come with vector units, crypto
|
|
242 |
accelerators etc. We have DSPs, GPUs,
|
|
243 |
ARM big.little, and Xeon Phi. This is only scratching the
|
|
244 |
surface.
|
|
245 |
\end{itemize}
|
|
246 |
\end{itemize}}
|
|
247 |
\only<3>{
|
|
248 |
\begin{itemize}
|
|
249 |
\item {\bf We’re getting tired of low-level languages and
|
|
250 |
their associated security disasters}
|
|
251 |
|
|
252 |
\begin{itemize}
|
|
253 |
\item
|
|
254 |
We want to write new code, to
|
|
255 |
whatever extent possible, in safer, higher-level
|
|
256 |
languages. Compilers are caught right in the middle of these
|
|
257 |
opposing trends: one of their main jobs is to help bridge the large
|
|
258 |
and growing gap between increasingly high-level languages and
|
|
259 |
increasingly wacky platforms.
|
|
260 |
\end{itemize}
|
|
261 |
\end{itemize}}}
|
|
262 |
|
|
263 |
\end{frame}
|
16
|
264 |
\fi
|
15
|
265 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
266 |
|
|
267 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
16
|
268 |
\iffalse
|
15
|
269 |
\begin{frame}[c]
|
|
270 |
\frametitle{Why Bother?}
|
|
271 |
|
|
272 |
\begin{columns}[t]
|
|
273 |
\begin{column}{.5\textwidth}
|
|
274 |
Ruby, Python, Java 8\medskip\\
|
|
275 |
\begin{tikzpicture}\footnotesize
|
|
276 |
\begin{axis}[
|
|
277 |
xlabel={$n$},
|
|
278 |
x label style={at={(1.05,0.0)}},
|
|
279 |
ylabel={time in secs},
|
|
280 |
enlargelimits=false,
|
|
281 |
xtick={0,5,...,30},
|
|
282 |
xmax=33,
|
|
283 |
ymax=35,
|
|
284 |
ytick={0,5,...,30},
|
|
285 |
scaled ticks=false,
|
|
286 |
axis lines=left,
|
|
287 |
width=5.5cm,
|
|
288 |
height=4cm,
|
|
289 |
legend entries={Python,Ruby},
|
|
290 |
legend pos=north west,
|
|
291 |
legend cell align=left]
|
|
292 |
\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
|
|
293 |
\addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data};
|
|
294 |
\end{axis}
|
|
295 |
\end{tikzpicture}
|
|
296 |
\begin{tikzpicture}\footnotesize
|
|
297 |
\begin{axis}[
|
|
298 |
xlabel={$n$},
|
|
299 |
x label style={at={(1.05,0.0)}},
|
|
300 |
ylabel={time in secs},
|
|
301 |
enlargelimits=false,
|
|
302 |
xtick={0,5,...,30},
|
|
303 |
xmax=33,
|
|
304 |
ymax=35,
|
|
305 |
ytick={0,5,...,30},
|
|
306 |
scaled ticks=false,
|
|
307 |
axis lines=left,
|
|
308 |
width=5.5cm,
|
|
309 |
height=4cm,
|
|
310 |
legend entries={Python, Java 8},
|
|
311 |
legend pos=north west,
|
|
312 |
legend cell align=left]
|
|
313 |
\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
|
|
314 |
\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
|
|
315 |
\end{axis}
|
|
316 |
\end{tikzpicture}
|
|
317 |
|
|
318 |
\end{column}
|
|
319 |
\begin{column}{.5\textwidth}
|
|
320 |
Us (after next lecture)\medskip\\
|
|
321 |
\begin{tikzpicture}\footnotesize
|
|
322 |
\begin{axis}[
|
|
323 |
xlabel={$n$},
|
|
324 |
x label style={at={(1.07,0.0)}},
|
|
325 |
ylabel={time in secs},
|
|
326 |
enlargelimits=false,
|
|
327 |
xtick={0,5000,...,10000},
|
|
328 |
xmax=11000,
|
|
329 |
ymax=35,
|
|
330 |
ytick={0,5,...,30},
|
|
331 |
scaled ticks=false,
|
|
332 |
axis lines=left,
|
|
333 |
width=5.5cm,
|
|
334 |
height=4cm]
|
|
335 |
\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
|
|
336 |
\addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
|
|
337 |
\end{axis}
|
|
338 |
\end{tikzpicture}
|
|
339 |
\begin{tikzpicture}\footnotesize
|
|
340 |
\begin{axis}[
|
|
341 |
xlabel={$n$},
|
|
342 |
x label style={at={(1.07,0.0)}},
|
|
343 |
ylabel={time in secs},
|
|
344 |
enlargelimits=false,
|
|
345 |
ymax=35,
|
|
346 |
ytick={0,5,...,30},
|
|
347 |
scaled ticks=false,
|
|
348 |
axis lines=left,
|
|
349 |
width=5.5cm,
|
|
350 |
height=4cm]
|
|
351 |
\addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
|
|
352 |
\end{axis}
|
|
353 |
\end{tikzpicture}
|
|
354 |
\end{column}
|
|
355 |
\end{columns}\bigskip
|
|
356 |
|
|
357 |
\small\centering
|
|
358 |
matching \texttt{[a?]\{n\}[a]\{n\}} and \texttt{(a*)*b}
|
|
359 |
against $\underbrace{\texttt{a}...\texttt{a}}_n$
|
|
360 |
\end{frame}
|
16
|
361 |
\fi
|
15
|
362 |
|
16
|
363 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
364 |
|
|
365 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
366 |
|
|
367 |
\begin{frame}[c]
|
|
368 |
\frametitle{Catastrophic Backtracking}
|
|
369 |
|
|
370 |
\begin{columns}
|
15
|
371 |
\begin{column}{.5\textwidth}
|
16
|
372 |
|
15
|
373 |
\begin{tikzpicture}
|
|
374 |
\begin{axis}[
|
|
375 |
xlabel={$n$},
|
16
|
376 |
x label style={at={(1.07,0.0)}},
|
15
|
377 |
ylabel={time in secs},
|
16
|
378 |
%y label style={at={(0.06,0.5)}},
|
15
|
379 |
enlargelimits=false,
|
|
380 |
xtick={0,5,...,30},
|
|
381 |
xmax=33,
|
|
382 |
ymax=45,
|
|
383 |
ytick={0,10,...,40},
|
|
384 |
scaled ticks=false,
|
|
385 |
axis lines=left,
|
16
|
386 |
width=5cm,
|
|
387 |
height=4cm,
|
15
|
388 |
legend entries={Python, Java 8},
|
|
389 |
legend pos=north west]
|
|
390 |
\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
|
|
391 |
\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
|
|
392 |
\end{axis}
|
|
393 |
\end{tikzpicture}
|
16
|
394 |
|
15
|
395 |
\begin{tikzpicture}
|
|
396 |
\begin{axis}[
|
|
397 |
xlabel={$n$},
|
16
|
398 |
x label style={at={(1.07,0.0)}},
|
15
|
399 |
ylabel={time in secs},
|
16
|
400 |
%y label style={at={(0.06,0.5)}},
|
15
|
401 |
%enlargelimits=false,
|
|
402 |
%xtick={0,5000,...,30000},
|
|
403 |
xmax=65000,
|
|
404 |
ymax=45,
|
|
405 |
ytick={0,10,...,40},
|
|
406 |
scaled ticks=false,
|
|
407 |
axis lines=left,
|
16
|
408 |
width=5cm,
|
|
409 |
height=4cm,
|
15
|
410 |
legend entries={Java 9},
|
|
411 |
legend pos=north west]
|
|
412 |
\addplot[cyan,mark=*, mark options={fill=white}] table {re-java9.data};
|
|
413 |
\end{axis}
|
|
414 |
\end{tikzpicture}
|
|
415 |
\end{column}
|
16
|
416 |
\end{columns}\bigskip
|
|
417 |
\center
|
|
418 |
matching \texttt{(a*)*b}
|
|
419 |
against $\underbrace{\texttt{a}...\texttt{a}}_n$
|
15
|
420 |
\end{frame}
|
|
421 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
422 |
|
|
423 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
16
|
424 |
\begin{frame}[fragile]
|
|
425 |
\frametitle{Impact}
|
|
426 |
|
|
427 |
\begin{itemize}
|
|
428 |
\item Network Traffic Analysis
|
|
429 |
\begin{itemize}
|
|
430 |
\item Snort: > 5 million downloads
|
|
431 |
\item Bro: > 10000 downloads
|
|
432 |
\item thousands of regexes
|
|
433 |
\item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\medskip
|
|
434 |
|
|
435 |
\end{itemize}
|
|
436 |
|
|
437 |
\begin{verbatim}
|
|
438 |
Jan 2 00:53:19 talisker sshd: Received disconnect from 110.53.183.227: [preauth]
|
|
439 |
Jan 2 00:58:31 talisker sshd: Received disconnect from 110.53.183.252: [preauth]
|
|
440 |
Jan 2 01:01:28 talisker sshd: Received disconnect from 221.194.47.236: [preauth]
|
|
441 |
Jan 2 01:03:59 talisker sshd: Received disconnect from 110.53.183.228: [preauth]
|
|
442 |
Jan 2 01:06:53 talisker sshd: Received disconnect from 221.194.47.245: [preauth]
|
|
443 |
...
|
|
444 |
\end{verbatim}
|
|
445 |
|
|
446 |
|
|
447 |
\item Compilers: lexical analysis
|
|
448 |
\end{itemize}
|
|
449 |
|
|
450 |
\end{frame}
|
|
451 |
|
|
452 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
453 |
|
|
454 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
15
|
455 |
\begin{frame}[c]
|
|
456 |
\frametitle{Evil Regular Expressions}
|
|
457 |
|
|
458 |
\begin{itemize}
|
16
|
459 |
%\item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\medskip
|
15
|
460 |
\item Evil regular expressions\medskip
|
|
461 |
\begin{itemize}
|
|
462 |
\item \bl{$(a^{?\{n\}}) \cdot a^{\{n\}}$}
|
|
463 |
\item \bl{$(a^*)^*\cdot b$}
|
|
464 |
\item \bl{$([a$\,-\,$z]^+)^*$}
|
|
465 |
\item \bl{$(a + a \cdot a)^*$}
|
|
466 |
\item \bl{$(a + a^?)^*$}
|
16
|
467 |
\item \bl{$\^(.*?,)\{11\}P$}
|
|
468 |
\item \bl{$<html>.*?<head>.*?<title>.*?</title>.*?</head>.*?<body[^>]*>.*?</body>.*?</html>$}
|
|
469 |
\end{itemize}
|
|
470 |
|
|
471 |
\item Pearl Compatible Regular Expression(PCRE) \bl{$(r) \backslash 1$}
|
15
|
472 |
\end{itemize}
|
|
473 |
|
16
|
474 |
\end{frame}
|
|
475 |
|
|
476 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
477 |
|
|
478 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
479 |
|
|
480 |
\begin{frame}[c]
|
|
481 |
\frametitle{A Concrete Example}
|
|
482 |
\begin{adjustwidth*}{}{-1.2cm}
|
|
483 |
\resizebox{13cm}{4cm}{
|
|
484 |
|
|
485 |
\begin{tikzpicture}[shorten >=1pt,node distance=2cm,on grid,auto]
|
|
486 |
\node[state,initial] (q_0) {$0$};
|
|
487 |
\node[state] (q_1) [right = of q_0] {$1$};
|
|
488 |
\node[state] (q_2) [right=of q_1] {$2$};
|
|
489 |
\node (q_dots1) [right = of q_2] {$\cdots$};
|
|
490 |
\node [state] (q_n) [right = of q_dots1] {$n$};
|
|
491 |
\node [state] (q_n1) [right = of q_n] {$n+1$};
|
|
492 |
\node[state] (q_n2) [right=of q_n1] {$n+2$};
|
|
493 |
\node[state,accepting] (q_n3) [right=of q_n2] {$n+3$};
|
|
494 |
|
|
495 |
\path[->]
|
|
496 |
(q_0) edge [loop above] node {$*$} (q_0)
|
|
497 |
(q_0) edge node {$a$} (q_1)
|
|
498 |
(q_1) edge node {$*$} (q_2)
|
|
499 |
(q_2) edge node {$*$} (q_dots1)
|
|
500 |
(q_dots1) edge node{$*$} (q_n)
|
|
501 |
(q_n) edge node{$*$} (q_n1)
|
|
502 |
(q_n1) edge node{$*$} (q_n2)
|
|
503 |
(q_n2) edge node{$*$} (q_n3);
|
|
504 |
|
|
505 |
\draw [decorate,decoration={brace,amplitude=10pt,mirror,raise=10pt},yshift=0pt]
|
|
506 |
(q_2.south west) -- (q_n1.south east) node [black,midway,yshift = -2cm] {\footnotesize n states};
|
|
507 |
%\draw [decorate,decoration={brace,amplitude=10pt,mirror,raise=4pt},yshift=0pt]
|
|
508 |
%(3.5,0.65) -- (3.5,6.5) node [black,midway,xshift=0.8cm] {\footnotesize
|
|
509 |
%$P_2$};
|
|
510 |
\end{tikzpicture}
|
|
511 |
}
|
|
512 |
\end{adjustwidth*}
|
|
513 |
|
|
514 |
\begin{itemize}
|
|
515 |
\item
|
|
516 |
NFA accepting Regex $.*a.{n}bc$
|
|
517 |
\item
|
|
518 |
Matching the string aaaaaaaaaaaaaaa.....abc
|
|
519 |
\end{itemize}
|
|
520 |
\end{frame}
|
|
521 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
522 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
523 |
\begin{frame}[c]
|
|
524 |
\frametitle{Brzozowski Derivative}
|
|
525 |
|
|
526 |
\begin{itemize}
|
|
527 |
\item Brzozowski invented in his 1964 Phd thesis
|
|
528 |
\item Intuition: Chopping off the first character
|
|
529 |
|
|
530 |
\begin{center}
|
|
531 |
\bl{$A \backslash c \dn \{ s \;|\; c\!::\!s \in A\}$ }
|
|
532 |
\end{center}
|
|
533 |
\item
|
|
534 |
For \bl{$A = \{\textit{foo}, \textit{bar}, \textit{frak}\}$} then
|
|
535 |
|
|
536 |
\begin{center}
|
|
537 |
\bl{\begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}l}
|
|
538 |
$A \backslash f $ & $=$ & $\{\textit{oo}, \textit{rak}\}$\\
|
|
539 |
$A \backslash b $ & $=$ & $\{\textit{ar}\}$\\
|
|
540 |
$A \backslash a $ & $=$ & $\{\}$\pause
|
|
541 |
\end{tabular}}
|
|
542 |
\end{center}
|
|
543 |
\item
|
|
544 |
\begin{tabular}{rp{4cm}}
|
|
545 |
\bl {$r \,matches \,s=c_1c_2...c_n$}
|
|
546 |
\bl{$\Leftrightarrow$} \bl{$c_1c_2...c_n \in L(r)$} \bl{$\Leftrightarrow$}\\
|
|
547 |
\bl{$c_2...c_n \in L(r) \backslash c_1$}\\
|
|
548 |
\bl{$\Leftrightarrow$} \bl{$\cdots$}\\
|
|
549 |
\bl{$\Leftrightarrow$} \bl{$[] \in ((L(r)\backslash c_1) \backslash c_2)\cdots\backslash c_n$}
|
|
550 |
\end{tabular}
|
|
551 |
|
15
|
552 |
\end{itemize}
|
|
553 |
|
|
554 |
\end{frame}
|
|
555 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
16
|
556 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
557 |
|
|
558 |
\begin{frame}[c]
|
|
559 |
\frametitle{Previous Example}
|
|
560 |
|
|
561 |
\begin{center}
|
|
562 |
\begin{tikzpicture}
|
|
563 |
\begin{axis}[
|
|
564 |
xlabel={$n$},
|
|
565 |
x label style={at={(1.09,-0.15)}},
|
|
566 |
ylabel={time in secs},
|
|
567 |
scaled x ticks=false,
|
|
568 |
enlargelimits=false,
|
|
569 |
xtick distance=10000,
|
|
570 |
xmax=44000,
|
|
571 |
ytick={0,10,...,30},
|
|
572 |
ymax=35,
|
|
573 |
axis lines=left,
|
|
574 |
width=7cm,
|
|
575 |
height=3.5cm,
|
|
576 |
legend entries={Java \liningnums{9}+},
|
|
577 |
legend pos=north west,
|
|
578 |
legend cell align=left]
|
|
579 |
\addplot[blue,mark=square*,mark options={fill=white}] table {re-java9.data};
|
|
580 |
\end{axis}
|
|
581 |
\end{tikzpicture}
|
|
582 |
\end{center}
|
|
583 |
|
|
584 |
\begin{center}
|
|
585 |
\begin{tikzpicture}
|
|
586 |
\begin{axis}[
|
|
587 |
xlabel={$n$},
|
|
588 |
x label style={at={(1.09,0.0)}},
|
|
589 |
ylabel={time in secs},
|
|
590 |
scaled x ticks=false,
|
|
591 |
xtick distance=2000000,
|
|
592 |
enlargelimits=false,
|
|
593 |
xmax=6400000,
|
|
594 |
ytick={0,10,...,30},
|
|
595 |
ymax=35,
|
|
596 |
axis lines=left,
|
|
597 |
width=7cm,
|
|
598 |
height=3.5cm,
|
|
599 |
legend entries={Simple Scala},
|
|
600 |
legend pos=north west,
|
|
601 |
legend cell align=left]
|
|
602 |
%\addplot[green,mark=square*,mark options={fill=white}] table {re2a.data};
|
|
603 |
\addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
|
|
604 |
\end{axis}
|
|
605 |
\end{tikzpicture}
|
|
606 |
\end{center}
|
|
607 |
|
|
608 |
Regex: \bl{$(a^*)^* \cdot b$} \space\space\space\space\space\space
|
|
609 |
Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}
|
|
610 |
|
|
611 |
\end{frame}
|
|
612 |
|
15
|
613 |
|
|
614 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
16
|
615 |
\begin{frame}[t]
|
|
616 |
\frametitle{Optimization}
|
15
|
617 |
|
|
618 |
\begin{center}
|
16
|
619 |
\begin{tikzpicture}
|
|
620 |
\begin{semilogyaxis}[
|
|
621 |
xlabel={$n$},
|
|
622 |
x label style={at={(1.05,0.0)}},
|
|
623 |
ylabel={regex size},
|
|
624 |
enlargelimits=false,
|
|
625 |
xtick={1,4,...,30},
|
|
626 |
xmax=33,
|
|
627 |
%ytick={0, 10,...,100},
|
|
628 |
scaled ticks=false,
|
|
629 |
axis lines=left,
|
|
630 |
width=9cm,
|
|
631 |
height=4.5cm,
|
|
632 |
legend entries={Simple Scala},
|
|
633 |
legend pos= south east,
|
|
634 |
legend cell align=left
|
|
635 |
]
|
|
636 |
\addplot[black,mark=square*, mark options={fill=white}] table {re-usize.data};
|
|
637 |
%\addplot[brown,mark=pentagon*, mark options={fill=white}] table {re-ssize.data};
|
|
638 |
%\addplot[red,mark=triangle*,mark options={fill=white}] table {re1.data};
|
|
639 |
%\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
|
|
640 |
\end{semilogyaxis}
|
|
641 |
\end{tikzpicture}
|
|
642 |
\end{center}
|
|
643 |
Regex: \bl{$(a+aa)^* \cdot b$} \space\space\space\space\space\space
|
|
644 |
Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}\\
|
|
645 |
\begin{itemize}
|
|
646 |
\item
|
|
647 |
Solution: Simplification%(Sulzmann \& Lu)
|
|
648 |
\begin{itemize}
|
|
649 |
\item $r+(r+s) = r+r+s = r+s$
|
|
650 |
\item $1 \cdot r= r$
|
|
651 |
\end{itemize}
|
|
652 |
\end{itemize}
|
|
653 |
\end{frame}
|
|
654 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
655 |
\begin{frame}[t]
|
|
656 |
\frametitle{Optimization}
|
15
|
657 |
|
16
|
658 |
Regex: \bl{$(a+aa)^* \cdot b$} \space\space\space\space\space\space
|
|
659 |
Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}
|
15
|
660 |
|
16
|
661 |
\begin{center}
|
|
662 |
\begin{tikzpicture}
|
|
663 |
\begin{axis}[
|
|
664 |
xlabel={$n$},
|
|
665 |
x label style={at={(1.05,0.0)}},
|
|
666 |
ylabel={regex size},
|
|
667 |
enlargelimits=false,
|
|
668 |
xtick={1,4,...,30},
|
|
669 |
xmax=33,
|
|
670 |
%ytick={0, 10,...,100},
|
|
671 |
scaled ticks=false,
|
|
672 |
axis lines=left,
|
|
673 |
width=9cm,
|
|
674 |
height=4.5cm,
|
|
675 |
legend entries={Simplification Applied},
|
|
676 |
legend pos= south east,
|
|
677 |
legend cell align=left
|
|
678 |
]
|
|
679 |
%\addplot[black,mark=square*, mark options={fill=white}] table {re-usize.data};
|
|
680 |
\addplot[black,mark=triangle*, mark options={fill=white}] table {re-ssize.data};
|
|
681 |
%\addplot[red,mark=triangle*,mark options={fill=white}] table {re1.data};
|
|
682 |
%\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
|
15
|
683 |
\end{axis}
|
16
|
684 |
\end{tikzpicture}
|
|
685 |
\end{center}
|
15
|
686 |
\begin{itemize}
|
16
|
687 |
\item
|
|
688 |
Lexer? Not just matcher
|
|
689 |
\begin{itemize}
|
|
690 |
\item
|
|
691 |
Bit-Coded Regular Expression Parsing (Lasse Nielsen, Fritz Henglein 2011)
|
|
692 |
\item
|
|
693 |
Bit-Coded Regular Expression Parsing with Simplification (Sulzmann and Lu 2014)
|
15
|
694 |
\end{itemize}
|
|
695 |
|
16
|
696 |
\end{itemize}
|
|
697 |
\end{frame}
|
15
|
698 |
|
|
699 |
|
|
700 |
|
|
701 |
|
|
702 |
|
|
703 |
|
|
704 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
705 |
|
|
706 |
\begin{frame}[t]
|
16
|
707 |
\frametitle{Ongoing Work}
|
15
|
708 |
|
|
709 |
|
16
|
710 |
\begin{tikzpicture}
|
|
711 |
\begin{semilogyaxis}[
|
|
712 |
xlabel={$n$},
|
|
713 |
x label style={at={(1.05,0.0)}},
|
|
714 |
ylabel={regex size},
|
|
715 |
enlargelimits=false,
|
|
716 |
xtick={1,4,...,30},
|
|
717 |
xmax=33,
|
|
718 |
%ytick={0, 10,...,100},
|
|
719 |
scaled ticks=false,
|
|
720 |
axis lines=left,
|
|
721 |
width=9cm,
|
|
722 |
height=4.5cm,
|
|
723 |
legend entries={Naive, Simp},
|
|
724 |
legend pos= north west,
|
|
725 |
legend cell align=left
|
|
726 |
]
|
|
727 |
\addplot[black,mark=square*, mark options={fill=white}] table {re-usize.data};
|
|
728 |
\addplot[black,mark=triangle*, mark options={fill=white}] table {re-ssize.data};
|
|
729 |
%\addplot[red,mark=triangle*,mark options={fill=white}] table {re1.data};
|
|
730 |
%\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
|
|
731 |
\end{semilogyaxis}
|
|
732 |
\end{tikzpicture}
|
15
|
733 |
|
|
734 |
\begin{itemize}
|
16
|
735 |
\item
|
|
736 |
Correctness of Sulzmann \& Lu's algorithm
|
15
|
737 |
|
16
|
738 |
\item
|
|
739 |
Size Bound $O(n^2t^2)$ of Dervatives (1995 Antimirov "Partial Derivative")
|
|
740 |
\item
|
|
741 |
More radical simplifications.
|
|
742 |
\item
|
|
743 |
Extend the algorithm to back-references.
|
|
744 |
\end{itemize}
|
15
|
745 |
\end{frame}
|
|
746 |
|
|
747 |
|
|
748 |
|
|
749 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
750 |
|
|
751 |
|
|
752 |
|
|
753 |
|
|
754 |
|
|
755 |
|
|
756 |
|
|
757 |
|
|
758 |
|
|
759 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
760 |
\begin{frame}[c]
|
|
761 |
\frametitle{\begin{tabular}{c}\\[3cm]\alert{Questions?}\end{tabular}}
|
|
762 |
|
|
763 |
\mbox{}
|
|
764 |
\end{frame}
|
|
765 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
766 |
\end{document}
|
|
767 |
|
|
768 |
%%% Local Variables:
|
|
769 |
%%% mode: latex
|
|
770 |
%%% TeX-master: t
|
|
771 |
%%% End:
|
|
772 |
|