|
1 % !TEX program = xelatex |
|
2 \documentclass[dvipsnames,14pt,t,xelatex,xcolor={table}]{beamer} |
|
3 \usepackage{../slides} |
|
4 \usepackage{../graphicss} |
|
5 \usepackage{../langs} |
|
6 \usepackage{../data} |
|
7 \usetikzlibrary{cd} |
|
8 |
|
9 |
|
10 \usepackage{tcolorbox} |
|
11 \newtcolorbox{mybox}{colback=red!5!white,colframe=red!75!black} |
|
12 \newtcolorbox{mybox2}[1]{colback=red!5!white,colframe=red!75!black,fonttitle=\bfseries,title=#1} |
|
13 \newtcolorbox{mybox3}[1]{colback=Cyan!5!white,colframe=Cyan!75!black,fonttitle=\bfseries,title=#1} |
|
14 |
|
15 |
|
16 |
|
17 \hfuzz=220pt |
|
18 |
|
19 \lstset{language=Scala, |
|
20 style=mystyle, |
|
21 numbersep=0pt, |
|
22 numbers=none, |
|
23 xleftmargin=0mm} |
|
24 |
|
25 \pgfplotsset{compat=1.17} |
|
26 |
|
27 \newcommand{\bl}[1]{\textcolor{blue}{#1}} |
|
28 |
|
29 % beamer stuff |
|
30 \renewcommand{\slidecaption}{SSY, King's College London} |
|
31 |
|
32 |
|
33 |
|
34 \begin{document} |
|
35 |
|
36 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
37 \begin{frame}[t] |
|
38 \frametitle{% |
|
39 \begin{tabular}{@ {}c@ {}} |
|
40 \\[8mm] |
|
41 \LARGE Derivative-Based\\ |
|
42 \LARGE Regular Expression Matching\\[5mm] |
|
43 \normalsize\textcolor{gray}{Christian Urban, SSY Group} |
|
44 \end{tabular}} |
|
45 |
|
46 \end{frame} |
|
47 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
48 |
|
49 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
50 \begin{frame}[c] |
|
51 \frametitle{A Bit of Background} |
|
52 |
|
53 \begin{textblock}{10}(2,2.5) |
|
54 \includegraphics[scale=0.4]{../pics/phd-1.jpg} |
|
55 \end{textblock} |
|
56 |
|
57 \begin{textblock}{11}(1.7,10) |
|
58 \begin{bubble}[9.6cm] |
|
59 PhD thesis on a particular term-rewriting system:\medskip\\ |
|
60 Proved that all rewriting sequences are strongly normalising (terminate). |
|
61 \end{bubble} |
|
62 \end{textblock} |
|
63 |
|
64 \only<2->{% |
|
65 \begin{textblock}{3}(10,2.5) |
|
66 \begin{bubble}[3cm] |
|
67 \begin{tabular}{rcl} |
|
68 \bl{$t$} & \bl{::=} & \bl{$x$}\\ |
|
69 & \bl{|} & \bl{$\lambda x.\,t$}\\ |
|
70 & \bl{|} & \bl{$t_1\;t_2$}\\ |
|
71 & \bl{|} & \bl{...} |
|
72 \end{tabular} |
|
73 \end{bubble} |
|
74 \end{textblock}} |
|
75 |
|
76 \end{frame} |
|
77 |
|
78 |
|
79 |
|
80 |
|
81 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
82 |
|
83 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
84 \begin{frame}[t] |
|
85 \frametitle{Quiz 1} |
|
86 \begin{bubble}[10cm]\it |
|
87 There are many, many regular expression libraries.\bigskip\\ |
|
88 |
|
89 \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the |
|
90 difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?} |
|
91 \end{bubble} |
|
92 |
|
93 \begin{textblock}{12}(2,9) |
|
94 \begin{tikzpicture} |
|
95 \draw(0,0) node[rotate=20]{linear}; |
|
96 \draw(3,0) node[rotate=-20]{\bl{$O(n \log n)$}}; |
|
97 \draw(3,-2) node[rotate=10]{quadratic}; |
|
98 \draw(6,0) node[rotate=20]{P}; |
|
99 \draw(7,0) node[rotate=-20]{NP}; |
|
100 \draw(7,-1) node[rotate=0]{\bl{$O(2^n)$}}; |
|
101 \end{tikzpicture} |
|
102 \end{textblock} |
|
103 |
|
104 \end{frame} |
|
105 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
106 |
|
107 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
108 \begin{frame}[t] |
|
109 \frametitle{Quiz 2} |
|
110 |
|
111 \begin{textblock}{12}(2,2.5) |
|
112 A regular expression for (some) email addresses: |
|
113 \begin{center} |
|
114 ($\underbrace{\bl{[a\mbox{-}z0\mbox{-}9\_\!\!\_\,.-]^+}}_{\textrm{name}}$)\bl{$\,@\,$} |
|
115 ($\underbrace{\bl{[a\mbox{-}z0\mbox{-}9\,-]^+}}_{\textrm{domain}}$) \bl{$\,.\,$} |
|
116 ($\underbrace{\bl{[a\mbox{-}z\,.]^{\{2..5\}}}}_{\textrm{top-level domain}}$) |
|
117 \end{center} |
|
118 \end{textblock} |
|
119 |
|
120 \only<2>{ |
|
121 \begin{textblock}{10}(4,8) |
|
122 bounded regular expressions:\medskip\\ |
|
123 \begin{tabular}{ll} |
|
124 \bl{$r^{\{n\}}$} & exactly n-times \bl{$r$}\\ |
|
125 \bl{$r^{\{n..m\}}$} & between n and m-times\\ |
|
126 \bl{$r^{\{n..\}}$} & from n-times\\ |
|
127 \bl{$r^{\{..m\}}$} & upto m-times\\ |
|
128 \end{tabular}\smallskip\\ |
|
129 \footnotesize\hfill ${}^*$ in some applications the counters can be in the millions |
|
130 \end{textblock}} |
|
131 |
|
132 \end{frame} |
|
133 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
134 |
|
135 |
|
136 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
137 \begin{frame}[t] |
|
138 \frametitle{Quiz 2} |
|
139 |
|
140 |
|
141 \begin{textblock}{12}(2,2.5) |
|
142 \onslide<1->{Thompson construction: By recursion we are given two NFAs for $r_1$ and $r_2$.\medskip\\ |
|
143 \onslide<2->{For $r_1\cdot r_2$:}\bigskip} |
|
144 \end{textblock} |
|
145 |
|
146 \begin{textblock}{12}(2,6) |
|
147 \onslide<1>{ |
|
148 \begin{tikzpicture}[node distance=3mm, |
|
149 >=stealth',very thick, |
|
150 every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20}] |
|
151 \node[state, initial] (Q_0) {$\mbox{}$}; |
|
152 \node[state, initial] (Q_01) [below=1mm of Q_0] {$\mbox{}$}; |
|
153 \node[state, initial] (Q_02) [above=1mm of Q_0] {$\mbox{}$}; |
|
154 \node (R_1) [right=of Q_0] {$\ldots$}; |
|
155 \node[state, accepting] (T_1) [right=of R_1] {$\mbox{}$}; |
|
156 \node[state, accepting] (T_2) [above=of T_1] {$\mbox{}$}; |
|
157 \node[state, accepting] (T_3) [below=of T_1] {$\mbox{}$}; |
|
158 |
|
159 \node (A_0) [right=2.5cm of T_1] {$\mbox{}$}; |
|
160 \node[state, initial] (A_01) [above=1mm of A_0] {$\mbox{}$}; |
|
161 \node[state, initial] (A_02) [below=1mm of A_0] {$\mbox{}$}; |
|
162 |
|
163 \node (b_1) [right=of A_0] {$\ldots$}; |
|
164 \node[state, accepting] (c_1) [right=of b_1] {$\mbox{}$}; |
|
165 \node[state, accepting] (c_2) [above=of c_1] {$\mbox{}$}; |
|
166 \node[state, accepting] (c_3) [below=of c_1] {$\mbox{}$}; |
|
167 \begin{pgfonlayer}{background} |
|
168 \node (1) [rounded corners, inner sep=1mm, thick, |
|
169 draw=black!60, fill=black!20, fit= (Q_0) (R_1) (T_1) (T_2) (T_3)] {}; |
|
170 \node (2) [rounded corners, inner sep=1mm, thick, |
|
171 draw=black!60, fill=black!20, fit= (A_0) (b_1) (c_1) (c_2) (c_3)] {}; |
|
172 \node [yshift=2mm] at (1.north) {$r_1$}; |
|
173 \node [yshift=2mm] at (2.north) {$r_2$}; |
|
174 \end{pgfonlayer} |
|
175 \end{tikzpicture}} |
|
176 \end{textblock} |
|
177 |
|
178 \begin{textblock}{12}(2,6) |
|
179 \onslide<2>{ |
|
180 \begin{tikzpicture}[node distance=3mm, |
|
181 >=stealth',very thick, |
|
182 every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},] |
|
183 \node[state, initial] (Q_0) {$\mbox{}$}; |
|
184 \node[state, initial] (Q_01) [below=1mm of Q_0] {$\mbox{}$}; |
|
185 \node[state, initial] (Q_02) [above=1mm of Q_0] {$\mbox{}$}; |
|
186 \node (r_1) [right=of Q_0] {$\ldots$}; |
|
187 \node[state] (t_1) [right=of r_1] {$\mbox{}$}; |
|
188 \node[state] (t_2) [above=of t_1] {$\mbox{}$}; |
|
189 \node[state] (t_3) [below=of t_1] {$\mbox{}$}; |
|
190 |
|
191 \node (A_0) [right=2.5cm of t_1] {$\mbox{}$}; |
|
192 \node[state] (A_01) [above=1mm of A_0] {$\mbox{}$}; |
|
193 \node[state] (A_02) [below=1mm of A_0] {$\mbox{}$}; |
|
194 |
|
195 \node (b_1) [right=of A_0] {$\ldots$}; |
|
196 \node[state, accepting] (c_1) [right=of b_1] {$\mbox{}$}; |
|
197 \node[state, accepting] (c_2) [above=of c_1] {$\mbox{}$}; |
|
198 \node[state, accepting] (c_3) [below=of c_1] {$\mbox{}$}; |
|
199 \path[->] (t_1) edge (A_01); |
|
200 \path[->] (t_2) edge node [above] {$\varepsilon$\footnotesize{}s} (A_01); |
|
201 \path[->] (t_3) edge (A_01); |
|
202 \path[->] (t_1) edge (A_02); |
|
203 \path[->] (t_2) edge (A_02); |
|
204 \path[->] (t_3) edge node [below] {$\varepsilon$\footnotesize{}s} (A_02); |
|
205 |
|
206 \begin{pgfonlayer}{background} |
|
207 \node (3) [rounded corners, inner sep=1mm, thick, |
|
208 draw=black!60, fill=black!20, fit= (Q_0) (c_1) (c_2) (c_3)] {}; |
|
209 \node [yshift=2mm] at (3.north) {$r_1\cdot r_2$}; |
|
210 \end{pgfonlayer} |
|
211 \end{tikzpicture}} |
|
212 \end{textblock} |
|
213 |
|
214 |
|
215 \end{frame} |
|
216 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
217 |
|
218 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
219 \begin{frame}[t] |
|
220 \frametitle{Quiz 2} |
|
221 |
|
222 \begin{textblock}{12}(2,2.5) |
|
223 \onslide<1->{Thompson construction: By recursion we have a NFA for $r$.\medskip\\ |
|
224 \onslide<2->{For $r^*$:\bigskip}} |
|
225 \end{textblock} |
|
226 |
|
227 \begin{textblock}{12}(4,6) |
|
228 \onslide<1>{ |
|
229 \begin{tikzpicture}[node distance=3mm, |
|
230 >=stealth',very thick, |
|
231 every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20}, |
|
232 baseline=(current bounding box.north)] |
|
233 \node (2) {$\mbox{}$}; |
|
234 \node[state, initial] (4) [above=1mm of 2] {$\mbox{}$}; |
|
235 \node[state, initial] (5) [below=1mm of 2] {$\mbox{}$}; |
|
236 \node (a) [right=of 2] {$\ldots$}; |
|
237 \node[state, accepting] (a1) [right=of a] {$\mbox{}$}; |
|
238 \node[state, accepting] (a2) [above=of a1] {$\mbox{}$}; |
|
239 \node[state, accepting] (a3) [below=of a1] {$\mbox{}$}; |
|
240 \begin{pgfonlayer}{background} |
|
241 \node (1) [rounded corners, inner sep=1mm, thick, draw=black!60, fill=black!20, fit= (2) (a1) (a2) (a3)] {}; |
|
242 \node [yshift=3mm] at (1.north) {$r$}; |
|
243 \end{pgfonlayer} |
|
244 \end{tikzpicture}} |
|
245 \end{textblock} |
|
246 |
|
247 \begin{textblock}{12}(2,6) |
|
248 \onslide<2->{ |
|
249 \begin{tikzpicture}[node distance=3mm, |
|
250 >=stealth',very thick, |
|
251 every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20}, |
|
252 baseline=(current bounding box.north)] |
|
253 \node at (0,0) [state, initial,accepting] (1) {$\mbox{}$}; |
|
254 \node (2) [right=16mm of 1] {$\mbox{}$}; |
|
255 \node[state] (4) [above=1mm of 2] {$\mbox{}$}; |
|
256 \node[state] (5) [below=1mm of 2] {$\mbox{}$}; |
|
257 \node (a) [right=of 2] {$\ldots$}; |
|
258 \node[state] (a1) [right=of a] {$\mbox{}$}; |
|
259 \node[state] (a2) [above=of a1] {$\mbox{}$}; |
|
260 \node[state] (a3) [below=of a1] {$\mbox{}$}; |
|
261 \path[->] (1) edge node [below] {$\varepsilon$} (4); |
|
262 \path[->] (1) edge node [below] {$\varepsilon$} (5); |
|
263 \path[->] (a1) edge [bend left=45] node [below] {$\varepsilon$} (1); |
|
264 \path[->] (a2) edge [bend right] node [below] {$\varepsilon$} (1); |
|
265 \path[->] (a3) edge [bend left=45] node [below] {$\varepsilon$} (1); |
|
266 \begin{pgfonlayer}{background} |
|
267 \node (2) [rounded corners, inner sep=1mm, thick, draw=black!60, fill=black!20, fit= (1) (a2) (a3)] {}; |
|
268 \node [yshift=3mm] at (2.north) {$r^*$}; |
|
269 \end{pgfonlayer} |
|
270 \end{tikzpicture}} |
|
271 \end{textblock} |
|
272 |
|
273 \begin{textblock}{12}(2,12) |
|
274 \onslide<3->{ |
|
275 \begin{bubble}[9.5cm]\it |
|
276 Quiz:\smallskip\\ |
|
277 \textbf{What is the Thompson Construction for \bl{$r^{\{n\}}$} ?} |
|
278 \end{bubble}} |
|
279 \end{textblock} |
|
280 |
|
281 \end{frame} |
|
282 |
|
283 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
284 |
|
285 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
286 \begin{frame}[t] |
|
287 \frametitle{Quiz 3} |
|
288 |
|
289 Hierarchy of Languages: |
|
290 |
|
291 \begin{center} |
|
292 \begin{tikzpicture} |
|
293 [rect/.style={draw=black!50, |
|
294 top color=white, |
|
295 bottom color=black!20, |
|
296 rectangle, |
|
297 very thick, |
|
298 rounded corners}, scale=1.2] |
|
299 |
|
300 \draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages}; |
|
301 \draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages}; |
|
302 \draw (0,-0.85) node [rect, text depth=17mm] {\;\;context sensitive languages\;\;}; |
|
303 \draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages}; |
|
304 \draw (0,-1.4) node [rect] {regular languages}; |
|
305 \end{tikzpicture} |
|
306 \end{center} |
|
307 |
|
308 \begin{textblock}{12}(2,11.5) |
|
309 \onslide<2->{ |
|
310 \begin{bubble}[9.5cm]\it |
|
311 Quiz:\smallskip\\ |
|
312 |
|
313 \textbf{Can we use standard parsing algorithms for matching / lexing\,?} |
|
314 Say CYK, LL, LR(k), PEG, \ldots |
|
315 \end{bubble}} |
|
316 \end{textblock} |
|
317 |
|
318 \end{frame} |
|
319 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
320 |
|
321 \defverbatim{\foo}{\footnotesize% |
|
322 \begin{verbatim} |
|
323 (?:(?:\"|'|\]|\}|\\|\d|(?:nan|infinity|true|false| |
|
324 null|undefined|symbol|math)|\`|\-|\+)+[)]*;?((?:\s |
|
325 | -|~|!|{}|\|\||\+)*.*(?:.*=.*))) |
|
326 \end{verbatim} |
|
327 } |
|
328 |
|
329 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
330 \begin{frame}[t,fragile] |
|
331 \frametitle{Quiz 4} |
|
332 |
|
333 \begin{textblock}{12}(2,2.5) |
|
334 \begin{bubble}[9.5cm]\it |
|
335 Quiz:\smallskip\\ |
|
336 Do regular expressions have any security relevance\,? |
|
337 \end{bubble} |
|
338 \end{textblock} |
|
339 |
|
340 \only<2->{ |
|
341 \begin{textblock}{2}(1,6) |
|
342 \includegraphics[scale=0.8]{../pics/zeek.png} |
|
343 \includegraphics[scale=0.17]{../pics/snort.png} |
|
344 \end{textblock}} |
|
345 |
|
346 \only<3->{ |
|
347 \begin{textblock}{7}(7,5.6) |
|
348 \includegraphics[scale=0.14]{../pics/cloudflare.png}\\ |
|
349 \footnotesize |
|
350 It serves more web traffic than Twitter, Amazon, Apple, |
|
351 Instagram, Bing \& Wikipedia combined.\medskip\\ |
|
352 Web Application Firewall filters out |
|
353 SQL injection attacks, XSS attacks etc |
|
354 \end{textblock}} |
|
355 |
|
356 \only<3->{ |
|
357 \begin{textblock}{13}(4,12.3) |
|
358 \footnotesize |
|
359 \hspace{1.5cm}a global outage on 2 July 2019 (first one for six years) |
|
360 \color{blue}\foo\color{black} |
|
361 \end{textblock} |
|
362 } |
|
363 |
|
364 |
|
365 \end{frame} |
|
366 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
367 |
|
368 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
369 \begin{frame}[t] |
|
370 \frametitle{Quiz 3} |
|
371 |
|
372 \begin{textblock}{12}(2,2) |
|
373 \begin{bubble}[9.5cm]\it |
|
374 Quiz:\smallskip\\ |
|
375 |
|
376 \textbf{Can we use standard parsing algorithms for matching / lexing\,?} |
|
377 Say CYK, LL, LR(k), PEG, \ldots |
|
378 \end{bubble} |
|
379 \end{textblock} |
|
380 |
|
381 |
|
382 \begin{textblock}{12}(1,7) |
|
383 \textbf{POSIX lexing} |
|
384 |
|
385 \begin{tabular}{@ {}ll} |
|
386 Assume you have: & \bl{$r_{key} \dn \texttt{if} + \texttt{then} + \texttt{while} \ldots$}\\ |
|
387 & \bl{$r_{id\;} \,\dn \texttt{[a-z]} \cdot \texttt{[a-z0-9\_]}^*$}\medskip\\ |
|
388 |
|
389 Lexing a string with & \bl{$(r_{key} + r_{id})^*$}\medskip\\ |
|
390 |
|
391 What should be & \\ |
|
392 the result for: & \bl{\texttt{"iffoo"}} \;\;\; \bl{\texttt{"if"}}\\ |
|
393 \end{tabular} |
|
394 \end{textblock} |
|
395 |
|
396 \only<2->{ |
|
397 \begin{textblock}{13.5}(1.5,4) |
|
398 \begin{mybox3}{POSIX rules} |
|
399 \begin{itemize} |
|
400 \item \textbf{The Longest Match Rule:} The longest initial |
|
401 substring matched by any regular expression is taken as next token. |
|
402 \item \textbf{Priority Rule:} For a particular longest initial |
|
403 substring, the first (leftmost) regular expression that can match |
|
404 determines the token. |
|
405 \item \ldots |
|
406 \end{itemize} |
|
407 \begin{center} |
|
408 \bl{$(a + ab) \cdot (bc + c)$} \;\;and\;\; \bl{\texttt{"}$abc$\texttt{"}} |
|
409 \end{center} |
|
410 \end{mybox3} |
|
411 \end{textblock}} |
|
412 |
|
413 \end{frame} |
|
414 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
415 |
|
416 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
417 \begin{frame}[t] |
|
418 \frametitle{Quiz 1} |
|
419 \begin{bubble}[10cm]\it |
|
420 There are many, many regular expression libraries.\bigskip\\ |
|
421 |
|
422 \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the |
|
423 difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?} |
|
424 \end{bubble} |
|
425 |
|
426 \begin{textblock}{12}(1,9) |
|
427 \begin{tabular}{p{4cm}|p{4cm}|p{2cm}} |
|
428 atrociously slow (s't) & pretty lazy (s't) & \textcolor{gray}{OTBT}\medskip\\ |
|
429 Python, Ruby, Swift, Dart, JavaScript & Rust, Go, RE2 & \textcolor{gray}{Snort, .Net7}\\ |
|
430 \end{tabular} |
|
431 \end{textblock} |
|
432 |
|
433 \end{frame} |
|
434 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
435 |
|
436 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
437 \begin{frame}[t] |
|
438 |
|
439 \mbox{}\\[10mm] |
|
440 |
|
441 \begin{columns}[t,onlytextwidth] |
|
442 \begin{column}{.2\textwidth} |
|
443 \raisebox{-10mm}{ |
|
444 \begin{tikzpicture} |
|
445 \begin{axis}[ |
|
446 xlabel={\bl{$n$} \pcode{a}s}, |
|
447 %%x label style={at={(1.05,0.0)}}, |
|
448 ylabel={time in secs}, |
|
449 enlargelimits=false, |
|
450 xtick={0,10,...,30}, |
|
451 xmax=35, |
|
452 ymax=35, |
|
453 ytick={0,5,...,30}, |
|
454 scaled ticks=false, |
|
455 axis lines=left, |
|
456 width=5cm, |
|
457 height=5cm, |
|
458 legend entries={Python,JavaScript,Swift,Dart}, |
|
459 legend style={font=\small,at={(0.5,-0.39)},anchor=north}, |
|
460 legend cell align=left] |
|
461 \addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data}; |
|
462 \addplot[red,mark=*, mark options={fill=white}] table {re-js.data}; |
|
463 \addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data}; |
|
464 \addplot[brown,mark=*, mark options={fill=white}] table {re-dart.data}; |
|
465 \end{axis} |
|
466 \end{tikzpicture}} |
|
467 \end{column} |
|
468 |
|
469 \begin{column}{.2\textwidth} |
|
470 \begin{tikzpicture} |
|
471 \begin{axis}[ |
|
472 xlabel={\bl{$n$} \pcode{a}s}, |
|
473 %ylabel={time in secs}, |
|
474 enlargelimits=false, |
|
475 xtick={0,10,...,30}, |
|
476 xmax=35, |
|
477 ymax=35, |
|
478 ytick={0,5,...,30}, |
|
479 scaled ticks=false, |
|
480 axis lines=left, |
|
481 width=5cm, |
|
482 height=5cm, |
|
483 legend entries={Python,Ruby}, |
|
484 legend style={font=\small,at={(0.5,-0.39)},anchor=north}, |
|
485 legend cell align=left] |
|
486 \addplot[blue,mark=*, mark options={fill=white}] table {re-python.data}; |
|
487 \addplot[brown,mark=pentagon*, mark options={fill=white}] table {re-ruby.data}; |
|
488 \end{axis} |
|
489 \end{tikzpicture} |
|
490 \end{column} |
|
491 \end{columns} |
|
492 |
|
493 \begin{textblock}{4}(9,1.7) |
|
494 \textbf{\bl{\texttt{(a?)\boldmath$^{\{n\}}\cdot$(a)\boldmath$^{\{n\}}$}}} |
|
495 \end{textblock} |
|
496 |
|
497 \begin{textblock}{4}(4,1.7) |
|
498 \textbf{\bl{\texttt{(a*)*$\cdot$b}}} |
|
499 \end{textblock} |
|
500 |
|
501 \begin{textblock}{3.4}(0.3,10) |
|
502 \small{}\textbf{matching with strings} |
|
503 \textbf{\bl{$\underbrace{\texttt{a}...\texttt{a}}_n$}} |
|
504 \end{textblock} |
|
505 |
|
506 \end{frame} |
|
507 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
508 |
|
509 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
510 \begin{frame}[t] |
|
511 \frametitle{Quiz 1} |
|
512 \begin{bubble}[10cm]\it |
|
513 \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the |
|
514 difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?} |
|
515 \end{bubble} |
|
516 |
|
517 \begin{textblock}{12}(1.7,7) |
|
518 For \textbf{Perl-style} regular expression matchers (say Python, JavaScript, etc), the |
|
519 answer is: |
|
520 \only<2->{\begin{center} |
|
521 \fontspec{Hoefler Text Black}\textcolor{ProcessBlue}{\huge{}NP} |
|
522 \end{center}} |
|
523 \end{textblock} |
|
524 |
|
525 \begin{textblock}{12}(1.7,12) |
|
526 \only<3->{Backreferences: \bl{(\ldots)$\,\ldots{}\backslash{}n$}} |
|
527 \end{textblock} |
|
528 \end{frame} |
|
529 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
530 |
|
531 |
|
532 |
|
533 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
534 \begin{frame} |
|
535 \frametitle{Quiz 2} |
|
536 |
|
537 \begin{textblock}{12}(2,2) |
|
538 \onslide<1->{ |
|
539 \begin{bubble}[9.5cm]\it |
|
540 Quiz:\smallskip\\ |
|
541 \textbf{What is the Thompson Construction for \bl{$r^{\{n\}}$} ?} |
|
542 \end{bubble}} |
|
543 \end{textblock} |
|
544 |
|
545 \only<2->{ |
|
546 \begin{textblock}{12}(1,6) |
|
547 \begin{tabular}{ll} |
|
548 Google's RE2 lib & $\Rightarrow$ \bl{$a^{\{1001\}}$} is too big\\ |
|
549 \onslide<3->{Rust Regex lib} & \onslide<5->{$\Rightarrow$ \bl{$a^{\{1000\}\{100\}\{5\}}$} too big}\\ |
|
550 & \onslide<6->{$\Rightarrow$ \bl{$a^{\{0\}\{4294967295\}}$} ?} |
|
551 \end{tabular} |
|
552 \end{textblock}} |
|
553 |
|
554 \only<4>{ |
|
555 \begin{textblock}{13.5}(1.5,4) |
|
556 \begin{mybox3}{From Rust's Regex Description}\it |
|
557 ``\ldots [the] syntax is similar to Perl-style regular expressions, but lacks a few features like look |
|
558 around and backreferences. In exchange, all searches execute in linear time with respect |
|
559 to the size of the regular expression and search text. \ldots'' |
|
560 \end{mybox3} |
|
561 \end{textblock}} |
|
562 |
|
563 |
|
564 \end{frame} |
|
565 |
|
566 |
|
567 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
568 \begin{frame}[t] |
|
569 \frametitle{Regular Expressions} |
|
570 |
|
571 \begin{textblock}{6}(2,5) |
|
572 \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} |
|
573 \bl{$r$} & \bl{$::=$} & \bl{$\ZERO$} & nothing\\ |
|
574 & \bl{$\mid$} & \bl{$\ONE$} & empty string / \pcode{""} \\ |
|
575 & \bl{$\mid$} & \bl{$c, d, \ldots$} & characters\\ |
|
576 & \bl{$\mid$} & \bl{$r_1 + r_2$} & alternative / choice\\ |
|
577 & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\ |
|
578 & \bl{$\mid$} & \bl{$r^*$} & star (zero or more)\\ |
|
579 \end{tabular} |
|
580 \end{textblock} |
|
581 |
|
582 \end{frame} |
|
583 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
584 |
|
585 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
586 \begin{frame}[c] |
|
587 \frametitle{\begin{tabular}{l}The Derivative of a Rexp\end{tabular}} |
|
588 |
|
589 \large |
|
590 If \bl{$r$} matches the string \bl{$c\!::\!s$}, what is a regular |
|
591 expression that matches just \bl{$s$}?\bigskip\bigskip\bigskip\bigskip |
|
592 |
|
593 \small |
|
594 \bl{$\der\,c\,r$} gives the answer, Brzozowski 1964\medskip\\ |
|
595 (lost in the sands of time, re-appeared in 2009) |
|
596 \end{frame} |
|
597 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
598 |
|
599 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
600 \begin{frame}[c] |
|
601 \frametitle{\begin{tabular}{l}The Derivative of a Rexp\end{tabular}} |
|
602 |
|
603 \begin{center} |
|
604 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} |
|
605 \bl{$\der\, c\, (\ZERO)$} & \bl{$\dn$} & \bl{$\ZERO$} & \\ |
|
606 \bl{$\der\, c\, (\ONE)$} & \bl{$\dn$} & \bl{$\ZERO$} & \\ |
|
607 \bl{$\der\, c\, (d)$} & \bl{$\dn$} & \bl{if $c = d$ then $\ONE$ else $\ZERO$} & \\ |
|
608 \bl{$\der\, c\, (r_1 + r_2)$} & \bl{$\dn$} & \bl{$\der\, c\, r_1 + \der\, c\, r_2$} & \\ |
|
609 \bl{$\der\, c\, (r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{if $nullable (r_1)$}\\ |
|
610 & & \bl{then $(\der\,c\,r_1) \cdot r_2 + \der\, c\, r_2$}\\ |
|
611 & & \bl{else $(\der\, c\, r_1) \cdot r_2$}\\ |
|
612 \bl{$\der\, c\, (r^*)$} & \bl{$\dn$} & \bl{$(\der\,c\,r) \cdot (r^*)$} &\medskip\bigskip\\ |
|
613 \end{tabular} |
|
614 \end{center} |
|
615 |
|
616 \end{frame} |
|
617 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
618 |
|
619 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
620 \begin{frame}[t] |
|
621 \frametitle{\begin{tabular}{l}Brzozowski matcher\end{tabular}} |
|
622 |
|
623 Does \bl{$r_1$} match \bl{"$abc$"}? |
|
624 \begin{center} |
|
625 \begin{tabular}{@{}rl@{}l@{}} |
|
626 Step 1: & build derivative of \bl{$a$} and \bl{$r_1$} & \bl{$(r_2 = \der\,a\,r_1)$}\smallskip\\ |
|
627 Step 2: & build derivative of \bl{$b$} and \bl{$r_2$} & \bl{$(r_3 = \der\,b\,r_2)$}\smallskip\\ |
|
628 Step 3: & build derivative of \bl{$c$} and \bl{$r_3$} & \bl{$(r_4 = \der\,c\,r_3)$}\smallskip\\ |
|
629 Step 4: & the string is exhausted: & \bl{($nullable(r_4))$}\\ |
|
630 & test whether \bl{$r_4$} can recognise\\ |
|
631 & the empty string\medskip\\ |
|
632 Output: & result of the test\\ |
|
633 & $\Rightarrow \bl{\textit{true}} \,\text{or}\, |
|
634 \bl{\textit{false}}$\\ |
|
635 \end{tabular} |
|
636 \end{center} |
|
637 |
|
638 |
|
639 \end{frame} |
|
640 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
641 |
|
642 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
643 \begin{frame}[c] |
|
644 \frametitle{\mbox{Nullable}} |
|
645 |
|
646 |
|
647 \ldots{}whether a regular expression can match the empty string: |
|
648 \begin{center} |
|
649 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} |
|
650 \bl{$nullable(\ZERO)$} & \bl{$\dn$} & \bl{\textit{false}}\\ |
|
651 \bl{$nullable(\ONE)$} & \bl{$\dn$} & \bl{\textit{true}}\\ |
|
652 \bl{$nullable (c)$} & \bl{$\dn$} & \bl{\textit{false}}\\ |
|
653 \bl{$nullable (r_1 + r_2)$} & \bl{$\dn$} & \bl{$nullable(r_1) \vee nullable(r_2)$} \\ |
|
654 \bl{$nullable (r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$nullable(r_1) \wedge nullable(r_2)$} \\ |
|
655 \bl{$nullable (r^*)$} & \bl{$\dn$} & \bl{\textit{true}}\\ |
|
656 \end{tabular} |
|
657 \end{center} |
|
658 |
|
659 \end{frame} |
|
660 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
661 |
|
662 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
663 \begin{frame}[t] |
|
664 \frametitle{\begin{tabular}{l}Extensions\end{tabular}} |
|
665 |
|
666 \begin{center} |
|
667 \begin{tabular}{@ {\hspace{-4mm}}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} |
|
668 \bl{$\der\, c\, (r^{\{n\}})$} & \bl{$\dn$} & \bl{$if\;n=0\;then\;\ZERO\;else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\ |
|
669 \bl{$\der\, c\, (r^{\{..n\}})$} & \bl{$\dn$} & \bl{$if\;n=0\;then\;\ZERO\;else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\ |
|
670 \bl{$\der\, c\, (r^{\{n..\}})$} & \bl{$\dn$} & \bl{$if\;n=0\;then\;(der\,c\,r)\cdot r^*$} \\ |
|
671 & & \bl{$else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\ |
|
672 \bl{$\der\, c\, (r^{\{n..m\}})$} & \bl{$\dn$} & \bl{$if\;n=0 \wedge m=0\;then\;\ZERO\;else$} & \\ |
|
673 & & \bl{$if \;n=0 \wedge 0<m\; then\;(der\,c\,r)\cdot r^{\{..m-1\}}$} & \\ |
|
674 & & \bl{$else\; (der\,c\,r)\cdot r^{\{n-1..m-1\}}$} & \\ |
|
675 \bl{$\der\, c\, (\neg{}r)$} & \bl{$\dn$} & \bl{$\neg(der\,c\,r)$}\\ |
|
676 \end{tabular} |
|
677 \end{center} |
|
678 |
|
679 also works for lookarounds, various anchors, etc, but \textbf{not} for backreferences |
|
680 |
|
681 \begin{textblock}{3}(11,13) |
|
682 \onslide<2>{ |
|
683 \begin{bubble}[3cm] |
|
684 \bl{$a^{\{0\}\{4294967295\}}$} |
|
685 \end{bubble}} |
|
686 \end{textblock} |
|
687 |
|
688 \end{frame} |
|
689 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
690 |
|
691 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
692 \begin{frame}[t] |
|
693 \frametitle{\mbox{Problems with Ders}} |
|
694 |
|
695 \textcolor{blue}{ |
|
696 \small |
|
697 \def\ll{\stackrel{der\,a}{\longrightarrow}} |
|
698 \begin{center} |
|
699 \begin{tabular}{@{\hspace{-5mm}}rll} |
|
700 $(a + aa)^*$ & $\ll$ & $(\ONE + \ONE{}a) \cdot (a + aa)^*$\\ |
|
701 & $\ll$ & $(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* \;+\; (\ONE + \ONE{}a) \cdot (a + aa)^*$\\ |
|
702 & $\ll$ & $(\ZERO + \ZERO{}a + \ZERO) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^* \;+\; $\\ |
|
703 & & $\qquad(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^*$\\ |
|
704 & $\ll$ & \ldots\\ \hspace{15mm} |
|
705 \end{tabular} |
|
706 \end{center}} |
|
707 |
|
708 \begin{textblock}{13.5}(1.5,12) |
|
709 (regular expressions of sizes 98, 169, 283, 468, 767, \ldots) |
|
710 \end{textblock} |
|
711 |
|
712 \end{frame} |
|
713 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
714 |
|
715 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
716 \begin{frame}[c] |
|
717 \frametitle{\mbox{Conclusion}} |
|
718 |
|
719 \begin{itemize} |
|
720 \item The beauty is that this only involves functional programs that can be conveniently reasoned about in theorem provers |
|
721 \item POSIX lexing can be done via an extension by Sulzmann and Lu, 2014 (our current work)\medskip\pause |
|
722 \item How surprising that one can still do new work on regular expressions. |
|
723 \end{itemize} |
|
724 |
|
725 \end{frame} |
|
726 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
727 |
|
728 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
729 |
|
730 \begin{frame}<1-10> |
|
731 \end{frame} |
|
732 |
|
733 |
|
734 |
|
735 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
736 \end{document} |
|
737 |
|
738 %%% Local Variables: |
|
739 %%% mode: latex |
|
740 %%% TeX-master: t |
|
741 %%% End: |
|
742 |