diff -r e32802acf952 -r eef6a56c185a slides/ssy.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/ssy.tex Wed Jul 05 16:12:40 2023 +0100 @@ -0,0 +1,742 @@ +% !TEX program = xelatex +\documentclass[dvipsnames,14pt,t,xelatex,xcolor={table}]{beamer} +\usepackage{../slides} +\usepackage{../graphicss} +\usepackage{../langs} +\usepackage{../data} +\usetikzlibrary{cd} + + +\usepackage{tcolorbox} +\newtcolorbox{mybox}{colback=red!5!white,colframe=red!75!black} +\newtcolorbox{mybox2}[1]{colback=red!5!white,colframe=red!75!black,fonttitle=\bfseries,title=#1} +\newtcolorbox{mybox3}[1]{colback=Cyan!5!white,colframe=Cyan!75!black,fonttitle=\bfseries,title=#1} + + + +\hfuzz=220pt + +\lstset{language=Scala, + style=mystyle, + numbersep=0pt, + numbers=none, + xleftmargin=0mm} + +\pgfplotsset{compat=1.17} + +\newcommand{\bl}[1]{\textcolor{blue}{#1}} + +% beamer stuff +\renewcommand{\slidecaption}{SSY, King's College London} + + + +\begin{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{% + \begin{tabular}{@ {}c@ {}} + \\[8mm] + \LARGE Derivative-Based\\ + \LARGE Regular Expression Matching\\[5mm] + \normalsize\textcolor{gray}{Christian Urban, SSY Group} + \end{tabular}} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{A Bit of Background} + +\begin{textblock}{10}(2,2.5) +\includegraphics[scale=0.4]{../pics/phd-1.jpg} +\end{textblock} + +\begin{textblock}{11}(1.7,10) +\begin{bubble}[9.6cm] + PhD thesis on a particular term-rewriting system:\medskip\\ + Proved that all rewriting sequences are strongly normalising (terminate). +\end{bubble} +\end{textblock} + +\only<2->{% +\begin{textblock}{3}(10,2.5) +\begin{bubble}[3cm] +\begin{tabular}{rcl} + \bl{$t$} & \bl{::=} & \bl{$x$}\\ + & \bl{|} & \bl{$\lambda x.\,t$}\\ + & \bl{|} & \bl{$t_1\;t_2$}\\ + & \bl{|} & \bl{...} +\end{tabular} +\end{bubble} +\end{textblock}} + +\end{frame} + + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{Quiz 1} +\begin{bubble}[10cm]\it + There are many, many regular expression libraries.\bigskip\\ + + \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the + difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?} +\end{bubble} + +\begin{textblock}{12}(2,9) +\begin{tikzpicture} + \draw(0,0) node[rotate=20]{linear}; + \draw(3,0) node[rotate=-20]{\bl{$O(n \log n)$}}; + \draw(3,-2) node[rotate=10]{quadratic}; + \draw(6,0) node[rotate=20]{P}; + \draw(7,0) node[rotate=-20]{NP}; + \draw(7,-1) node[rotate=0]{\bl{$O(2^n)$}}; +\end{tikzpicture} +\end{textblock} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{Quiz 2} + +\begin{textblock}{12}(2,2.5) +A regular expression for (some) email addresses: +\begin{center} + ($\underbrace{\bl{[a\mbox{-}z0\mbox{-}9\_\!\!\_\,.-]^+}}_{\textrm{name}}$)\bl{$\,@\,$} + ($\underbrace{\bl{[a\mbox{-}z0\mbox{-}9\,-]^+}}_{\textrm{domain}}$) \bl{$\,.\,$} + ($\underbrace{\bl{[a\mbox{-}z\,.]^{\{2..5\}}}}_{\textrm{top-level domain}}$) +\end{center} +\end{textblock} + +\only<2>{ +\begin{textblock}{10}(4,8) +bounded regular expressions:\medskip\\ +\begin{tabular}{ll} + \bl{$r^{\{n\}}$} & exactly n-times \bl{$r$}\\ + \bl{$r^{\{n..m\}}$} & between n and m-times\\ + \bl{$r^{\{n..\}}$} & from n-times\\ + \bl{$r^{\{..m\}}$} & upto m-times\\ +\end{tabular}\smallskip\\ +\footnotesize\hfill ${}^*$ in some applications the counters can be in the millions +\end{textblock}} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{Quiz 2} + + +\begin{textblock}{12}(2,2.5) + \onslide<1->{Thompson construction: By recursion we are given two NFAs for $r_1$ and $r_2$.\medskip\\ + \onslide<2->{For $r_1\cdot r_2$:}\bigskip} +\end{textblock} + +\begin{textblock}{12}(2,6) +\onslide<1>{ +\begin{tikzpicture}[node distance=3mm, + >=stealth',very thick, + every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20}] +\node[state, initial] (Q_0) {$\mbox{}$}; +\node[state, initial] (Q_01) [below=1mm of Q_0] {$\mbox{}$}; +\node[state, initial] (Q_02) [above=1mm of Q_0] {$\mbox{}$}; +\node (R_1) [right=of Q_0] {$\ldots$}; +\node[state, accepting] (T_1) [right=of R_1] {$\mbox{}$}; +\node[state, accepting] (T_2) [above=of T_1] {$\mbox{}$}; +\node[state, accepting] (T_3) [below=of T_1] {$\mbox{}$}; + +\node (A_0) [right=2.5cm of T_1] {$\mbox{}$}; +\node[state, initial] (A_01) [above=1mm of A_0] {$\mbox{}$}; +\node[state, initial] (A_02) [below=1mm of A_0] {$\mbox{}$}; + +\node (b_1) [right=of A_0] {$\ldots$}; +\node[state, accepting] (c_1) [right=of b_1] {$\mbox{}$}; +\node[state, accepting] (c_2) [above=of c_1] {$\mbox{}$}; +\node[state, accepting] (c_3) [below=of c_1] {$\mbox{}$}; +\begin{pgfonlayer}{background} + \node (1) [rounded corners, inner sep=1mm, thick, + draw=black!60, fill=black!20, fit= (Q_0) (R_1) (T_1) (T_2) (T_3)] {}; + \node (2) [rounded corners, inner sep=1mm, thick, + draw=black!60, fill=black!20, fit= (A_0) (b_1) (c_1) (c_2) (c_3)] {}; +\node [yshift=2mm] at (1.north) {$r_1$}; +\node [yshift=2mm] at (2.north) {$r_2$}; +\end{pgfonlayer} +\end{tikzpicture}} +\end{textblock} + +\begin{textblock}{12}(2,6) +\onslide<2>{ +\begin{tikzpicture}[node distance=3mm, + >=stealth',very thick, + every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},] +\node[state, initial] (Q_0) {$\mbox{}$}; +\node[state, initial] (Q_01) [below=1mm of Q_0] {$\mbox{}$}; +\node[state, initial] (Q_02) [above=1mm of Q_0] {$\mbox{}$}; +\node (r_1) [right=of Q_0] {$\ldots$}; +\node[state] (t_1) [right=of r_1] {$\mbox{}$}; +\node[state] (t_2) [above=of t_1] {$\mbox{}$}; +\node[state] (t_3) [below=of t_1] {$\mbox{}$}; + +\node (A_0) [right=2.5cm of t_1] {$\mbox{}$}; +\node[state] (A_01) [above=1mm of A_0] {$\mbox{}$}; +\node[state] (A_02) [below=1mm of A_0] {$\mbox{}$}; + +\node (b_1) [right=of A_0] {$\ldots$}; +\node[state, accepting] (c_1) [right=of b_1] {$\mbox{}$}; +\node[state, accepting] (c_2) [above=of c_1] {$\mbox{}$}; +\node[state, accepting] (c_3) [below=of c_1] {$\mbox{}$}; +\path[->] (t_1) edge (A_01); +\path[->] (t_2) edge node [above] {$\varepsilon$\footnotesize{}s} (A_01); +\path[->] (t_3) edge (A_01); +\path[->] (t_1) edge (A_02); +\path[->] (t_2) edge (A_02); +\path[->] (t_3) edge node [below] {$\varepsilon$\footnotesize{}s} (A_02); + +\begin{pgfonlayer}{background} + \node (3) [rounded corners, inner sep=1mm, thick, + draw=black!60, fill=black!20, fit= (Q_0) (c_1) (c_2) (c_3)] {}; +\node [yshift=2mm] at (3.north) {$r_1\cdot r_2$}; +\end{pgfonlayer} +\end{tikzpicture}} +\end{textblock} + + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{Quiz 2} + +\begin{textblock}{12}(2,2.5) + \onslide<1->{Thompson construction: By recursion we have a NFA for $r$.\medskip\\ + \onslide<2->{For $r^*$:\bigskip}} +\end{textblock} + +\begin{textblock}{12}(4,6) +\onslide<1>{ +\begin{tikzpicture}[node distance=3mm, + >=stealth',very thick, + every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20}, + baseline=(current bounding box.north)] +\node (2) {$\mbox{}$}; +\node[state, initial] (4) [above=1mm of 2] {$\mbox{}$}; +\node[state, initial] (5) [below=1mm of 2] {$\mbox{}$}; +\node (a) [right=of 2] {$\ldots$}; +\node[state, accepting] (a1) [right=of a] {$\mbox{}$}; +\node[state, accepting] (a2) [above=of a1] {$\mbox{}$}; +\node[state, accepting] (a3) [below=of a1] {$\mbox{}$}; +\begin{pgfonlayer}{background} +\node (1) [rounded corners, inner sep=1mm, thick, draw=black!60, fill=black!20, fit= (2) (a1) (a2) (a3)] {}; +\node [yshift=3mm] at (1.north) {$r$}; +\end{pgfonlayer} +\end{tikzpicture}} +\end{textblock} + +\begin{textblock}{12}(2,6) +\onslide<2->{ +\begin{tikzpicture}[node distance=3mm, + >=stealth',very thick, + every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20}, + baseline=(current bounding box.north)] +\node at (0,0) [state, initial,accepting] (1) {$\mbox{}$}; +\node (2) [right=16mm of 1] {$\mbox{}$}; +\node[state] (4) [above=1mm of 2] {$\mbox{}$}; +\node[state] (5) [below=1mm of 2] {$\mbox{}$}; +\node (a) [right=of 2] {$\ldots$}; +\node[state] (a1) [right=of a] {$\mbox{}$}; +\node[state] (a2) [above=of a1] {$\mbox{}$}; +\node[state] (a3) [below=of a1] {$\mbox{}$}; +\path[->] (1) edge node [below] {$\varepsilon$} (4); +\path[->] (1) edge node [below] {$\varepsilon$} (5); +\path[->] (a1) edge [bend left=45] node [below] {$\varepsilon$} (1); +\path[->] (a2) edge [bend right] node [below] {$\varepsilon$} (1); +\path[->] (a3) edge [bend left=45] node [below] {$\varepsilon$} (1); +\begin{pgfonlayer}{background} +\node (2) [rounded corners, inner sep=1mm, thick, draw=black!60, fill=black!20, fit= (1) (a2) (a3)] {}; +\node [yshift=3mm] at (2.north) {$r^*$}; +\end{pgfonlayer} +\end{tikzpicture}} +\end{textblock} + +\begin{textblock}{12}(2,12) +\onslide<3->{ + \begin{bubble}[9.5cm]\it + Quiz:\smallskip\\ + \textbf{What is the Thompson Construction for \bl{$r^{\{n\}}$} ?} +\end{bubble}} +\end{textblock} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] + \frametitle{Quiz 3} + +Hierarchy of Languages: + +\begin{center} +\begin{tikzpicture} +[rect/.style={draw=black!50, + top color=white, + bottom color=black!20, + rectangle, + very thick, + rounded corners}, scale=1.2] + +\draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages}; +\draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages}; +\draw (0,-0.85) node [rect, text depth=17mm] {\;\;context sensitive languages\;\;}; +\draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages}; +\draw (0,-1.4) node [rect] {regular languages}; +\end{tikzpicture} +\end{center} + +\begin{textblock}{12}(2,11.5) +\onslide<2->{ + \begin{bubble}[9.5cm]\it + Quiz:\smallskip\\ + + \textbf{Can we use standard parsing algorithms for matching / lexing\,?} + Say CYK, LL, LR(k), PEG, \ldots +\end{bubble}} +\end{textblock} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\defverbatim{\foo}{\footnotesize% +\begin{verbatim} +(?:(?:\"|'|\]|\}|\\|\d|(?:nan|infinity|true|false| +null|undefined|symbol|math)|\`|\-|\+)+[)]*;?((?:\s +| -|~|!|{}|\|\||\+)*.*(?:.*=.*))) +\end{verbatim} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t,fragile] +\frametitle{Quiz 4} + +\begin{textblock}{12}(2,2.5) +\begin{bubble}[9.5cm]\it + Quiz:\smallskip\\ + Do regular expressions have any security relevance\,? +\end{bubble} +\end{textblock} + +\only<2->{ +\begin{textblock}{2}(1,6) +\includegraphics[scale=0.8]{../pics/zeek.png} +\includegraphics[scale=0.17]{../pics/snort.png} +\end{textblock}} + +\only<3->{ +\begin{textblock}{7}(7,5.6) + \includegraphics[scale=0.14]{../pics/cloudflare.png}\\ + \footnotesize + It serves more web traffic than Twitter, Amazon, Apple, + Instagram, Bing \& Wikipedia combined.\medskip\\ + Web Application Firewall filters out + SQL injection attacks, XSS attacks etc +\end{textblock}} + +\only<3->{ + \begin{textblock}{13}(4,12.3) + \footnotesize + \hspace{1.5cm}a global outage on 2 July 2019 (first one for six years) + \color{blue}\foo\color{black} + \end{textblock} +} + + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] + \frametitle{Quiz 3} + +\begin{textblock}{12}(2,2) + \begin{bubble}[9.5cm]\it + Quiz:\smallskip\\ + + \textbf{Can we use standard parsing algorithms for matching / lexing\,?} + Say CYK, LL, LR(k), PEG, \ldots +\end{bubble} +\end{textblock} + + +\begin{textblock}{12}(1,7) +\textbf{POSIX lexing} + +\begin{tabular}{@ {}ll} + Assume you have: & \bl{$r_{key} \dn \texttt{if} + \texttt{then} + \texttt{while} \ldots$}\\ + & \bl{$r_{id\;} \,\dn \texttt{[a-z]} \cdot \texttt{[a-z0-9\_]}^*$}\medskip\\ + + Lexing a string with & \bl{$(r_{key} + r_{id})^*$}\medskip\\ + + What should be & \\ + the result for: & \bl{\texttt{"iffoo"}} \;\;\; \bl{\texttt{"if"}}\\ +\end{tabular} +\end{textblock} + +\only<2->{ +\begin{textblock}{13.5}(1.5,4) + \begin{mybox3}{POSIX rules} + \begin{itemize} + \item \textbf{The Longest Match Rule:} The longest initial + substring matched by any regular expression is taken as next token. + \item \textbf{Priority Rule:} For a particular longest initial + substring, the first (leftmost) regular expression that can match + determines the token. + \item \ldots + \end{itemize} + \begin{center} + \bl{$(a + ab) \cdot (bc + c)$} \;\;and\;\; \bl{\texttt{"}$abc$\texttt{"}} + \end{center} + \end{mybox3} +\end{textblock}} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{Quiz 1} +\begin{bubble}[10cm]\it + There are many, many regular expression libraries.\bigskip\\ + + \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the + difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?} +\end{bubble} + +\begin{textblock}{12}(1,9) + \begin{tabular}{p{4cm}|p{4cm}|p{2cm}} + atrociously slow (s't) & pretty lazy (s't) & \textcolor{gray}{OTBT}\medskip\\ + Python, Ruby, Swift, Dart, JavaScript & Rust, Go, RE2 & \textcolor{gray}{Snort, .Net7}\\ + \end{tabular} +\end{textblock} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] + +\mbox{}\\[10mm] + +\begin{columns}[t,onlytextwidth] +\begin{column}{.2\textwidth} +\raisebox{-10mm}{ +\begin{tikzpicture} + \begin{axis}[ + xlabel={\bl{$n$} \pcode{a}s}, + %%x label style={at={(1.05,0.0)}}, + ylabel={time in secs}, + enlargelimits=false, + xtick={0,10,...,30}, + xmax=35, + ymax=35, + ytick={0,5,...,30}, + scaled ticks=false, + axis lines=left, + width=5cm, + height=5cm, + legend entries={Python,JavaScript,Swift,Dart}, + legend style={font=\small,at={(0.5,-0.39)},anchor=north}, + legend cell align=left] +\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data}; +\addplot[red,mark=*, mark options={fill=white}] table {re-js.data}; +\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data}; +\addplot[brown,mark=*, mark options={fill=white}] table {re-dart.data}; +\end{axis} +\end{tikzpicture}} +\end{column} + +\begin{column}{.2\textwidth} +\begin{tikzpicture} + \begin{axis}[ + xlabel={\bl{$n$} \pcode{a}s}, + %ylabel={time in secs}, + enlargelimits=false, + xtick={0,10,...,30}, + xmax=35, + ymax=35, + ytick={0,5,...,30}, + scaled ticks=false, + axis lines=left, + width=5cm, + height=5cm, + legend entries={Python,Ruby}, + legend style={font=\small,at={(0.5,-0.39)},anchor=north}, + legend cell align=left] +\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data}; +\addplot[brown,mark=pentagon*, mark options={fill=white}] table {re-ruby.data}; +\end{axis} +\end{tikzpicture} +\end{column} +\end{columns} + +\begin{textblock}{4}(9,1.7) +\textbf{\bl{\texttt{(a?)\boldmath$^{\{n\}}\cdot$(a)\boldmath$^{\{n\}}$}}} +\end{textblock} + +\begin{textblock}{4}(4,1.7) +\textbf{\bl{\texttt{(a*)*$\cdot$b}}} +\end{textblock} + +\begin{textblock}{3.4}(0.3,10) +\small{}\textbf{matching with strings} +\textbf{\bl{$\underbrace{\texttt{a}...\texttt{a}}_n$}} +\end{textblock} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{Quiz 1} +\begin{bubble}[10cm]\it + \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the + difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?} +\end{bubble} + +\begin{textblock}{12}(1.7,7) +For \textbf{Perl-style} regular expression matchers (say Python, JavaScript, etc), the +answer is: +\only<2->{\begin{center} +\fontspec{Hoefler Text Black}\textcolor{ProcessBlue}{\huge{}NP} +\end{center}} +\end{textblock} + +\begin{textblock}{12}(1.7,12) +\only<3->{Backreferences: \bl{(\ldots)$\,\ldots{}\backslash{}n$}} +\end{textblock} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame} +\frametitle{Quiz 2} + +\begin{textblock}{12}(2,2) +\onslide<1->{ + \begin{bubble}[9.5cm]\it + Quiz:\smallskip\\ + \textbf{What is the Thompson Construction for \bl{$r^{\{n\}}$} ?} +\end{bubble}} +\end{textblock} + +\only<2->{ +\begin{textblock}{12}(1,6) +\begin{tabular}{ll} + Google's RE2 lib & $\Rightarrow$ \bl{$a^{\{1001\}}$} is too big\\ + \onslide<3->{Rust Regex lib} & \onslide<5->{$\Rightarrow$ \bl{$a^{\{1000\}\{100\}\{5\}}$} too big}\\ + & \onslide<6->{$\Rightarrow$ \bl{$a^{\{0\}\{4294967295\}}$} ?} +\end{tabular} +\end{textblock}} + +\only<4>{ +\begin{textblock}{13.5}(1.5,4) + \begin{mybox3}{From Rust's Regex Description}\it + ``\ldots [the] syntax is similar to Perl-style regular expressions, but lacks a few features like look + around and backreferences. In exchange, all searches execute in linear time with respect + to the size of the regular expression and search text. \ldots'' + \end{mybox3} +\end{textblock}} + + +\end{frame} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{Regular Expressions} + +\begin{textblock}{6}(2,5) + \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} + \bl{$r$} & \bl{$::=$} & \bl{$\ZERO$} & nothing\\ + & \bl{$\mid$} & \bl{$\ONE$} & empty string / \pcode{""} \\ + & \bl{$\mid$} & \bl{$c, d, \ldots$} & characters\\ + & \bl{$\mid$} & \bl{$r_1 + r_2$} & alternative / choice\\ + & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\ + & \bl{$\mid$} & \bl{$r^*$} & star (zero or more)\\ + \end{tabular} + \end{textblock} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{\begin{tabular}{l}The Derivative of a Rexp\end{tabular}} + +\large +If \bl{$r$} matches the string \bl{$c\!::\!s$}, what is a regular +expression that matches just \bl{$s$}?\bigskip\bigskip\bigskip\bigskip + +\small +\bl{$\der\,c\,r$} gives the answer, Brzozowski 1964\medskip\\ +(lost in the sands of time, re-appeared in 2009) +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{\begin{tabular}{l}The Derivative of a Rexp\end{tabular}} + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} + \bl{$\der\, c\, (\ZERO)$} & \bl{$\dn$} & \bl{$\ZERO$} & \\ + \bl{$\der\, c\, (\ONE)$} & \bl{$\dn$} & \bl{$\ZERO$} & \\ + \bl{$\der\, c\, (d)$} & \bl{$\dn$} & \bl{if $c = d$ then $\ONE$ else $\ZERO$} & \\ + \bl{$\der\, c\, (r_1 + r_2)$} & \bl{$\dn$} & \bl{$\der\, c\, r_1 + \der\, c\, r_2$} & \\ + \bl{$\der\, c\, (r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{if $nullable (r_1)$}\\ + & & \bl{then $(\der\,c\,r_1) \cdot r_2 + \der\, c\, r_2$}\\ + & & \bl{else $(\der\, c\, r_1) \cdot r_2$}\\ + \bl{$\der\, c\, (r^*)$} & \bl{$\dn$} & \bl{$(\der\,c\,r) \cdot (r^*)$} &\medskip\bigskip\\ + \end{tabular} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{\begin{tabular}{l}Brzozowski matcher\end{tabular}} + +Does \bl{$r_1$} match \bl{"$abc$"}? +\begin{center} +\begin{tabular}{@{}rl@{}l@{}} +Step 1: & build derivative of \bl{$a$} and \bl{$r_1$} & \bl{$(r_2 = \der\,a\,r_1)$}\smallskip\\ +Step 2: & build derivative of \bl{$b$} and \bl{$r_2$} & \bl{$(r_3 = \der\,b\,r_2)$}\smallskip\\ +Step 3: & build derivative of \bl{$c$} and \bl{$r_3$} & \bl{$(r_4 = \der\,c\,r_3)$}\smallskip\\ +Step 4: & the string is exhausted: & \bl{($nullable(r_4))$}\\ + & test whether \bl{$r_4$} can recognise\\ + & the empty string\medskip\\ +Output: & result of the test\\ + & $\Rightarrow \bl{\textit{true}} \,\text{or}\, + \bl{\textit{false}}$\\ +\end{tabular} +\end{center} + + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{\mbox{Nullable}} + + +\ldots{}whether a regular expression can match the empty string: +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} +\bl{$nullable(\ZERO)$} & \bl{$\dn$} & \bl{\textit{false}}\\ +\bl{$nullable(\ONE)$} & \bl{$\dn$} & \bl{\textit{true}}\\ +\bl{$nullable (c)$} & \bl{$\dn$} & \bl{\textit{false}}\\ +\bl{$nullable (r_1 + r_2)$} & \bl{$\dn$} & \bl{$nullable(r_1) \vee nullable(r_2)$} \\ +\bl{$nullable (r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$nullable(r_1) \wedge nullable(r_2)$} \\ +\bl{$nullable (r^*)$} & \bl{$\dn$} & \bl{\textit{true}}\\ +\end{tabular} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{\begin{tabular}{l}Extensions\end{tabular}} + +\begin{center} +\begin{tabular}{@ {\hspace{-4mm}}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} + \bl{$\der\, c\, (r^{\{n\}})$} & \bl{$\dn$} & \bl{$if\;n=0\;then\;\ZERO\;else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\ + \bl{$\der\, c\, (r^{\{..n\}})$} & \bl{$\dn$} & \bl{$if\;n=0\;then\;\ZERO\;else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\ + \bl{$\der\, c\, (r^{\{n..\}})$} & \bl{$\dn$} & \bl{$if\;n=0\;then\;(der\,c\,r)\cdot r^*$} \\ + & & \bl{$else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\ + \bl{$\der\, c\, (r^{\{n..m\}})$} & \bl{$\dn$} & \bl{$if\;n=0 \wedge m=0\;then\;\ZERO\;else$} & \\ + & & \bl{$if \;n=0 \wedge 0<m\; then\;(der\,c\,r)\cdot r^{\{..m-1\}}$} & \\ + & & \bl{$else\; (der\,c\,r)\cdot r^{\{n-1..m-1\}}$} & \\ + \bl{$\der\, c\, (\neg{}r)$} & \bl{$\dn$} & \bl{$\neg(der\,c\,r)$}\\ + \end{tabular} +\end{center} + +also works for lookarounds, various anchors, etc, but \textbf{not} for backreferences + +\begin{textblock}{3}(11,13) +\onslide<2>{ + \begin{bubble}[3cm] + \bl{$a^{\{0\}\{4294967295\}}$} +\end{bubble}} +\end{textblock} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{\mbox{Problems with Ders}} + +\textcolor{blue}{ +\small +\def\ll{\stackrel{der\,a}{\longrightarrow}} +\begin{center} +\begin{tabular}{@{\hspace{-5mm}}rll} +$(a + aa)^*$ & $\ll$ & $(\ONE + \ONE{}a) \cdot (a + aa)^*$\\ +& $\ll$ & $(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* \;+\; (\ONE + \ONE{}a) \cdot (a + aa)^*$\\ +& $\ll$ & $(\ZERO + \ZERO{}a + \ZERO) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^* \;+\; $\\ +& & $\qquad(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^*$\\ +& $\ll$ & \ldots\\ \hspace{15mm} +\end{tabular} +\end{center}} + +\begin{textblock}{13.5}(1.5,12) +(regular expressions of sizes 98, 169, 283, 468, 767, \ldots) +\end{textblock} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{\mbox{Conclusion}} + +\begin{itemize} +\item The beauty is that this only involves functional programs that can be conveniently reasoned about in theorem provers +\item POSIX lexing can be done via an extension by Sulzmann and Lu, 2014 (our current work)\medskip\pause +\item How surprising that one can still do new work on regular expressions. +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame}<1-10> +\end{frame} + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: +