slides/ssy.tex
author Christian Urban <christian.urban@kcl.ac.uk>
Fri, 18 Oct 2024 05:45:14 +0100
changeset 969 0dfa2923a7c6
parent 960 c7009356ddd8
permissions -rw-r--r--
updated

% !TEX program = xelatex
\documentclass[dvipsnames,14pt,t,xelatex,xcolor={table}]{beamer}
\usepackage{../slides}
\usepackage{../graphicss}
\usepackage{../langs}
\usepackage{../data}
\usetikzlibrary{cd}


\usepackage{tcolorbox}
\newtcolorbox{mybox}{colback=red!5!white,colframe=red!75!black}
\newtcolorbox{mybox2}[1]{colback=red!5!white,colframe=red!75!black,fonttitle=\bfseries,title=#1}
\newtcolorbox{mybox3}[1]{colback=Cyan!5!white,colframe=Cyan!75!black,fonttitle=\bfseries,title=#1}


\setbeamersize{text margin left=2mm} % <- like this
\hfuzz=220pt

\lstset{language=Scala,
        style=mystyle,
        numbersep=0pt,
        numbers=none,
        xleftmargin=0mm}

\pgfplotsset{compat=1.17}

\newcommand{\bl}[1]{\textcolor{blue}{#1}}

% beamer stuff
\renewcommand{\slidecaption}{Christian Urban, SSY, King's College London}



\begin{document}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{\begin{tabular}{@ {\hspace{8mm}}c@ {}}Fast Regular Expression Matching\end{tabular}}
\mbox{}\\[1mm]

\begin{columns}[t,onlytextwidth]
\begin{column}{.2\textwidth}
\raisebox{-10mm}{
\begin{tikzpicture}
  \begin{axis}[
    xlabel={size of strings},
    x label style={at={(0.45,-0.16)}},
    ylabel={time in secs},
    enlargelimits=false,
    xtick={0,10,...,30},
    xmax=35,
    ymax=35,
    ytick={0,10,...,30},
    scaled ticks=false,
    axis lines=left,
    width=5cm,
    height=4.5cm,
    legend entries={Python,JavaScript,Swift,Dart},
    legend style={font=\footnotesize,at={(0.45,-0.48)},anchor=north},
    legend cell align=left]
\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
\addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};
\addplot[brown,mark=*, mark options={fill=white}] table {re-dart.data};
\end{axis}
\end{tikzpicture}}
\end{column}

\end{columns}

\begin{textblock}{4}(3.5,3.2)
{\bl{\texttt{(a*)*$\cdot$b}}}
\end{textblock}

\begin{textblock}{7.5}(8,3)
\begin{itemize}
\item use \textbf{Brzozowski derivatives} for regex matching rather\\ than NFAs/DFAs% and lexing
\item based on work by \textbf{Christian Urban} and \textbf{Roy Dyckhoff}\bigskip
\item applications in network security (traffic filtering)
\item formal verification of correctness and speed (\textbf{Isabelle} theorem prover)
\end{itemize}
\end{textblock}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\end{document}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{%
  \begin{tabular}{@ {}c@ {}}
    \\[8mm]
    \LARGE Derivative-Based\\
    \LARGE Regular Expression Matching\\[5mm]
    \normalsize\textcolor{gray}{Christian Urban, SSY Group}
  \end{tabular}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{A Bit of Background}

\begin{textblock}{10}(2,2.5)
\includegraphics[scale=0.4]{../pics/phd-1.jpg}
\end{textblock}

\begin{textblock}{11}(1.7,10)
\begin{bubble}[9.6cm]
  PhD thesis on a particular term-rewriting system:\medskip\\
  Proved that all rewriting sequences are strongly normalising (terminate).
\end{bubble}
\end{textblock}

\only<2->{%
\begin{textblock}{3}(10,2.5)
\begin{bubble}[3cm]
\begin{tabular}{rcl}
  \bl{$t$} & \bl{::=} & \bl{$x$}\\
         & \bl{|} & \bl{$\lambda x.\,t$}\\
         & \bl{|} & \bl{$t_1\;t_2$}\\
         & \bl{|} & \bl{...}
\end{tabular}
\end{bubble}
\end{textblock}}

\end{frame}




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Quiz 1}
\begin{bubble}[10cm]\it
  There are many, many regular expression libraries.\bigskip\\

  \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the
  difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?}
\end{bubble}

\begin{textblock}{12}(2,9)
\begin{tikzpicture}
  \draw(0,0)  node[rotate=20]{linear};
  \draw(3,0)  node[rotate=-20]{\bl{$O(n \log n)$}};
  \draw(3,-2) node[rotate=10]{quadratic};
  \draw(6,0) node[rotate=20]{P};
  \draw(7,0) node[rotate=-20]{NP};
  \draw(7,-1) node[rotate=0]{\bl{$O(2^n)$}};
\end{tikzpicture}
\end{textblock}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Quiz 2}

\begin{textblock}{12}(2,2.5)
A regular expression for (some) email addresses:
\begin{center}
  ($\underbrace{\bl{[a\mbox{-}z0\mbox{-}9\_\!\!\_\,.-]^+}}_{\textrm{name}}$)\bl{$\,@\,$}
  ($\underbrace{\bl{[a\mbox{-}z0\mbox{-}9\,-]^+}}_{\textrm{domain}}$) \bl{$\,.\,$}
  ($\underbrace{\bl{[a\mbox{-}z\,.]^{\{2..5\}}}}_{\textrm{top-level domain}}$)
\end{center}
\end{textblock}

\only<2>{
\begin{textblock}{10}(4,8)
bounded regular expressions:\medskip\\
\begin{tabular}{ll}
  \bl{$r^{\{n\}}$}     & exactly n-times \bl{$r$}\\
  \bl{$r^{\{n..m\}}$}  & between n and m-times\\
  \bl{$r^{\{n..\}}$}   & from n-times\\
  \bl{$r^{\{..m\}}$}   & upto m-times\\
\end{tabular}\smallskip\\
\footnotesize\hfill ${}^*$ in some applications the counters can be in the millions
\end{textblock}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Quiz 2}


\begin{textblock}{12}(2,2.5)
  \onslide<1->{Thompson construction: By recursion we are given two NFAs for $r_1$ and $r_2$.\medskip\\
    \onslide<2->{For $r_1\cdot r_2$:}\bigskip}
\end{textblock}

\begin{textblock}{12}(2,6)
\onslide<1>{
\begin{tikzpicture}[node distance=3mm,
    >=stealth',very thick,
    every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20}]
\node[state, initial]  (Q_0)  {$\mbox{}$};
\node[state, initial]  (Q_01) [below=1mm of Q_0] {$\mbox{}$};
\node[state, initial]  (Q_02) [above=1mm of Q_0] {$\mbox{}$};
\node (R_1)  [right=of Q_0] {$\ldots$};
\node[state, accepting]  (T_1)  [right=of R_1] {$\mbox{}$};
\node[state, accepting]  (T_2)  [above=of T_1] {$\mbox{}$};
\node[state, accepting]  (T_3)  [below=of T_1] {$\mbox{}$};

\node (A_0)  [right=2.5cm of T_1] {$\mbox{}$};
\node[state, initial]  (A_01)  [above=1mm of A_0] {$\mbox{}$};
\node[state, initial]  (A_02)  [below=1mm of A_0] {$\mbox{}$};

\node (b_1)  [right=of A_0] {$\ldots$};
\node[state, accepting]  (c_1)  [right=of b_1] {$\mbox{}$};
\node[state, accepting]  (c_2)  [above=of c_1] {$\mbox{}$};
\node[state, accepting]  (c_3)  [below=of c_1] {$\mbox{}$};
\begin{pgfonlayer}{background}
  \node (1) [rounded corners, inner sep=1mm, thick,
    draw=black!60, fill=black!20, fit= (Q_0) (R_1) (T_1) (T_2) (T_3)] {};
  \node (2) [rounded corners, inner sep=1mm, thick,
    draw=black!60, fill=black!20, fit= (A_0) (b_1) (c_1) (c_2) (c_3)] {};
\node [yshift=2mm] at (1.north) {$r_1$};
\node [yshift=2mm] at (2.north) {$r_2$};
\end{pgfonlayer}
\end{tikzpicture}}
\end{textblock}

\begin{textblock}{12}(2,6)
\onslide<2>{
\begin{tikzpicture}[node distance=3mm,
    >=stealth',very thick,
    every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},]
\node[state, initial]  (Q_0)  {$\mbox{}$};
\node[state, initial]  (Q_01) [below=1mm of Q_0] {$\mbox{}$};
\node[state, initial]  (Q_02) [above=1mm of Q_0] {$\mbox{}$};
\node (r_1)  [right=of Q_0] {$\ldots$};
\node[state]  (t_1)  [right=of r_1] {$\mbox{}$};
\node[state]  (t_2)  [above=of t_1] {$\mbox{}$};
\node[state]  (t_3)  [below=of t_1] {$\mbox{}$};

\node  (A_0)  [right=2.5cm of t_1] {$\mbox{}$};
\node[state]  (A_01)  [above=1mm of A_0] {$\mbox{}$};
\node[state]  (A_02)  [below=1mm of A_0] {$\mbox{}$};

\node (b_1)  [right=of A_0] {$\ldots$};
\node[state, accepting]  (c_1)  [right=of b_1] {$\mbox{}$};
\node[state, accepting]  (c_2)  [above=of c_1] {$\mbox{}$};
\node[state, accepting]  (c_3)  [below=of c_1] {$\mbox{}$};
\path[->] (t_1) edge (A_01);
\path[->] (t_2) edge node [above]  {$\varepsilon$\footnotesize{}s} (A_01);
\path[->] (t_3) edge (A_01);
\path[->] (t_1) edge (A_02);
\path[->] (t_2) edge (A_02);
\path[->] (t_3) edge node [below]  {$\varepsilon$\footnotesize{}s} (A_02);

\begin{pgfonlayer}{background}
  \node (3) [rounded corners, inner sep=1mm, thick,
    draw=black!60, fill=black!20, fit= (Q_0) (c_1) (c_2) (c_3)] {};
\node [yshift=2mm] at (3.north) {$r_1\cdot r_2$};
\end{pgfonlayer}
\end{tikzpicture}}
\end{textblock}


\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Quiz 2}

\begin{textblock}{12}(2,2.5)
  \onslide<1->{Thompson construction: By recursion we have a NFA for $r$.\medskip\\
  \onslide<2->{For $r^*$:\bigskip}}
\end{textblock}

\begin{textblock}{12}(4,6)
\onslide<1>{
\begin{tikzpicture}[node distance=3mm,
    >=stealth',very thick,
    every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},
    baseline=(current bounding box.north)]
\node (2)  {$\mbox{}$};
\node[state, initial]  (4)  [above=1mm of 2] {$\mbox{}$};
\node[state, initial]  (5)  [below=1mm of 2] {$\mbox{}$};
\node (a)  [right=of 2] {$\ldots$};
\node[state, accepting]  (a1)  [right=of a] {$\mbox{}$};
\node[state, accepting]  (a2)  [above=of a1] {$\mbox{}$};
\node[state, accepting]  (a3)  [below=of a1] {$\mbox{}$};
\begin{pgfonlayer}{background}
\node (1) [rounded corners, inner sep=1mm, thick, draw=black!60, fill=black!20, fit= (2) (a1) (a2) (a3)] {};
\node [yshift=3mm] at (1.north) {$r$};
\end{pgfonlayer}
\end{tikzpicture}}
\end{textblock}

\begin{textblock}{12}(2,6)
\onslide<2->{
\begin{tikzpicture}[node distance=3mm,
    >=stealth',very thick,
    every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},
    baseline=(current bounding box.north)]
\node at (0,0) [state, initial,accepting]  (1)  {$\mbox{}$};
\node (2)  [right=16mm of 1] {$\mbox{}$};
\node[state]  (4)  [above=1mm of 2] {$\mbox{}$};
\node[state]  (5)  [below=1mm of 2] {$\mbox{}$};
\node (a)  [right=of 2] {$\ldots$};
\node[state]  (a1)  [right=of a] {$\mbox{}$};
\node[state]  (a2)  [above=of a1] {$\mbox{}$};
\node[state]  (a3)  [below=of a1] {$\mbox{}$};
\path[->] (1) edge node [below]  {$\varepsilon$} (4);
\path[->] (1) edge node [below]  {$\varepsilon$} (5);
\path[->] (a1) edge [bend left=45] node [below]  {$\varepsilon$} (1);
\path[->] (a2) edge [bend right] node [below]  {$\varepsilon$} (1);
\path[->] (a3) edge [bend left=45] node [below]  {$\varepsilon$} (1);
\begin{pgfonlayer}{background}
\node (2) [rounded corners, inner sep=1mm, thick, draw=black!60, fill=black!20, fit= (1) (a2) (a3)] {};
\node [yshift=3mm] at (2.north) {$r^*$};
\end{pgfonlayer}
\end{tikzpicture}}
\end{textblock}

\begin{textblock}{12}(2,12)
\onslide<3->{
  \begin{bubble}[9.5cm]\it
  Quiz:\smallskip\\
  \textbf{What is the Thompson Construction for \bl{$r^{\{n\}}$} ?}
\end{bubble}}
\end{textblock}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
  \frametitle{Quiz 3}

Hierarchy of Languages:

\begin{center}
\begin{tikzpicture}
[rect/.style={draw=black!50,
              top color=white,
              bottom color=black!20,
              rectangle,
              very thick,
              rounded corners}, scale=1.2]

\draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages};
\draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages};
\draw (0,-0.85) node [rect, text depth=17mm] {\;\;context sensitive languages\;\;};
\draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages};
\draw (0,-1.4) node [rect] {regular languages};
\end{tikzpicture}
\end{center}

\begin{textblock}{12}(2,11.5)
\onslide<2->{
  \begin{bubble}[9.5cm]\it
    Quiz:\smallskip\\

    \textbf{Can we use standard parsing algorithms for matching / lexing\,?}
    Say CYK, LL, LR(k), PEG, \ldots
\end{bubble}}
\end{textblock}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\defverbatim{\foo}{\footnotesize%
\begin{verbatim}
(?:(?:\"|'|\]|\}|\\|\d|(?:nan|infinity|true|false|
null|undefined|symbol|math)|\`|\-|\+)+[)]*;?((?:\s
| -|~|!|{}|\|\||\+)*.*(?:.*=.*)))
\end{verbatim}
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t,fragile]
\frametitle{Quiz 4}

\begin{textblock}{12}(2,2.5)
\begin{bubble}[9.5cm]\it
  Quiz:\smallskip\\
  Do regular expressions have any security relevance\,?
\end{bubble}
\end{textblock}

\only<2->{
\begin{textblock}{2}(1,6)
\includegraphics[scale=0.8]{../pics/zeek.png}
\includegraphics[scale=0.17]{../pics/snort.png}
\end{textblock}}

\only<3->{
\begin{textblock}{7}(7,5.6)
  \includegraphics[scale=0.14]{../pics/cloudflare.png}\\
    \footnotesize
    It serves more web traffic than Twitter, Amazon, Apple,
    Instagram, Bing \& Wikipedia combined.\medskip\\
    Web Application Firewall filters out
    SQL injection attacks, XSS attacks etc
\end{textblock}}

\only<3->{
  \begin{textblock}{13}(4,12.3)
  \footnotesize
  \hspace{1.5cm}a global outage on 2 July 2019 (first one for six years)
  \color{blue}\foo\color{black}
   \end{textblock}
}


\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
  \frametitle{Quiz 3}

\begin{textblock}{12}(2,2)
  \begin{bubble}[9.5cm]\it
    Quiz:\smallskip\\

    \textbf{Can we use standard parsing algorithms for matching / lexing\,?}
    Say CYK, LL, LR(k), PEG, \ldots
\end{bubble}
\end{textblock}


\begin{textblock}{12}(1,7)
\textbf{POSIX lexing}

\begin{tabular}{@ {}ll}
  Assume you have: & \bl{$r_{key} \dn \texttt{if} + \texttt{then} + \texttt{while} \ldots$}\\
                   & \bl{$r_{id\;} \,\dn \texttt{[a-z]} \cdot \texttt{[a-z0-9\_]}^*$}\medskip\\

  Lexing a string with  & \bl{$(r_{key} + r_{id})^*$}\medskip\\

  What should be & \\
  the result for:  & \bl{\texttt{"iffoo"}} \;\;\; \bl{\texttt{"if"}}\\
\end{tabular}
\end{textblock}

\only<2->{
\begin{textblock}{13.5}(1.5,4)
  \begin{mybox3}{POSIX rules}
    \begin{itemize}
    \item \textbf{The Longest Match Rule:} The longest initial
      substring matched by any regular expression is taken as next token.
    \item \textbf{Priority Rule:}  For a particular longest initial
      substring, the first (leftmost) regular expression that can match
      determines the token.
    \item \ldots
    \end{itemize}
    \begin{center}
    \bl{$(a + ab) \cdot (bc + c)$} \;\;and\;\; \bl{\texttt{"}$abc$\texttt{"}}
    \end{center}
  \end{mybox3}
\end{textblock}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Quiz 1}
\begin{bubble}[10cm]\it
  There are many, many regular expression libraries.\bigskip\\

  \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the
  difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?}
\end{bubble}

\begin{textblock}{12}(1,9)
  \begin{tabular}{p{4cm}|p{4cm}|p{2cm}}
    atrociously slow (s't) & pretty lazy (s't) & \textcolor{gray}{OTBT}\medskip\\
    Python, Ruby, Swift, Dart, JavaScript & Rust, Go, RE2 & \textcolor{gray}{Snort, .Net7}\\
  \end{tabular}
\end{textblock}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]

\mbox{}\\[10mm]

\begin{columns}[t,onlytextwidth]
\begin{column}{.2\textwidth}
\raisebox{-10mm}{
\begin{tikzpicture}
  \begin{axis}[
    xlabel={\bl{$n$} \pcode{a}s},
    %%x label style={at={(1.05,0.0)}},
    ylabel={time in secs},
    enlargelimits=false,
    xtick={0,10,...,30},
    xmax=35,
    ymax=35,
    ytick={0,5,...,30},
    scaled ticks=false,
    axis lines=left,
    width=5cm,
    height=5cm,
    legend entries={Python,JavaScript,Swift,Dart},
    legend style={font=\small,at={(0.5,-0.39)},anchor=north},
    legend cell align=left]
\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
\addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};
\addplot[brown,mark=*, mark options={fill=white}] table {re-dart.data};
\end{axis}
\end{tikzpicture}}
\end{column}

\begin{column}{.2\textwidth}
\begin{tikzpicture}
  \begin{axis}[
    xlabel={\bl{$n$} \pcode{a}s},
    %ylabel={time in secs},
    enlargelimits=false,
    xtick={0,10,...,30},
    xmax=35,
    ymax=35,
    ytick={0,5,...,30},
    scaled ticks=false,
    axis lines=left,
    width=5cm,
    height=5cm,
    legend entries={Python,Ruby},
    legend style={font=\small,at={(0.5,-0.39)},anchor=north},
    legend cell align=left]
\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
\addplot[brown,mark=pentagon*, mark options={fill=white}] table {re-ruby.data};
\end{axis}
\end{tikzpicture}
\end{column}
\end{columns}

\begin{textblock}{4}(9,1.7)
\textbf{\bl{\texttt{(a?)\boldmath$^{\{n\}}\cdot$(a)\boldmath$^{\{n\}}$}}}
\end{textblock}

\begin{textblock}{4}(4,1.7)
\textbf{\bl{\texttt{(a*)*$\cdot$b}}}
\end{textblock}

\begin{textblock}{3.4}(0.3,10)
\small{}\textbf{matching with strings}
\textbf{\bl{$\underbrace{\texttt{a}...\texttt{a}}_n$}}
\end{textblock}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Quiz 1}
\begin{bubble}[10cm]\it
  \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the
  difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?}
\end{bubble}

\begin{textblock}{12}(1.7,7)
For \textbf{Perl-style} regular expression matchers (say Python, JavaScript, etc), the
answer is:
\only<2->{\begin{center}
\fontspec{Hoefler Text Black}\textcolor{ProcessBlue}{\huge{}NP}
\end{center}}
\end{textblock}

\begin{textblock}{12}(1.7,12)
\only<3->{Backreferences: \bl{(\ldots)$\,\ldots{}\backslash{}n$}}
\end{textblock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Quiz 2}

\begin{textblock}{12}(2,2)
\onslide<1->{
  \begin{bubble}[9.5cm]\it
  Quiz:\smallskip\\
  \textbf{What is the Thompson Construction for \bl{$r^{\{n\}}$} ?}
\end{bubble}}
\end{textblock}

\only<2->{
\begin{textblock}{12}(1,6)
\begin{tabular}{ll}
  Google's RE2 lib & $\Rightarrow$ \bl{$a^{\{1001\}}$} is too big\\
  \onslide<3->{Rust Regex lib}    & \onslide<5->{$\Rightarrow$ \bl{$a^{\{1000\}\{100\}\{5\}}$} too big}\\
                    & \onslide<6->{$\Rightarrow$ \bl{$a^{\{0\}\{4294967295\}}$} ?}
\end{tabular}
\end{textblock}}

\only<4>{
\begin{textblock}{13.5}(1.5,4)
  \begin{mybox3}{From Rust's Regex Description}\it
    ``\ldots [the] syntax is similar to Perl-style regular expressions, but lacks a few features like look
    around and backreferences. In exchange, all searches execute in linear time with respect
    to the size of the regular expression and search text. \ldots''
  \end{mybox3}
\end{textblock}}


\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Regular Expressions}

\begin{textblock}{6}(2,5)
  \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
  \bl{$r$} & \bl{$::=$}  & \bl{$\ZERO$}  & nothing\\
         & \bl{$\mid$} & \bl{$\ONE$}       & empty string / \pcode{""} \\
         & \bl{$\mid$} & \bl{$c, d, \ldots$}                         & characters\\
         & \bl{$\mid$} & \bl{$r_1 + r_2$}  & alternative / choice\\
         & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
         & \bl{$\mid$} & \bl{$r^*$}            & star (zero or more)\\
  \end{tabular}
  \end{textblock}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{\begin{tabular}{l}The Derivative of a Rexp\end{tabular}}

\large
If \bl{$r$} matches the string \bl{$c\!::\!s$}, what is a regular
expression that matches just \bl{$s$}?\bigskip\bigskip\bigskip\bigskip

\small
\bl{$\der\,c\,r$} gives the answer, Brzozowski 1964\medskip\\
(lost in the sands of time, re-appeared in 2009)
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{\begin{tabular}{l}The Derivative of a Rexp\end{tabular}}

\begin{center}
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
  \bl{$\der\, c\, (\ZERO)$}      & \bl{$\dn$} & \bl{$\ZERO$} & \\
  \bl{$\der\, c\, (\ONE)$}           & \bl{$\dn$} & \bl{$\ZERO$} & \\
  \bl{$\der\, c\, (d)$}                     & \bl{$\dn$} & \bl{if $c = d$ then $\ONE$ else $\ZERO$} & \\
  \bl{$\der\, c\, (r_1 + r_2)$}        & \bl{$\dn$} & \bl{$\der\, c\, r_1 + \der\, c\, r_2$} & \\
  \bl{$\der\, c\, (r_1 \cdot r_2)$}  & \bl{$\dn$}  & \bl{if $nullable (r_1)$}\\
  & & \bl{then $(\der\,c\,r_1) \cdot r_2 + \der\, c\, r_2$}\\
  & & \bl{else $(\der\, c\, r_1) \cdot r_2$}\\
  \bl{$\der\, c\, (r^*)$}  & \bl{$\dn$} & \bl{$(\der\,c\,r) \cdot (r^*)$} &\medskip\bigskip\\
  \end{tabular}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{\begin{tabular}{l}Brzozowski matcher\end{tabular}}

Does \bl{$r_1$} match \bl{"$abc$"}?
\begin{center}
\begin{tabular}{@{}rl@{}l@{}}
Step 1: & build derivative of \bl{$a$} and \bl{$r_1$} & \bl{$(r_2 = \der\,a\,r_1)$}\smallskip\\
Step 2: & build derivative of \bl{$b$} and \bl{$r_2$} & \bl{$(r_3 = \der\,b\,r_2)$}\smallskip\\
Step 3: & build derivative of \bl{$c$} and \bl{$r_3$} & \bl{$(r_4 = \der\,c\,r_3)$}\smallskip\\
Step 4: & the string is exhausted: & \bl{($nullable(r_4))$}\\
        & test whether \bl{$r_4$} can recognise\\
        & the empty string\medskip\\
Output: & result of the test\\
        & $\Rightarrow \bl{\textit{true}} \,\text{or}\,
                       \bl{\textit{false}}$\\
\end{tabular}
\end{center}


\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{\mbox{Nullable}}


\ldots{}whether a regular expression can match the empty string:
\begin{center}
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
\bl{$nullable(\ZERO)$}    & \bl{$\dn$} & \bl{\textit{false}}\\
\bl{$nullable(\ONE)$}       & \bl{$\dn$} & \bl{\textit{true}}\\
\bl{$nullable (c)$}             & \bl{$\dn$} & \bl{\textit{false}}\\
\bl{$nullable (r_1 + r_2)$}     & \bl{$\dn$} & \bl{$nullable(r_1) \vee nullable(r_2)$} \\
\bl{$nullable (r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$nullable(r_1) \wedge nullable(r_2)$} \\
\bl{$nullable (r^*)$}           & \bl{$\dn$} & \bl{\textit{true}}\\
\end{tabular}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{\begin{tabular}{l}Extensions\end{tabular}}

\begin{center}
\begin{tabular}{@ {\hspace{-4mm}}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
  \bl{$\der\, c\, (r^{\{n\}})$}      & \bl{$\dn$} & \bl{$if\;n=0\;then\;\ZERO\;else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\
  \bl{$\der\, c\, (r^{\{..n\}})$}    & \bl{$\dn$} & \bl{$if\;n=0\;then\;\ZERO\;else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\
  \bl{$\der\, c\, (r^{\{n..\}})$}    & \bl{$\dn$} & \bl{$if\;n=0\;then\;(der\,c\,r)\cdot r^*$} \\
                                    &            & \bl{$else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\
  \bl{$\der\, c\, (r^{\{n..m\}})$}   & \bl{$\dn$} & \bl{$if\;n=0 \wedge m=0\;then\;\ZERO\;else$} & \\
                                     &           & \bl{$if \;n=0 \wedge 0<m\; then\;(der\,c\,r)\cdot r^{\{..m-1\}}$} & \\
                                     &           & \bl{$else\; (der\,c\,r)\cdot r^{\{n-1..m-1\}}$} & \\
  \bl{$\der\, c\, (\neg{}r)$}            & \bl{$\dn$}  & \bl{$\neg(der\,c\,r)$}\\
  \end{tabular}
\end{center}

also works for lookarounds, various anchors, etc, but \textbf{not} for backreferences

\begin{textblock}{3}(11,13)
\onslide<2>{
  \begin{bubble}[3cm]
  \bl{$a^{\{0\}\{4294967295\}}$}
\end{bubble}}
\end{textblock}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{\mbox{Problems with Ders}}

\textcolor{blue}{
\small
\def\ll{\stackrel{der\,a}{\longrightarrow}}
\begin{center}
\begin{tabular}{@{\hspace{-5mm}}rll}
$(a + aa)^*$ & $\ll$ & $(\ONE + \ONE{}a) \cdot (a + aa)^*$\\
& $\ll$ & $(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* \;+\; (\ONE + \ONE{}a) \cdot (a + aa)^*$\\
& $\ll$ & $(\ZERO + \ZERO{}a + \ZERO) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^* \;+\; $\\
& & $\qquad(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^*$\\
& $\ll$ & \ldots\\ \hspace{15mm}
\end{tabular}
\end{center}}

\begin{textblock}{13.5}(1.5,12)
(regular expressions of sizes 98, 169, 283, 468, 767, \ldots)
\end{textblock}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{\mbox{Conclusion}}

\begin{itemize}
\item The beauty is that this only involves functional programs that can be conveniently reasoned about in theorem provers
\item POSIX lexing can be done via an extension by Sulzmann and Lu, 2014 (our current work)\medskip\pause
\item How surprising that one can still do new work on regular expressions.
\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}<1-10>
\end{frame}



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{document}

%%% Local Variables:
%%% mode: latex
%%% TeX-master: t
%%% End: