--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/slides/ssy.tex	Wed Jul 05 16:12:40 2023 +0100
@@ -0,0 +1,742 @@
+% !TEX program = xelatex
+\documentclass[dvipsnames,14pt,t,xelatex,xcolor={table}]{beamer}
+\usepackage{../slides}
+\usepackage{../graphicss}
+\usepackage{../langs}
+\usepackage{../data}
+\usetikzlibrary{cd}
+
+
+\usepackage{tcolorbox}
+\newtcolorbox{mybox}{colback=red!5!white,colframe=red!75!black}
+\newtcolorbox{mybox2}[1]{colback=red!5!white,colframe=red!75!black,fonttitle=\bfseries,title=#1}
+\newtcolorbox{mybox3}[1]{colback=Cyan!5!white,colframe=Cyan!75!black,fonttitle=\bfseries,title=#1}
+
+
+
+\hfuzz=220pt 
+
+\lstset{language=Scala,
+        style=mystyle,
+        numbersep=0pt,
+        numbers=none,
+        xleftmargin=0mm}
+
+\pgfplotsset{compat=1.17}      
+
+\newcommand{\bl}[1]{\textcolor{blue}{#1}}     
+ 
+% beamer stuff 
+\renewcommand{\slidecaption}{SSY, King's College London}
+
+
+
+\begin{document}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t]
+\frametitle{%  
+  \begin{tabular}{@ {}c@ {}}
+    \\[8mm]
+    \LARGE Derivative-Based\\
+    \LARGE Regular Expression Matching\\[5mm]
+    \normalsize\textcolor{gray}{Christian Urban, SSY Group}
+  \end{tabular}}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
+\begin{frame}[c]
+\frametitle{A Bit of Background}
+
+\begin{textblock}{10}(2,2.5)
+\includegraphics[scale=0.4]{../pics/phd-1.jpg}
+\end{textblock}
+
+\begin{textblock}{11}(1.7,10)
+\begin{bubble}[9.6cm]
+  PhD thesis on a particular term-rewriting system:\medskip\\
+  Proved that all rewriting sequences are strongly normalising (terminate). 
+\end{bubble}
+\end{textblock}
+
+\only<2->{%
+\begin{textblock}{3}(10,2.5)
+\begin{bubble}[3cm]
+\begin{tabular}{rcl}
+  \bl{$t$} & \bl{::=} & \bl{$x$}\\
+         & \bl{|} & \bl{$\lambda x.\,t$}\\
+         & \bl{|} & \bl{$t_1\;t_2$}\\
+         & \bl{|} & \bl{...}
+\end{tabular}  
+\end{bubble}  
+\end{textblock}}
+
+\end{frame}
+
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
+\begin{frame}[t]
+\frametitle{Quiz 1}
+\begin{bubble}[10cm]\it
+  There are many, many regular expression libraries.\bigskip\\
+  
+  \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the
+  difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?}
+\end{bubble}
+
+\begin{textblock}{12}(2,9)
+\begin{tikzpicture}
+  \draw(0,0)  node[rotate=20]{linear};
+  \draw(3,0)  node[rotate=-20]{\bl{$O(n \log n)$}};
+  \draw(3,-2) node[rotate=10]{quadratic};
+  \draw(6,0) node[rotate=20]{P};
+  \draw(7,0) node[rotate=-20]{NP};
+  \draw(7,-1) node[rotate=0]{\bl{$O(2^n)$}};
+\end{tikzpicture}
+\end{textblock}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
+\begin{frame}[t]
+\frametitle{Quiz 2}
+
+\begin{textblock}{12}(2,2.5)
+A regular expression for (some) email addresses:
+\begin{center}
+  ($\underbrace{\bl{[a\mbox{-}z0\mbox{-}9\_\!\!\_\,.-]^+}}_{\textrm{name}}$)\bl{$\,@\,$}
+  ($\underbrace{\bl{[a\mbox{-}z0\mbox{-}9\,-]^+}}_{\textrm{domain}}$) \bl{$\,.\,$}
+  ($\underbrace{\bl{[a\mbox{-}z\,.]^{\{2..5\}}}}_{\textrm{top-level domain}}$)
+\end{center}  
+\end{textblock}
+
+\only<2>{
+\begin{textblock}{10}(4,8)
+bounded regular expressions:\medskip\\
+\begin{tabular}{ll}    
+  \bl{$r^{\{n\}}$}     & exactly n-times \bl{$r$}\\
+  \bl{$r^{\{n..m\}}$}  & between n and m-times\\
+  \bl{$r^{\{n..\}}$}   & from n-times\\
+  \bl{$r^{\{..m\}}$}   & upto m-times\\
+\end{tabular}\smallskip\\
+\footnotesize\hfill ${}^*$ in some applications the counters can be in the millions
+\end{textblock}}
+ 
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
+\begin{frame}[t]
+\frametitle{Quiz 2}
+
+
+\begin{textblock}{12}(2,2.5)
+  \onslide<1->{Thompson construction: By recursion we are given two NFAs for $r_1$ and $r_2$.\medskip\\
+    \onslide<2->{For $r_1\cdot r_2$:}\bigskip}
+\end{textblock}
+
+\begin{textblock}{12}(2,6)
+\onslide<1>{
+\begin{tikzpicture}[node distance=3mm,
+    >=stealth',very thick,
+    every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20}]
+\node[state, initial]  (Q_0)  {$\mbox{}$};
+\node[state, initial]  (Q_01) [below=1mm of Q_0] {$\mbox{}$};  
+\node[state, initial]  (Q_02) [above=1mm of Q_0] {$\mbox{}$};  
+\node (R_1)  [right=of Q_0] {$\ldots$};
+\node[state, accepting]  (T_1)  [right=of R_1] {$\mbox{}$};
+\node[state, accepting]  (T_2)  [above=of T_1] {$\mbox{}$};
+\node[state, accepting]  (T_3)  [below=of T_1] {$\mbox{}$};
+
+\node (A_0)  [right=2.5cm of T_1] {$\mbox{}$};
+\node[state, initial]  (A_01)  [above=1mm of A_0] {$\mbox{}$};
+\node[state, initial]  (A_02)  [below=1mm of A_0] {$\mbox{}$};
+
+\node (b_1)  [right=of A_0] {$\ldots$};
+\node[state, accepting]  (c_1)  [right=of b_1] {$\mbox{}$};
+\node[state, accepting]  (c_2)  [above=of c_1] {$\mbox{}$};
+\node[state, accepting]  (c_3)  [below=of c_1] {$\mbox{}$};
+\begin{pgfonlayer}{background}
+  \node (1) [rounded corners, inner sep=1mm, thick,
+    draw=black!60, fill=black!20, fit= (Q_0) (R_1) (T_1) (T_2) (T_3)] {};
+  \node (2) [rounded corners, inner sep=1mm, thick,
+    draw=black!60, fill=black!20, fit= (A_0) (b_1) (c_1) (c_2) (c_3)] {};
+\node [yshift=2mm] at (1.north) {$r_1$};
+\node [yshift=2mm] at (2.north) {$r_2$};
+\end{pgfonlayer}
+\end{tikzpicture}}
+\end{textblock}
+
+\begin{textblock}{12}(2,6)
+\onslide<2>{
+\begin{tikzpicture}[node distance=3mm,
+    >=stealth',very thick,
+    every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},]
+\node[state, initial]  (Q_0)  {$\mbox{}$};
+\node[state, initial]  (Q_01) [below=1mm of Q_0] {$\mbox{}$};  
+\node[state, initial]  (Q_02) [above=1mm of Q_0] {$\mbox{}$};  
+\node (r_1)  [right=of Q_0] {$\ldots$};
+\node[state]  (t_1)  [right=of r_1] {$\mbox{}$};
+\node[state]  (t_2)  [above=of t_1] {$\mbox{}$};
+\node[state]  (t_3)  [below=of t_1] {$\mbox{}$};
+
+\node  (A_0)  [right=2.5cm of t_1] {$\mbox{}$};
+\node[state]  (A_01)  [above=1mm of A_0] {$\mbox{}$};
+\node[state]  (A_02)  [below=1mm of A_0] {$\mbox{}$};
+
+\node (b_1)  [right=of A_0] {$\ldots$};
+\node[state, accepting]  (c_1)  [right=of b_1] {$\mbox{}$};
+\node[state, accepting]  (c_2)  [above=of c_1] {$\mbox{}$};
+\node[state, accepting]  (c_3)  [below=of c_1] {$\mbox{}$};
+\path[->] (t_1) edge (A_01);
+\path[->] (t_2) edge node [above]  {$\varepsilon$\footnotesize{}s} (A_01);
+\path[->] (t_3) edge (A_01);
+\path[->] (t_1) edge (A_02);
+\path[->] (t_2) edge (A_02);
+\path[->] (t_3) edge node [below]  {$\varepsilon$\footnotesize{}s} (A_02);
+ 
+\begin{pgfonlayer}{background}
+  \node (3) [rounded corners, inner sep=1mm, thick,
+    draw=black!60, fill=black!20, fit= (Q_0) (c_1) (c_2) (c_3)] {};
+\node [yshift=2mm] at (3.north) {$r_1\cdot r_2$};
+\end{pgfonlayer}
+\end{tikzpicture}}
+\end{textblock}
+
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
+\begin{frame}[t]
+\frametitle{Quiz 2}
+
+\begin{textblock}{12}(2,2.5)
+  \onslide<1->{Thompson construction: By recursion we have a NFA for $r$.\medskip\\
+  \onslide<2->{For $r^*$:\bigskip}}
+\end{textblock}
+
+\begin{textblock}{12}(4,6)
+\onslide<1>{  
+\begin{tikzpicture}[node distance=3mm,
+    >=stealth',very thick,
+    every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},
+    baseline=(current bounding box.north)]
+\node (2)  {$\mbox{}$};
+\node[state, initial]  (4)  [above=1mm of 2] {$\mbox{}$};
+\node[state, initial]  (5)  [below=1mm of 2] {$\mbox{}$};  
+\node (a)  [right=of 2] {$\ldots$};
+\node[state, accepting]  (a1)  [right=of a] {$\mbox{}$};
+\node[state, accepting]  (a2)  [above=of a1] {$\mbox{}$};
+\node[state, accepting]  (a3)  [below=of a1] {$\mbox{}$};
+\begin{pgfonlayer}{background}
+\node (1) [rounded corners, inner sep=1mm, thick, draw=black!60, fill=black!20, fit= (2) (a1) (a2) (a3)] {};
+\node [yshift=3mm] at (1.north) {$r$};
+\end{pgfonlayer}
+\end{tikzpicture}}
+\end{textblock}
+
+\begin{textblock}{12}(2,6)
+\onslide<2->{  
+\begin{tikzpicture}[node distance=3mm,
+    >=stealth',very thick,
+    every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},
+    baseline=(current bounding box.north)]
+\node at (0,0) [state, initial,accepting]  (1)  {$\mbox{}$};
+\node (2)  [right=16mm of 1] {$\mbox{}$};
+\node[state]  (4)  [above=1mm of 2] {$\mbox{}$};
+\node[state]  (5)  [below=1mm of 2] {$\mbox{}$};  
+\node (a)  [right=of 2] {$\ldots$};
+\node[state]  (a1)  [right=of a] {$\mbox{}$};
+\node[state]  (a2)  [above=of a1] {$\mbox{}$};
+\node[state]  (a3)  [below=of a1] {$\mbox{}$};
+\path[->] (1) edge node [below]  {$\varepsilon$} (4);
+\path[->] (1) edge node [below]  {$\varepsilon$} (5);
+\path[->] (a1) edge [bend left=45] node [below]  {$\varepsilon$} (1);
+\path[->] (a2) edge [bend right] node [below]  {$\varepsilon$} (1);
+\path[->] (a3) edge [bend left=45] node [below]  {$\varepsilon$} (1);
+\begin{pgfonlayer}{background}
+\node (2) [rounded corners, inner sep=1mm, thick, draw=black!60, fill=black!20, fit= (1) (a2) (a3)] {};
+\node [yshift=3mm] at (2.north) {$r^*$};
+\end{pgfonlayer}
+\end{tikzpicture}}
+\end{textblock}
+
+\begin{textblock}{12}(2,12)
+\onslide<3->{ 
+  \begin{bubble}[9.5cm]\it
+  Quiz:\smallskip\\  
+  \textbf{What is the Thompson Construction for \bl{$r^{\{n\}}$} ?}
+\end{bubble}}
+\end{textblock}
+
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
+\begin{frame}[t]
+  \frametitle{Quiz 3}
+
+Hierarchy of Languages:  
+
+\begin{center}
+\begin{tikzpicture}
+[rect/.style={draw=black!50, 
+              top color=white,
+              bottom color=black!20, 
+              rectangle, 
+              very thick, 
+              rounded corners}, scale=1.2]
+
+\draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages};
+\draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages};
+\draw (0,-0.85) node [rect, text depth=17mm] {\;\;context sensitive languages\;\;};
+\draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages};
+\draw (0,-1.4) node [rect] {regular languages};
+\end{tikzpicture}
+\end{center}  
+
+\begin{textblock}{12}(2,11.5)
+\onslide<2->{ 
+  \begin{bubble}[9.5cm]\it
+    Quiz:\smallskip\\
+    
+    \textbf{Can we use standard parsing algorithms for matching / lexing\,?}
+    Say CYK, LL, LR(k), PEG, \ldots
+\end{bubble}}
+\end{textblock}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+\defverbatim{\foo}{\footnotesize%
+\begin{verbatim}
+(?:(?:\"|'|\]|\}|\\|\d|(?:nan|infinity|true|false|
+null|undefined|symbol|math)|\`|\-|\+)+[)]*;?((?:\s
+| -|~|!|{}|\|\||\+)*.*(?:.*=.*)))  
+\end{verbatim}
+}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
+\begin{frame}[t,fragile]
+\frametitle{Quiz 4}
+
+\begin{textblock}{12}(2,2.5)
+\begin{bubble}[9.5cm]\it
+  Quiz:\smallskip\\    
+  Do regular expressions have any security relevance\,?
+\end{bubble}
+\end{textblock}
+
+\only<2->{
+\begin{textblock}{2}(1,6)
+\includegraphics[scale=0.8]{../pics/zeek.png}
+\includegraphics[scale=0.17]{../pics/snort.png}
+\end{textblock}}
+
+\only<3->{
+\begin{textblock}{7}(7,5.6)
+  \includegraphics[scale=0.14]{../pics/cloudflare.png}\\
+    \footnotesize
+    It serves more web traffic than Twitter, Amazon, Apple,
+    Instagram, Bing \& Wikipedia combined.\medskip\\
+    Web Application Firewall filters out
+    SQL injection attacks, XSS attacks etc
+\end{textblock}}
+
+\only<3->{
+  \begin{textblock}{13}(4,12.3)
+  \footnotesize
+  \hspace{1.5cm}a global outage on 2 July 2019 (first one for six years)
+  \color{blue}\foo\color{black}
+   \end{textblock}
+}
+
+    
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
+\begin{frame}[t]
+  \frametitle{Quiz 3}
+
+\begin{textblock}{12}(2,2) 
+  \begin{bubble}[9.5cm]\it
+    Quiz:\smallskip\\
+    
+    \textbf{Can we use standard parsing algorithms for matching / lexing\,?}
+    Say CYK, LL, LR(k), PEG, \ldots
+\end{bubble}
+\end{textblock}
+
+
+\begin{textblock}{12}(1,7)
+\textbf{POSIX lexing}
+
+\begin{tabular}{@ {}ll}
+  Assume you have: & \bl{$r_{key} \dn \texttt{if} + \texttt{then} + \texttt{while} \ldots$}\\
+                   & \bl{$r_{id\;} \,\dn \texttt{[a-z]} \cdot \texttt{[a-z0-9\_]}^*$}\medskip\\
+
+  Lexing a string with  & \bl{$(r_{key} + r_{id})^*$}\medskip\\
+
+  What should be & \\
+  the result for:  & \bl{\texttt{"iffoo"}} \;\;\; \bl{\texttt{"if"}}\\
+\end{tabular}  
+\end{textblock}
+
+\only<2->{
+\begin{textblock}{13.5}(1.5,4) 
+  \begin{mybox3}{POSIX rules}
+    \begin{itemize}
+    \item \textbf{The Longest Match Rule:} The longest initial
+      substring matched by any regular expression is taken as next token.
+    \item \textbf{Priority Rule:}  For a particular longest initial
+      substring, the first (leftmost) regular expression that can match
+      determines the token.
+    \item \ldots  
+    \end{itemize}
+    \begin{center}
+    \bl{$(a + ab) \cdot (bc + c)$} \;\;and\;\; \bl{\texttt{"}$abc$\texttt{"}}
+    \end{center}  
+  \end{mybox3}
+\end{textblock}}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
+\begin{frame}[t]
+\frametitle{Quiz 1}
+\begin{bubble}[10cm]\it
+  There are many, many regular expression libraries.\bigskip\\
+  
+  \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the
+  difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?}
+\end{bubble}
+
+\begin{textblock}{12}(1,9)
+  \begin{tabular}{p{4cm}|p{4cm}|p{2cm}}
+    atrociously slow (s't) & pretty lazy (s't) & \textcolor{gray}{OTBT}\medskip\\
+    Python, Ruby, Swift, Dart, JavaScript & Rust, Go, RE2 & \textcolor{gray}{Snort, .Net7}\\
+  \end{tabular}
+\end{textblock}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
+\begin{frame}[t]
+
+\mbox{}\\[10mm]  
+
+\begin{columns}[t,onlytextwidth]  
+\begin{column}{.2\textwidth}
+\raisebox{-10mm}{    
+\begin{tikzpicture}
+  \begin{axis}[
+    xlabel={\bl{$n$} \pcode{a}s},
+    %%x label style={at={(1.05,0.0)}},
+    ylabel={time in secs},
+    enlargelimits=false,
+    xtick={0,10,...,30},
+    xmax=35,
+    ymax=35,
+    ytick={0,5,...,30},
+    scaled ticks=false,
+    axis lines=left,
+    width=5cm,
+    height=5cm, 
+    legend entries={Python,JavaScript,Swift,Dart},
+    legend style={font=\small,at={(0.5,-0.39)},anchor=north},
+    legend cell align=left]
+\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
+\addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
+\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};
+\addplot[brown,mark=*, mark options={fill=white}] table {re-dart.data};
+\end{axis}
+\end{tikzpicture}}
+\end{column}
+
+\begin{column}{.2\textwidth}
+\begin{tikzpicture}
+  \begin{axis}[
+    xlabel={\bl{$n$} \pcode{a}s},
+    %ylabel={time in secs},
+    enlargelimits=false,
+    xtick={0,10,...,30},
+    xmax=35,
+    ymax=35,
+    ytick={0,5,...,30},
+    scaled ticks=false,
+    axis lines=left,
+    width=5cm,
+    height=5cm, 
+    legend entries={Python,Ruby},
+    legend style={font=\small,at={(0.5,-0.39)},anchor=north},
+    legend cell align=left]
+\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
+\addplot[brown,mark=pentagon*, mark options={fill=white}] table {re-ruby.data};  
+\end{axis}
+\end{tikzpicture}  
+\end{column}  
+\end{columns}
+
+\begin{textblock}{4}(9,1.7)
+\textbf{\bl{\texttt{(a?)\boldmath$^{\{n\}}\cdot$(a)\boldmath$^{\{n\}}$}}}
+\end{textblock}
+
+\begin{textblock}{4}(4,1.7)  
+\textbf{\bl{\texttt{(a*)*$\cdot$b}}}
+\end{textblock}
+
+\begin{textblock}{3.4}(0.3,10)
+\small{}\textbf{matching with strings}
+\textbf{\bl{$\underbrace{\texttt{a}...\texttt{a}}_n$}}  
+\end{textblock}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
+\begin{frame}[t]
+\frametitle{Quiz 1}
+\begin{bubble}[10cm]\it
+  \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the
+  difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?}
+\end{bubble}
+
+\begin{textblock}{12}(1.7,7)
+For \textbf{Perl-style} regular expression matchers (say Python, JavaScript, etc), the
+answer is:
+\only<2->{\begin{center}
+\fontspec{Hoefler Text Black}\textcolor{ProcessBlue}{\huge{}NP}
+\end{center}}  
+\end{textblock}
+
+\begin{textblock}{12}(1.7,12)
+\only<3->{Backreferences: \bl{(\ldots)$\,\ldots{}\backslash{}n$}}
+\end{textblock}  
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+\begin{frame}
+\frametitle{Quiz 2}
+
+\begin{textblock}{12}(2,2)
+\onslide<1->{ 
+  \begin{bubble}[9.5cm]\it
+  Quiz:\smallskip\\  
+  \textbf{What is the Thompson Construction for \bl{$r^{\{n\}}$} ?}
+\end{bubble}}
+\end{textblock}
+
+\only<2->{
+\begin{textblock}{12}(1,6)
+\begin{tabular}{ll}
+  Google's RE2 lib & $\Rightarrow$ \bl{$a^{\{1001\}}$} is too big\\
+  \onslide<3->{Rust Regex lib}    & \onslide<5->{$\Rightarrow$ \bl{$a^{\{1000\}\{100\}\{5\}}$} too big}\\
+                    & \onslide<6->{$\Rightarrow$ \bl{$a^{\{0\}\{4294967295\}}$} ?}
+\end{tabular}
+\end{textblock}}
+
+\only<4>{
+\begin{textblock}{13.5}(1.5,4) 
+  \begin{mybox3}{From Rust's Regex Description}\it 
+    ``\ldots [the] syntax is similar to Perl-style regular expressions, but lacks a few features like look
+    around and backreferences. In exchange, all searches execute in linear time with respect
+    to the size of the regular expression and search text. \ldots''
+  \end{mybox3}
+\end{textblock}}
+
+
+\end{frame}  
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t]
+\frametitle{Regular Expressions}
+
+\begin{textblock}{6}(2,5)
+  \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
+  \bl{$r$} & \bl{$::=$}  & \bl{$\ZERO$}  & nothing\\
+         & \bl{$\mid$} & \bl{$\ONE$}       & empty string / \pcode{""} \\
+         & \bl{$\mid$} & \bl{$c, d, \ldots$}                         & characters\\
+         & \bl{$\mid$} & \bl{$r_1 + r_2$}  & alternative / choice\\
+         & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
+         & \bl{$\mid$} & \bl{$r^*$}            & star (zero or more)\\
+  \end{tabular}
+  \end{textblock}
+  
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{\begin{tabular}{l}The Derivative of a Rexp\end{tabular}}
+
+\large
+If \bl{$r$} matches the string \bl{$c\!::\!s$}, what is a regular 
+expression that matches just \bl{$s$}?\bigskip\bigskip\bigskip\bigskip
+
+\small
+\bl{$\der\,c\,r$} gives the answer, Brzozowski 1964\medskip\\
+(lost in the sands of time, re-appeared in 2009)
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{\begin{tabular}{l}The Derivative of a Rexp\end{tabular}}
+
+\begin{center}
+\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
+  \bl{$\der\, c\, (\ZERO)$}      & \bl{$\dn$} & \bl{$\ZERO$} & \\
+  \bl{$\der\, c\, (\ONE)$}           & \bl{$\dn$} & \bl{$\ZERO$} & \\
+  \bl{$\der\, c\, (d)$}                     & \bl{$\dn$} & \bl{if $c = d$ then $\ONE$ else $\ZERO$} & \\
+  \bl{$\der\, c\, (r_1 + r_2)$}        & \bl{$\dn$} & \bl{$\der\, c\, r_1 + \der\, c\, r_2$} & \\
+  \bl{$\der\, c\, (r_1 \cdot r_2)$}  & \bl{$\dn$}  & \bl{if $nullable (r_1)$}\\
+  & & \bl{then $(\der\,c\,r_1) \cdot r_2 + \der\, c\, r_2$}\\ 
+  & & \bl{else $(\der\, c\, r_1) \cdot r_2$}\\
+  \bl{$\der\, c\, (r^*)$}  & \bl{$\dn$} & \bl{$(\der\,c\,r) \cdot (r^*)$} &\medskip\bigskip\\
+  \end{tabular}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t]
+\frametitle{\begin{tabular}{l}Brzozowski matcher\end{tabular}}
+
+Does \bl{$r_1$} match \bl{"$abc$"}?
+\begin{center}
+\begin{tabular}{@{}rl@{}l@{}}
+Step 1: & build derivative of \bl{$a$} and \bl{$r_1$} & \bl{$(r_2 = \der\,a\,r_1)$}\smallskip\\
+Step 2: & build derivative of \bl{$b$} and \bl{$r_2$} & \bl{$(r_3 = \der\,b\,r_2)$}\smallskip\\
+Step 3: & build derivative of \bl{$c$} and \bl{$r_3$} & \bl{$(r_4 = \der\,c\,r_3)$}\smallskip\\
+Step 4: & the string is exhausted: & \bl{($nullable(r_4))$}\\
+        & test whether \bl{$r_4$} can recognise\\
+        & the empty string\medskip\\
+Output: & result of the test\\
+        & $\Rightarrow \bl{\textit{true}} \,\text{or}\, 
+                       \bl{\textit{false}}$\\        
+\end{tabular}
+\end{center}
+
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{\mbox{Nullable}}
+
+
+\ldots{}whether a regular expression can match the empty string:
+\begin{center}
+\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
+\bl{$nullable(\ZERO)$}    & \bl{$\dn$} & \bl{\textit{false}}\\
+\bl{$nullable(\ONE)$}       & \bl{$\dn$} & \bl{\textit{true}}\\
+\bl{$nullable (c)$}             & \bl{$\dn$} & \bl{\textit{false}}\\
+\bl{$nullable (r_1 + r_2)$}     & \bl{$\dn$} & \bl{$nullable(r_1) \vee nullable(r_2)$} \\ 
+\bl{$nullable (r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$nullable(r_1) \wedge nullable(r_2)$} \\
+\bl{$nullable (r^*)$}           & \bl{$\dn$} & \bl{\textit{true}}\\
+\end{tabular}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t]
+\frametitle{\begin{tabular}{l}Extensions\end{tabular}}
+
+\begin{center}
+\begin{tabular}{@ {\hspace{-4mm}}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
+  \bl{$\der\, c\, (r^{\{n\}})$}      & \bl{$\dn$} & \bl{$if\;n=0\;then\;\ZERO\;else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\
+  \bl{$\der\, c\, (r^{\{..n\}})$}    & \bl{$\dn$} & \bl{$if\;n=0\;then\;\ZERO\;else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\
+  \bl{$\der\, c\, (r^{\{n..\}})$}    & \bl{$\dn$} & \bl{$if\;n=0\;then\;(der\,c\,r)\cdot r^*$} \\
+                                    &            & \bl{$else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\
+  \bl{$\der\, c\, (r^{\{n..m\}})$}   & \bl{$\dn$} & \bl{$if\;n=0 \wedge m=0\;then\;\ZERO\;else$} & \\
+                                     &           & \bl{$if \;n=0 \wedge 0<m\; then\;(der\,c\,r)\cdot r^{\{..m-1\}}$} & \\
+                                     &           & \bl{$else\; (der\,c\,r)\cdot r^{\{n-1..m-1\}}$} & \\
+  \bl{$\der\, c\, (\neg{}r)$}            & \bl{$\dn$}  & \bl{$\neg(der\,c\,r)$}\\
+  \end{tabular}
+\end{center}
+
+also works for lookarounds, various anchors, etc, but \textbf{not} for backreferences
+
+\begin{textblock}{3}(11,13)
+\onslide<2>{ 
+  \begin{bubble}[3cm]
+  \bl{$a^{\{0\}\{4294967295\}}$}
+\end{bubble}}
+\end{textblock}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t]
+\frametitle{\mbox{Problems with Ders}}
+
+\textcolor{blue}{
+\small
+\def\ll{\stackrel{der\,a}{\longrightarrow}}
+\begin{center}
+\begin{tabular}{@{\hspace{-5mm}}rll}
+$(a + aa)^*$ & $\ll$ & $(\ONE + \ONE{}a) \cdot (a + aa)^*$\\
+& $\ll$ & $(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* \;+\; (\ONE + \ONE{}a) \cdot (a + aa)^*$\\
+& $\ll$ & $(\ZERO + \ZERO{}a + \ZERO) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^* \;+\; $\\
+& & $\qquad(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^*$\\
+& $\ll$ & \ldots\\ \hspace{15mm}
+\end{tabular}
+\end{center}}
+
+\begin{textblock}{13.5}(1.5,12) 
+(regular expressions of sizes 98, 169, 283, 468, 767, \ldots)
+\end{textblock}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{\mbox{Conclusion}}
+
+\begin{itemize}
+\item The beauty is that this only involves functional programs that can be conveniently reasoned about in theorem provers 
+\item POSIX lexing can be done via an extension by Sulzmann and Lu, 2014 (our current work)\medskip\pause
+\item How surprising that one can still do new work on regular expressions.  
+\end{itemize} 
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+\begin{frame}<1-10>
+\end{frame}  
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+\end{document}
+
+%%% Local Variables:  
+%%% mode: latex
+%%% TeX-master: t
+%%% End: 
+
changeset 913	eef6a56c185a
parent 906	2bf1516d730f
child 960	c7009356ddd8