\documentclass[dvipsnames,14pt,t]{beamer}
\usepackage{../slides}
\usepackage{../graphics}
\usepackage{../langs}
\usepackage{../data}
\hfuzz=220pt
\lstset{language=Scala,
style=mystyle,
numbersep=0pt,
numbers=none,
xleftmargin=0mm}
\pgfplotsset{compat=1.12}
\newcommand{\bl}[1]{\textcolor{blue}{#1}}
% beamer stuff
\renewcommand{\slidecaption}{CFL 02, King's College London}
\begin{document}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{%
\begin{tabular}{@ {}c@ {}}
\\[-3mm]
\LARGE Compilers and \\[-1mm]
\LARGE Formal Languages (2)\\[3mm]
\end{tabular}}
\normalsize
\begin{center}
\begin{tabular}{ll}
Email: & christian.urban at kcl.ac.uk\\
Office: & N\liningnums{7.07} (North Wing, Bush House)\\
Slides: & KEATS (also homework is there)
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{\Large
Lets Implement an Efficient\\[-2mm]
Regular Expression Matcher}
\footnotesize
\begin{center}
{\normalsize Graphs: \bl{$a^{?\{n\}} \cdot a^{\{n\}}$} and strings \bl{$\underbrace{\,a\ldots a\,}_{n}$}}\medskip\\
\begin{tabular}{@{}cc@{}}
\begin{tikzpicture}
\begin{axis}[
xlabel={$n$},
x label style={at={(1.05,0.0)}},
ylabel={time in secs},
enlargelimits=false,
xtick={0,5,...,30},
xmax=33,
ymax=35,
ytick={0,5,...,30},
scaled ticks=false,
axis lines=left,
width=5cm,
height=4.5cm,
legend entries={Python,Ruby},
legend pos=north west,
legend cell align=left]
\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
\addplot[brown,mark=pentagon*, mark options={fill=white}] table {re-ruby.data};
\end{axis}
\end{tikzpicture}
&
\begin{tikzpicture}
\begin{axis}[
xlabel={$n$},
x label style={at={(1.1,0.05)}},
ylabel={time in secs},
enlargelimits=false,
xtick={0,5000,...,10000},
xmax=11500,
ymax=35,
ytick={0,5,...,30},
scaled ticks=false,
axis lines=left,
width=5cm,
height=4.5cm,
legend entries={Scala V2,Scala V3},
legend pos=north east,
legend cell align=left]
\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
\addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
\end{axis}
\end{tikzpicture}
\end{tabular}
\end{center}
\small
In the handouts is a similar graph for \bl{$(a^*)^* \cdot b$} and Java 8.
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Evil Regular Expressions}
\begin{itemize}
\item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\bigskip
\item Evil regular expressions\medskip
\begin{itemize}
\item \bl{$a^{?\{n\}} \cdot a^{\{n\}}$}
\item \bl{$(a^*)^*$}
\item \bl{$([a$\,-\,$z]^+)^*$}
\item \bl{$(a + a \cdot a)^*$}
\item \bl{$(a + a^?)^*$}
\end{itemize}
\item sometimes also called \alert{catastrophic backtracking}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Languages}
\begin{itemize}
\item A \alert{\bf Language} is a set of strings, for example\medskip
\begin{center}
\bl{$\{[], hello, \textit{foobar}, a, abc\}$}
\end{center}\bigskip
\item \alert{\bf Concatenation} of strings and languages
\begin{center}
\begin{tabular}{rcl}
\bl{$\textit{foo}\;@\;bar$} & \bl{$=$} & \bl{$\textit{foobar}$}\medskip\\
\bl{$A\;@\;B$} & \bl{$\dn$} & \bl{$\{ s_1\,@\,s_2 \;\mid\; s_1 \in A \wedge s_2 \in B\}$}
\end{tabular}
\end{center}
\bigskip
\small
\item [] For example \bl{$A = \{foo, bar\}$}, \bl{$B = \{a, b\}$}
\[
\bl{A \,@\, B = \{fooa, foob, bara, barb\}}
\]
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Power Operation}
\begin{itemize}
\item The \alert{\textbf{\boldmath$n$th Power}} of a language:
\begin{center}
\begin{tabular}{lcl}
\bl{$A^0$} & \bl{$\dn$} & \bl{$\{[]\}$}\\
\bl{$A^{n+1}$} & \bl{$\dn$} & \bl{$A \,@\, A^n$}
\end{tabular}
\end{center}\bigskip
\item[] For example
\begin{center}
\begin{tabular}{lcl@{\hspace{10mm}}l}
\bl{$A^4$} & \bl{$=$} & \bl{$A \,@\, A \,@\, A \,@\, A$} & \bl{$(@\,\{[]\})$}\\
\bl{$A^1$} & \bl{$=$} & \bl{$A$} & \bl{$(@\,\{[]\})$}\\
\bl{$A^0$} & \bl{$=$} & \bl{$\{[]\}$}\\
\end{tabular}
\end{center}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Homework Question}
\begin{itemize}
\item Say \bl{$A = \{[a],[b],[c],[d]\}$}.\bigskip
\begin{center}
How many strings are in \bl{$A^4$}?
\end{center}\bigskip\medskip\pause
\begin{center}
\begin{tabular}{l}
What if \bl{$A = \{[a],[b],[c],[]\}$};\\
how many strings are then in \bl{$A^4$}?
\end{tabular}
\end{center}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Star Operation}
\begin{itemize}
\item The \alert{\bf Kleene Star} of a \underline{language}:
\bigskip
\begin{center}
\begin{tabular}{c}
\bl{$A\star \dn \bigcup_{0\le n} A^n$}
\end{tabular}
\end{center}\bigskip
\item[] This expands to
\[
\bl{A^0 \cup A^1 \cup A^2 \cup A^3 \cup A^4 \cup \ldots}
\]
or
\small
\[
\bl{\{[]\} \;\cup\; A \;\cup\; A\,@\,A \;\cup\;
A\,@\,A\,@\,A \;\cup\; A\,@\,A\,@\,A\,@\,A \cup \ldots}
\]
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{
The Meaning of a\\[-2mm]
Regular Expression}
\begin{textblock}{15}(1,4)
\begin{tabular}{rcl}
\bl{$L(\ZERO)$} & \bl{$\dn$} & \bl{$\{\}$}\\
\bl{$L(\ONE)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\
\bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\
\bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
\bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\
\bl{$L(r^*)$} & \bl{$\dn$} & \bl{$(L(r))\star \quad\dn \bigcup_{0 \le n} L(r)^n$}\\
\end{tabular}
\end{textblock}
\begin{textblock}{9}(6,12)\small
\bl{$L$} is a function from regular expressions to
sets of strings (languages):\smallskip\\
\bl{$L$ : Rexp $\Rightarrow$ Set$[$String$]$}
\end{textblock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Semantic Derivative\\[5mm]}
\begin{itemize}
\item The \alert{\bf Semantic Derivative} of a
\underline{language}\\ w.r.t.~to a character \bl{$c$}:
\bigskip
\begin{center}
\bl{$\Der\,c\,A \dn \{ s \;|\; c\!::\!s \in A\}$ }
\end{center}\bigskip
For \bl{$A = \{\textit{foo}, \textit{bar}, \textit{frak}\}$} then
\begin{center}
\bl{\begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}l}
$\Der\,f\,A$ & $=$ & $\{\textit{oo}, \textit{rak}\}$\\
$\Der\,b\,A$ & $=$ & $\{\textit{ar}\}$\\
$\Der\,a\,A$ & $=$ & $\{\}$\pause
\end{tabular}}
\end{center}
\small
We can extend this definition to strings
\[
\bl{\Ders\,s\,A = \{s'\;|\;s\,@\,s' \in A\}}
\]
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Specification\\ for Matching}
\begin{bubble}[10cm]
\large
A regular expression \bl{$r$} matches a string~\bl{$s$}
provided
\begin{center}
\bl{$s \in L(r)$}\\
\end{center}
\end{bubble}\bigskip\bigskip
\ldots and the point of the this lecture is
to decide this problem as fast as possible
(unlike Python, Ruby, Java etc)
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Regular Expressions}
Their inductive definition:
\begin{textblock}{6}(2,7.5)
\begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
\bl{$r$} & \bl{$::=$} & \bl{$\ZERO$} & nothing\\
& \bl{$\mid$} & \bl{$\ONE$} & empty string / \pcode{""} / $[]$\\
& \bl{$\mid$} & \bl{$c$} & single character\\
& \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
& \bl{$\mid$} & \bl{$r_1 + r_2$} & alternative / choice\\
& \bl{$\mid$} & \bl{$r^*$} & star (zero or more)\\
\end{tabular}
\end{textblock}
\only<2->{\footnotesize
\begin{textblock}{9}(2,0.5)
\begin{bubble}[9.8cm]
\lstinputlisting{../progs/app01.scala}
\end{bubble}
\end{textblock}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{
When Are Two Regular\\[-1mm]
Expressions Equivalent?}
\large
\begin{center}
\bl{$r_1 \equiv r_2 \;\;\dn\;\; L(r_1) = L(r_2)$}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Concrete Equivalences}
\begin{center}
\bl{\begin{tabular}{rcl}
$(a + b) + c$ & $\equiv$ & $a + (b + c)$\\
$a + a$ & $\equiv$ & $a$\\
$a + b$ & $\equiv$ & $b + a$\\
$(a \cdot b) \cdot c$ & $\equiv$ & $a \cdot (b \cdot c)$\\
$c \cdot (a + b)$ & $\equiv$ & $(c \cdot a) + (c \cdot b)$\bigskip\bigskip\\\pause
$a \cdot a$ & $\not\equiv$ & $a$\\
$a + (b \cdot c)$ & $\not\equiv$ & $(a + b) \cdot (a + c)$\\
\end{tabular}}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Corner Cases}
\begin{center}
\bl{\begin{tabular}{rcl}
$a \cdot \ZERO$ & $\not\equiv$ & $a$\\
$a + \ONE$ & $\not\equiv$ & $a$\\
$\ONE$ & $\equiv$ & $\ZERO^*$\\
$\ONE^*$ & $\equiv$ & $\ONE$\\
$\ZERO^*$ & $\not\equiv$ & $\ZERO$\\
\end{tabular}}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Simplification Rules}
\begin{center}
\bl{\begin{tabular}{rcl}
$r + \ZERO$ & $\equiv$ & $r$\\
$\ZERO + r$ & $\equiv$ & $r$\\
$r \cdot \ONE$ & $\equiv$ & $r$\\
$\ONE \cdot r$ & $\equiv$ & $r$\\
$r \cdot \ZERO$ & $\equiv$ & $\ZERO$\\
$\ZERO \cdot r$ & $\equiv$ & $\ZERO$\\
$r + r$ & $\equiv$ & $r$
\end{tabular}}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{A Matching Algorithm (1)}
\ldots{}whether a regular expression can match the empty string:
\begin{center}
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
\bl{$nullable(\ZERO)$} & \bl{$\dn$} & \bl{\textit{false}}\\
\bl{$nullable(\ONE)$} & \bl{$\dn$} & \bl{\textit{true}}\\
\bl{$nullable (c)$} & \bl{$\dn$} & \bl{\textit{false}}\\
\bl{$nullable (r_1 + r_2)$} & \bl{$\dn$} & \bl{$nullable(r_1) \vee nullable(r_2)$} \\
\bl{$nullable (r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$nullable(r_1) \wedge nullable(r_2)$} \\
\bl{$nullable (r^*)$} & \bl{$\dn$} & \bl{\textit{true}}\\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Derivative of a Rexp}
\large
If \bl{$r$} matches the string \bl{$c\!::\!s$}, what is a regular
expression that matches just \bl{$s$}?\bigskip\bigskip\bigskip\bigskip
\small
\bl{$\der\,c\,r$} gives the answer, Brzozowski 1964
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Derivative of a Rexp}
\begin{center}
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
\bl{$\der\, c\, (\ZERO)$} & \bl{$\dn$} & \bl{$\ZERO$} & \\
\bl{$\der\, c\, (\ONE)$} & \bl{$\dn$} & \bl{$\ZERO$} & \\
\bl{$\der\, c\, (d)$} & \bl{$\dn$} & \bl{if $c = d$ then $\ONE$ else $\ZERO$} & \\
\bl{$\der\, c\, (r_1 + r_2)$} & \bl{$\dn$} & \bl{$\der\, c\, r_1 + \der\, c\, r_2$} & \\
\bl{$\der\, c\, (r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{if $nullable (r_1)$}\\
& & \bl{then $(\der\,c\,r_1) \cdot r_2 + \der\, c\, r_2$}\\
& & \bl{else $(\der\, c\, r_1) \cdot r_2$}\\
\bl{$\der\, c\, (r^*)$} & \bl{$\dn$} & \bl{$(\der\,c\,r) \cdot (r^*)$} &\bigskip\\\pause
\bl{$\ders\, []\, r$} & \bl{$\dn$} & \bl{$r$} & \\
\bl{$\ders\, (c\!::\!s)\, r$} & \bl{$\dn$} & \bl{$\ders\,s\,(\der\,c\,r)$} & \\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Examples}
Given \bl{$r \dn ((a \cdot b) + b)^*$} what is
\begin{center}
\begin{tabular}{l}
\bl{$\der\,a\,r =\;?$}\\
\bl{$\der\,b\,r =\;?$}\\
\bl{$\der\,c\,r =\;?$}
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{\mbox{The Brzozowski Algorithm}}
\begin{center}
\begin{tabular}{l}
\bl{$\textit{matches}\,r\,s \dn \textit{nullable}(\ders\;s\;r)$}
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Brzozowski: An Example}
Does \bl{$r_1$} match \bl{$abc$}?
\begin{center}
\begin{tabular}{@{}rl@{}l@{}}
Step 1: & build derivative of \bl{$a$} and \bl{$r_1$} & \bl{$(r_2 = \der\,a\,r_1)$}\smallskip\\
Step 2: & build derivative of \bl{$b$} and \bl{$r_2$} & \bl{$(r_3 = \der\,b\,r_2)$}\smallskip\\
Step 3: & build derivative of \bl{$c$} and \bl{$r_3$} & \bl{$(r_4 = \der\,c\,r_3)$}\smallskip\\
Step 4: & the string is exhausted: & \bl{($nullable(r_4))$}\\
& test whether \bl{$r_4$} can recognise\\
& the empty string\medskip\\
Output: & result of the test\\
& $\Rightarrow \bl{\textit{true}} \,\text{or}\,
\bl{\textit{false}}$\\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Idea of the Algorithm}
If we want to recognise the string \bl{$abc$} with regular
expression \bl{$r_1$} then\medskip
\begin{enumerate}
\item \bl{$\Der\,a\,(L(r_1))$}\pause
\item \bl{$\Der\,b\,(\Der\,a\,(L(r_1)))$}\pause
\item \bl{$\Der\,c\,(\Der\,b\,(\Der\,a\,(L(r_1))))$}\bigskip
\item finally we test whether the empty string is in this
set; same for \bl{$\Ders\,abc\,(L(r_1))$}.\medskip
\end{enumerate}
The matching algorithm works similarly, just over regular expressions instead of sets.
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Oops\ldots \;\bl{$a^{?\{n\}} \cdot a^{\{n\}}$}}
\begin{center}
\begin{tikzpicture}
\begin{axis}[
xlabel={$n$},
x label style={at={(1.05,0.0)}},
ylabel={time in secs},
enlargelimits=false,
xtick={0,5,...,30},
xmax=31,
ytick={0,5,...,30},
scaled ticks=false,
axis lines=left,
width=7cm,
height=7cm,
legend entries={Python,Ruby,Scala V1},
legend pos=outer north east,
legend cell align=left
]
\addplot[blue,mark=*, mark options={fill=white}]
table {re-python.data};
\addplot[brown,mark=pentagon*, mark options={fill=white}]
table {re-ruby.data};
\addplot[red,mark=triangle*,mark options={fill=white}]
table {re1.data};
\end{axis}
\end{tikzpicture}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{A Problem\medskip}
We represented the ``n-times'' \bl{$a^{\{n\}}$} as a
sequence regular expression:
\begin{center}
\begin{tabular}{rl}
1: & \bl{$a$}\\
2: & \bl{$a\cdot a$}\\
3: & \bl{$a\cdot a\cdot a$}\\
& \ldots\\
13: & \bl{$a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a\cdot a$}\\
& \ldots\\
20:
\end{tabular}
\end{center}
This problem is aggravated with \bl{$a^?$} being represented
as \bl{$a + \ONE$}.
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Solving the Problem}
What happens if we extend our regular expressions with explicit
constructors
\begin{center}
\begin{tabular}{rcl}
\bl{$r$} & \bl{$::=$} & \bl{\ldots}\\
& \bl{$\mid$} & \bl{$r^{\{n\}}$}\\
& \bl{$\mid$} & \bl{$r^?$}
\end{tabular}
\end{center}
What is their meaning?\\
What are the cases for \bl{$nullable$} and \bl{$\der$}?
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Brzozowski: \bl{$a^{?\{n\}} \cdot a^{\{n\}}$}}
\begin{center}
\begin{tikzpicture}
\begin{axis}[
xlabel={$n$},
x label style={at={(1.05,0.0)}},
ylabel={time in secs},
enlargelimits=false,
xtick={0,200,...,1100},
xmax=1200,
ytick={0,5,...,30},
scaled ticks=false,
axis lines=left,
width=8cm,
height=6.5cm,
legend entries={Python,Ruby,Scala V1,Scala V2},
legend pos=outer north east,
legend cell align=left
]
\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
\addplot[brown,mark=pentagon*, mark options={fill=white}] table {re-ruby.data};
\addplot[red,mark=triangle*,mark options={fill=white}] table {re1.data};
\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
\end{axis}
\end{tikzpicture}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Examples}
Recall the example of \bl{$r \dn ((a \cdot b) + b)^*$} with
\begin{center}
\begin{tabular}{l}
\bl{$\der\,a\,r = ((\ONE \cdot b) + \ZERO) \cdot r$}\\
\bl{$\der\,b\,r = ((\ZERO \cdot b) + \ONE)\cdot r$}\\
\bl{$\der\,c\,r = ((\ZERO \cdot b) + \ZERO)\cdot r$}
\end{tabular}
\end{center}
What are these regular expressions equivalent to?
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Simplification Rules}
\begin{center}
\bl{\begin{tabular}{rcl}
$r + \ZERO$ & $\Rightarrow$ & $r$\\
$\ZERO + r$ & $\Rightarrow$ & $r$\\
$r \cdot \ONE$ & $\Rightarrow$ & $r$\\
$\ONE \cdot r$ & $\Rightarrow$ & $r$\\
$r \cdot \ZERO$ & $\Rightarrow$ & $\ZERO$\\
$\ZERO \cdot r$ & $\Rightarrow$ & $\ZERO$\\
$r + r$ & $\Rightarrow$ & $r$
\end{tabular}}
\end{center}
\footnotesize
\lstinputlisting[firstline=22,lastline=26]{../progs/app6.scala}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\footnotesize
\lstinputlisting[firstline=1,lastline=21]{../progs/app6.scala}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Brzozowski: \bl{$a^{?\{n\}} \cdot a^{\{n\}}$}}
\begin{center}
\begin{tikzpicture}
\begin{axis}[
xlabel={$n$},
x label style={at={(1.05,0.0)}},
ylabel={time in secs},
enlargelimits=false,
xtick={0,5000,...,10000},
xmax=11000,
ymax=35,
ytick={0,5,...,30},
scaled ticks=false,
axis lines=left,
width=9cm,
height=7cm,
legend entries={Scala V2,Scala V3},
legend pos=north east
]
\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
\addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
\end{axis}
\end{tikzpicture}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{
Another Example\\[-1mm]
in Java \liningnums{8} and Python}
\begin{center}
\begin{tikzpicture}
\begin{axis}[
xlabel={$n$},
x label style={at={(1.05,0.0)}},
ylabel={time in secs},
enlargelimits=false,
xtick={0,5,...,30},
xmax=33,
ymax=35,
ytick={0,10,...,30},
scaled ticks=false,
axis lines=left,
width=9cm,
height=5cm,
legend entries={Java 8, Python},
legend pos=north west,
legend cell align=left]
\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
\end{axis}
\end{tikzpicture}
\end{center}
Regex: \bl{$(a^*)^* \cdot b$}
Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Same Example in Java \liningnums{9}+}
\begin{center}
\begin{tikzpicture}
\begin{axis}[
xlabel={$n$},
x label style={at={(1.09,-0.15)}},
ylabel={time in secs},
scaled x ticks=false,
enlargelimits=false,
xtick distance=10000,
xmax=44000,
ytick={0,10,...,30},
ymax=35,
axis lines=left,
width=9cm,
height=5cm,
legend entries={Java \liningnums{9}+},
legend pos=north west,
legend cell align=left]
\addplot[blue,mark=square*,mark options={fill=white}] table {re-java9.data};
\end{axis}
\end{tikzpicture}
\end{center}
Regex: \bl{$(a^*)^* \cdot b$}
Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{and with Brzozowski}
\begin{center}
\begin{tikzpicture}
\begin{axis}[
xlabel={$n$},
x label style={at={(1.09,0.0)}},
ylabel={time in secs},
scaled x ticks=false,
xtick distance=2000000,
enlargelimits=false,
xmax=6400000,
ytick={0,10,...,30},
ymax=35,
axis lines=left,
width=9cm,
height=5cm,
legend entries={Scala V3},
legend pos=north west,
legend cell align=left]
%\addplot[green,mark=square*,mark options={fill=white}] table {re2a.data};
\addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
\end{axis}
\end{tikzpicture}
\end{center}
Regex: \bl{$(a^*)^* \cdot b$}
Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{What is good about this Alg.}
\begin{itemize}
\item extends to most regular expressions, for example
\bl{$\sim r$}
\item is easy to implement in a functional language
\item the algorithm is already quite old; there is still
work to be done to use it as a tokenizer (that is relatively new work)
\item we can prove its correctness\ldots
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Proofs about Rexps}
Remember their inductive definition:\\[5cm]
\begin{textblock}{6}(5,5)
\begin{tabular}{@ {}rrl}
\bl{$r$} & \bl{$::=$} & \bl{$\ZERO$}\\
& \bl{$\mid$} & \bl{$\ONE$} \\
& \bl{$\mid$} & \bl{$c$} \\
& \bl{$\mid$} & \bl{$r_1 \cdot r_2$}\\
& \bl{$\mid$} & \bl{$r_1 + r_2$} \\
& \bl{$\mid$} & \bl{$r^*$} \\
\end{tabular}
\end{textblock}
If we want to prove something, say a property \bl{$P(r)$}, for all regular expressions \bl{$r$} then \ldots
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Proofs about Rexp (2)}
\begin{itemize}
\item \bl{$P$} holds for \bl{$\ZERO$}, \bl{$\ONE$} and \bl{c}\bigskip
\item \bl{$P$} holds for \bl{$r_1 + r_2$} under the assumption that \bl{$P$} already
holds for \bl{$r_1$} and \bl{$r_2$}.\bigskip
\item \bl{$P$} holds for \bl{$r_1 \cdot r_2$} under the assumption that \bl{$P$} already
holds for \bl{$r_1$} and \bl{$r_2$}.\bigskip
\item \bl{$P$} holds for \bl{$r^*$} under the assumption that \bl{$P$} already
holds for \bl{$r$}.
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Proofs about Rexp (3)}
Assume \bl{$P(r)$} is the property:
\begin{center}
\bl{$nullable(r)$} if and only if \bl{$[] \in L(r)$}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Proofs about Rexp (4)}
\begin{center}
\bl{\begin{tabular}{r@{\hspace{2mm}}c@{\hspace{2mm}}l}
$rev(\ZERO)$ & $\dn$ & $\ZERO$\\
$rev(\ONE)$ & $\dn$ & $\ONE$\\
$rev(c)$ & $\dn$ & $c$\\
$rev(r_1 + r_2)$ & $\dn$ & $rev(r_1) + rev(r_2)$\\
$rev(r_1 \cdot r_2)$ & $\dn$ & $rev(r_2) \cdot rev(r_1)$\\
$rev(r^*)$ & $\dn$ & $rev(r)^*$\\
\end{tabular}}
\end{center}
We can prove
\begin{center}
\bl{$L(rev(r)) = \{s^{-1} \;|\; s \in L(r)\}$}
\end{center}
by induction on \bl{$r$}.
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{\begin{tabular}{c}
Correctness Proof\\[-1mm] for our Matcher\end{tabular}}
\begin{itemize}
\item We started from
\begin{center}
\begin{tabular}{rp{4cm}}
& \bl{$s \in L(r)$}\medskip\\
\bl{$\Leftrightarrow$} & \bl{$[] \in \Ders\,s\,(L(r))$}\pause
\end{tabular}
\end{center}
\item if we can show \bl{$\Ders\, s\,(L(r)) = L(\ders\,s\,r)$} we
have
\begin{center}
\begin{tabular}{rp{4cm}}
\bl{$\Leftrightarrow$} & \bl{$[] \in L(\ders\,s\,r)$}\medskip\\
\bl{$\Leftrightarrow$} & \bl{$nullable(\ders\,s\,r)$}\medskip\\
\bl{$\dn$} & \bl{$matches\,s\,r$}
\end{tabular}
\end{center}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Proofs about Rexp (5)}
Let \bl{$Der\,c\,A$} be the set defined as
\begin{center}
\bl{$\Der\,c\,A \dn \{ s \;|\; c\!::\!s \in A\}$ }
\end{center}
We can prove
\begin{center}
\bl{$L(\der\,c\,r) = \Der\,c\,(L(r))$}
\end{center}
by induction on \bl{$r$}.
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Proofs about Strings}
If we want to prove something, say a property \bl{$P(s)$}, for all strings \bl{$s$} then \ldots\bigskip
\begin{itemize}
\item \bl{$P$} holds for the empty string, and\medskip
\item \bl{$P$} holds for the string \bl{$c\!::\!s$} under the assumption that \bl{$P$}
already holds for \bl{$s$}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Proofs about Strings (2)}
We can then prove
\begin{center}
\bl{$\Ders\,s\,(L(r)) = L(\ders\,s\,r)$}
\end{center}
We can finally prove
\begin{center}
\bl{$matches\,s\,r$} if and only if \bl{$s \in L(r)$}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Epilogue}
\footnotesize
\begin{center}
\begin{tabular}{cc}
\begin{tikzpicture}
\begin{axis}[
title={\small{}Graph: \bl{$a^{?\{n\}} \cdot a^{\{n\}}$}},
xlabel={$n$},
x label style={at={(1.04,0.0)}},
ylabel={time in secs},
enlargelimits=false,
xmax=7100000,
ytick={0,5,...,30},
ymax=33,
%scaled ticks=false,
axis lines=left,
width=5cm,
height=5cm,
legend entries={Scala V3, Scala V4},
legend style={at={(0.1,-0.2)},anchor=north}]
\addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
\addplot[purple,mark=square*,mark options={fill=white}] table {re4.data};
\end{axis}
\end{tikzpicture}
&
\begin{tikzpicture}
\begin{axis}[
title={\small{}Graph: \bl{$(a^*)^* \cdot b$}},
xlabel={$n$},
x label style={at={(1.09,0.0)}},
ylabel={time in secs},
enlargelimits=false,
xmax=8100000,
ytick={0,5,...,30},
ymax=33,
%scaled ticks=false,
axis lines=left,
width=5cm,
height=5cm,
legend entries={Scala V3, Scala V4},
legend style={at={(0.1,-0.2)},anchor=north}]
\addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
\addplot[purple,mark=square*,mark options={fill=white}] table {re4a.data};
\end{axis}
\end{tikzpicture}
\end{tabular}
\end{center}
\only<2->{\footnotesize
\begin{textblock}{9}(0.5,8)
\begin{bubble}[11.8cm]
\lstinputlisting[numbers=none]{../progs/app52.scala}
\end{bubble}
\end{textblock}}
\end{frame}
\end{document}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: t
%%% End: