% !TEX program = xelatex
\documentclass[dvipsnames,14pt,t,xelatex,aspectratio=169,xcolor={table}]{beamer}
\usepackage{../slides}
\usepackage{../graphics}
\usepackage{../langs}
\usepackage{../data}
\usepackage{tcolorbox}
\newtcolorbox{mybox}{colback=red!5!white,colframe=red!75!black}
\newtcolorbox{mybox2}[1]{colback=red!5!white,colframe=red!75!black,fonttitle=\bfseries,title=#1}
\newtcolorbox{mybox3}[1]{colback=Cyan!5!white,colframe=Cyan!75!black,fonttitle=\bfseries,title=#1}
\hfuzz=220pt
\lstset{language=Scala,
style=mystyle,
numbersep=0pt,
numbers=none,
xleftmargin=0mm}
\newcommand{\bl}[1]{\textcolor{blue}{#1}}
% beamer stuff
\renewcommand{\slidecaption}{CFL 01, King's College London}
%% https://cs.rit.edu/~hh/teaching/_media/cc18/lectures/lect1/main.pdf
%% https://cs.rit.edu/~hh/teaching/_media/cc18/lectures/lect2/main.pdf
%% https://cs.rit.edu/~hh/teaching/_media/cc18/lectures/lect3/main.pdf
\begin{document}
%\begin{frame}[t]
%
%\begin{mybox}
%A physical explanation the \emph{dynamic matrix}\\
%lots of text
%\end{mybox}
%\begin{mybox2}{Test}
%A physical explanation the \emph{dynamic matrix}\\
%lots of text
%\end{mybox2}
%\begin{mybox3}{Test}
%A physical explanation the \emph{dynamic matrix}\\
%lots of text
%\end{mybox3}
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{%
\begin{tabular}{@ {}c@ {}}
\\[-3mm]
\LARGE Compilers and \\[-1mm]
\LARGE Formal Languages\\[-3mm]
\end{tabular}}
\begin{center}
%\includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm}
%\includegraphics[scale=0.31]{pics/ante2.jpg}\\
%\footnotesize\textcolor{gray}{Antikythera automaton, 100 BC (Archimedes?)}
\end{center}
\normalsize
\begin{center}
\begin{tabular}{ll}
Email: & christian.urban at kcl.ac.uk\\
%Office Hours: & Thursdays 12 -- 14\\
%Location: & N7.07 (North Wing, Bush House)\\
Slides \& Progs: & KEATS\\
\end{tabular}
\end{center}
\begin{center}
\begin{tikzpicture}
\node[drop shadow,fill=white,inner sep=0pt]
{\footnotesize\rowcolors{1}{capri!10}{white}
\begin{tabular}{|p{4.8cm}|p{4.8cm}|}\hline
\cellcolor{blue!50}
1 Introduction, Languages & 6 While-Language \\
2 Regular Expressions, Derivatives & 7 Compilation, JVM \\
3 Automata, Regular Languages & 8 Compiling Functional Languages \\
4 Lexing, Tokenising & 9 Optimisations \\
5 Grammars, Parsing & 10 LLVM \\ \hline
\end{tabular}%
};
\end{tikzpicture}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}<1-12>[c]
\frametitle{The Goal of this Module\ldots}
\begin{center}
\begin{tikzpicture}[scale=1,
node/.style={
rectangle,rounded corners=3mm,
very thick,draw=black!50,minimum height=18mm, minimum width=20mm,
top color=white,bottom color=black!20,drop shadow}]
\node at (3.05, 1.8) {\Large\bf \ldots{} you write a compiler};
\node (0) at (-2.3,0) {};
\node [above=5mm of 0]
{\makebox[0mm]{\footnotesize
\begin{tabular}{@{}l@{}}input\\[-1mm]program\end{tabular}}};
\node (A) at (0,0) [node] {};
\node [below right] at (A.north west) {lexer};
\node (B) at (3,0) [node] {};
\node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser};
\node (C) at (6,0) [node] {};
\node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen};
\node (1) at (8.4,0) {};
\node [above=5mm of 1]
{\makebox[0mm]{\footnotesize
\begin{tabular}{@{}r@{}}binary\\[-1mm]code\end{tabular}}};
\draw [->,line width=4mm] (0) -- (A);
\draw [->,line width=4mm] (A) -- (B);
\draw [->,line width=4mm] (B) -- (C);
\draw [->,line width=4mm] (C) -- (1);
\end{tikzpicture}
\end{center}
\only<2,3,4>{
\begin{textblock}{1}(1,2.1)
\begin{bubble}[9.8cm]
\normalsize
lexer input: a string\smallskip\\
\hspace{5mm}\code{"read(n);"}\medskip\\
lexer output: a sequence of tokens\smallskip\\
\hspace{5mm}\code{key(read) lpar id(n) rpar semi}
\end{bubble}
\end{textblock}}
\only<3,4>{
\begin{textblock}{1}(6,7.8)
\begin{tabular}{c}
\includegraphics[scale=0.2]{../pics/rosetta.jpg}\\[-2mm]
\footnotesize lexing $\Rightarrow$ recognising words (Stone of Rosetta)
\end{tabular}
\end{textblock}}
\only<4>{
\begin{textblock}{1}(0.5,12)\small
\begin{tabular}{l@{}c@{}l}
\pcode{if} & $\;\Rightarrow\;$ & keyword\\
\pcode{iffoo} & $\;\Rightarrow\;$ & identifier\\
\end{tabular}
\end{textblock}}
\only<6>{
\begin{textblock}{1}(1,1.5)
\begin{bubble}[8.5cm]
\normalsize
parser input: a sequence of tokens\smallskip\\
{\small\hspace{5mm}\code{key(read) lpar id(n) rpar semi}}\smallskip\\
parser output: an abstract syntax tree\smallskip\\
\footnotesize
\hspace{2cm}\begin{tikzpicture}
\node {\code{read}}
child {node {\code{lpar}}}
child {node {\code{n}}}
child {node {\code{rpar}}};
\end{tikzpicture}
\end{bubble}
\end{textblock}}
\only<8,9>{
\begin{textblock}{1}(1,1.5)
\begin{bubble}[4cm]
\normalsize
code generation:\smallskip\\
\hspace{5mm}\code{istore 2}\\
\hspace{5mm}\code{iload 2}\\
\hspace{5mm}\code{ldc 10}\\
\hspace{5mm}\code{isub}\\
\hspace{5mm}\code{ifeq Label2}\\
\hspace{5mm}\code{iload 2}\\
\hspace{5mm}\code{...}\\
\end{bubble}
\end{textblock}}
\only<9>{
\begin{textblock}{6}(8.4,7)
\begin{bubble}[5cm]
\mbox{\begin{tikzpicture}[scale=0.58,rounded corners=0mm]
\begin{axis}[axis x line=bottom, axis y line=left, ylabel=secs,
xlabel=n,
enlargelimits=0.05,
ybar interval=0.7, legend style=small]
\addplot file {interpreted2.data};
\addplot file {compiled2.data};
%\legend{interpreted, compiled}
\end{axis}
\end{tikzpicture}}
\end{bubble}
\end{textblock}}
\only<10>{
\begin{textblock}{6}(1,3)
\begin{bubble}[11cm]
Compiler explorers, e.g.: \url{https://gcc.godbolt.org} \;\video{https://youtu.be/ysaBmhMEyUg}
\begin{tikzpicture}[]
\node (0) at (-2.3,0) {\includegraphics[scale=0.3]{pics/csource.png}};
\node (1) [right=35mm] at (0) {\includegraphics[scale=0.3]{pics/cassmbl.png}};
\draw [->,line width=4mm, red] (0) -- (1);
\node (2) [below=20mm] at (0) {\LARGE\bf source};
\node (3) [right=40mm] at (2) {\LARGE\bf binary};
\draw [->,line width=1mm] (2) -- (3);
\end{tikzpicture}
\end{bubble}
\end{textblock}}
\only<11>{
\begin{textblock}{6}(1,3)
\begin{bubble}[11cm]
Compiler explorer for Java: \url{https://javap.yawk.at}
\begin{tikzpicture}[]
\node (0) at (-2.3,0) {\includegraphics[scale=0.4]{pics/jsource.png}};
\node (1) [right=35mm] at (0) {\includegraphics[scale=0.4]{pics/jassmbl.png}};
\draw [->,line width=4mm, red] (0) -- (1);
\node (2) [below=20mm] at (0) {\LARGE\bf source};
\node (3) [right=40mm] at (2) {\LARGE\bf byte code};
\draw [->,line width=1mm] (2) -- (3);
\end{tikzpicture}
\end{bubble}
\end{textblock}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Why Study Compilers?}
John Regehr {\small(Univ.~Utah, LLVM compiler hacker)}
\here{https://blog.regehr.org/archives/1419}
\smallskip\\
\begin{bubble}[10.5cm]
\bf ``\ldots{}It’s effectively a perpetual
employment act for solid compiler hackers.''
\end{bubble}
\onslide<1->{
\only<2>{
\begin{itemize}
\item {\bf Hardware is getting weirder
rather than getting clocked faster.}
\begin{itemize}
\item[] ``Almost all processors are multicores nowadays and it looks
like there is increasing asymmetry in resources across cores.
Processors come with vector units, crypto accelerators etc. We have
DSPs, GPUs, ARM big.little, and Xeon Phi. This is only scratching the
surface.''
\end{itemize}
\end{itemize}}
\only<3>{
\begin{itemize}
\item {\bf We’re getting tired of low-level languages and
their associated security disasters.}
\begin{itemize}
\item [] ``We want to write new code, to whatever extent possible, in
safer, higher-level languages. Compilers are caught right in the
middle of these opposing trends: one of their main jobs is to help
bridge the large and growing gap between increasingly high-level
languages and increasingly wacky platforms.''
\end{itemize}
\end{itemize}}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Why Bother with Compilers?}
\textbf{Boeing 777's}: First flight in 1994. They want to achieve
triple redundancy for potential hardware faults.
\here{http://www.citemaster.net/get/db3a81c6-548e-11e5-9d2e-00163e009cc7/R8.pdf}\bigskip
They compile 1 Ada program to\medskip
\begin{itemize}
\item Intel 80486
\item Motorola 68040 (old Macintosh's)
\item AMD 29050 (RISC chips used often in laser printers)
\end{itemize}\medskip\medskip
using 3 independent compilers.\bigskip\pause
\small Airbus uses C and static analysers. Recently started using CompCert.
\only<1->{%
\begin{textblock}{6}(8,4.5)
\includegraphics[scale=0.28]{../pics/777.png}
\end{textblock}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{What Do Compilers Do?}
Remember BF*** from PEP?
\begin{center}
\begin{tabular}{rcl}
\bl{\texttt{>}} & $\Rightarrow$ & move one cell right\\
\bl{\texttt{<}} & $\Rightarrow$ & move one cell left\\
\bl{\texttt{+}} & $\Rightarrow$ & increase cell by one\\
\bl{\texttt{-}} & $\Rightarrow$ & decrease cell by one\\
\bl{\texttt{.}} & $\Rightarrow$ & print current cell\\
\bl{\texttt{,}} & $\Rightarrow$ & input current cell\\
\bl{\texttt{[}} & $\Rightarrow$ & loop begin\\
\bl{\texttt{]}} & $\Rightarrow$ & loop end\medskip\\
& $\Rightarrow$ & everything else is a comment\\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{A ``Compiler'' for BF*** to C}
\begin{center}
\begin{tabular}{rcl}
\bl{\texttt{>}} & $\Rightarrow$ & \texttt{ptr++}\\
\bl{\texttt{<}} & $\Rightarrow$ & \texttt{ptr--}\\
\bl{\texttt{+}} & $\Rightarrow$ & \texttt{(*ptr)++}\\
\bl{\texttt{-}} & $\Rightarrow$ & \texttt{(*ptr)--}\\
\bl{\texttt{.}} & $\Rightarrow$ & \texttt{putchar(*ptr)}\\
\bl{\texttt{,}} & $\Rightarrow$ & \texttt{*ptr = getchar()}\\
\bl{\texttt{[}} & $\Rightarrow$ & \texttt{while(*ptr)\{}\\
\bl{\texttt{]}} & $\Rightarrow$ & \texttt{\}}\medskip\\
& $\Rightarrow$ & ignore everything else\\
\end{tabular}
\end{center}\bigskip
\texttt{char field[30000]\\ char *ptr = \&field[15000]}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Another~``Compiler''~for~BF~to~C}
\begin{center}
\begin{tabular}{rcl}
\bl{\texttt{>\ldots>}} & $\Rightarrow$ & \texttt{ptr += n}\\
\bl{\texttt{<\ldots<}} & $\Rightarrow$ & \texttt{ptr -= n}\\
\bl{\texttt{+\ldots+}} & $\Rightarrow$ & \texttt{(*ptr) += n}\\
\bl{\texttt{-\ldots-}} & $\Rightarrow$ & \texttt{(*ptr) -= n}\\
\bl{\texttt{.}} & $\Rightarrow$ & \texttt{putchar(*ptr)}\\
\bl{\texttt{,}} & $\Rightarrow$ & \texttt{*ptr = getchar()}\\
\bl{\texttt{[}} & $\Rightarrow$ & \texttt{while(*ptr)\{}\\
\bl{\texttt{]}} & $\Rightarrow$ & \texttt{\}}\medskip\\
& $\Rightarrow$ & ignore everything else\\
\end{tabular}
\end{center}\bigskip
\texttt{char field[30000]\\ char *ptr = \&field[15000]}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{A Brief Compiler History}
\bigskip
\begin{itemize}
\item Turing Machines, 1936 (a tape as memory)
\item Regular Expressions, 1956\\
\item The first compiler for COBOL, 1957\\ (Grace Hopper)\medskip
\item But surprisingly research papers are still published nowadays\\
\item ``Parsing: The Solved Problem That Isn't''
\here{https://tratt.net/laurie/blog/entries/parsing_the_solved_problem_that_isnt.html}
\end{itemize}
\begin{textblock}{8.5}(5,7.6)
\begin{flushright}
\includegraphics[scale=0.3]{pics/hopper.jpg}\\
\footnotesize\textcolor{gray}{Grace Hopper}\smallskip\\
{\small\textcolor{gray}{(she made it to David Letterman's Tonight Show
\here{https://youtu.be/3N_ywhx6_K0?t=31})}}
\end{flushright}
\end{textblock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Some Housekeeping}
\textbf{Exam will be online:}\bigskip
\begin{itemize}
\item final exam in January (35\%)
\item five CWs (65\%)
\end{itemize}\bigskip\bigskip\pause
\textbf{Weekly Homework (optional):}
\begin{itemize}
\item uploaded on KEATS, send answers via email, (try to!) respond individually
\item \alert{\bf all} questions in the exam will be from the HWs!!
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Some Housekeeping}
\textbf{Coursework (5 accounting for 65\%):}\bigskip
\begin{itemize}
\item matcher (5\%)
\item lexer (10\%)
\item parser / interpreter (10\%)
\item JVM compiler (15\%)
\item LLVM compiler (25\%)
\end{itemize}\bigskip\pause
you can use \alert{any} programming language you like (Haskell, Rust)\\\pause
you can use any code I show you and is uploaded to KEATS\ldots\textbf{BUT NOTHING ELSE!}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Lectures 1 - 5}
transforming strings into structured data\\[10mm]
{\LARGE\bf Lexing} {\hfill{}based on regular expressions}\medskip\\
\hspace{5mm}(recognising ``words'')\\[6mm]
{\LARGE\bf Parsing}\medskip\\
\hspace{5mm}(recognising ``sentences'')
\begin{textblock}{1}(10,9.1)
\begin{tabular}{c}
\includegraphics[scale=0.1]{../pics/rosetta.jpg}\\[-2mm]
\footnotesize Stone of Rosetta
\end{tabular}
\end{textblock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Lectures 1 - 5}
transforming strings into structured data\\[10mm]
{\LARGE\bf Lexing} {\hfill{}based on regular expressions}\medskip\\
\hspace{5mm}(recognising ``words'')\\[6mm]
{\LARGE\bf Parsing}\medskip\\
\hspace{5mm}(recognising ``sentences'')
\begin{textblock}{1}(10,9.1)
\begin{tabular}{c}
\includegraphics[scale=0.1]{../pics/rosetta.jpg}\\[-2mm]
\footnotesize Stone of Rosetta
\end{tabular}
\end{textblock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Lectures 5 - 10}
code generation for a small imperative and a small functional language\\[10mm]
{\LARGE\bf Interpreters}\medskip\\
\hspace{5mm}(directly runs a program)\\[6mm]
{\LARGE\bf Compilers}\medskip\\
\hspace{5mm}(generate JVM code and LLVM-IR code)
\begin{textblock}{1}(8.8,8.1)
\begin{tabular}{c@{}c}
\includegraphics[scale=0.4]{../pics/javaduke.png} &
\includegraphics[scale=0.23]{../pics/llvmlogo.png}
\end{tabular}
\end{textblock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Familiar Regular Expresssions}
\small
\begin{center}
\texttt{[a-z0-9\_$\backslash{}$.-]+ @ [a-z0-9$\backslash{}$.-]+ . [a-z$\backslash{}$.]\{2,6\}}
\end{center}\smallskip
\begin{center}
\begin{tabular}{@{}lp{8.5cm}@{}}
\pcode{re*} & matches 0 or more times\\
\pcode{re+} & matches 1 or more times\\
\pcode{re?} & matches 0 or 1 times\\
\pcode{re\{n\}} & matches exactly \pcode{n} number of times\\
\pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\
\pcode{[...]} & matches any single character inside the brackets\\
\pcode{[^...]} & matches any single character not inside the
brackets\\
\pcode{a-z A-Z} & character ranges\\
\pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\
\pcode{.} & matches every character except newline\\
\pcode{(re)} & groups regular expressions and remembers
the matched text
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Some ``innocent'' examples}
Let's try two examples
\begin{center}
\bl{\texttt{(a*)*b}}
\hspace{2cm}
\bl{\texttt{[a?]\{n\}[a]\{n\}}}
\end{center}\bigskip\pause
and match them with strings of the form
\begin{center}
\bl{\texttt{a}},
\bl{\texttt{aa}},
\bl{\texttt{aaa}},
\bl{\texttt{aaaa}},
\bl{\texttt{aaaaa}},
\bl{$\underbrace{\texttt{a}...\texttt{a}}_n$}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Why Bother with Regexes?}
\begin{columns}[t,onlytextwidth]
\begin{column}{1.8cm}
\mbox{}
\end{column}
\begin{column}{.5\textwidth}
\small{}Ruby, Python, Java 8\medskip\\
\begin{tikzpicture}\footnotesize
\begin{axis}[
xlabel={$n$},
x label style={at={(1.05,0.0)}},
ylabel={time in secs},
enlargelimits=false,
xtick={0,5,...,30},
xmax=33,
ymax=35,
ytick={0,5,...,30},
scaled ticks=false,
axis lines=left,
width=\textwidth,
height=4cm,
legend entries={Python,Ruby},
legend pos=north west,
legend cell align=left]
\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
\addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data};
\end{axis}
\end{tikzpicture}
\begin{tikzpicture}\footnotesize
\begin{axis}[
xlabel={$n$},
x label style={at={(1.05,0.0)}},
ylabel={time in secs},
enlargelimits=false,
xtick={0,5,...,30},
xmax=33,
ymax=35,
ytick={0,5,...,30},
scaled ticks=false,
axis lines=left,
width=\textwidth,
height=4cm,
legend entries={Python, Java 8, JavaScript, Swift},
legend pos=north west,
legend cell align=left]
\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
\addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};
\end{axis}
\end{tikzpicture}
%
\end{column}
\begin{column}{.5\textwidth}
\small{}Us (after next lecture)\medskip\\
\begin{tikzpicture}\footnotesize
\begin{axis}[
xlabel={$n$},
x label style={at={(1.07,0.0)}},
ylabel={time in secs},
enlargelimits=false,
xtick={0,5000,...,10000},
xmax=11000,
ymax=35,
ytick={0,5,...,30},
scaled ticks=false,
axis lines=left,
width=\textwidth,
height=4cm]
\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
\addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
\end{axis}
\end{tikzpicture}
\begin{tikzpicture}\footnotesize
\begin{axis}[
xlabel={$n$},
x label style={at={(1.07,0.0)}},
ylabel={time in secs},
enlargelimits=false,
ymax=35,
ytick={0,5,...,30},
scaled ticks=false,
axis lines=left,
width=\textwidth,
height=4cm]
\addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
\end{axis}
\end{tikzpicture}
\end{column}
\end{columns}
\medskip
\begin{textblock}{3}(-0.1,3.3)
\small\hfill\bl{\texttt{[a?]\{n\}[a]\{n\}}}:
\end{textblock}
\begin{textblock}{3}(-0.1,8.7)
\small\hfill\bl{\texttt{(a*)*b}}:
\end{textblock}
\begin{textblock}{3}(0.3,13)
\small{}matching with strings
\bl{$\underbrace{\texttt{a}...\texttt{a}}_n$}
\end{textblock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c,fragile]
\frametitle{Incidents}
\begin{itemize}
\item a global outage on 2 July 2019 at \textbf{Cloudflare}
(first one for six years)\medskip
\begin{center}\small\color{blue}
\begin{verbatim}
(?:(?:\"|'|\]|\}|\\|\d|(?:nan|infinity|true|false|
null|undefined|symbol|math)|\`|\-|\+)+[)]*;?((?:\s
|-|~|!|{}|\|\||\+)*.*(?:.*=.*)))
\end{verbatim}
\end{center}\bigskip\bigskip\bigskip\bigskip\bigskip\bigskip\bigskip
\item on 20 July 2016 the \textbf{Stack Exchange} webpage went down
because of an evil regular expression
\here{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}
\end{itemize}
\begin{textblock}{6}(6,7.6)
\includegraphics[scale=0.14]{../pics/cloudflare.png}\\
\footnotesize
It serves more web traffic than Twitter, Amazon, Apple,
Instagram, Bing \& Wikipedia combined.
\here{https://blog.cloudflare.com/details-of-the-cloudflare-outage-on-july-2-2019/}
\end{textblock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Evil Regular Expressions}
\begin{itemize}
\item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\medskip
\item Some evil regular expressions:\medskip
\begin{itemize}
\item \bl{\texttt{[a?]\{n\}\;[a]\{n\}}}
\item \bl{\texttt{(a*)*\;b}}
\item \bl{\texttt{([a-z]+)*}}
\item \bl{\texttt{(a + aa)*}}
\item \bl{\texttt{(a + a?)*}}
\end{itemize}
\item sometimes also called \alert{catastrophic backtracking}
\item this is a problem for \alert{N}etwork \alert{I}ntrusion
\alert{D}etection systems, Cloudflare, StackExchange, Atom editor
\item \url{https://vimeo.com/112065252}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%\frametitle{Today}
%
%\begin{itemize}
%\item While the ultimate goal is to implement a small compiler for the JVM
% \ldots\bigskip
%\end{itemize}
%
%Let's start with:
%
%\begin{itemize}
%\item a web-crawler
%\item an email harvester
%\item \textcolor{gray}{(a web-scraper)}
%\end{itemize}
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[t]
%\frametitle{A Web-Crawler}
%
%\mbox{}\\[10mm]
%
%\begin{enumerate}
%\item given an URL, read the corresponding webpage
%\item extract all links from it
%\item call the web-crawler again for all these links
%\end{enumerate}
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[t]
%\frametitle{A Web-Crawler}
%
%\mbox{}\\[10mm]
%
%
%\begin{enumerate}
%\item given an URL, read the corresponding webpage
%\item if not possible print, out a problem
%\item if possible, extract all links from it
%\item call the web-crawler again for all these links
%\end{enumerate}\bigskip\pause
%
%\small (we need a bound for the number of recursive calls)
%
%\small (the purpose is to check all links on my own webpage)
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%
%\begin{textblock}{1}(2,5)
%\begin{tabular}{c}
%\includegraphics[scale=0.15]{pics/servers.png}\\[-2mm]
%\small Server
%\end{tabular}
%\end{textblock}
%
%\begin{textblock}{1}(5.6,4)
% \begin{tikzpicture}[scale=1.1]
% \draw[white] (0,1) node (X) {};
% \draw[white] (2,1) node (Y) {};
% \draw[white] (0,0) node (X1) {};
% \draw[white] (2,0) node (Y1) {};
% \draw[white] (0,-1) node (X2) {};
% \draw[white] (2,-1) node (Y2) {};
% \draw[red, <-, line width = 2mm] (X) -- (Y);
% \node [inner sep=5pt,label=above:\textcolor{black}{GET request}] at ($ (X)!.5!(Y) $) {};
% \draw[red, ->, line width = 2mm] (X1) -- (Y1);
% \node [inner sep=5pt,label=above:\textcolor{black}{webpage}] at ($ (X1)!.5!(Y1) $) {};
% \draw[red, <-, line width = 2mm] (X2) -- (Y2);
% \node [inner sep=7pt,label=above:\textcolor{black}{POST data}] at ($ (X2)!.5!(Y2) $) {};
% \end{tikzpicture}
%\end{textblock}
%
%
%\begin{textblock}{1}(9,5.5)
%\begin{tabular}{c}
%\includegraphics[scale=0.15]{pics/laptop.png}\\[-2mm]
%\small Browser
%\end{tabular}
%\end{textblock}
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%\frametitle{Scala}
%
%\small A simple Scala function for reading webpages:
%\bigskip
%
%\footnotesize
%\lstinputlisting{../progs/app0.scala}
%\medskip\pause
%
%\lstinline{get_page("""https://nms.kcl.ac.uk/christian.urban/""")}
%\bigskip\medskip\pause
%
%
%\small A slightly more complicated version for handling errors:
%\smallskip
%
%\footnotesize
%\lstinputlisting[xleftmargin=-4mm]{../progs/app1.scala}
%
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[t]
%\frametitle{A Regular Expression}
%
%\begin{itemize}
%\item \ldots{} is a pattern or template for specifying strings
%\end{itemize}\bigskip
%
%\begin{center}
%\only<1>{\scode{"https?://[^"]*"}}%
%\only<2>{\scode{""""https?://[^"]*"""".r}}
%\end{center}\bigskip\bigskip
%
%matches for example\smallskip\\
%\hspace{2mm}\code{"http://www.foobar.com"}\\
%\hspace{2mm}\code{"https://www.tls.org"}\smallskip\\
%
%but not\smallskip\\
%\hspace{2mm}\code{"http://www."foo"bar.com"}\\
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%\frametitle{Finding Operations in Scala}
%
%{\bf\code{rexp.findAllIn(string)}}\medskip
%
%returns a list of all (sub)strings that match the
%regular expression
%\bigskip\bigskip
%
%
%{\bf\code{rexp.findFirstIn(string)}}\medskip
%
%returns either
%
%\begin{itemize}
%\item \code{None} if no (sub)string matches or
%\item \code{Some(s)} with the first (sub)string
%\end{itemize}
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%
%\footnotesize
%\lstinputlisting{../progs/app2.scala}
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%
%\small
%A version that only crawls links in ``my'' domain:\bigskip
%
%\footnotesize
%\lstinputlisting{../progs/app3.scala}
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%\lstset{xleftmargin=-4mm}
%\small
%A little email harvester:
%
%\footnotesize
%\lstinputlisting{../progs/app4.scala}\bigskip
%
%\tiny
%\url{http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/}
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{(Basic) Regular Expressions}
Their inductive definition:
\begin{textblock}{6}(2,7.5)
\begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
\bl{$r$} & \bl{$::=$} & \bl{$\ZERO$} & nothing\\
& \bl{$\mid$} & \bl{$\ONE$} & empty string / \pcode{""} / $[]$\\
& \bl{$\mid$} & \bl{$c$} & character\\
& \bl{$\mid$} & \bl{$r_1 + r_2$} & alternative / choice\\
& \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
& \bl{$\mid$} & \bl{$r^*$} & star (zero or more)\\
\end{tabular}
\end{textblock}
\only<2->{\footnotesize
\begin{textblock}{9}(2,0.5)
\begin{bubble}[9.8cm]
\lstinputlisting{../progs/app01.scala}
\end{bubble}
\end{textblock}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[t]
%\frametitle{Regular Expressions}
%
%\small
%In Scala:\bigskip
%
%\footnotesize
%\lstinputlisting{../progs/app51.scala}
%
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Strings}
\ldots are lists of characters. For example \code{"hello"}
\begin{center}
\bl{$[h, e, l, l, o]$} or just \bl{$hello$}
\end{center}
the empty string: \bl{$[]$} or \bl{\pcode{""}}\bigskip\\
the concatenation of two strings:
\begin{center}
\bl{$s_1 \,@\, s_2$}
\end{center}
\bl{\textit{foo $@$ bar = foobar}}\\
\bl{\textit{baz $@\, []$ = baz}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Languages, Strings}
\begin{itemize}
\item \alert{\bf Strings} are lists of characters, for example
\begin{center}
\bl{$[]$},\;\bl{$abc$} \hspace{2cm}(Pattern match: \bl{$c\!::\!s$})
\end{center}\bigskip
\item A \alert{\bf language} is a set of strings, for example\medskip
\begin{center}
\bl{$\{[], hello, \textit{foobar}, a, abc\}$}
\end{center}\bigskip
\item \alert{\bf Concatenation} of strings and languages
\begin{center}
\begin{tabular}{rcl}
\bl{$\textit{foo}\;@\;bar$} & \bl{$=$} & \bl{$\textit{foobar}$}\medskip\\
\bl{$A\;@\;B$} & \bl{$\dn$} & \bl{$\{ s_1\,@\,s_2 \;\mid\; s_1 \in A \wedge s_2 \in B\}$}
\end{tabular}
\end{center}
%\item The \alert{\bf meaning} of a regular expression is a set of
% strings, or language.
\end{itemize}
\only<2>{
\begin{textblock}{4}(10.5,8)
\small
Let
\bl{$A = \{foo, bar\}$} \bl{$B = \{a, b\}$}
\[
\bl{A \,@\, B = \{fooa, foob, bara, barb\}}
\]
\end{textblock}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Two Corner Cases}
\Large
\begin{center}
\bl{$A \,@\, \{[]\} = \;?$}\bigskip\bigskip\pause\\
\bl{$A \,@\, \{\} = \;?$}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Meaning of a Regex}
...all the strings a regular expression can match.
\begin{center}
\begin{tabular}{rcl}
\bl{$L(\ZERO)$} & \bl{$\dn$} & \bl{$\{\}$}\\
\bl{$L(\ONE)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\
\bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\
\bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
\bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \,@\, L(r_2)$}\\
\bl{$L(r^*)$} & \bl{$\dn$} & \\
\end{tabular}
\end{center}
\begin{textblock}{14}(1.5,13.5)\small
\bl{$L$} is a function from regular expressions to
sets of strings (languages):\smallskip\\
\bl{\quad$L$ : Rexp $\Rightarrow$ Set$[$String$]$}
\end{textblock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Power Operation}
\begin{itemize}
\item The \alert{\textbf{\boldmath$n$th Power}} of a language:
\begin{center}
\begin{tabular}{lcl}
\bl{$A^0$} & \bl{$\dn$} & \bl{$\{[]\}$}\\
\bl{$A^{n+1}$} & \bl{$\dn$} & \bl{$A \,@\, A^n$}
\end{tabular}
\end{center}\bigskip
\item[] For example
\begin{center}
\begin{tabular}{lcl@{\hspace{10mm}}l}
\bl{$A^4$} & \bl{$=$} & \bl{$A \,@\, A \,@\, A \,@\, A$} & \bl{$(@\,\{[]\})$}\\
\bl{$A^1$} & \bl{$=$} & \bl{$A$} & \bl{$(@\,\{[]\})$}\\
\bl{$A^0$} & \bl{$=$} & \bl{$\{[]\}$}\\
\end{tabular}
\end{center}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Meaning of a Regex}
\begin{textblock}{15}(1,4)
\begin{tabular}{rcl}
\bl{$L(\ZERO)$} & \bl{$\dn$} & \bl{$\{\}$}\\
\bl{$L(\ONE)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\
\bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\
\bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
\bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\
\bl{$L(r^*)$} & \bl{$\dn$} & \onslide<2->{\bl{$\bigcup_{0 \le n} L(r)^n$}}\\
\end{tabular}\bigskip
%\onslide<2->{
%\hspace{5mm}\bl{$L(r)^0 \;\dn\; \{[]\}$}\\
%\bl{$L(r)^{n+1} \;\dn\; L(r) \,@\, L(r)^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\
%\small\hspace{5cm}\textcolor{gray}{$\{ s_1 @ s_2 \;|\; s_1\in L(r) \wedge s_2 \in L(r)^n \}$}}
%}
\end{textblock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Star Operation}
\begin{itemize}
\item The \alert{\bf Kleene Star} of a \underline{language}:
\bigskip
\begin{center}
\begin{tabular}{c}
\bl{$A\star \dn \bigcup_{0\le n} A^n$}
\end{tabular}
\end{center}\bigskip
\item[] This expands to
\[
\bl{A^0 \cup A^1 \cup A^2 \cup A^3 \cup A^4 \cup \ldots}
\]
or
\small
\[
\bl{\{[]\} \;\cup\; A \;\cup\; A\,@\,A \;\cup\;
A\,@\,A\,@\,A \;\cup\; A\,@\,A\,@\,A\,@\,A \cup \ldots}
\]
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Meaning of a Regex}
\begin{textblock}{15}(1,4)
\begin{tabular}{rcl}
\bl{$L(\ZERO)$} & \bl{$\dn$} & \bl{$\{\}$}\\
\bl{$L(\ONE)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\
\bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\
\bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
\bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\
\bl{$L(r^*)$} & \bl{$\dn$} & \bl{$(L(r))\star$}\\
\end{tabular}
\end{textblock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Meaning of Matching}
\begin{bubble}[10cm]
\large\bf
A regular expression \bl{$r$} matches a string~\bl{$s$}
provided
\begin{center}
\bl{$s \in L(r)$}\\
\end{center}
\end{bubble}\bigskip\bigskip
\ldots and the point of the next lecture is
to decide this problem as fast as possible (unlike Python,
Ruby, Java)
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Questions}
\begin{itemize}
\item Say \bl{$A = \{[a],[b],[c],[d]\}$}.\bigskip
\item[]
How many strings are in \bl{$A^4$}\,?
\bigskip\medskip\pause
\item[]
What if \bl{$A = \{[a],[b],[c],[]\}$};\\
how many strings are then in \bl{$A^4$}\,?
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \begin{frame}[c]
% \frametitle{Languages (Sets of Strings)}
% \begin{itemize}
% \item A \alert{\bf Language} is a set of strings, for example\medskip
% \begin{center}
% \bl{$\{[], hello, foobar, a, abc\}$}
% \end{center}\bigskip
% \item \alert{\bf Concatenation} for strings and languages
% \begin{center}
% \begin{tabular}{rcl}
% \bl{$foo\;@\;bar$} & \bl{$=$} & \bl{$foobar$}\medskip\\
% \bl{$A\;@\;B$} & \bl{$\dn$} & \bl{$\{ s_1\,@\,s_2 \;\mid\; s_1 \in A \wedge s_2 \in B\}$}
% \end{tabular}
% \end{center}
% \bigskip
% \small
% \item [] For example \bl{$A = \{foo, bar\}$}, \bl{$B = \{a, b\}$}
% \[
% \bl{A \,@\, B = \{fooa, foob, bara, barb\}}
% \]
% \end{itemize}
% \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \begin{frame}[c]
% \frametitle{Two Corner Cases}
% \Large
% \begin{center}
% \bl{$A \,@\, \{[]\} = \;?$}\bigskip\bigskip\pause\\
% \bl{$A \,@\, \{\} = \;?$}
% \end{center}
% \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \begin{frame}[c]
% \frametitle{The Meaning of a Regex}
% ...all the strings a regular expression can match.
% \begin{center}
% \begin{tabular}{rcl}
% \bl{$L(\ZERO)$} & \bl{$\dn$} & \bl{$\{\}$}\\
% \bl{$L(\ONE)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\
% \bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\
% \bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
% \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \,@\, L(r_2)$}\\
% \bl{$L(r^*)$} & \bl{$\dn$} & \\
% \end{tabular}
% \end{center}
% \begin{textblock}{14}(1.5,13.5)\small
% \bl{$L$} is a function from regular expressions to
% sets of strings (languages):\smallskip\\
% \bl{\quad$L$ : Rexp $\Rightarrow$ Set$[$String$]$}
% \end{textblock}
% \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \begin{frame}[c]
% \frametitle{The Power Operation}
% \begin{itemize}
% \item The \alert{\textbf{\boldmath$n$th Power}} of a language:
% \begin{center}
% \begin{tabular}{lcl}
% \bl{$A^0$} & \bl{$\dn$} & \bl{$\{[]\}$}\\
% \bl{$A^{n+1}$} & \bl{$\dn$} & \bl{$A \,@\, A^n$}
% \end{tabular}
% \end{center}\bigskip
% \item[] For example
% \begin{center}
% \begin{tabular}{lcl@{\hspace{10mm}}l}
% \bl{$A^4$} & \bl{$=$} & \bl{$A \,@\, A \,@\, A \,@\, A$} & \bl{$(@\,\{[]\})$}\\
% \bl{$A^1$} & \bl{$=$} & \bl{$A$} & \bl{$(@\,\{[]\})$}\\
% \bl{$A^0$} & \bl{$=$} & \bl{$\{[]\}$}\\
% \end{tabular}
% \end{center}
% \end{itemize}
% \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \begin{frame}[c]
% \frametitle{Written Exam}
% \begin{itemize}
% \item Accounts for 80\%.\bigskip
% \item The question ``\textit{Is this relevant for
% the exam?}'' is very demotivating for the lecturer!\bigskip\\
% \item Deal: Whatever is in the homework (and is not marked
% ``\textit{optional}'') is relevant for the exam.\bigskip
% \item Each lecture has also a handout. There are also handouts about
% notation and Scala.
% \end{itemize}
% \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \begin{frame}[t]
% \frametitle{Coursework}
% \begin{itemize}
% \item Accounts for 20\%. Two strands. Choose \alert{\bf one}!\bigskip
% \end{itemize}
% \begin{columns}[t]
% \begin{column}{.5\textwidth}
% \underline{\bf Strand 1}\medskip
% \begin{itemize}
% \item 4 programming tasks:
% \begin{itemize}
% \item matcher (4\%, 11.10.)
% \item lexer (5\%, 04.11.)
% \item parser (5\%, 22.11.)
% \item compiler (6\%, 13.12.)
% \end{itemize}
% \item in any lang.~you like,\\ but I want to see the\\ code
% \end{itemize}
% \end{column}
% \hspace{-45pt}\vrule{}\hspace{10pt}
% \begin{column}{.5\textwidth}
% \underline{\bf Strand 2}\smallskip\begin{itemize}
% \item one task: prove the correctness of a regular expression matcher in
% the \underline{Isabelle} theorem prover
% \item 20\%, submission on~13.12.\hspace{-5mm}\mbox{}
% \end{itemize}
% \end{column}
% \end{columns}\medskip
% \small
% \begin{itemize}
% \item Solving more than one strand will {\bf not} give you more
% marks.
% \end{itemize}
% \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%\frametitle{Lecture Capture}
%
%\begin{itemize}
%\item Hope it works\ldots\pause actually no, it does not!\medskip\pause
%\item It is important to use lecture capture wisely\\ (it is only the ``baseline''):
%\begin{itemize}
%\item Lecture recordings are a study and revision aid.
%\item Statistically, there is a clear and direct link between attendance and
% attainment: students who do not attend lectures, do less well in exams.
%\end{itemize}
%
%\item Attending a lecture is more than watching it online -- if you do not
%attend, you miss out!
%
%\end{itemize}
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{\begin{tabular}{c}\\[3cm]\alert{Questions?}\end{tabular}}
\begin{tabular}{lll}
TAs: & Anton Luca-Dorin & (took the module last year)\\
& Chengsong Tan & (PhD student working on derivatives)
\end{tabular}
\mbox{}
\end{frame}
\begin{frame}[c]
\begin{mybox3}{Coursework}
Do we need to provide instructions on running the coursework files
if we're using languages other than Scala? Thanks
\end{mybox3}\pause
\begin{mybox2}{Zip-File for Coursework}
Please, please submit a zipfile that generates a subdirectory
\begin{center}
\texttt{NameFamilyName}
\end{center}
\end{mybox2}
\end{frame}
\begin{frame}[c]
\begin{mybox3}{Coursework}
What is the purpose of the workshop session on the timetable?
Slightly confused about how to undertake cw1 and what exactly we
should be implementing. This is more for clarification of the cw1
structure, including the implementation and questions present in
cw1.
\end{mybox3}
\end{frame}
\begin{frame}[c]
\begin{mybox3}{What is the trick?}\small
What was the trick to improve the evil regular expressions matcher
to have such good results compared to other programming languages?
Is it working better on casual regular expressions (the ones that
Python and Java handle pretty well), too? Or was it just optimised
for these evil ones?
\end{mybox3}
\begin{mybox3}{}\small
It was shown in the lectures that the pattern matching algorithms
currently implemented in popular programming languages (Python, JS,
Java, etc) are far slower than the algorithm we are going to be
implementing in this module. My question is why do these programming
languages not implement the algorithm that we are going to implement
in this module?
\end{mybox3}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Thanks to Martin Mikusovic}
\bigskip
\begin{center}
\begin{tikzpicture}
\begin{axis}[
xlabel={$n$},
x label style={at={(1.05,0.0)}},
ylabel={time in secs},
enlargelimits=false,
xtick={0,5,...,30},
xmax=33,
ymax=35,
ytick={0,10,...,30},
scaled ticks=false,
axis lines=left,
width=9cm,
height=5.5cm,
legend entries={Java 8, Python, JavaScript, Swift},
legend pos=north west,
legend cell align=left]
\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
\addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};
\end{axis}
\end{tikzpicture}
\end{center}
Regex: \bl{$(a^*)^* \cdot b$}
Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Same Example in Java 9+}
\begin{center}
\begin{tikzpicture}
\begin{axis}[
xlabel={$n$},
x label style={at={(1.09,-0.15)}},
ylabel={time in secs},
scaled x ticks=false,
enlargelimits=false,
xtick distance=10000,
xmax=44000,
ytick={0,10,...,30},
ymax=35,
axis lines=left,
width=9cm,
height=5cm,
legend entries={Java \liningnums{9}+},
legend pos=north west,
legend cell align=left]
\addplot[blue,mark=square*,mark options={fill=white}] table {re-java9.data};
\end{axis}
\end{tikzpicture}
\end{center}
Regex: \bl{$(a^*)^* \cdot b$}
Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Questions
\begin{frame}[c]
\begin{mybox3}{}
Are there any (common) languages that have a built-in regex
implementation matching the set of functions of a formal 'simple'
regular expression, as opposed to an 'extended' regular expression
implemented in most regex-supporting languages?
\end{mybox3}
\end{frame}
\begin{frame}[c]
\begin{mybox3}{Passing Mark}
I believe the assessment is 70\% coursework (broken into 10\% weekly
stuff, 15\% mid term exam and 45\% CW in any programming language)
and 30\% January exam. However, I would like to know if we just need
40\% overall to pass the module or pass the each component
individually?
\end{mybox3}
\hfill$\Rightarrow$ 40\% overall
\end{frame}
\begin{frame}[c]
\begin{mybox3}{Regexes}
Can we determine all the possible regular expressions matching a
certain string? If we take into account all the possible ways to
combine the operations: \bl{$\ZERO$}, \bl{$\ONE$},
\bl{$r_1 + r_2$}, \bl{$r_1 \cdot r_2$}, \bl{$r^*$}?
\end{mybox3}
\end{frame}
\begin{frame}[c]
\begin{mybox3}{\bl{$L$} + Equivalence}
When we explain why two regular expressions are not equivalent, what
method is better for us, using mathematics formulas or making an
example?
\end{mybox3}
\begin{mybox3}{}
Meaning of Regex and Operations
\end{mybox3}
\end{frame}
\begin{frame}[c]
\begin{mybox3}{\bl{$L$}}
Can the function L be applied to anything other than regular
expressions? For example would L(L(c)) return anything?
\end{mybox3}
\hfill $\Rightarrow$ No
\end{frame}
\begin{frame}[c]
\begin{mybox3}{\bl{$(a?)\{n\} \cdot a\{n\}$}}
In the evil regexes section, is there any reason why in the regex
\texttt{[a?]\{n\}[a]\{n\}} the square brackets are used? It is defined as a
single character from the square brackets, however there is just one
character, so it seems like it is not necessary. Maybe it is just
necessary for the first part, because ? is a token instead of a
character and we need to refer to a? as a ``unit''? Could regular
brackets be used instead? Is there any difference apart from the
fact that it would create a group? Also, are the regexes
\texttt{[a?]\{n\}} and
\texttt{a\{0,3\}} equivalent?
\end{mybox3}
\end{frame}
\begin{frame}[c]
\begin{mybox3}{Python + Parser Combinators (CW3)}\small
Hi Christian,
I don’t see a problem: you certainly have higher order functions and
it is easy to implement algebraic data types using classes. As far
as I can see that’s all you need. You don’t get the static types but
that should be obvious. Basically if you can do it in LISP you can
do it in Python. The only problem could be stack overflows due to a
lack of tail recursion optimisation. On the other hand you can
simulate laziness using generators.
Cheers,
Thorsten
\end{mybox3}
Trees \url{https://youtu.be/7tCNu4CnjVc}
Laziness \url{https://youtu.be/5jwV3zxXc8E}
\end{frame}
\begin{frame}[c]
\begin{mybox3}{}
What suggestions do you have for us to get the most out of this
module, especially in the online format? I.e. form discussion
groups, will you have office hours?
\end{mybox3}
\small
\hfill $\Rightarrow$\mbox{} Discussion Forum on KEATS
\hfill online tutorial sessions
\hfill ???
\hfill PL-groups for ``exotic'' langs
\end{frame}
\begin{frame}[c]
\small
\begin{mybox3}{}
Where do most students struggle with this module? What will the format
of the exam be? What is the most efficient way of studying for the
exam? There are plenty of resources available on KEATS, but is there
anything else you'd recommend us to study? Although (just by skimming
the headings) the module seems to be a combination of practical and
theoretical matters, exactly in what field would the syllabus be
applied? Besides these questions and the ones other students asked, is
there anything else we should know? Thank you!
\end{mybox3}
\end{frame}
\begin{frame}[c]
\end{frame}
\begin{frame}[c]
\end{frame}
\begin{frame}[c]
\end{frame}
\begin{frame}[c]
\end{frame}
\begin{frame}[c]
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{document}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: t
%%% End: