% !TEX program = xelatex\documentclass[dvipsnames,14pt,t,xelatex,aspectratio=169,xcolor={table}]{beamer}\usepackage{../slides}\usepackage{../graphics}\usepackage{../langs}\usepackage{../data}\usepackage{tcolorbox}\newtcolorbox{mybox}{colback=red!5!white,colframe=red!75!black}\newtcolorbox{mybox2}[1]{colback=red!5!white,colframe=red!75!black,fonttitle=\bfseries,title=#1}\newtcolorbox{mybox3}[1]{colback=Cyan!5!white,colframe=Cyan!75!black,fonttitle=\bfseries,title=#1}\hfuzz=220pt \lstset{language=Scala, style=mystyle, numbersep=0pt, numbers=none, xleftmargin=0mm}\newcommand{\bl}[1]{\textcolor{blue}{#1}} % beamer stuff \renewcommand{\slidecaption}{CFL 01, King's College London}%% https://cs.rit.edu/~hh/teaching/_media/cc18/lectures/lect1/main.pdf%% https://cs.rit.edu/~hh/teaching/_media/cc18/lectures/lect2/main.pdf%% https://cs.rit.edu/~hh/teaching/_media/cc18/lectures/lect3/main.pdf\begin{document}%\begin{frame}[t]%\begin{mybox}%A physical explanation the \emph{dynamic matrix}\\%lots of text%\end{mybox}%\begin{mybox2}{Test}%A physical explanation the \emph{dynamic matrix}\\%lots of text%\end{mybox2}%\begin{mybox3}{Test}%A physical explanation the \emph{dynamic matrix}\\%lots of text%\end{mybox3}%\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]\frametitle{% \begin{tabular}{@ {}c@ {}} \\[-3mm] \LARGE Compilers and \\[-1mm] \LARGE Formal Languages\\[-3mm] \end{tabular}} \begin{center} %\includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm} %\includegraphics[scale=0.31]{pics/ante2.jpg}\\ %\footnotesize\textcolor{gray}{Antikythera automaton, 100 BC (Archimedes?)} \end{center} \normalsize \begin{center} \begin{tabular}{ll} Email: & christian.urban at kcl.ac.uk\\ %Office Hours: & Thursdays 12 -- 14\\ %Location: & N7.07 (North Wing, Bush House)\\ Slides \& Progs: & KEATS\\ \end{tabular} \end{center} \begin{center} \begin{tikzpicture} \node[drop shadow,fill=white,inner sep=0pt] {\footnotesize\rowcolors{1}{capri!10}{white} \begin{tabular}{|p{4.8cm}|p{4.8cm}|}\hline \cellcolor{blue!50} 1 Introduction, Languages & 6 While-Language \\ 2 Regular Expressions, Derivatives & 7 Compilation, JVM \\ 3 Automata, Regular Languages & 8 Compiling Functional Languages \\ 4 Lexing, Tokenising & 9 Optimisations \\ 5 Grammars, Parsing & 10 LLVM \\ \hline \end{tabular}% }; \end{tikzpicture} \end{center}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}<1-12>[c]\frametitle{The Goal of this Module\ldots}\begin{center} \begin{tikzpicture}[scale=1, node/.style={ rectangle,rounded corners=3mm, very thick,draw=black!50,minimum height=18mm, minimum width=20mm, top color=white,bottom color=black!20,drop shadow}] \node at (3.05, 1.8) {\Large\bf \ldots{} you write a compiler}; \node (0) at (-2.3,0) {}; \node [above=5mm of 0] {\makebox[0mm]{\footnotesize \begin{tabular}{@{}l@{}}input\\[-1mm]program\end{tabular}}}; \node (A) at (0,0) [node] {}; \node [below right] at (A.north west) {lexer}; \node (B) at (3,0) [node] {}; \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser}; \node (C) at (6,0) [node] {}; \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen}; \node (1) at (8.4,0) {}; \node [above=5mm of 1] {\makebox[0mm]{\footnotesize \begin{tabular}{@{}r@{}}binary\\[-1mm]code\end{tabular}}}; \draw [->,line width=4mm] (0) -- (A); \draw [->,line width=4mm] (A) -- (B); \draw [->,line width=4mm] (B) -- (C); \draw [->,line width=4mm] (C) -- (1); \end{tikzpicture} \end{center}\only<2,3,4>{\begin{textblock}{1}(1,2.1)\begin{bubble}[9.8cm]\normalsizelexer input: a string\smallskip\\\hspace{5mm}\code{"read(n);"}\medskip\\lexer output: a sequence of tokens\smallskip\\\hspace{5mm}\code{key(read) lpar id(n) rpar semi}\end{bubble}\end{textblock}} \only<3,4>{\begin{textblock}{1}(6,7.8)\begin{tabular}{c}\includegraphics[scale=0.2]{../pics/rosetta.jpg}\\[-2mm]\footnotesize lexing $\Rightarrow$ recognising words (Stone of Rosetta)\end{tabular}\end{textblock}}\only<4>{\begin{textblock}{1}(0.5,12)\small\begin{tabular}{l@{}c@{}l} \pcode{if} & $\;\Rightarrow\;$ & keyword\\ \pcode{iffoo} & $\;\Rightarrow\;$ & identifier\\\end{tabular} \end{textblock}}\only<6>{\begin{textblock}{1}(1,1.5)\begin{bubble}[8.5cm]\normalsizeparser input: a sequence of tokens\smallskip\\{\small\hspace{5mm}\code{key(read) lpar id(n) rpar semi}}\smallskip\\parser output: an abstract syntax tree\smallskip\\\footnotesize\hspace{2cm}\begin{tikzpicture} \node {\code{read}} child {node {\code{lpar}}} child {node {\code{n}}} child {node {\code{rpar}}};\end{tikzpicture}\end{bubble}\end{textblock}}\only<8,9>{\begin{textblock}{1}(1,1.5)\begin{bubble}[4cm]\normalsizecode generation:\smallskip\\\hspace{5mm}\code{istore 2}\\ \hspace{5mm}\code{iload 2}\\ \hspace{5mm}\code{ldc 10}\\\hspace{5mm}\code{isub}\\\hspace{5mm}\code{ifeq Label2}\\ \hspace{5mm}\code{iload 2}\\\hspace{5mm}\code{...}\\\end{bubble}\end{textblock}}\only<9>{\begin{textblock}{6}(8.4,7)\begin{bubble}[5cm]\mbox{\begin{tikzpicture}[scale=0.58,rounded corners=0mm]\begin{axis}[axis x line=bottom, axis y line=left, ylabel=secs, xlabel=n, enlargelimits=0.05, ybar interval=0.7, legend style=small]\addplot file {interpreted2.data};\addplot file {compiled2.data};%\legend{interpreted, compiled}\end{axis}\end{tikzpicture}}\end{bubble}\end{textblock}}\only<10>{\begin{textblock}{6}(1,3) \begin{bubble}[11cm] Compiler explorers, e.g.: \url{https://gcc.godbolt.org} \;\video{https://youtu.be/ysaBmhMEyUg} \begin{tikzpicture}[] \node (0) at (-2.3,0) {\includegraphics[scale=0.3]{pics/csource.png}}; \node (1) [right=35mm] at (0) {\includegraphics[scale=0.3]{pics/cassmbl.png}}; \draw [->,line width=4mm, red] (0) -- (1); \node (2) [below=20mm] at (0) {\LARGE\bf source}; \node (3) [right=40mm] at (2) {\LARGE\bf binary}; \draw [->,line width=1mm] (2) -- (3); \end{tikzpicture}\end{bubble}\end{textblock}}\only<11>{\begin{textblock}{6}(1,3) \begin{bubble}[11cm] Compiler explorer for Java: \url{https://javap.yawk.at} \begin{tikzpicture}[] \node (0) at (-2.3,0) {\includegraphics[scale=0.4]{pics/jsource.png}}; \node (1) [right=35mm] at (0) {\includegraphics[scale=0.4]{pics/jassmbl.png}}; \draw [->,line width=4mm, red] (0) -- (1); \node (2) [below=20mm] at (0) {\LARGE\bf source}; \node (3) [right=40mm] at (2) {\LARGE\bf byte code}; \draw [->,line width=1mm] (2) -- (3); \end{tikzpicture}\end{bubble}\end{textblock}}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]\frametitle{Why Study Compilers?}John Regehr {\small(Univ.~Utah, LLVM compiler hacker)}\here{https://blog.regehr.org/archives/1419}\smallskip\\\begin{bubble}[10.5cm] \bf ``\ldots{}It’s effectively a perpetual employment act for solid compiler hackers.''\end{bubble}\onslide<1->{\only<2>{\begin{itemize}\item {\bf Hardware is getting weirder rather than getting clocked faster.}\begin{itemize}\item[] ``Almost all processors are multicores nowadays and it looks like there is increasing asymmetry in resources across cores. Processors come with vector units, crypto accelerators etc. We have DSPs, GPUs, ARM big.little, and Xeon Phi. This is only scratching the surface.''\end{itemize} \end{itemize}}\only<3>{\begin{itemize}\item {\bf We’re getting tired of low-level languages and their associated security disasters.}\begin{itemize}\item [] ``We want to write new code, to whatever extent possible, in safer, higher-level languages. Compilers are caught right in the middle of these opposing trends: one of their main jobs is to help bridge the large and growing gap between increasingly high-level languages and increasingly wacky platforms.''\end{itemize} \end{itemize}}}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Why Bother with Compilers?}\textbf{Boeing 777's}: First flight in 1994. They want to achievetriple redundancy for potential hardware faults.\here{http://www.citemaster.net/get/db3a81c6-548e-11e5-9d2e-00163e009cc7/R8.pdf}\bigskipThey compile 1 Ada program to\medskip\begin{itemize} \item Intel 80486 \item Motorola 68040 (old Macintosh's) \item AMD 29050 (RISC chips used often in laser printers)\end{itemize}\medskip\medskipusing 3 independent compilers.\bigskip\pause\small Airbus uses C and static analysers. Recently started using CompCert.\only<1->{%\begin{textblock}{6}(8,4.5)\includegraphics[scale=0.28]{../pics/777.png}\end{textblock}}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{What Do Compilers Do?}Remember BF*** from PEP?\begin{center}\begin{tabular}{rcl}\bl{\texttt{>}} & $\Rightarrow$ & move one cell right\\\bl{\texttt{<}} & $\Rightarrow$ & move one cell left\\\bl{\texttt{+}} & $\Rightarrow$ & increase cell by one\\\bl{\texttt{-}} & $\Rightarrow$ & decrease cell by one\\\bl{\texttt{.}} & $\Rightarrow$ & print current cell\\\bl{\texttt{,}} & $\Rightarrow$ & input current cell\\\bl{\texttt{[}} & $\Rightarrow$ & loop begin\\\bl{\texttt{]}} & $\Rightarrow$ & loop end\medskip\\ & $\Rightarrow$ & everything else is a comment\\\end{tabular} \end{center} \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c] \frametitle{A ``Compiler'' for BF*** to C} \begin{center} \begin{tabular}{rcl} \bl{\texttt{>}} & $\Rightarrow$ & \texttt{ptr++}\\ \bl{\texttt{<}} & $\Rightarrow$ & \texttt{ptr--}\\ \bl{\texttt{+}} & $\Rightarrow$ & \texttt{(*ptr)++}\\ \bl{\texttt{-}} & $\Rightarrow$ & \texttt{(*ptr)--}\\ \bl{\texttt{.}} & $\Rightarrow$ & \texttt{putchar(*ptr)}\\ \bl{\texttt{,}} & $\Rightarrow$ & \texttt{*ptr = getchar()}\\ \bl{\texttt{[}} & $\Rightarrow$ & \texttt{while(*ptr)\{}\\ \bl{\texttt{]}} & $\Rightarrow$ & \texttt{\}}\medskip\\ & $\Rightarrow$ & ignore everything else\\ \end{tabular} \end{center}\bigskip \texttt{char field[30000]\\ char *ptr = \&field[15000]}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c] \frametitle{Another~``Compiler''~for~BF~to~C} \begin{center} \begin{tabular}{rcl} \bl{\texttt{>\ldots>}} & $\Rightarrow$ & \texttt{ptr += n}\\ \bl{\texttt{<\ldots<}} & $\Rightarrow$ & \texttt{ptr -= n}\\ \bl{\texttt{+\ldots+}} & $\Rightarrow$ & \texttt{(*ptr) += n}\\ \bl{\texttt{-\ldots-}} & $\Rightarrow$ & \texttt{(*ptr) -= n}\\ \bl{\texttt{.}} & $\Rightarrow$ & \texttt{putchar(*ptr)}\\ \bl{\texttt{,}} & $\Rightarrow$ & \texttt{*ptr = getchar()}\\ \bl{\texttt{[}} & $\Rightarrow$ & \texttt{while(*ptr)\{}\\ \bl{\texttt{]}} & $\Rightarrow$ & \texttt{\}}\medskip\\ & $\Rightarrow$ & ignore everything else\\ \end{tabular} \end{center}\bigskip \texttt{char field[30000]\\ char *ptr = \&field[15000]}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]\frametitle{A Brief Compiler History}\bigskip\begin{itemize}\item Turing Machines, 1936 (a tape as memory)\item Regular Expressions, 1956\\\item The first compiler for COBOL, 1957\\ (Grace Hopper)\medskip\item But surprisingly research papers are still published nowadays\\\item ``Parsing: The Solved Problem That Isn't'' \here{https://tratt.net/laurie/blog/entries/parsing_the_solved_problem_that_isnt.html}\end{itemize}\begin{textblock}{8.5}(5,7.6)\begin{flushright}\includegraphics[scale=0.3]{pics/hopper.jpg}\\\footnotesize\textcolor{gray}{Grace Hopper}\smallskip\\{\small\textcolor{gray}{(she made it to David Letterman's Tonight Show \here{https://youtu.be/3N_ywhx6_K0?t=31})}}\end{flushright}\end{textblock}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Some Housekeeping}\textbf{Exams will be online:}\bigskip\begin{itemize}\item final exam in January (30\%)\item mid-term shortly after Reading Week (10\%)\bigskip\item weekly engagement (10\%) \end{itemize}\bigskip\bigskip\pause\textbf{Weekly Homework (optional):}\begin{itemize}\item uploaded on KEATS, send answers via email, responded individually\item \alert{\bf all} questions in the exam and mid-term will be from the HW!!\end{itemize} \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Some Housekeeping}\textbf{Coursework (5 accounting for 45\%):}\bigskip\begin{itemize}\item matcher (5\%)\item lexer (8\%)\item parser / interpreter (10\%)\item JVM compiler (10\%)\item LLVM compiler (12\%) \end{itemize}\bigskip\pauseyou can use any programming language you like (Haskell, Rust)\\\pauseyou can use any code I showed you and uploaded to KEATS\ldots\textbf{BUT NOTHING ELSE!}\pause\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Lectures 1 - 5}transforming strings into structured data\\[10mm]{\LARGE\bf Lexing} {\hfill{}based on regular expressions}\medskip\\\hspace{5mm}(recognising ``words'')\\[6mm]{\LARGE\bf Parsing}\medskip\\\hspace{5mm}(recognising ``sentences'')\begin{textblock}{1}(10,9.1)\begin{tabular}{c}\includegraphics[scale=0.1]{../pics/rosetta.jpg}\\[-2mm]\footnotesize Stone of Rosetta\end{tabular}\end{textblock}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Lectures 1 - 5}transforming strings into structured data\\[10mm]{\LARGE\bf Lexing} {\hfill{}based on regular expressions}\medskip\\\hspace{5mm}(recognising ``words'')\\[6mm]{\LARGE\bf Parsing}\medskip\\\hspace{5mm}(recognising ``sentences'')\begin{textblock}{1}(10,9.1)\begin{tabular}{c}\includegraphics[scale=0.1]{../pics/rosetta.jpg}\\[-2mm]\footnotesize Stone of Rosetta\end{tabular}\end{textblock}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c] \frametitle{Lectures 5 - 10} code generation for a small imperative and a small functional language\\[10mm] {\LARGE\bf Interpreters}\medskip\\ \hspace{5mm}(directly runs a program)\\[6mm] {\LARGE\bf Compilers}\medskip\\ \hspace{5mm}(generate JVM code and LLVM-IR code) \begin{textblock}{1}(8.8,8.1) \begin{tabular}{c@{}c} \includegraphics[scale=0.4]{../pics/javaduke.png} & \includegraphics[scale=0.23]{../pics/llvmlogo.png} \end{tabular} \end{textblock} \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]\frametitle{Familiar Regular Expresssions}\small\begin{center}\texttt{[a-z0-9\_$\backslash{}$.-]+ @ [a-z0-9$\backslash{}$.-]+ . [a-z$\backslash{}$.]\{2,6\}}\end{center}\smallskip\begin{center}\begin{tabular}{@{}lp{8.5cm}@{}}\pcode{re*} & matches 0 or more times\\\pcode{re+} & matches 1 or more times\\\pcode{re?} & matches 0 or 1 times\\\pcode{re\{n\}} & matches exactly \pcode{n} number of times\\\pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\\pcode{[...]} & matches any single character inside the brackets\\\pcode{[^...]} & matches any single character not inside the brackets\\\pcode{a-z A-Z} & character ranges\\\pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\\pcode{.} & matches every character except newline\\\pcode{(re)} & groups regular expressions and remembers the matched text\end{tabular}\end{center}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Some ``innocent'' examples}Let's try two examples\begin{center} \bl{\texttt{(a*)*b}} \hspace{2cm} \bl{\texttt{[a?]\{n\}[a]\{n\}}}\end{center}\bigskip\pause and match them with strings of the form\begin{center} \bl{\texttt{a}}, \bl{\texttt{aa}}, \bl{\texttt{aaa}}, \bl{\texttt{aaaa}}, \bl{\texttt{aaaaa}}, \bl{$\underbrace{\texttt{a}...\texttt{a}}_n$} \end{center} \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Why Bother with Regexes?}\begin{columns}[t,onlytextwidth]\begin{column}{1.8cm}\mbox{} \end{column} \begin{column}{.5\textwidth}\small{}Ruby, Python, Java 8\medskip\\\begin{tikzpicture}\footnotesize\begin{axis}[ xlabel={$n$}, x label style={at={(1.05,0.0)}}, ylabel={time in secs}, enlargelimits=false, xtick={0,5,...,30}, xmax=33, ymax=35, ytick={0,5,...,30}, scaled ticks=false, axis lines=left, width=\textwidth, height=4cm, legend entries={Python,Ruby}, legend pos=north west, legend cell align=left]\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};\addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data};\end{axis}\end{tikzpicture}\begin{tikzpicture}\footnotesize\begin{axis}[ xlabel={$n$}, x label style={at={(1.05,0.0)}}, ylabel={time in secs}, enlargelimits=false, xtick={0,5,...,30}, xmax=33, ymax=35, ytick={0,5,...,30}, scaled ticks=false, axis lines=left, width=\textwidth, height=4cm, legend entries={Python, Java 8, JavaScript, Swift}, legend pos=north west, legend cell align=left]\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data}; \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};\addplot[red,mark=*, mark options={fill=white}] table {re-js.data};\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};\end{axis}\end{tikzpicture}%\end{column}\begin{column}{.5\textwidth}\small{}Us (after next lecture)\medskip\\\begin{tikzpicture}\footnotesize\begin{axis}[ xlabel={$n$}, x label style={at={(1.07,0.0)}}, ylabel={time in secs}, enlargelimits=false, xtick={0,5000,...,10000}, xmax=11000, ymax=35, ytick={0,5,...,30}, scaled ticks=false, axis lines=left, width=\textwidth, height=4cm]\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};\addplot[black,mark=square*,mark options={fill=white}] table {re3.data};\end{axis}\end{tikzpicture}\begin{tikzpicture}\footnotesize\begin{axis}[ xlabel={$n$}, x label style={at={(1.07,0.0)}}, ylabel={time in secs}, enlargelimits=false, ymax=35, ytick={0,5,...,30}, scaled ticks=false, axis lines=left, width=\textwidth, height=4cm]\addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};\end{axis}\end{tikzpicture}\end{column}\end{columns}\medskip\begin{textblock}{3}(-0.1,3.3)\small\hfill\bl{\texttt{[a?]\{n\}[a]\{n\}}}:\end{textblock}\begin{textblock}{3}(-0.1,8.7) \small\hfill\bl{\texttt{(a*)*b}}:\end{textblock}\begin{textblock}{3}(0.3,13)\small{}matching with strings\bl{$\underbrace{\texttt{a}...\texttt{a}}_n$} \end{textblock}\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c,fragile] \frametitle{Incidents} \begin{itemize} \item a global outage on 2 July 2019 at \textbf{Cloudflare} (first one for six years)\medskip \begin{center}\small\color{blue} \begin{verbatim} (?:(?:\"|'|\]|\}|\\|\d|(?:nan|infinity|true|false| null|undefined|symbol|math)|\`|\-|\+)+[)]*;?((?:\s |-|~|!|{}|\|\||\+)*.*(?:.*=.*))) \end{verbatim} \end{center}\bigskip\bigskip\bigskip\bigskip\bigskip\bigskip\bigskip \item on 20 July 2016 the \textbf{Stack Exchange} webpage went down because of an evil regular expression \here{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016} \end{itemize} \begin{textblock}{6}(6,7.6) \includegraphics[scale=0.14]{../pics/cloudflare.png}\\ \footnotesize It serves more web traffic than Twitter, Amazon, Apple, Instagram, Bing \& Wikipedia combined. \here{https://blog.cloudflare.com/details-of-the-cloudflare-outage-on-july-2-2019/} \end{textblock} \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Evil Regular Expressions}\begin{itemize}\item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\medskip\item Some evil regular expressions:\medskip\begin{itemize}\item \bl{\texttt{[a?]\{n\}\;[a]\{n\}}}\item \bl{\texttt{(a*)*\;b}} \item \bl{\texttt{([a-z]+)*}} \item \bl{\texttt{(a + aa)*}}\item \bl{\texttt{(a + a?)*}}\end{itemize}\item sometimes also called \alert{catastrophic backtracking}\item this is a problem for \alert{N}etwork \alert{I}ntrusion \alert{D}etection systems, Cloudflare, StackExchange, Atom editor\item \url{https://vimeo.com/112065252} \end{itemize}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]%\frametitle{Today}%%\begin{itemize}%\item While the ultimate goal is to implement a small compiler for the JVM% \ldots\bigskip%\end{itemize}%%Let's start with:%%\begin{itemize}%\item a web-crawler%\item an email harvester%\item \textcolor{gray}{(a web-scraper)}%\end{itemize}%%\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]%\frametitle{A Web-Crawler}%%\mbox{}\\[10mm]%%\begin{enumerate}%\item given an URL, read the corresponding webpage%\item extract all links from it%\item call the web-crawler again for all these links%\end{enumerate}%%\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]%\frametitle{A Web-Crawler}%%\mbox{}\\[10mm]%%%\begin{enumerate}%\item given an URL, read the corresponding webpage%\item if not possible print, out a problem%\item if possible, extract all links from it%\item call the web-crawler again for all these links%\end{enumerate}\bigskip\pause%%\small (we need a bound for the number of recursive calls)%%\small (the purpose is to check all links on my own webpage)%\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]%%\begin{textblock}{1}(2,5)%\begin{tabular}{c}%\includegraphics[scale=0.15]{pics/servers.png}\\[-2mm]%\small Server%\end{tabular}%\end{textblock}%%\begin{textblock}{1}(5.6,4)% \begin{tikzpicture}[scale=1.1]% \draw[white] (0,1) node (X) {};% \draw[white] (2,1) node (Y) {};% \draw[white] (0,0) node (X1) {};% \draw[white] (2,0) node (Y1) {};% \draw[white] (0,-1) node (X2) {};% \draw[white] (2,-1) node (Y2) {};% \draw[red, <-, line width = 2mm] (X) -- (Y);% \node [inner sep=5pt,label=above:\textcolor{black}{GET request}] at ($ (X)!.5!(Y) $) {};% \draw[red, ->, line width = 2mm] (X1) -- (Y1);% \node [inner sep=5pt,label=above:\textcolor{black}{webpage}] at ($ (X1)!.5!(Y1) $) {};% \draw[red, <-, line width = 2mm] (X2) -- (Y2);% \node [inner sep=7pt,label=above:\textcolor{black}{POST data}] at ($ (X2)!.5!(Y2) $) {};% \end{tikzpicture}%\end{textblock}%%%\begin{textblock}{1}(9,5.5)%\begin{tabular}{c}%\includegraphics[scale=0.15]{pics/laptop.png}\\[-2mm]%\small Browser%\end{tabular}%\end{textblock}%\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]%\frametitle{Scala}%%\small A simple Scala function for reading webpages:%\bigskip%%\footnotesize%\lstinputlisting{../progs/app0.scala}%\medskip\pause%%\lstinline{get_page("""https://nms.kcl.ac.uk/christian.urban/""")}%\bigskip\medskip\pause%%%\small A slightly more complicated version for handling errors:%\smallskip%%\footnotesize%\lstinputlisting[xleftmargin=-4mm]{../progs/app1.scala}%%%\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]%\frametitle{A Regular Expression}%%\begin{itemize}%\item \ldots{} is a pattern or template for specifying strings%\end{itemize}\bigskip% %\begin{center} %\only<1>{\scode{"https?://[^"]*"}}%%\only<2>{\scode{""""https?://[^"]*"""".r}}%\end{center}\bigskip\bigskip%%matches for example\smallskip\\ %\hspace{2mm}\code{"http://www.foobar.com"}\\%\hspace{2mm}\code{"https://www.tls.org"}\smallskip\\%%but not\smallskip\\ %\hspace{2mm}\code{"http://www."foo"bar.com"}\\%%\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]%\frametitle{Finding Operations in Scala}%%{\bf\code{rexp.findAllIn(string)}}\medskip% %returns a list of all (sub)strings that match the %regular expression%\bigskip\bigskip % %%{\bf\code{rexp.findFirstIn(string)}}\medskip% %returns either %%\begin{itemize}%\item \code{None} if no (sub)string matches or %\item \code{Some(s)} with the first (sub)string%\end{itemize}%%\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]%%\footnotesize%\lstinputlisting{../progs/app2.scala}%%\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]%%\small%A version that only crawls links in ``my'' domain:\bigskip%%\footnotesize%\lstinputlisting{../progs/app3.scala}%%\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]%\lstset{xleftmargin=-4mm}%\small%A little email harvester:%%\footnotesize%\lstinputlisting{../progs/app4.scala}\bigskip%%\tiny%\url{http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/}%%\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]\frametitle{(Basic) Regular Expressions}Their inductive definition:\begin{textblock}{6}(2,7.5) \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} \bl{$r$} & \bl{$::=$} & \bl{$\ZERO$} & nothing\\ & \bl{$\mid$} & \bl{$\ONE$} & empty string / \pcode{""} / $[]$\\ & \bl{$\mid$} & \bl{$c$} & character\\ & \bl{$\mid$} & \bl{$r_1 + r_2$} & alternative / choice\\ & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\ & \bl{$\mid$} & \bl{$r^*$} & star (zero or more)\\ \end{tabular} \end{textblock}\only<2->{\footnotesize\begin{textblock}{9}(2,0.5)\begin{bubble}[9.8cm]\lstinputlisting{../progs/app01.scala}\end{bubble}\end{textblock}}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]%\frametitle{Regular Expressions}%%\small%In Scala:\bigskip%%\footnotesize%\lstinputlisting{../progs/app51.scala}%% %\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]\frametitle{Strings}\ldots are lists of characters. For example \code{"hello"}\begin{center}\bl{$[h, e, l, l, o]$} or just \bl{$hello$}\end{center}the empty string: \bl{$[]$} or \bl{\pcode{""}}\bigskip\\the concatenation of two strings:\begin{center}\bl{$s_1 \,@\, s_2$}\end{center}\bl{\textit{foo $@$ bar = foobar}}\\\bl{\textit{baz $@\, []$ = baz}}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Languages, Strings}\begin{itemize}\item \alert{\bf Strings} are lists of characters, for example\begin{center}\bl{$[]$},\;\bl{$abc$} \hspace{2cm}(Pattern match: \bl{$c\!::\!s$})\end{center}\bigskip\item A \alert{\bf language} is a set of strings, for example\medskip\begin{center}\bl{$\{[], hello, \textit{foobar}, a, abc\}$}\end{center}\bigskip\item \alert{\bf Concatenation} of strings and languages\begin{center}\begin{tabular}{rcl}\bl{$\textit{foo}\;@\;bar$} & \bl{$=$} & \bl{$\textit{foobar}$}\medskip\\\bl{$A\;@\;B$} & \bl{$\dn$} & \bl{$\{ s_1\,@\,s_2 \;\mid\; s_1 \in A \wedge s_2 \in B\}$}\end{tabular}\end{center}%\item The \alert{\bf meaning} of a regular expression is a set of % strings, or language.\end{itemize} \only<2>{\begin{textblock}{4}(10.5,8)\smallLet\bl{$A = \{foo, bar\}$} \bl{$B = \{a, b\}$}\[\bl{A \,@\, B = \{fooa, foob, bara, barb\}}\]\end{textblock}} \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c] \frametitle{Two Corner Cases} \Large \begin{center} \bl{$A \,@\, \{[]\} = \;?$}\bigskip\bigskip\pause\\ \bl{$A \,@\, \{\} = \;?$} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{The Meaning of a Regex} ...all the strings a regular expression can match. \begin{center} \begin{tabular}{rcl} \bl{$L(\ZERO)$} & \bl{$\dn$} & \bl{$\{\}$}\\ \bl{$L(\ONE)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\ \bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\ \bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\ \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \,@\, L(r_2)$}\\ \bl{$L(r^*)$} & \bl{$\dn$} & \\ \end{tabular}\end{center}\begin{textblock}{14}(1.5,13.5)\small\bl{$L$} is a function from regular expressions to sets of strings (languages):\smallskip\\\bl{\quad$L$ : Rexp $\Rightarrow$ Set$[$String$]$}\end{textblock}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c] \frametitle{The Power Operation} \begin{itemize} \item The \alert{\textbf{\boldmath$n$th Power}} of a language: \begin{center} \begin{tabular}{lcl} \bl{$A^0$} & \bl{$\dn$} & \bl{$\{[]\}$}\\ \bl{$A^{n+1}$} & \bl{$\dn$} & \bl{$A \,@\, A^n$} \end{tabular} \end{center}\bigskip \item[] For example \begin{center} \begin{tabular}{lcl@{\hspace{10mm}}l} \bl{$A^4$} & \bl{$=$} & \bl{$A \,@\, A \,@\, A \,@\, A$} & \bl{$(@\,\{[]\})$}\\ \bl{$A^1$} & \bl{$=$} & \bl{$A$} & \bl{$(@\,\{[]\})$}\\ \bl{$A^0$} & \bl{$=$} & \bl{$\{[]\}$}\\ \end{tabular} \end{center} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{The Meaning of a Regex}\begin{textblock}{15}(1,4) \begin{tabular}{rcl} \bl{$L(\ZERO)$} & \bl{$\dn$} & \bl{$\{\}$}\\ \bl{$L(\ONE)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\ \bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\ \bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\ \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\ \bl{$L(r^*)$} & \bl{$\dn$} & \onslide<2->{\bl{$\bigcup_{0 \le n} L(r)^n$}}\\ \end{tabular}\bigskip%\onslide<2->{%\hspace{5mm}\bl{$L(r)^0 \;\dn\; \{[]\}$}\\%\bl{$L(r)^{n+1} \;\dn\; L(r) \,@\, L(r)^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\%\small\hspace{5cm}\textcolor{gray}{$\{ s_1 @ s_2 \;|\; s_1\in L(r) \wedge s_2 \in L(r)^n \}$}}%} \end{textblock}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c] \frametitle{The Star Operation} \begin{itemize} \item The \alert{\bf Kleene Star} of a \underline{language}: \bigskip \begin{center} \begin{tabular}{c} \bl{$A\star \dn \bigcup_{0\le n} A^n$} \end{tabular} \end{center}\bigskip \item[] This expands to \[ \bl{A^0 \cup A^1 \cup A^2 \cup A^3 \cup A^4 \cup \ldots} \] or \small \[ \bl{\{[]\} \;\cup\; A \;\cup\; A\,@\,A \;\cup\; A\,@\,A\,@\,A \;\cup\; A\,@\,A\,@\,A\,@\,A \cup \ldots} \] \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{The Meaning of a Regex}\begin{textblock}{15}(1,4) \begin{tabular}{rcl} \bl{$L(\ZERO)$} & \bl{$\dn$} & \bl{$\{\}$}\\ \bl{$L(\ONE)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\ \bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\ \bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\ \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\ \bl{$L(r^*)$} & \bl{$\dn$} & \bl{$(L(r))\star$}\\ \end{tabular}\end{textblock}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{The Meaning of Matching}\begin{bubble}[10cm]\large\bf A regular expression \bl{$r$} matches a string~\bl{$s$} provided\begin{center}\bl{$s \in L(r)$}\\ \end{center}\end{bubble}\bigskip\bigskip\ldots and the point of the next lecture is to decide this problem as fast as possible (unlike Python,Ruby, Java)\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c] \frametitle{Questions} \begin{itemize} \item Say \bl{$A = \{[a],[b],[c],[d]\}$}.\bigskip \item[] How many strings are in \bl{$A^4$}\,? \bigskip\medskip\pause \item[] What if \bl{$A = \{[a],[b],[c],[]\}$};\\ how many strings are then in \bl{$A^4$}\,? \end{itemize} \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c]% \frametitle{Languages (Sets of Strings)}% \begin{itemize}% \item A \alert{\bf Language} is a set of strings, for example\medskip% \begin{center}% \bl{$\{[], hello, foobar, a, abc\}$}% \end{center}\bigskip% \item \alert{\bf Concatenation} for strings and languages% \begin{center}% \begin{tabular}{rcl}% \bl{$foo\;@\;bar$} & \bl{$=$} & \bl{$foobar$}\medskip\\% \bl{$A\;@\;B$} & \bl{$\dn$} & \bl{$\{ s_1\,@\,s_2 \;\mid\; s_1 \in A \wedge s_2 \in B\}$}% \end{tabular}% \end{center}% \bigskip% \small% \item [] For example \bl{$A = \{foo, bar\}$}, \bl{$B = \{a, b\}$}% \[% \bl{A \,@\, B = \{fooa, foob, bara, barb\}}% \]% \end{itemize} % \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c]% \frametitle{Two Corner Cases}% \Large% \begin{center}% \bl{$A \,@\, \{[]\} = \;?$}\bigskip\bigskip\pause\\% \bl{$A \,@\, \{\} = \;?$}% \end{center} % \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c]% \frametitle{The Meaning of a Regex}% ...all the strings a regular expression can match. % \begin{center}% \begin{tabular}{rcl}% \bl{$L(\ZERO)$} & \bl{$\dn$} & \bl{$\{\}$}\\% \bl{$L(\ONE)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\% \bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\% \bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\% \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \,@\, L(r_2)$}\\% \bl{$L(r^*)$} & \bl{$\dn$} & \\% \end{tabular}% \end{center}% \begin{textblock}{14}(1.5,13.5)\small% \bl{$L$} is a function from regular expressions to % sets of strings (languages):\smallskip\\% \bl{\quad$L$ : Rexp $\Rightarrow$ Set$[$String$]$}% \end{textblock}% \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c]% \frametitle{The Power Operation}% \begin{itemize}% \item The \alert{\textbf{\boldmath$n$th Power}} of a language:% \begin{center}% \begin{tabular}{lcl}% \bl{$A^0$} & \bl{$\dn$} & \bl{$\{[]\}$}\\% \bl{$A^{n+1}$} & \bl{$\dn$} & \bl{$A \,@\, A^n$}% \end{tabular}% \end{center}\bigskip% \item[] For example% \begin{center}% \begin{tabular}{lcl@{\hspace{10mm}}l}% \bl{$A^4$} & \bl{$=$} & \bl{$A \,@\, A \,@\, A \,@\, A$} & \bl{$(@\,\{[]\})$}\\% \bl{$A^1$} & \bl{$=$} & \bl{$A$} & \bl{$(@\,\{[]\})$}\\% \bl{$A^0$} & \bl{$=$} & \bl{$\{[]\}$}\\% \end{tabular}% \end{center}% \end{itemize} % \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c]% \frametitle{Written Exam}% \begin{itemize}% \item Accounts for 80\%.\bigskip% \item The question ``\textit{Is this relevant for% the exam?}'' is very demotivating for the lecturer!\bigskip\\% \item Deal: Whatever is in the homework (and is not marked% ``\textit{optional}'') is relevant for the exam.\bigskip% \item Each lecture has also a handout. There are also handouts about% notation and Scala. % \end{itemize}% \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[t]% \frametitle{Coursework}% \begin{itemize}% \item Accounts for 20\%. Two strands. Choose \alert{\bf one}!\bigskip% \end{itemize}% \begin{columns}[t]% \begin{column}{.5\textwidth}% \underline{\bf Strand 1}\medskip% \begin{itemize}% \item 4 programming tasks:% \begin{itemize}% \item matcher (4\%, 11.10.) % \item lexer (5\%, 04.11.)% \item parser (5\%, 22.11.)% \item compiler (6\%, 13.12.)% \end{itemize}% \item in any lang.~you like,\\ but I want to see the\\ code% \end{itemize}% \end{column}% \hspace{-45pt}\vrule{}\hspace{10pt}% \begin{column}{.5\textwidth}% \underline{\bf Strand 2}\smallskip\begin{itemize}% \item one task: prove the correctness of a regular expression matcher in % the \underline{Isabelle} theorem prover% \item 20\%, submission on~13.12.\hspace{-5mm}\mbox{}% \end{itemize}% \end{column}% \end{columns}\medskip% \small% \begin{itemize}% \item Solving more than one strand will {\bf not} give you more % marks.% \end{itemize}% \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]%\frametitle{Lecture Capture}%%\begin{itemize}%\item Hope it works\ldots\pause actually no, it does not!\medskip\pause%\item It is important to use lecture capture wisely\\ (it is only the ``baseline''):%\begin{itemize} %\item Lecture recordings are a study and revision aid.%\item Statistically, there is a clear and direct link between attendance and% attainment: students who do not attend lectures, do less well in exams.%\end{itemize}%%\item Attending a lecture is more than watching it online -- if you do not%attend, you miss out! % %\end{itemize}%%\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{\begin{tabular}{c}\\[3cm]\alert{Questions?}\end{tabular}}\begin{tabular}{lll} TAs: & Anton Luca-Dorin & (took the module last year)\\ & Chengsong Tan & (PhD student working on derivatives) \end{tabular} \mbox{}\end{frame}\begin{frame}[c]\begin{mybox3}{Coursework} Do we need to provide instructions on running the coursework files if we're using languages other than Scala? Thanks\end{mybox3}\pause\begin{mybox2}{Zip-File for Coursework} Please, please submit a zipfile that generates a subdirectory \begin{center} \texttt{NameFamilyName} \end{center} \end{mybox2}\end{frame}\begin{frame}[c]\begin{mybox3}{Coursework} What is the purpose of the workshop session on the timetable? Slightly confused about how to undertake cw1 and what exactly we should be implementing. This is more for clarification of the cw1 structure, including the implementation and questions present in cw1.\end{mybox3}\end{frame}\begin{frame}[c]\begin{mybox3}{What is the trick?}\small What was the trick to improve the evil regular expressions matcher to have such good results compared to other programming languages? Is it working better on casual regular expressions (the ones that Python and Java handle pretty well), too? Or was it just optimised for these evil ones?\end{mybox3}\begin{mybox3}{}\small It was shown in the lectures that the pattern matching algorithms currently implemented in popular programming languages (Python, JS, Java, etc) are far slower than the algorithm we are going to be implementing in this module. My question is why do these programming languages not implement the algorithm that we are going to implement in this module?\end{mybox3}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c] \frametitle{Thanks to Martin Mikusovic}\bigskip \begin{center}\begin{tikzpicture} \begin{axis}[ xlabel={$n$}, x label style={at={(1.05,0.0)}}, ylabel={time in secs}, enlargelimits=false, xtick={0,5,...,30}, xmax=33, ymax=35, ytick={0,10,...,30}, scaled ticks=false, axis lines=left, width=9cm, height=5.5cm, legend entries={Java 8, Python, JavaScript, Swift}, legend pos=north west, legend cell align=left]\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};\addplot[red,mark=*, mark options={fill=white}] table {re-js.data};\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};\end{axis}\end{tikzpicture}\end{center}Regex: \bl{$(a^*)^* \cdot b$}Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Same Example in Java 9+}\begin{center}\begin{tikzpicture} \begin{axis}[ xlabel={$n$}, x label style={at={(1.09,-0.15)}}, ylabel={time in secs}, scaled x ticks=false, enlargelimits=false, xtick distance=10000, xmax=44000, ytick={0,10,...,30}, ymax=35, axis lines=left, width=9cm, height=5cm, legend entries={Java \liningnums{9}+}, legend pos=north west, legend cell align=left]\addplot[blue,mark=square*,mark options={fill=white}] table {re-java9.data};\end{axis}\end{tikzpicture}\end{center}Regex: \bl{$(a^*)^* \cdot b$}Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\begin{mybox3}{} Are there any (common) languages that have a built-in regex implementation matching the set of functions of a formal 'simple' regular expression, as opposed to an 'extended' regular expression implemented in most regex-supporting languages?\end{mybox3}\end{frame}\begin{frame}[c]\begin{mybox3}{Passing Mark} I believe the assessment is 70\% coursework (broken into 10\% weekly stuff, 15\% mid term exam and 45\% CW in any programming language) and 30\% January exam. However, I would like to know if we just need 40\% overall to pass the module or pass the each component individually?\end{mybox3}\hfill$\Rightarrow$ 40\% overall\end{frame}\begin{frame}[c]\begin{mybox3}{Regexes} Can we determine all the possible regular expressions matching a certain string? If we take into account all the possible ways to combine the operations: \bl{$\ZERO$}, \bl{$\ONE$}, \bl{$r_1 + r_2$}, \bl{$r_1 \cdot r_2$}, \bl{$r^*$}?\end{mybox3}\end{frame}\begin{frame}[c]\begin{mybox3}{\bl{$L$} + Equivalence} When we explain why two regular expressions are not equivalent, what method is better for us, using mathematics formulas or making an example? \end{mybox3}\begin{mybox3}{} Meaning of Regex and Operations\end{mybox3}\end{frame}\begin{frame}[c]\begin{mybox3}{\bl{$L$}} Can the function L be applied to anything other than regular expressions? For example would L(L(c)) return anything?\end{mybox3}\hfill $\Rightarrow$ No\end{frame} \begin{frame}[c]\begin{mybox3}{\bl{$(a?)\{n\} \cdot a\{n\}$}} In the evil regexes section, is there any reason why in the regex \texttt{[a?]\{n\}[a]\{n\}} the square brackets are used? It is defined as a single character from the square brackets, however there is just one character, so it seems like it is not necessary. Maybe it is just necessary for the first part, because ? is a token instead of a character and we need to refer to a? as a ``unit''? Could regular brackets be used instead? Is there any difference apart from the fact that it would create a group? Also, are the regexes \texttt{[a?]\{n\}} and \texttt{a\{0,3\}} equivalent?\end{mybox3}\end{frame} \begin{frame}[c]\begin{mybox3}{Python + Parser Combinators (CW3)}\small Hi Christian, I don’t see a problem: you certainly have higher order functions and it is easy to implement algebraic data types using classes. As far as I can see that’s all you need. You don’t get the static types but that should be obvious. Basically if you can do it in LISP you can do it in Python. The only problem could be stack overflows due to a lack of tail recursion optimisation. On the other hand you can simulate laziness using generators. Cheers, Thorsten \end{mybox3}Trees \url{https://youtu.be/7tCNu4CnjVc}Laziness \url{https://youtu.be/5jwV3zxXc8E}\end{frame}\begin{frame}[c]\begin{mybox3}{} What suggestions do you have for us to get the most out of this module, especially in the online format? I.e. form discussion groups, will you have office hours?\end{mybox3}\small\hfill $\Rightarrow$\mbox{} Discussion Forum on KEATS\hfill online tutorial sessions\hfill ???\hfill PL-groups for ``exotic'' langs\end{frame}\begin{frame}[c] \small\begin{mybox3}{}Where do most students struggle with this module? What will the formatof the exam be? What is the most efficient way of studying for theexam? There are plenty of resources available on KEATS, but is thereanything else you'd recommend us to study? Although (just by skimmingthe headings) the module seems to be a combination of practical andtheoretical matters, exactly in what field would the syllabus beapplied? Besides these questions and the ones other students asked, isthere anything else we should know? Thank you!\end{mybox3}\end{frame}\begin{frame}[c]\end{frame}\begin{frame}[c]\end{frame}\begin{frame}[c]\end{frame}\begin{frame}[c]\end{frame}\begin{frame}[c]\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{document}%%% Local Variables: %%% mode: latex%%% TeX-master: t%%% End: