slides/slides01.tex
author Christian Urban <urbanc@in.tum.de>
Thu, 14 Nov 2019 01:21:02 +0000
changeset 686 05cfce0fdef7
parent 637 27f71d2755f0
child 721 e3c64f22dd31
permissions -rw-r--r--
updated

% !TEX program = xelatex
\documentclass[dvipsnames,14pt,t,xelatex]{beamer}
\usepackage{../slides}
\usepackage{../graphics}
\usepackage{../langs}
\usepackage{../data}

\hfuzz=220pt 

\lstset{language=Scala,
        style=mystyle,
        numbersep=0pt,
        numbers=none,
        xleftmargin=0mm}

\newcommand{\bl}[1]{\textcolor{blue}{#1}}     
 
% beamer stuff 
\renewcommand{\slidecaption}{CFL 01, King's College London}


\begin{document}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{%  
  \begin{tabular}{@ {}c@ {}}
  \\[-3mm]
  \LARGE Compilers and \\[-1mm] 
  \LARGE Formal Languages (1)\\[-3mm] 
  \end{tabular}}

  \begin{center}
  %\includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm}
  %\includegraphics[scale=0.31]{pics/ante2.jpg}\\
  %\footnotesize\textcolor{gray}{Antikythera automaton, 100 BC (Archimedes?)}
  \end{center}

  \normalsize
  \begin{center}
  \begin{tabular}{ll}
  Email:  & christian.urban at kcl.ac.uk\\
  Office Hours: & Thursdays 12 -- 14\\
  Location: & N7.07 (North Wing, Bush House)\\
  Slides \& Progs: & KEATS\\
  \end{tabular}
  \end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Why Study Compilers?}

John Regehr {\small(Univ.~Utah, LLVM compiler hacker)}\smallskip\\

\begin{bubble}[10.5cm]
  \bf ``\ldots{}It’s effectively a perpetual
  employment act for solid compiler hackers.''
\end{bubble}

\onslide<1->{
\only<2>{
\begin{itemize}
\item {\bf Hardware is getting weirder
  rather than getting clocked faster.}

\begin{itemize}
\item[]  ``Almost all processors are multicores nowadays and it looks
  like there is increasing asymmetry in resources across cores.
  Processors come with vector units, crypto accelerators etc. We have
  DSPs, GPUs, ARM big.little, and Xeon Phi. This is only scratching the
  surface.''
\end{itemize}  
\end{itemize}}
\only<3>{
\begin{itemize}
\item {\bf We’re getting tired of low-level languages and
    their associated security disasters.}
  
\begin{itemize}
\item [] ``We want to write new code, to whatever extent possible, in
  safer, higher-level languages. Compilers are caught right in the
  middle of these opposing trends: one of their main jobs is to help
  bridge the large and growing gap between increasingly high-level
  languages and increasingly wacky platforms.''
\end{itemize}  
\end{itemize}}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
  \frametitle{What are Compilers?}
 
\begin{center}
\begin{tikzpicture}[]
  \node (0) at (-2.3,0) {\includegraphics[scale=0.3]{pics/csource.png}};
  \node (1) [right=35mm] at (0) {\includegraphics[scale=0.3]{pics/cassmbl.png}}; 
  \draw [->,line width=4mm, red] (0) -- (1);   
  \node (2) [below=25mm] at (0) {\LARGE\bf``source''};
  \node (3) [right=35mm] at (2) {\LARGE\bf``binary''};
  \draw [->,line width=1mm] (2) -- (3);   
\end{tikzpicture}
\end{center}

\begin{textblock}{10}(1,13.5)
Compiler explorers, e.g.: \url{https://gcc.godbolt.org}
\end{textblock}

\end{frame} 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{\begin{tabular}{c}Why Bother?\\[-2mm] Compilers \& Boeings 777\end{tabular}}
  
First flight in 1994. They want to achieve triple redundancy in hardware
faults.\bigskip
  
They compile 1 Ada program to\medskip
  
\begin{itemize}
  \item Intel 80486
  \item Motorola 68040 (old Macintosh's)
  \item AMD 29050 (RISC chips used often in laser printers)
\end{itemize}\medskip\medskip
  
using 3 independent compilers.\bigskip\pause
  
\small Airbus uses C and static analysers. Recently started using CompCert.
  
\end{frame}
%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Why Bother?}

\begin{columns}[t]
\begin{column}{.5\textwidth}
Ruby, Python, Java 8\medskip\\
\begin{tikzpicture}\footnotesize
\begin{axis}[
    xlabel={$n$},
    x label style={at={(1.05,0.0)}},
    ylabel={time in secs},
    enlargelimits=false,
    xtick={0,5,...,30},
    xmax=33,
    ymax=35,
    ytick={0,5,...,30},
    scaled ticks=false,
    axis lines=left,
    width=5.5cm,
    height=4cm, 
    legend entries={Python,Ruby},  
    legend pos=north west,
    legend cell align=left]
\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
\addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data};
\end{axis}
\end{tikzpicture}
\begin{tikzpicture}\footnotesize
\begin{axis}[
    xlabel={$n$},
    x label style={at={(1.05,0.0)}},
    ylabel={time in secs},
    enlargelimits=false,
    xtick={0,5,...,30},
    xmax=33,
    ymax=35,
    ytick={0,5,...,30},
    scaled ticks=false,
    axis lines=left,
    width=5.5cm,
    height=4cm, 
    legend entries={Python, Java 8, JavaScript},  
    legend pos=north west,
    legend cell align=left]
\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};   
\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
\addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
\end{axis}
\end{tikzpicture}

\end{column}
\begin{column}{.5\textwidth}
Us (after next lecture)\medskip\\
\begin{tikzpicture}\footnotesize
\begin{axis}[
    xlabel={$n$},
    x label style={at={(1.07,0.0)}},
    ylabel={time in secs},
    enlargelimits=false,
    xtick={0,5000,...,10000},
    xmax=11000,
    ymax=35,
    ytick={0,5,...,30},
    scaled ticks=false,
    axis lines=left,
    width=5.5cm,
    height=4cm]
\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
\addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
\end{axis}
\end{tikzpicture}
\begin{tikzpicture}\footnotesize
\begin{axis}[
    xlabel={$n$},
    x label style={at={(1.07,0.0)}},
    ylabel={time in secs},
    enlargelimits=false,
    ymax=35,
    ytick={0,5,...,30},
    scaled ticks=false,
    axis lines=left,
    width=5.5cm,
    height=4cm]
\addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
\end{axis}
\end{tikzpicture}
\end{column}
\end{columns}\bigskip

\small\centering
matching \bl{\texttt{[a?]\{n\}[a]\{n\}}} and \bl{\texttt{(a*)*b}}
against \bl{$\underbrace{\texttt{a}...\texttt{a}}_n$}
\end{frame} 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
    
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c,fragile]
  \frametitle{Incidents}
    
  \begin{itemize}
  \item a global outage on 2 July 2019 at \textbf{Cloudflare} 
  (first one for six years)\medskip
  
  \begin{center}\small\color{blue}
  \begin{verbatim}  
  (?:(?:\"|'|\]|\}|\\|\d|(?:nan|infinity|true|false|
  null|undefined|symbol|math)|\`|\-|\+)+[)]*;?((?:\s
  |-|~|!|{}|\|\||\+)*.*(?:.*=.*)))  
  \end{verbatim}
  \end{center}\bigskip\bigskip\bigskip\bigskip\bigskip\bigskip\bigskip    
  
  \item on 20 July 2016 the \textbf{Stack Exchange} webpage went down
  because of an evil regular expression
  \end{itemize}
  
  \begin{textblock}{6}(9,7.6)
    \includegraphics[scale=0.14]{cloudflare.png}\\
    \footnotesize
    It serves more web traffic than Twitter, Amazon, Apple, Instagram, Bing \& Wikipedia combined.
    \end{textblock}
  
  \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     
    
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Evil Regular Expressions}

\begin{itemize}
\item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\medskip
\item Evil regular expressions\medskip
\begin{itemize}
\item \bl{$(a^{?\{n\}}) \cdot a^{\{n\}}$}
\item \bl{$(a^*)^*\cdot b$}
\item \bl{$([a-z]^+)^*$}
\item \bl{$(a + a \cdot a)^*$}
\item \bl{$(a + a^?)^*$}
\end{itemize}

\item sometimes also called \alert{catastrophic backtracking}
\item this is a problem for \alert{N}etwork \alert{I}ntrusion
  \alert{D}etection systems, Cloudflare, StackExchange, Atom editor
\item \url{https://vimeo.com/112065252}  
\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Goal of this Module}

\begin{center}
  \begin{tikzpicture}[scale=1,
                      node/.style={
                      rectangle,rounded corners=3mm,
                      very thick,draw=black!50,minimum height=18mm, minimum width=20mm,
                      top color=white,bottom color=black!20}]

  \node at (3.05, 1.8) {\Large\bf write a compiler};

  \node (0) at (-2.3,0) {};  
  \node [above=5mm of 0]
  {\makebox[0mm]{\footnotesize
      \begin{tabular}{@{}l@{}}input\\[-1mm]program\end{tabular}}}; 
     
  \node (A) at (0,0)  [node] {};
  \node [below right] at (A.north west) {lexer};

  \node (B) at (3,0)  [node] {};
  \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser};

  \node (C) at (6,0)  [node] {};
  \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen};

  \node (1) at (8.4,0) {};
  \node [above=5mm of 1]
  {\makebox[0mm]{\footnotesize
      \begin{tabular}{@{}r@{}}binary\\[-1mm]code\end{tabular}}};

  \draw [->,line width=4mm] (0) -- (A); 
  \draw [->,line width=4mm] (A) -- (B); 
  \draw [->,line width=4mm] (B) -- (C); 
  \draw [->,line width=4mm] (C) -- (1); 
  \end{tikzpicture}
  \end{center}

\only<2,3,4>{
\begin{textblock}{1}(1,2.1)
\begin{bubble}[9.8cm]
\normalsize
lexer input: a string\smallskip\\
\hspace{5mm}\code{"read(n);"}\medskip\\
lexer output: a sequence of tokens\smallskip\\
\hspace{5mm}\code{key(read) lpar id(n) rpar semi}
\end{bubble}
\end{textblock}} 

\only<3,4>{
\begin{textblock}{1}(6,7.8)
\begin{tabular}{c}
\includegraphics[scale=0.2]{../pics/rosetta.jpg}\\[-2mm]
\footnotesize lexing $\Rightarrow$ recognising words (Stone of Rosetta)
\end{tabular}
\end{textblock}}

\only<4>{
\begin{textblock}{1}(0.5,12)\small
\begin{tabular}{l@{}c@{}l}
  \pcode{if}    & $\;\Rightarrow\;$ & keyword\\
  \pcode{iffoo} & $\;\Rightarrow\;$ & identifier\\
\end{tabular}  
\end{textblock}}

\only<5>{
\begin{textblock}{1}(1,1.5)
\begin{bubble}[8.5cm]
\normalsize
parser input: a sequence of tokens\smallskip\\

{\small\hspace{5mm}\code{key(read) lpar id(n) rpar semi}}\smallskip\\

parser output: an abstract syntax tree\smallskip\\
\footnotesize
\hspace{2cm}\begin{tikzpicture}
  \node {\code{read}}
    child {node {\code{lpar}}}
    child {node {\code{n}}}
    child {node {\code{rpar}}};
\end{tikzpicture}
\end{bubble}
\end{textblock}}

\only<6,7>{
\begin{textblock}{1}(1,1.5)
\begin{bubble}[4cm]
\normalsize
code generator:\smallskip\\
\hspace{5mm}\code{istore 2}\\ 
\hspace{5mm}\code{iload 2}\\ 
\hspace{5mm}\code{ldc 10}\\
\hspace{5mm}\code{isub}\\
\hspace{5mm}\code{ifeq Label2}\\ 
\hspace{5mm}\code{iload 2}\\
\hspace{5mm}\code{...}\\
\end{bubble}
\end{textblock}}

\only<7>{
\begin{textblock}{6}(8.4,7)
\begin{bubble}[5cm]
\mbox{\begin{tikzpicture}[scale=0.58,rounded corners=0mm]
\begin{axis}[axis x line=bottom, axis y line=left, ylabel=secs,
    xlabel=n,
    enlargelimits=0.05,
    ybar interval=0.7, legend style=small]
\addplot file {interpreted2.data};
\addplot file {compiled2.data};
%\legend{interpreted, compiled}
\end{axis}
\end{tikzpicture}}
\end{bubble}
\end{textblock}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Acad.~Subject is Mature}

\bigskip
\begin{itemize}
\item Turing Machines, 1936 (a tape as memory)
\item Regular Expressions, 1956\\
\item The first compiler for COBOL, 1957\\ (Grace Hopper)\medskip
\item But surprisingly research papers are still published nowadays\\
\item ``Parsing: The Solved Problem That Isn't''
\end{itemize}

\begin{flushright}
\includegraphics[scale=0.3]{pics/hopper.jpg}\\
\footnotesize\textcolor{gray}{Grace Hopper}
\end{flushright}


\begin{flushright}
\mbox{}\\[-6mm]
{\footnotesize\textcolor{gray}{(she made it to David Letterman's Tonight Show,\\[-2mm]
 \url{http://www.youtube.com/watch?v=aZOxtURhfEU})}}
\end{flushright}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Remember BF*** from PEP?}

\begin{center}
\begin{tabular}{lcl}
\bl{\texttt{>}} & $\Rightarrow$ & move one cell right\\
\bl{\texttt{<}} & $\Rightarrow$ & move one cell left\\
\bl{\texttt{+}} & $\Rightarrow$ & increase cell by one\\
\bl{\texttt{-}} & $\Rightarrow$ & decrease cell by one\\
\bl{\texttt{.}} & $\Rightarrow$ & print current cell\\
\bl{\texttt{,}} & $\Rightarrow$ & input current cell\\
\bl{\texttt{[}} & $\Rightarrow$ & loop begin\\
\bl{\texttt{]}} & $\Rightarrow$ & loop end\medskip\\
                & $\Rightarrow$ & everything else is a comment\\
\end{tabular}  
\end{center}  

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
  \frametitle{A ``Compiler'' for BF***}
  
  \begin{center}
  \begin{tabular}{lcl}
  \bl{\texttt{>}} & $\Rightarrow$ & \texttt{ptr++}\\
  \bl{\texttt{<}} & $\Rightarrow$ & \texttt{ptr--}\\
  \bl{\texttt{+}} & $\Rightarrow$ & \texttt{(*ptr)++}\\
  \bl{\texttt{-}} & $\Rightarrow$ & \texttt{(*ptr)--}\\
  \bl{\texttt{.}} & $\Rightarrow$ & \texttt{putchar(*ptr)}\\
  \bl{\texttt{,}} & $\Rightarrow$ & \texttt{*ptr = getchar()}\\
  \bl{\texttt{[}} & $\Rightarrow$ & \texttt{while(*ptr)\{}\\
  \bl{\texttt{]}} & $\Rightarrow$ & \texttt{\}}\medskip\\
                  & $\Rightarrow$ & ignore everything else\\
  \end{tabular}  
  \end{center}\bigskip  
  
  \texttt{char field[30000]\\ char *ptr = &field[15000]}
  
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
    

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Lectures 1 - 5}

transforming strings into structured data\\[10mm]

{\LARGE\bf Lexing} {\hfill{}based on regular expressions}\medskip\\
\hspace{5mm}(recognising ``words'')\\[6mm]

{\LARGE\bf Parsing}\medskip\\
\hspace{5mm}(recognising ``sentences'')

\begin{textblock}{1}(10,9.1)
\begin{tabular}{c}
\includegraphics[scale=0.1]{../pics/rosetta.jpg}\\[-2mm]
\footnotesize Stone of Rosetta
\end{tabular}
\end{textblock}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
  \frametitle{Lectures 5 - 10}
  
  code generation for a small imperative and a small functional languages\\[10mm]
  
  {\LARGE\bf Interpreters}\medskip\\
  \hspace{5mm}(directly runs a program)\\[6mm]
  
  {\LARGE\bf Compilers}\medskip\\
  \hspace{5mm}(generates JVM code)
  
  \begin{textblock}{1}(10,8.1)
  \begin{tabular}{c}
  \includegraphics[scale=0.4]{../pics/javaduke.png}
  \end{tabular}
  \end{textblock}
  
  \end{frame}
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
  


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Familiar Regular Expr.}
\small
\begin{center}
\texttt{[a-z0-9\_$\backslash{}$.-]+ @ [a-z0-9$\backslash{}$.-]+ . [a-z$\backslash{}$.]\{2,6\}}
\end{center}\smallskip

\begin{center}
\begin{tabular}{@{}lp{8.5cm}@{}}
\pcode{re*} & matches 0 or more times\\
\pcode{re+} & matches 1 or more times\\
\pcode{re?} & matches 0 or 1 times\\
\pcode{re\{n\}}	& matches exactly \pcode{n} number of times\\
\pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\
\pcode{[...]} & matches any single character inside the brackets\\
\pcode{[^...]} & matches any single character not inside the 
brackets\\
\pcode{a-z A-Z} & character ranges\\
\pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\
\pcode{.} & matches every character except newline\\
\pcode{(re)}	& groups regular expressions and remembers 
the matched text
\end{tabular}
\end{center}


\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%\frametitle{Today}
%
%\begin{itemize}
%\item While the ultimate goal is to implement a small compiler for the JVM
%  \ldots\bigskip
%\end{itemize}
%
%Let's start with:
%
%\begin{itemize}
%\item a web-crawler
%\item an email harvester
%\item \textcolor{gray}{(a web-scraper)}
%\end{itemize}
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[t]
%\frametitle{A Web-Crawler}
%
%\mbox{}\\[10mm]
%
%\begin{enumerate}
%\item given an URL, read the corresponding webpage
%\item extract all links from it
%\item call the web-crawler again for all these links
%\end{enumerate}
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[t]
%\frametitle{A Web-Crawler}
%
%\mbox{}\\[10mm]
%
%
%\begin{enumerate}
%\item given an URL, read the corresponding webpage
%\item if not possible print, out a problem
%\item if possible, extract all links from it
%\item call the web-crawler again for all these links
%\end{enumerate}\bigskip\pause
%
%\small (we need a bound for the number of recursive calls)
%
%\small (the purpose is to check all links on my own webpage)
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%
%\begin{textblock}{1}(2,5)
%\begin{tabular}{c}
%\includegraphics[scale=0.15]{pics/servers.png}\\[-2mm]
%\small Server
%\end{tabular}
%\end{textblock}
%
%\begin{textblock}{1}(5.6,4)
%  \begin{tikzpicture}[scale=1.1]
%  \draw[white] (0,1) node (X) {};
%  \draw[white] (2,1) node (Y) {};
%   \draw[white] (0,0) node (X1) {};
%  \draw[white] (2,0) node (Y1) {};
%   \draw[white] (0,-1) node (X2) {};
%  \draw[white] (2,-1) node (Y2) {};
%  \draw[red, <-, line width = 2mm] (X) -- (Y);
%  \node [inner sep=5pt,label=above:\textcolor{black}{GET request}] at ($ (X)!.5!(Y) $) {};
%  \draw[red, ->, line width = 2mm] (X1) -- (Y1);
%  \node [inner sep=5pt,label=above:\textcolor{black}{webpage}] at ($ (X1)!.5!(Y1) $) {};
%  \draw[red, <-, line width = 2mm] (X2) -- (Y2);
%  \node [inner sep=7pt,label=above:\textcolor{black}{POST data}] at ($ (X2)!.5!(Y2) $) {};
%  \end{tikzpicture}
%\end{textblock}
%
%
%\begin{textblock}{1}(9,5.5)
%\begin{tabular}{c}
%\includegraphics[scale=0.15]{pics/laptop.png}\\[-2mm]
%\small Browser
%\end{tabular}
%\end{textblock}
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%\frametitle{Scala}
%
%\small A simple Scala function for reading webpages:
%\bigskip
%
%\footnotesize
%\lstinputlisting{../progs/app0.scala}
%\medskip\pause
%
%\lstinline{get_page("""https://nms.kcl.ac.uk/christian.urban/""")}
%\bigskip\medskip\pause
%
%
%\small A slightly more complicated version for handling errors:
%\smallskip
%
%\footnotesize
%\lstinputlisting[xleftmargin=-4mm]{../progs/app1.scala}
%
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{A Regular Expression}

\begin{itemize}
\item \ldots{} is a pattern or template for specifying strings
\end{itemize}\bigskip
  
\begin{center}  
\only<1>{\scode{"https?://[^"]*"}}%
\only<2>{\scode{""""https?://[^"]*"""".r}}
\end{center}\bigskip\bigskip

matches for example\smallskip\\  
\hspace{2mm}\code{"http://www.foobar.com"}\\
\hspace{2mm}\code{"https://www.tls.org"}\smallskip\\

but not\smallskip\\  
\hspace{2mm}\code{"http://www."foo"bar.com"}\\

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%\frametitle{Finding Operations in Scala}
%
%{\bf\code{rexp.findAllIn(string)}}\medskip
%  
%returns a list of all (sub)strings that match the 
%regular expression
%\bigskip\bigskip  
%  
%
%{\bf\code{rexp.findFirstIn(string)}}\medskip
% 
%returns either 
%
%\begin{itemize}
%\item \code{None} if no (sub)string matches or 
%\item \code{Some(s)} with the first (sub)string
%\end{itemize}
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%
%\footnotesize
%\lstinputlisting{../progs/app2.scala}
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%
%\small
%A version that only crawls links in ``my'' domain:\bigskip
%
%\footnotesize
%\lstinputlisting{../progs/app3.scala}
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%\lstset{xleftmargin=-4mm}
%\small
%A little email harvester:
%
%\footnotesize
%\lstinputlisting{../progs/app4.scala}\bigskip
%
%\tiny
%\url{http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/}
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Regular Expressions}

Their inductive definition:


\begin{textblock}{6}(2,7.5)
  \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
  \bl{$r$} & \bl{$::=$}  & \bl{$\ZERO$}  & nothing\\
         & \bl{$\mid$} & \bl{$\ONE$}       & empty string / \pcode{""} / $[]$\\
         & \bl{$\mid$} & \bl{$c$}                         & character\\
         & \bl{$\mid$} & \bl{$r_1 + r_2$}  & alternative / choice\\
         & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
         & \bl{$\mid$} & \bl{$r^*$}            & star (zero or more)\\
  \end{tabular}
  \end{textblock}
  
  
\only<2->{\footnotesize
\begin{textblock}{9}(2,0.5)
\begin{bubble}[9.8cm]
\lstinputlisting{../progs/app01.scala}
\end{bubble}
\end{textblock}}
  
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[t]
%\frametitle{Regular Expressions}
%
%\small
%In Scala:\bigskip
%
%\footnotesize
%\lstinputlisting{../progs/app51.scala}
%
%  
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Strings}

\ldots are lists of characters. For example \code{"hello"}

\begin{center}
\bl{$[h, e, l, l, o]$} or just \bl{$hello$}
\end{center}

the empty string: \bl{$[]$} or \bl{\pcode{""}}\bigskip\\

the concatenation of two strings:

\begin{center}
\bl{$s_1 \,@\, s_2$}
\end{center}

\bl{\textit{foo $@$ bar = foobar}}\\
\bl{\textit{baz $@\, []$ = baz}}
  
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Languages, Strings}

\begin{itemize}
\item \alert{\bf Strings} are lists of characters, for example
\begin{center}
\bl{$[]$},\;\bl{$abc$}  \hspace{2cm}(Pattern match: \bl{$c\!::\!s$})
\end{center}\bigskip


\item A \alert{\bf language} is a set of strings, for example\medskip
\begin{center}
\bl{$\{[], hello, \textit{foobar}, a, abc\}$}
\end{center}\bigskip

\item \alert{\bf Concatenation} of strings and languages

\begin{center}
\begin{tabular}{rcl}
\bl{$\textit{foo}\;@\;bar$} & \bl{$=$} & \bl{$\textit{foobar}$}\medskip\\
\bl{$A\;@\;B$} & \bl{$\dn$} & \bl{$\{ s_1\,@\,s_2 \;\mid\; s_1 \in A \wedge s_2 \in B\}$}
\end{tabular}
\end{center}

%\item The \alert{\bf meaning} of a regular expression is a set of 
%  strings, or language.
\end{itemize}  

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{\begin{tabular}{c}The Meaning of a\\[-2mm] 
  Regular Expression\end{tabular}}

\begin{textblock}{15}(1,4)
 \begin{tabular}{rcl}
 \bl{$L(\ZERO)$}  & \bl{$\dn$} & \bl{$\{\}$}\\
 \bl{$L(\ONE)$}     & \bl{$\dn$} & \bl{$\{[]\}$}\\
 \bl{$L(c)$}            & \bl{$\dn$} & \bl{$\{[c]\}$}\\
 \bl{$L(r_1 + r_2)$}    & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
 \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\
 \bl{$L(r^*)$}           & \bl{$\dn$} & \onslide<4->{\bl{$\bigcup_{0 \le n} L(r)^n$}}\\
  \end{tabular}\bigskip
  
\onslide<2->{
\hspace{5mm}\bl{$L(r)^0 \;\dn\; \{[]\}$}\\
\bl{$L(r)^{n+1} \;\dn\; L(r) \,@\, L(r)^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\
\small\hspace{5cm}\textcolor{gray}{$\{ s_1 @ s_2 \;|\; s_1\in L(r) \wedge s_2 \in L(r)^n \}$}}
}  
    \end{textblock}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Meaning of Matching}

\begin{bubble}[10cm]
\large\bf 
A regular expression \bl{$r$} matches a string~\bl{$s$} 
provided

\begin{center}
\bl{$s \in L(r)$}\\ 
\end{center}
\end{bubble}\bigskip\bigskip

\ldots and the point of the next lecture is 
to decide this problem as fast as possible (unlike Python,
Ruby, Java)

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
  \frametitle{The Power Operation}
  
  \begin{itemize}
  \item The \alert{\textbf{\boldmath$n$th Power}} of a language:
  
  \begin{center}
  \begin{tabular}{lcl}
  \bl{$A^0$}    & \bl{$\dn$} & \bl{$\{[]\}$}\\
  \bl{$A^{n+1}$} & \bl{$\dn$} & \bl{$A \,@\, A^n$}
  \end{tabular}
  \end{center}\bigskip
  
  \item[] For example
  
  \begin{center}
  \begin{tabular}{lcl@{\hspace{10mm}}l}
  \bl{$A^4$} & \bl{$=$} & \bl{$A \,@\, A \,@\, A \,@\, A$} & \bl{$(@\,\{[]\})$}\\
  \bl{$A^1$} & \bl{$=$} & \bl{$A$} & \bl{$(@\,\{[]\})$}\\
  \bl{$A^0$} & \bl{$=$} & \bl{$\{[]\}$}\\
  \end{tabular}
  \end{center}
  
  \end{itemize}  
  
  \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
  \frametitle{Questions}
  
  \begin{itemize}
  \item Say \bl{$A = \{[a],[b],[c],[d]\}$}.\bigskip
  
  \item[]
  How many strings are in \bl{$A^4$}\,?
  \bigskip\medskip\pause
  
  
 \item[]
  What if \bl{$A = \{[a],[b],[c],[]\}$};\\ 
  how many strings are then in \bl{$A^4$}\,?
  \end{itemize}  
  
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
  \frametitle{The Star Operation}
  
  \begin{itemize}
  \item The \alert{\bf Kleene Star} of a \underline{language}:
  \bigskip
  
  \begin{center}
  \begin{tabular}{c}
  \bl{$A\star \dn \bigcup_{0\le n} A^n$}
  \end{tabular}
  \end{center}\bigskip
  
  \item[] This expands to 
  
  \[
  \bl{A^0 \cup A^1 \cup A^2 \cup A^3 \cup A^4 \cup \ldots}
  \]
  
  or
  
  \small
  \[
  \bl{\{[]\} \;\cup\; A \;\cup\; A\,@\,A \;\cup\; 
    A\,@\,A\,@\,A \;\cup\; A\,@\,A\,@\,A\,@\,A \cup \ldots}
  \]
  
  \end{itemize}  
  
  \end{frame}
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
  


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Written Exam}

\begin{itemize}
\item Accounts for 80\%.\bigskip

\item The question ``\textit{Is this relevant for
      the exam?}'' is very demotivating for the lecturer!\bigskip\\

\item Deal: Whatever is in the homework (and is not marked
      ``\textit{optional}'') is relevant for the exam.\bigskip
      
\item Each lecture has also a handout. There are also handouts about
notation and Scala.      
\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Coursework}

\begin{itemize}
\item Accounts for 20\%. Two strands. Choose \alert{\bf one}!\bigskip
\end{itemize}

\begin{columns}[t]
\begin{column}{.5\textwidth}
\underline{\bf Strand 1}\medskip
\begin{itemize}
\item 4 programming tasks:
\begin{itemize}
\item matcher (4\%, 11.10.) 
\item lexer (5\%, 04.11.)
\item parser (5\%, 22.11.)
\item compiler (6\%, 13.12.)
\end{itemize}
\item in any lang.~you like,\\ but I want to see the\\ code
\end{itemize}
\end{column}

\hspace{-45pt}\vrule{}\hspace{10pt}
\begin{column}{.5\textwidth}
\underline{\bf Strand 2}\smallskip\begin{itemize}
\item one task: prove the correctness of a regular expression matcher in 
the \underline{Isabelle} theorem prover
\item 20\%, submission on~13.12.\hspace{-5mm}\mbox{}
\end{itemize}
\end{column}
\end{columns}\medskip

\small
\begin{itemize}
\item Solving more than one strand will {\bf not} give you more 
marks.

\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Lecture Capture}

\begin{itemize}
\item Hope it works\ldots\pause actually no, it does not!\medskip\pause
\item It is important to use lecture capture wisely\\ (it is only the ``baseline''):
\begin{itemize}  
\item Lecture recordings are a study and revision aid.
\item Statistically, there is a clear and direct link between attendance and
  attainment: students who do not attend lectures, do less well in exams.
\end{itemize}

\item Attending a lecture is more than watching it online -- if you do not
attend, you miss out!  
  
\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{\begin{tabular}{c}\\[3cm]\alert{Questions?}\end{tabular}}

\mbox{}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
\end{document}

%%% Local Variables:  
%%% mode: latex
%%% TeX-master: t
%%% End: