% !TEX program = xelatex
\documentclass[dvipsnames,14pt,t]{beamer}
\usepackage{../slides}
\usepackage{../graphics}
\usepackage{../langs}
\usepackage{../data}
\hfuzz=220pt
\pgfplotsset{compat=1.11}
% a hand written lexer for SML
% https://ponyo.org/
% https://github.com/eatonphil/ponyo/blob/master/src/Sml/Lexer.sml
\newcommand{\bl}[1]{\textcolor{blue}{#1}}
\renewcommand{\slidecaption}{CFL 04, King's College London}
\begin{document}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{%
\begin{tabular}{@ {}c@ {}}
\\[-3mm]
\LARGE Compilers and \\[-2mm]
\LARGE Formal Languages (4)\\[3mm]
\end{tabular}}
\normalsize
\begin{center}
\begin{tabular}{ll}
Email: & christian.urban at kcl.ac.uk\\
Office Hours: & Thursdays 12 -- 14\\
Location: & N7.07 (North Wing, Bush House)\\
Slides \& Progs: & KEATS (also homework is there)\\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\begin{center}
\begin{tikzpicture}[scale=2,>=stealth',very thick,
every state/.style={minimum size=0pt,draw=blue!50,very thick,fill=blue!20},]
\only<-7>{\node[state, initial] (q0) at ( 0,1) {$\mbox{Q}_0$};}
\only<8->{\node[state, initial,accepting] (q0) at ( 0,1) {$\mbox{Q}_0$};}
\only<-7>{\node[state] (q1) at ( 1,1) {$\mbox{Q}_1$};}
\only<8->{\node[state,accepting] (q1) at ( 1,1) {$\mbox{Q}_1$};}
\node[state] (q2) at ( 2,1) {$\mbox{Q}_2$};
\path[->] (q0) edge[bend left] node[above] {\alert{$a$}} (q1)
(q1) edge[bend left] node[above] {\alert{$b$}} (q0)
(q2) edge[bend left=50] node[below] {\alert{$b$}} (q0)
(q1) edge node[above] {\alert{$a$}} (q2)
(q2) edge [loop right] node {\alert{$a$}} ()
(q0) edge [loop below] node {\alert{$b$}} ()
;
\end{tikzpicture}
\end{center}\bigskip
\begin{center}
\begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
\bl{$\mbox{Q}_0$} & \bl{$=$} & \bl{$\ONE + \mbox{Q}_0\,b + \mbox{Q}_1\,b + \mbox{Q}_2\,b$}\\
\bl{$\mbox{Q}_1$} & \bl{$=$} & \bl{$\mbox{Q}_0\,a$}\\
\bl{$\mbox{Q}_2$} & \bl{$=$} & \bl{$\mbox{Q}_1\,a + \mbox{Q}_2\,a$}\\
\end{tabular}
\end{center}
Arden's Lemma:
\begin{center}
If \bl{$q = q\,r + s$}\; then\; \bl{$q = s\, r^*$}
\end{center}
\only<2-6>{\small
\begin{textblock}{6}(1,0.8)
\begin{bubble}[6.7cm]
\begin{tabular}{r@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
\multicolumn{3}{@{}l}{substitute \bl{$\mbox{Q}_1$} into \bl{$\mbox{Q}_0$} \& \bl{$\mbox{Q}_2$}:}\\
\bl{$\mbox{Q}_0$} & \bl{$=$} & \bl{$\ONE + \mbox{Q}_0\,b + \mbox{Q}_0\,a\,b + \mbox{Q}_2\,b$}\\
\bl{$\mbox{Q}_2$} & \bl{$=$} & \bl{$\mbox{Q}_0\,a\,a + \mbox{Q}_2\,a$}
\end{tabular}
\end{bubble}
\end{textblock}}
\only<3-6>{\small
\begin{textblock}{6}(2,4.15)
\begin{bubble}[6.7cm]
\begin{tabular}{r@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
\multicolumn{3}{@{}l}{simplifying \bl{$\mbox{Q}_0$}:}\\
\bl{$\mbox{Q}_0$} & \bl{$=$} & \bl{$\ONE + \mbox{Q}_0\,(b + a\,b) + \mbox{Q}_2\,b$}\\
\bl{$\mbox{Q}_2$} & \bl{$=$} & \bl{$\mbox{Q}_0\,a\,a + \mbox{Q}_2\,a$}
\end{tabular}
\end{bubble}
\end{textblock}}
\only<4-6>{\small
\begin{textblock}{6}(3,7.55)
\begin{bubble}[6.7cm]
\begin{tabular}{r@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
\multicolumn{3}{@{}l}{Arden for \bl{$\mbox{Q}_2$}:}\\
\bl{$\mbox{Q}_0$} & \bl{$=$} & \bl{$\ONE + \mbox{Q}_0\,(b + a\,b) + \mbox{Q}_2\,b$}\\
\bl{$\mbox{Q}_2$} & \bl{$=$} & \bl{$\mbox{Q}_0\,a\,a\,(a^*)$}
\end{tabular}
\end{bubble}
\end{textblock}}
\only<5-6>{\small
\begin{textblock}{6}(4,10.9)
\begin{bubble}[7.5cm]
\begin{tabular}{r@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
\multicolumn{3}{@{}l}{Substitute \bl{$\mbox{Q}_2$} and simplify:}\\
\bl{$\mbox{Q}_0$} & \bl{$=$} & \bl{$\ONE + \mbox{Q}_0\,(b + a\,b + a\,a\,(a^*)\,b)$}\\
\end{tabular}
\end{bubble}
\end{textblock}}
\only<6>{\small
\begin{textblock}{6}(5,13.4)
\begin{bubble}[7.5cm]
\begin{tabular}{r@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
\multicolumn{3}{@{}l}{Arden again for \bl{$\mbox{Q}_0$}:}\\
\bl{$\mbox{Q}_0$} & \bl{$=$} & \bl{$(b + a\,b + a\,a\,(a^*)\,b)^*$}\\
\end{tabular}
\end{bubble}
\end{textblock}}
\only<7->{\small
\begin{textblock}{6}(6,11.5)
\begin{bubble}[6.7cm]
\begin{tabular}{r@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
\multicolumn{3}{@{}l}{Finally:}\\
\bl{$\mbox{Q}_0$} & \bl{$=$} & \bl{$(b + a\,b + a\,a\,(a^*)\,b)^*$}\\
\bl{$\mbox{Q}_1$} & \bl{$=$} & \bl{$(b + a\,b + a\,a\,(a^*)\,b)^*\,a$}\\
\bl{$\mbox{Q}_2$} & \bl{$=$} & \bl{$(b + a\,b + a\,a\,(a^*)\,b)^*\,a\,a\,(a^*)$}\\
\end{tabular}
\end{bubble}
\end{textblock}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Coursework}
\begin{itemize}
\item \bl{$\der\,c\,(r^+) \dn \der\,c (r\cdot r^*)$}\quad given
that \bl{$r^+ \dn r\cdot r^*$}
\end{itemize}\bigskip\pause
\begin{center}
\begin{tabular}{lcl}
\bl{$\der\,c\,(r\cdot r^*)$} & \bl{$\dn$} &
\only<2-4>{if \bl{$nullable\,r$}}%
\only<5>{\bl{$(\der\,c\,r)\cdot r^*$}}\\
& &
\only<2>{then \bl{$(\der\,c\,r)\cdot r^* \,+\, \der\,c\,(r^*)$}}%
\only<3>{then \bl{$(\der\,c\,r)\cdot r^* \,+\, (\der\,c\,r)\cdot r^*$}}%
\only<4>{then \bl{$(\der\,c\,r)\cdot r^*$}}\\
& & \only<2-4>{else \bl{$(\der\,c\,r)\cdot r^*$}}
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Coursework (2)}
\begin{itemize}
\item \bl{\texttt{CFUN(f: Char => Boolean)}}
\end{itemize}\medskip
\begin{center}
\begin{tabular}{l}
\bl{\texttt{CHAR(c: Char)}} \bl{$\dn$}\\
\quad\bl{\texttt{CFUN(\_ == c)}}\medskip\\
\bl{\texttt{RANGE(cs: Set[Char])}} \bl{$\dn$}\\
\quad\bl{\texttt{CFUN(cs.contains(\_))}}\medskip\\
\bl{\texttt{ALL}} \bl{$\dn$}\\
\quad\bl{\texttt{CFUN((c: Char) => true)}}\\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Regexps and Automata}
\begin{center}
\begin{tikzpicture}
\node (rexp) {\bl{\bf Regexps}};
\node (nfa) [right=of rexp] {\bl{\bf NFAs}};
\node (dfa) [right=of nfa] {\bl{\bf DFAs}};
\node (mdfa) [right=of dfa] {\bl{\bf \begin{tabular}{c}minimal\\ DFAs\end{tabular}}};
\path[->,red, line width=2mm] (rexp) edge node [above=4mm, black]
{\begin{tabular}{c@{\hspace{9mm}}}Thompson's\\[-1mm] construction\end{tabular}} (nfa);
\path[->,red, line width=2mm] (nfa) edge node [above=4mm, black]
{\begin{tabular}{c}subset\\[-1mm] construction\end{tabular}}(dfa);
\path[->, red, line width=2mm] (dfa) edge node [below=5mm, black] {minimisation} (mdfa);
%%\path[->, red, line width=2mm] (dfa) edge [bend left=45] (rexp);
\path[->, red, line width=2mm] (dfa) edge [bend left=45] node [below, black] {\begin{tabular}{l}Brzozowski's\\ method\end{tabular}} (rexp);
\end{tikzpicture}\\
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{\bl{$a^{?\{n\}} \cdot a^{\{n\}}$}}
\begin{tikzpicture}
\begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs},
enlargelimits=false,
xtick={0,5,...,30},
xmax=30,
ymax=35,
ytick={0,5,...,30},
scaled ticks=false,
axis lines=left,
width=10cm,
height=7cm,
legend entries={Python,Ruby,my NFA},
legend pos=north west,
legend cell align=left]
\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
\addplot[brown,mark=pentagon*, mark options={fill=white}] table {re-ruby.data};
\addplot[red,mark=triangle*, mark options={fill=white}] table {nfasearch.data};
\end{axis}
\end{tikzpicture}
The punchline is that many existing libraries do depth-first search
in NFAs (backtracking).
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \begin{frame}[c]
% \frametitle{DFA to Rexp}
% \begin{center}
% \begin{tikzpicture}[scale=2,>=stealth',very thick,
% every state/.style={minimum size=0pt,
% draw=blue!50,very thick,fill=blue!20},]
% \node[state, initial] (q0) at ( 0,1) {$q_0$};
% \node[state] (q1) at ( 1,1) {$q_1$};
% \node[state, accepting] (q2) at ( 2,1) {$q_2$};
% \path[->] (q0) edge[bend left] node[above] {\alert{$a$}} (q1)
% (q1) edge[bend left] node[above] {\alert{$b$}} (q0)
% (q2) edge[bend left=50] node[below] {\alert{$b$}} (q0)
% (q1) edge node[above] {\alert{$a$}} (q2)
% (q2) edge [loop right] node {\alert{$a$}} ()
% (q0) edge [loop below] node {\alert{$b$}} ();
% \end{tikzpicture}
% \end{center}\bigskip
% \begin{center}
% \begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l@{\hspace{7mm}}l}
% \bl{$q_0$} & \bl{$=$} & \bl{$\ONE + q_0\,b + q_1\,b + q_2\,b$} & (start state)\\
% \bl{$q_1$} & \bl{$=$} & \bl{$q_0\,a$}\\
% \bl{$q_2$} & \bl{$=$} & \bl{$q_1\,a + q_2\,a$}\\
% \end{tabular}
% \end{center}
% Arden's Lemma:
% \begin{center}
% If \bl{$q = q\,r + s$}\; then\; \bl{$q = s\, r^*$}
% \end{center}
% \end{frame}
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \begin{frame}[c]
% \frametitle{DFA Minimisation}
% \begin{enumerate}
% \item Take all pairs \bl{$(q, p)$} with \bl{$q \not= p$}
% \item Mark all pairs that accepting and non-accepting states
% \item For all unmarked pairs \bl{$(q, p)$} and all characters \bl{$c$} test whether
% \begin{center}
% \bl{$(\delta(q, c), \delta(p,c))$}
% \end{center}
% are marked. If yes, then also mark \bl{$(q, p)$}.
% \item Repeat last step until no change.
% \item All unmarked pairs can be merged.
% \end{enumerate}
% \end{frame}
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \begin{frame}[c]
% \begin{center}
% \begin{tabular}{@{\hspace{-8mm}}cc@{}}
% \begin{tikzpicture}[>=stealth',very thick,auto,
% every state/.style={minimum size=0pt,inner sep=2pt,draw=blue!50,very thick,fill=blue!20},]
% \node[state,initial] (q_0) {$q_0$};
% \node[state] (q_1) [right=of q_0] {$q_1$};
% \node[state] (q_2) [below right=of q_0] {$q_2$};
% \node[state] (q_3) [right=of q_2] {$q_3$};
% \node[state, accepting] (q_4) [right=of q_1] {$q_4$};
% \path[->] (q_0) edge node [above] {\alert{$a$}} (q_1);
% \path[->] (q_1) edge node [above] {\alert{$a$}} (q_4);
% \path[->] (q_4) edge [loop right] node {\alert{$a, b$}} ();
% \path[->] (q_3) edge node [right] {\alert{$a$}} (q_4);
% \path[->] (q_2) edge node [above] {\alert{$a$}} (q_3);
% \path[->] (q_1) edge node [right] {\alert{$b$}} (q_2);
% \path[->] (q_0) edge node [above] {\alert{$b$}} (q_2);
% \path[->] (q_2) edge [loop left] node {\alert{$b$}} ();
% \path[->] (q_3) edge [bend left=95, looseness=1.3] node [below] {\alert{$b$}} (q_0);
% \end{tikzpicture}
% &
% \raisebox{9mm}{\begin{tikzpicture}[scale=0.6,line width=0.8mm]
% \draw (0,0) -- (4,0);
% \draw (0,1) -- (4,1);
% \draw (0,2) -- (3,2);
% \draw (0,3) -- (2,3);
% \draw (0,4) -- (1,4);
% \draw (0,0) -- (0, 4);
% \draw (1,0) -- (1, 4);
% \draw (2,0) -- (2, 3);
% \draw (3,0) -- (3, 2);
% \draw (4,0) -- (4, 1);
% \draw (0.5,-0.5) node {$q_0$};
% \draw (1.5,-0.5) node {$q_1$};
% \draw (2.5,-0.5) node {$q_2$};
% \draw (3.5,-0.5) node {$q_3$};
% \draw (-0.5, 3.5) node {$q_1$};
% \draw (-0.5, 2.5) node {$q_2$};
% \draw (-0.5, 1.5) node {$q_3$};
% \draw (-0.5, 0.5) node {$q_4$};
% \draw (0.5,0.5) node {\large$\star$};
% \draw (1.5,0.5) node {\large$\star$};
% \draw (2.5,0.5) node {\large$\star$};
% \draw (3.5,0.5) node {\large$\star$};
% \draw (0.5,1.5) node {\large$\star$};
% \draw (2.5,1.5) node {\large$\star$};
% \draw (0.5,3.5) node {\large$\star$};
% \draw (1.5,2.5) node {\large$\star$};
% \end{tikzpicture}}
% \end{tabular}
% \end{center}
% \mbox{}\\[-20mm]\mbox{}
% \begin{center}
% \begin{tikzpicture}[>=stealth',very thick,auto,
% every state/.style={minimum size=0pt,inner sep=2pt,draw=blue!50,very thick,fill=blue!20},]
% \node[state,initial] (q_02) {$q_{0, 2}$};
% \node[state] (q_13) [right=of q_02] {$q_{1, 3}$};
% \node[state, accepting] (q_4) [right=of q_13] {$q_{4\phantom{,0}}$};
% \path[->] (q_02) edge [bend left] node [above] {\alert{$a$}} (q_13);
% \path[->] (q_13) edge [bend left] node [below] {\alert{$b$}} (q_02);
% \path[->] (q_02) edge [loop below] node {\alert{$b$}} ();
% \path[->] (q_13) edge node [above] {\alert{$a$}} (q_4);
% \path[->] (q_4) edge [loop above] node {\alert{$a, b$}} ();
% \end{tikzpicture}\\
% minimal automaton
% \end{center}
% \end{frame}
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Regular Languages}
Two equivalent definitions:\bigskip
\begin{quote}\rm A language is \alert{regular} iff there exists a
regular expression that recognises all its strings.
\end{quote}
\begin{quote}\rm A language is \alert{regular} iff there exists an
automaton that recognises all its strings.
\end{quote}\bigskip\bigskip
\small
for example \bl{$a^nb^n$} is not regular
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Negation}
Regular languages are closed under negation:\bigskip
\begin{center}
\begin{tikzpicture}[scale=2,>=stealth',very thick,
every state/.style={minimum size=0pt,
draw=blue!50,very thick,fill=blue!20}]
\only<1>{\node[state,initial] (q0) at ( 0,1) {$q_0$};}
\only<2>{\node[state,initial,accepting] (q0) at ( 0,1) {$q_0$};}
\only<1>{\node[state] (q1) at ( 1,1) {$q_1$};}
\only<2>{\node[state,accepting] (q1) at ( 1,1) {$q_1$};}
\only<1>{\node[state, accepting] (q2) at ( 2,1) {$q_2$};}
\only<2>{\node[state] (q2) at ( 2,1) {$q_2$};}
\path[->] (q0) edge[bend left] node[above] {\alert{$a$}} (q1)
(q1) edge[bend left] node[above] {\alert{$b$}} (q0)
(q2) edge[bend left=50] node[below] {\alert{$b$}} (q0)
(q1) edge node[above] {\alert{$a$}} (q2)
(q2) edge [loop right] node {\alert{$a$}} ()
(q0) edge [loop below] node {\alert{$b$}} ();
\end{tikzpicture}
\end{center}\bigskip\bigskip
But requires that the automaton is \alert{completed}!
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{The Goal of this Course}
\mbox{}\\[-26mm]\mbox{}
\begin{center}
\begin{tikzpicture}[scale=1,
node/.style={
rectangle,rounded corners=3mm,
very thick,draw=black!50,
minimum height=18mm, minimum width=20mm,
top color=white,bottom color=black!20}]
\node at (3.05, 1.8) {\Large\bf Write a compiler};
\node (0) at (-2.3,0) {};
\node (A) at (0,0) [node] {};
\node [below right] at (A.north west) {lexer};
\node (B) at (3,0) [node] {};
\node [below right=1mm] at (B.north west)
{\mbox{}\hspace{-1mm}parser};
\node (C) at (6,0) [node] {};
\node [below right] at (C.north west)
{\mbox{}\hspace{-1mm}code gen};
\node (1) at (8.4,0) {};
\draw [->,line width=4mm] (0) -- (A);
\draw [->,line width=4mm] (A) -- (B);
\draw [->,line width=4mm] (B) -- (C);
\draw [->,line width=4mm] (C) -- (1);
\end{tikzpicture}
\end{center}
Today a lexer.
\only<2>{
\begin{textblock}{1}(6,9.8)
\begin{tabular}{c}
\includegraphics[scale=0.13]{../pics/rosetta.jpg}\\[-2mm]
\footnotesize lexing $\Rightarrow$ recognising words (Stone of Rosetta)
\end{tabular}
\end{textblock}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Regular Expressions}
In programming languages they are often used to recognise:\medskip
\begin{itemize}
\item operands, digits
\item identifiers
\item numbers (non-leading zeros)
\item keywords
\item comments
\end{itemize}\bigskip
\mbox{}\hfill\bl{\url{http://www.regexper.com}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Lexing: Test Case}
\mbox{\lstinputlisting[language=While]{../progs/fib.while}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%\frametitle{?? Test Case}
%
%\mbox{\lstinputlisting[language=While]{../progs/collatz.while}}
%
%\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\tt
\begin{center}\large
\code{"if true then then 42 else +"}
\end{center}
\begin{tabular}{@{}l}
KEYWORD: \\
\hspace{5mm}{if}, {then}, {else},\\
WHITESPACE:\\
\hspace{5mm}{" "}, {$\backslash$n},\\
IDENTIFIER:\\
\hspace{5mm}LETTER $\cdot$ (LETTER + DIGIT + {\_})$^*$\\
NUM:\\
\hspace{5mm}(NONZERODIGIT $\cdot$ DIGIT$^*$) + {0}\\
OP:\\
\hspace{5mm}+, -, *, \%, <, <=\\
COMMENT:\\
\hspace{5mm}{$\slash$*} $\cdot$ $\sim$(ALL$^*$ $\cdot$ (*$\slash$) $\cdot$ ALL$^*$) $\cdot$ {*$\slash$}
\end{tabular}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\tt
\begin{center}\large
\code{"if true then then 42 else +"}
\end{center}
\only<1>{
\small\begin{tabular}{l}
KEYWORD(if),\\
WHITESPACE,\\
IDENT(true),\\
WHITESPACE,\\
KEYWORD(then),\\
WHITESPACE,\\
KEYWORD(then),\\
WHITESPACE,\\
NUM(42),\\
WHITESPACE,\\
KEYWORD(else),\\
WHITESPACE,\\
OP(+)
\end{tabular}}
\only<2>{
\small\begin{tabular}{l}
KEYWORD(if),\\
IDENT(true),\\
KEYWORD(then),\\
KEYWORD(then),\\
NUM(42),\\
KEYWORD(else),\\
OP(+)
\end{tabular}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
There is one small problem with the tokenizer. How should we
tokenize\ldots?
\begin{center}
\large\code{"x-3"}
\end{center}
\tt
\begin{tabular}{@{}l}
ID: \ldots\\
OP:\\
\hspace{5mm}\texttt{"+"}, \texttt{"-"}\\
NUM:\\
\hspace{5mm}(NONZERODIGIT $\cdot$ DIGIT$^*$) + {''0''}\\
NUMBER:\\
\hspace{5mm}NUM + (\texttt{"-"} $\cdot$ NUM)\\
\end{tabular}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
The same problem with\medskip
\[
\bl{(ab + a) \cdot (c + bc)}
\]\medskip
and the string $\bl{abc}$.\pause\bigskip
Or, keywords are \code{if} etc and identifiers are
letters followed by ``letters + numbers + \_''$^*$
\[
\bl{if}\qquad\bl{iffoo}
\]
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{POSIX: Two Rules}
\begin{itemize}
\item Longest match rule (``maximal munch rule''): The
longest initial substring matched by any regular expression is taken
as the next token.\bigskip
\item Rule priority:
For a particular longest initial substring, the first regular
expression that can match determines the token.
\end{itemize}\bigskip\bigskip\pause
\small
\hfill most posix matchers are buggy\\
\footnotesize
\hfill
\url{http://www.haskell.org/haskellwiki/Regex_Posix}
\smallskip\pause
\hfill \small traditional lexers are fast, but hairy
%\url{http://www.technologyreview.com/tr10/?year=2011}
%finite deterministic automata/ nondeterministic automaton
%\item problem with infix operations, for example i-12
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Sulzmann \& Lu Matcher}
We want to match the string \bl{$abc$} using \bl{$r_1$}:
\begin{center}
\begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}]
\node (r1) {\bl{$r_1$}};
\node (r2) [right=of r1] {\bl{$r_2$}};
\draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};\pause
\node (r3) [right=of r2] {\bl{$r_3$}};
\draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};\pause
\node (r4) [right=of r3] {\bl{$r_4$}};
\draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};\pause
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable?$}}};\pause
\node (v4) [below=of r4] {\bl{$v_4$}};
\draw[->,line width=1mm] (r4) -- (v4);\pause
\node (v3) [left=of v4] {\bl{$v_3$}};
\draw[->,line width=1mm] (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};\pause
\node (v2) [left=of v3] {\bl{$v_2$}};
\draw[->,line width=1mm] (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};\pause
\node (v1) [left=of v2] {\bl{$v_1$}};
\draw[->,line width=1mm] (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};\pause
\draw[->,line width=0.5mm] (r3) -- (v3);
\draw[->,line width=0.5mm] (r2) -- (v2);
\draw[->,line width=0.5mm] (r1) -- (v1);
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
\end{tikzpicture}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Regexes and Values}
Regular expressions and their corresponding values:
\begin{center}
\begin{columns}
\begin{column}{3cm}
\begin{tabular}{@{}rrl@{}}
\bl{$r$} & \bl{$::=$} & \bl{$\ZERO$}\\
& \bl{$\mid$} & \bl{$\ONE$} \\
& \bl{$\mid$} & \bl{$c$} \\
& \bl{$\mid$} & \bl{$r_1 \cdot r_2$}\\
& \bl{$\mid$} & \bl{$r_1 + r_2$} \\
\\
& \bl{$\mid$} & \bl{$r^*$} \\
\\
\end{tabular}
\end{column}
\begin{column}{3cm}
\begin{tabular}{@{\hspace{-7mm}}rrl@{}}
\bl{$v$} & \bl{$::=$} & \\
& & \bl{$Empty$} \\
& \bl{$\mid$} & \bl{$Char(c)$} \\
& \bl{$\mid$} & \bl{$Seq(v_1,v_2)$}\\
& \bl{$\mid$} & \bl{$Left(v)$} \\
& \bl{$\mid$} & \bl{$Right(v)$} \\
& \bl{$\mid$} & \bl{$Stars\,[]$} \\
& \bl{$\mid$} & \bl{$Stars\,[v_1,\ldots\,v_n]$} \\
\end{tabular}
\end{column}
\end{columns}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\small
{\small\lstinputlisting[language=Scala,numbers=none,
xleftmargin=-5mm] {../progs/app01.scala}}
{\small\lstinputlisting[language=Scala,numbers=none,
xleftmargin=-5mm] {../progs/app02.scala}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\begin{textblock}{10}(3,5)
\begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}]
\node (r1) {\bl{$r_1$}};
\node (r2) [right=of r1] {\bl{$r_2$}};
\draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};
\node (r3) [right=of r2] {\bl{$r_3$}};
\draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};
\node (r4) [right=of r3] {\bl{$r_4$}};
\draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}};
\node (v4) [below=of r4] {\bl{$v_4$}};
\draw[->,line width=1mm] (r4) -- (v4);
\node (v3) [left=of v4] {\bl{$v_3$}};
\draw[->,line width=1mm] (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};
\node (v2) [left=of v3] {\bl{$v_2$}};
\draw[->,line width=1mm] (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};
\node (v1) [left=of v2] {\bl{$v_1$}};
\draw[->,line width=1mm] (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};
\draw[->,line width=0.5mm] (r3) -- (v3);
\draw[->,line width=0.5mm] (r2) -- (v2);
\draw[->,line width=0.5mm] (r1) -- (v1);
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
\end{tikzpicture}
\end{textblock}
\only<1->{
\begin{textblock}{6}(1,0.8)
\begin{bubble}[6cm]
\small
\begin{tabular}{ll}
\bl{$r_1$}: & \bl{$a \cdot (b \cdot c)$}\\
\bl{$r_2$}: & \bl{$\ONE \cdot (b \cdot c)$}\\
\bl{$r_3$}: & \bl{$(\ZERO \cdot (b \cdot c)) + (\ONE \cdot c)$}\\
\bl{$r_4$}: & \bl{$(\ZERO \cdot (b \cdot c)) + ((\ZERO \cdot c) + \ONE)$}\\
\end{tabular}
\end{bubble}
\end{textblock}}
\only<2->{
\begin{textblock}{6}(5,11.4)
\begin{bubble}[7.6cm]
\small
\begin{tabular}{ll}
\bl{$v_1$}: & \bl{$Seq(Char(a), Seq(Char(b), Char(c)))$}\\
\bl{$v_2$}: & \bl{$Seq(Empty, Seq(Char(b), Char(c)))$}\\
\bl{$v_3$}: & \bl{$Right(Seq(Empty, Char(c)))$}\\
\bl{$v_4$}: & \bl{$Right(Right(Empty))$}\\
\end{tabular}
\end{bubble}
\end{textblock}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Flatten}
Obtaining the string underlying a value:
\begin{center}
\begin{tabular}{lcl}
\bl{$|Empty|$} & \bl{$\dn$} & \bl{$[]$}\\
\bl{$|Char(c)|$} & \bl{$\dn$} & \bl{$[c]$}\\
\bl{$|Left(v)|$} & \bl{$\dn$} & \bl{$|v|$}\\
\bl{$|Right(v)|$} & \bl{$\dn$} & \bl{$|v|$}\\
\bl{$|Seq(v_1,v_2)|$}& \bl{$\dn$} & \bl{$|v_1| \,@\, |v_2|$}\\
\bl{$|[v_1,\ldots ,v_n]|$} & \bl{$\dn$} & \bl{$|v_1| \,@\ldots @\, |v_n|$}\\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\begin{textblock}{10}(3,5)
\begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}]
\node (r1) {\bl{$r_1$}};
\node (r2) [right=of r1] {\bl{$r_2$}};
\draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};
\node (r3) [right=of r2] {\bl{$r_3$}};
\draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};
\node (r4) [right=of r3] {\bl{$r_4$}};
\draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}};
\node (v4) [below=of r4] {\bl{$v_4$}};
\draw[->,line width=1mm] (r4) -- (v4);
\node (v3) [left=of v4] {\bl{$v_3$}};
\draw[->,line width=1mm] (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};
\node (v2) [left=of v3] {\bl{$v_2$}};
\draw[->,line width=1mm] (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};
\node (v1) [left=of v2] {\bl{$v_1$}};
\draw[->,line width=1mm] (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};
\draw[->,line width=0.5mm] (r3) -- (v3);
\draw[->,line width=0.5mm] (r2) -- (v2);
\draw[->,line width=0.5mm] (r1) -- (v1);
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
\end{tikzpicture}
\end{textblock}
\begin{textblock}{6}(1,0.8)
\begin{bubble}[6cm]
\small
\begin{tabular}{ll}
\bl{$r_1$}: & \bl{$a \cdot (b \cdot c)$}\\
\bl{$r_2$}: & \bl{$\ONE \cdot (b \cdot c)$}\\
\bl{$r_3$}: & \bl{$(\ZERO \cdot (b \cdot c)) + (\ONE \cdot c)$}\\
\bl{$r_4$}: & \bl{$(\ZERO \cdot (b \cdot c)) + ((\ZERO \cdot c) + \ONE)$}\\
\end{tabular}
\end{bubble}
\end{textblock}
\begin{textblock}{6}(1,11.4)
\begin{bubble}[7.6cm]
\small
\begin{tabular}{ll}
\bl{$v_1$}: & \bl{$Seq(Char(a), Seq(Char(b), Char(c)))$}\\
\bl{$v_2$}: & \bl{$Seq(Empty, Seq(Char(b), Char(c)))$}\\
\bl{$v_3$}: & \bl{$Right(Seq(Empty, Char(c)))$}\\
\bl{$v_4$}: & \bl{$Right(Right(Empty))$}\\
\end{tabular}
\end{bubble}
\end{textblock}
\begin{textblock}{6}(12,11.4)
\begin{bubble}[2cm]
\small
\begin{tabular}{ll}
\bl{$|v_1|$}: & \bl{$abc$}\\
\bl{$|v_2|$}: & \bl{$bc$}\\
\bl{$|v_3|$}: & \bl{$c$}\\
\bl{$|v_4|$}: & \bl{$[]$}
\end{tabular}
\end{bubble}
\end{textblock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Mkeps}
Finding a (posix) value for recognising the empty string:
\begin{center}
\begin{tabular}{lcl}
\bl{$mkeps\,(\ONE)$} & \bl{$\dn$} & \bl{$Empty$}\\
\bl{$mkeps\,(r_1 + r_2)$} & \bl{$\dn$} & \bl{if $nullable(r_1)$} \\
& & \bl{then $Left(mkeps(r_1))$}\\
& & \bl{else $Right(mkeps(r_2))$}\\
\bl{$mkeps\,(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$Seq(mkeps(r_1),mkeps(r_2))$}\\
\bl{$mkeps\,(r^*)$} & \bl{$\dn$} & \bl{$Stars\,[]$} \\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Inject}
Injecting (``Adding'') a character to a value\\[-12mm]\mbox{}
\begin{center}
\begin{tabular}{@{\hspace{-3mm}}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}}
\bl{$inj\,(c)\,c\,(Empty)$} & \bl{$\dn$} & \bl{$Char\,c$}\\
\bl{$inj\,(r_1 + r_2)\,c\,(Left(v))$} & \bl{$\dn$} & \bl{$Left(inj\,r_1\,c\,v)$}\\
\bl{$inj\,(r_1 + r_2)\,c\,(Right(v))$} & \bl{$\dn$} & \bl{$Right(inj\,r_2\,c\,v)$}\\
\bl{$inj\,(r_1 \cdot r_2)\,c\,(Seq(v_1,v_2))$} & \bl{$\dn$} & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\
\bl{$inj\,(r_1 \cdot r_2)\,c\,(Left(Seq(v_1,v_2)))$} & \bl{$\dn$} & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\
\bl{$inj\,(r_1 \cdot r_2)\,c\,(Right(v))$} & \bl{$\dn$} & \bl{$Seq(mkeps(r_1),inj\,r_2\,c\,v)$}\\
\bl{$inj\,(r^*)\,c\,(Seq(v,Stars\,vs))$} & \bl{$\dn$} & \bl{$Stars\,(inj\,r\,c\,v\,::\,vs)$}\\
\end{tabular}
\end{center}\bigskip
\footnotesize
\begin{tabular}{l@{\hspace{2mm}}l}
\bl{$inj$}: & 1st arg $\mapsto$ a rexp; 2nd arg $\mapsto$ a character;
3rd arg $\mapsto$ a value\\
& result $\mapsto$ a value
\end{tabular}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Lexing}
\begin{center}
\begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l}
\bl{$lex\,r\,[]$} & \bl{$\dn$} & \bl{if $nullable(r)$ then $mkeps(r)$ else $error$}\\
\bl{$lex\,r\,c::s$} & \bl{$\dn$} & \bl{$inj\,r\,c\,lex(der(c,r), s)$}\\
\end{tabular}
\end{center}
\footnotesize
\bl{$lex$}: returns a value
\begin{center}
\begin{tikzpicture}[scale=2,node distance=1.0cm,every node/.style={minimum size=6mm}]
\node (r1) {\bl{$r_1$}};
\node (r2) [right=of r1] {\bl{$r_2$}};
\draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};
\node (r3) [right=of r2] {\bl{$r_3$}};
\draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};
\node (r4) [right=of r3] {\bl{$r_4$}};
\draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}};
\node (v4) [below=of r4] {\bl{$v_4$}};
\draw[->,line width=1mm] (r4) -- (v4);
\node (v3) [left=of v4] {\bl{$v_3$}};
\draw[->,line width=1mm] (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};
\node (v2) [left=of v3] {\bl{$v_2$}};
\draw[->,line width=1mm] (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};
\node (v1) [left=of v2] {\bl{$v_1$}};
\draw[->,line width=1mm] (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};
\draw[->,line width=0.5mm] (r3) -- (v3);
\draw[->,line width=0.5mm] (r2) -- (v2);
\draw[->,line width=0.5mm] (r1) -- (v1);
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
\end{tikzpicture}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Records}
\begin{itemize}
\item new regex: \bl{$(x:r)$}\hspace{7mm}new value: \bl{$Rec(x,v)$}\medskip\pause
\item \bl{$nullable(x:r) \dn nullable(r)$}
\item \bl{$der\,c\,(x:r) \dn der\,c\,r$}
\item \bl{$mkeps(x:r) \dn Rec(x, mkeps(r))$}
\item \bl{$inj\,(x:r)\,c\,v \dn Rec(x, inj\,r\,c\,v)$}
\end{itemize}\bigskip\bigskip\pause
\small
for extracting subpatterns \bl{$(z: ((x:ab) + (y:ba))$}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\begin{itemize}
\item A regular expression for email addresses
\begin{center}
\begin{tabular}{l}
(name: \bl{$[a\mbox{-}z0\mbox{-}9\_\!\_\,.-]^+$})\bl{$\cdot @\cdot$}\\
\qquad(domain: \bl{$[a\mbox{-}z0\mbox{-}9\,.-]^+$}) \bl{$\cdot .\cdot$}\\
\qquad\qquad(top\_level: \bl{$[a\mbox{-}z\,.]^{\{2,6\}}$})
\end{tabular}
\end{center}
\bl{\[
\texttt{christian.urban@kcl.ac.uk}
\]}
\item the result environment:
\begin{center}
\begin{tabular}{l}
\bl{$[(name:\texttt{christian.urban}),$}\\
\bl{$\phantom{[}(domain:\texttt{kcl}),$}\\
\bl{$\phantom{[}(top\_level:\texttt{ac.uk})]$}
\end{tabular}
\end{center}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{While Tokens}
\begin{center}
\begin{tabular}{rcl}
\pcode{WHILE\_REGS} & $\dn$ & \raisebox{-1mm}{\large(}\pcode{("k" : KEYWORD)} +\\
& & \phantom{(}\pcode{("i" : ID)} +\\
& & \phantom{(}\pcode{("o" : OP)} + \\
& & \phantom{(}\pcode{("n" : NUM)} + \\
& & \phantom{(}\pcode{("s" : SEMI)} +\\
& & \phantom{(}\pcode{("p" : (LPAREN + RPAREN))} +\\
& & \phantom{(}\pcode{("b" : (BEGIN + END))} +\\
& & \phantom{(}\pcode{("w" : WHITESPACE)}\raisebox{-1mm}{\large)$^*$}
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Simplification}
\begin{itemize}
\item If we simplify after the derivative, then we are building the
value for the simplified regular expression, but \alert{\textbf{not}} for the original
regular expression.
\end{itemize}
\begin{center}
\begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}]
\node (r1) {\bl{$r_1$}};
\node (r2) [right=of r1] {\bl{$r_2$}};
\draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};
\node (r3) [right=of r2] {\bl{$r_3$}};
\draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};
\node (r4) [right=of r3] {\bl{$r_4$}};
\draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}};
\node (v4) [below=of r4] {\bl{$v_4$}};
\draw[->,line width=1mm] (r4) -- (v4);
\node (v3) [left=of v4] {\bl{$v_3$}};
\draw[->,line width=1mm] (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};
\node (v2) [left=of v3] {\bl{$v_2$}};
\draw[->,line width=1mm] (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};
\node (v1) [left=of v2] {\bl{$v_1$}};
\draw[->,line width=1mm] (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};
\draw[->,line width=0.5mm] (r3) -- (v3);
\draw[->,line width=0.5mm] (r2) -- (v2);
\draw[->,line width=0.5mm] (r1) -- (v1);
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
\end{tikzpicture}
\end{center}
\small
\hspace{4.5cm}\bl{$(\ZERO \cdot (b \cdot c)) + ((\ZERO \cdot c) + \ONE)$}
$\mapsto$
\bl{$\ONE$}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
Normally we would have
\begin{center}
\bl{$(\ZERO \cdot (b \cdot c)) + ((\ZERO \cdot c) + \ONE)$}
\end{center}
and answer how this regular expression matches the empty string
with the value
\begin{center}
\bl{$Right(Right(Empty))$}
\end{center}\bigskip
But now we simplify this to \bl{$\ONE$} and would produce \bl{$Empty$} (see \textit{mkeps}).
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Rectification}
\def\arraystretch{1.05}
\begin{center}
\begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}l@{\hspace{8mm}}l}
& & & \hspace{5mm}rectification \\
& & & \hspace{5mm}functions:\\
\bl{$r \cdot \ZERO$} & $\mapsto$ & \bl{$\ZERO$} & \\
\bl{$\ZERO \cdot r$} & $\mapsto$ & \bl{$\ZERO$} & \\
\bl{$r \cdot \ONE$} & $\mapsto$ & \bl{$r$} & \bl{$\lambda f_1\,f_2\,v.\, Seq(f_1\,v, f_2\,Empty)$}\\
\bl{$\ONE \cdot r$} & $\mapsto$ & \bl{$r$} & \bl{$\lambda f_1\,f_2\,v.\, Seq(f_1\,Empty, f_2\,v)$}\\
\bl{$r + \ZERO$} & $\mapsto$ & \bl{$r$} & \bl{$\lambda f_1\,f_2\,v.\, Left(f_1\,v)$}\\
\bl{$\ZERO + r$} & $\mapsto$ & \bl{$r$} & \bl{$\lambda f_1\,f_2\,v.\, Right(f_2\,v)$}\\
\bl{$r + r$} & $\mapsto$ & \bl{$r$} & \bl{$\lambda f_1\,f_2\,v.\, Left(f_1\,v)$}
\end{tabular}
\end{center}\medskip\pause
\small
old \bl{$simp$} returns a rexp;\\
new \bl{$simp$} returns a rexp and a rectification~function.
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Rectification}
\begin{center}
\begin{tabular}{l}
\bl{$simp(r)$}:\\
\quad case \bl{$r = r_1 + r_2$}\\
\qquad let \bl{$(r_{1s}, f_{1s}) = simp(r_1)$}\\
\qquad \phantom{let} \bl{$(r_{2s}, f_{2s}) = simp(r_2)$}\smallskip\\
\qquad case \bl{$r_{1s} = \ZERO$}:
return \bl{$(r_{2s}, \lambda v. \,Right(f_{2s}(v)))$}\\
\qquad case \bl{$r_{2s} = \ZERO$}:
return \bl{$(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$}\\
\qquad case \bl{$r_{1s} = r_{2s}$}:
return \bl{$(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$}\\
\qquad otherwise:
return \bl{$(r_{1s} + r_{2s}, f_{alt}(f_{1s}, f_{2s}))$}\\
\end{tabular}
\end{center}
\small
\begin{center}
\begin{tabular}{l@{\hspace{1mm}}l}
\bl{$f_{alt}(f_1, f_2) \dn$}\\
\qquad \bl{$\lambda v.\,$} case \bl{$v = Left(v')$}:
& return \bl{$Left(f_1(v'))$}\\
\qquad \phantom{$\lambda v.\,$} case \bl{$v = Right(v')$}:
& return \bl{$Right(f_2(v'))$}\\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
{\footnotesize\lstinputlisting[language=Scala,numbers=none,
xleftmargin=-5mm] {../progs/app05.scala}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Rectification}
\begin{center}
\begin{tabular}{@{\hspace{-3mm}}l}
\bl{$simp(r)$}:\ldots\\
\quad case \bl{$r = r_1 \cdot r_2$}\\
\qquad let \bl{$(r_{1s}, f_{1s}) = simp(r_1)$}\\
\qquad \phantom{let} \bl{$(r_{2s}, f_{2s}) = simp(r_2)$}\smallskip\\
\qquad case \bl{$r_{1s} = \ZERO$}:
return \bl{$(\ZERO, f_{error})$}\\
\qquad case \bl{$r_{2s} = \ZERO$}:
return \bl{$(\ZERO, f_{error})$}\\
\qquad case \bl{$r_{1s} = \ONE$}:
return \bl{$(r_{2s}, \lambda v. \,Seq(f_{1s}(Empty), f_{2s}(v)))$}\\
\qquad case \bl{$r_{2s} = \ONE$}:
return \bl{$(r_{1s}, \lambda v. \,Seq(f_{1s}(v), f_{2s}(Empty)))$}\\
\qquad otherwise:
return \bl{$(r_{1s} \cdot r_{2s}, f_{seq}(f_{1s}, f_{2s}))$}\\
\end{tabular}
\end{center}
\small
\begin{center}
\begin{tabular}{l@{\hspace{1mm}}l}
\bl{$f_{seq}(f_1, f_2) \dn$}\\
\qquad \bl{$\lambda v.\,$ case $v = Seq(v_1, v_2)$}:
& return \bl{$Seq(f_1(v_1), f_2(v_2))$}\\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
{\footnotesize\lstinputlisting[language=Scala,numbers=none,
xleftmargin=-5mm] {../progs/app06.scala}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Rectification Example}
\begin{center}
\bl{$\only<1>{(b \cdot c)}%
\only<2-3>{(\underline{b \cdot c})}%
\only<1-3>{+}%
\only<1>{(\ZERO + \ONE)}%
\only<2-3>{(\underline{\ZERO + \ONE})}$}%
\only<4->{%
\bl{$\underline{(b \cdot c) + (\ZERO + \ONE)}$}%
}
$\mapsto$
\bl{$(b \cdot c) + \ONE$}
\end{center}\bigskip
\onslide<3->{%
\begin{center}
\begin{tabular}{lcl}
\bl{$f_{s1}$} & \bl{$=$} & \bl{$\lambda v.v$}\\
\bl{$f_{s2}$} & \bl{$=$} & \bl{$\lambda v. \textit{Right}(v)$}
\end{tabular}
\end{center}}
\only<4>{%
\begin{center}
\begin{tabular}{@{}l@{\hspace{1mm}}l@{}}
\bl{$f_{alt}(f_{s1}, f_{s2}) \dn$}\\
\quad \bl{$\lambda v.\,$}
case \bl{$v = Left(v')$}:
& return \bl{$Left(f_{s1}(v'))$}\\
\quad \phantom{$\lambda v.\,$}
case \bl{$v = Right(v')$}:
& return \bl{$Right(f_{s2}(v'))$}\\
\end{tabular}
\end{center}}%
\only<5->{%
\begin{center}
\begin{tabular}{@{}l@{\hspace{1mm}}l@{}}
\only<5->{\phantom{\bl{$f_{alt}(f_{s1}, f_{s2}) \dn$}}}\\
\quad \bl{$\lambda v.\,$}
case \bl{$v = Left(v')$}:
& return \bl{$Left(v')$}\\
\quad \phantom{$\lambda v.\,$}
case \bl{$v = Right(v')$}:
& return \bl{$Right(Right(v'))$}\\
\end{tabular}
\end{center}}%
\only<6->{%
\begin{center}
\begin{tabular}{@{}l@{\hspace{4mm}}l@{}}
\bl{$\textit{mkeps}$} simplified case: &
\bl{$\textit{Right}(\textit{Empty})$}\\
rectified case: &
\bl{$\textit{Right}(\textit{Right}(\textit{Empty}))$}
\end{tabular}
\end{center}}%
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Lexing with Simplification}
\begin{center}
\begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l}
\bl{$lex\,r\,[]$} & \bl{$\dn$} & \bl{if $nullable(r)$ then $mkeps(r)$ else $error$}\\
\bl{$lex\,r\,c::s$} & \bl{$\dn$} & \bl{let $(r', frect) = simp(der(c, r))$}\smallskip\\
& & \bl{$inj\,r\,c\,(frect(lex(r', s)))$}\\
\end{tabular}
\end{center}\bigskip
\begin{center}\small
\begin{tikzpicture}[node distance=1.1cm,every node/.style={minimum size=7mm}]
\node (r1) {\bl{$r_1$}};
\node (r2) [right=of r1] {\bl{$r_2$}};
\draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};
\node (r3) [right=of r2] {\bl{$r_3$}};
\draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};
\node (r4) [right=of r3] {\bl{$r_4$}};
\draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}};
\node (v4) [below=of r4] {\bl{$v_4$}};
\draw[->,line width=1mm] (r4) -- (v4);
\node (v3) [left=of v4] {\bl{$v_3$}};
\draw[->,line width=1mm] (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};
\node (v2) [left=of v3] {\bl{$v_2$}};
\draw[->,line width=1mm] (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};
\node (v1) [left=of v2] {\bl{$v_1$}};
\draw[->,line width=1mm] (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};
\draw[->,line width=0.5mm] (r3) -- (v3);
\draw[->,line width=0.5mm] (r2) -- (v2);
\draw[->,line width=0.5mm] (r1) -- (v1);
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
\end{tikzpicture}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Environments}
Obtaining the ``recorded'' parts of a value:
\begin{center}
\begin{tabular}{lcl}
\bl{$env(Empty)$} & \bl{$\dn$} & \bl{$[]$}\\
\bl{$env(Char(c))$} & \bl{$\dn$} & \bl{$[]$}\\
\bl{$env(Left(v))$} & \bl{$\dn$} & \bl{$env(v)$}\\
\bl{$env(Right(v))$} & \bl{$\dn$} & \bl{$env(v)$}\\
\bl{$env(Seq(v_1,v_2))$}& \bl{$\dn$} & \bl{$env(v_1) \,@\, env(v_2)$}\\
\bl{$env(Stars\,[v_1,\ldots ,v_n])$} & \bl{$\dn$} &
\bl{$env(v_1) \,@\ldots @\, env(v_n)$}\\
\bl{$env(Rec(x:v))$} & \bl{$\dn$} & \bl{$(x:|v|) :: env(v)$}\\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{While Tokens}
\begin{center}
\begin{tabular}{@{}r@{\hspace{2mm}}c@{\hspace{2mm}}l@{}}
\pcode{WHILE\_REGS} & $\dn$ & \raisebox{-1mm}{\large(}\pcode{("k" : KEYWORD)} +\\
& & \phantom{(}\pcode{("i" : ID)} +\\
& & \phantom{(}\pcode{("o" : OP)} + \\
& & \phantom{(}\pcode{("n" : NUM)} + \\
& & \phantom{(}\pcode{("s" : SEMI)} +\\
& & \phantom{(}\pcode{("p" : (LPAREN + RPAREN))} +\\
& & \phantom{(}\pcode{("b" : (BEGIN + END))} +\\
& & \phantom{(}\pcode{("w" : WHITESPACE)}\raisebox{-1mm}{\large)$^*$}
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\consolas
\begin{center}
\code{"if true then then 42 else +"}
\end{center}
\only<1>{
\small\begin{tabular}{l}
KEYWORD(if),\\
WHITESPACE,\\
IDENT(true),\\
WHITESPACE,\\
KEYWORD(then),\\
WHITESPACE,\\
KEYWORD(then),\\
WHITESPACE,\\
NUM(42),\\
WHITESPACE,\\
KEYWORD(else),\\
WHITESPACE,\\
OP(+)
\end{tabular}}
\only<2>{
\small\begin{tabular}{l}
KEYWORD(if),\\
IDENT(true),\\
KEYWORD(then),\\
KEYWORD(then),\\
NUM(42),\\
KEYWORD(else),\\
OP(+)
\end{tabular}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Lexer: Two Rules}
\begin{itemize}
\item Longest match rule (``maximal munch rule''): The
longest initial substring matched by any regular expression is taken
as next token.\bigskip
\item Rule priority:
For a particular longest initial substring, the first regular
expression that can match determines the token.
\end{itemize}
%\url{http://www.technologyreview.com/tr10/?year=2011}
%finite deterministic automata/ nondeterministic automaton
%\item problem with infix operations, for example i-12
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\begin{center}
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
\bl{$zeroable(\ZERO)$} & \bl{$\dn$} & \bl{$true$}\\
\bl{$zeroable(\ONE)$} & \bl{$\dn$} & \bl{$\textit{false}$}\\
\bl{$zeroable(c)$} & \bl{$\dn$} & \bl{$\textit{false}$}\\
\bl{$zeroable(r_1 + r_2)$} & \bl{$\dn$} & \bl{$zeroable(r_1) \wedge zeroable(r_2)$}\\
\bl{$zeroable(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$zeroable(r_1) \vee zeroable(r_2)$}\\
\bl{$zeroable(r^*)$} & \bl{$\dn$} & \bl{$\textit{false}$}\\
\end{tabular}
\end{center}
\begin{center}
\bl{$zeroable(r)$} if and only if \bl{$L(r) = \{\}$}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{document}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: t
%%% End: