\documentclass[dvipsnames,14pt,t]{beamer}
\usepackage{../slides}
\usepackage{../graphics}
\usepackage{../langs}
\usepackage{../data}
\usepackage{../grammar}
\hfuzz=220pt
\pgfplotsset{compat=1.11}
\newcommand{\bl}[1]{\textcolor{blue}{#1}}
% beamer stuff
\renewcommand{\slidecaption}{AFL 05, King's College London}
\begin{document}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{%
\begin{tabular}{@ {}c@ {}}
\\[-3mm]
\LARGE Automata and \\[-2mm]
\LARGE Formal Languages (5)\\[3mm]
\end{tabular}}
\normalsize
\begin{center}
\begin{tabular}{ll}
Email: & christian.urban at kcl.ac.uk\\
Office: & S1.27 (1st floor Strand Building)\\
Slides: & KEATS (also home work is there)\\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{\begin{tabular}{c}Last Week\\[-1mm]
Regexes and Values\end{tabular}}
Regular expressions and their corresponding values:
\begin{center}
\begin{columns}
\begin{column}{3cm}
\begin{tabular}{@{}rrl@{}}
\bl{$r$} & \bl{$::=$} & \bl{$\varnothing$}\\
& \bl{$\mid$} & \bl{$\epsilon$} \\
& \bl{$\mid$} & \bl{$c$} \\
& \bl{$\mid$} & \bl{$r_1 \cdot r_2$}\\
& \bl{$\mid$} & \bl{$r_1 + r_2$} \\
\\
& \bl{$\mid$} & \bl{$r^*$} \\
\end{tabular}
\end{column}
\begin{column}{3cm}
\begin{tabular}{@{\hspace{-7mm}}rrl@{}}
\bl{$v$} & \bl{$::=$} & \\
& & \bl{$Empty$} \\
& \bl{$\mid$} & \bl{$Char(c)$} \\
& \bl{$\mid$} & \bl{$Seq(v_1,v_2)$}\\
& \bl{$\mid$} & \bl{$Left(v)$} \\
& \bl{$\mid$} & \bl{$Right(v)$} \\
& \bl{$\mid$} & \bl{$[v_1,\ldots\,v_n]$} \\
\end{tabular}
\end{column}
\end{columns}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\begin{textblock}{10}(3,5)
\begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}]
\node (r1) {\bl{$r_1$}};
\node (r2) [right=of r1] {\bl{$r_2$}};
\draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};
\node (r3) [right=of r2] {\bl{$r_3$}};
\draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};
\node (r4) [right=of r3] {\bl{$r_4$}};
\draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}};
\node (v4) [below=of r4] {\bl{$v_4$}};
\draw[->,line width=1mm] (r4) -- (v4);
\node (v3) [left=of v4] {\bl{$v_3$}};
\draw[->,line width=1mm] (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};
\node (v2) [left=of v3] {\bl{$v_2$}};
\draw[->,line width=1mm] (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};
\node (v1) [left=of v2] {\bl{$v_1$}};
\draw[->,line width=1mm] (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};
\draw[->,line width=0.5mm] (r3) -- (v3);
\draw[->,line width=0.5mm] (r2) -- (v2);
\draw[->,line width=0.5mm] (r1) -- (v1);
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
\end{tikzpicture}
\end{textblock}
\begin{textblock}{6}(1,0.8)
\begin{bubble}[6cm]
\small
\begin{tabular}{ll}
\bl{$r_1$}: & \bl{$a \cdot (b \cdot c)$}\\
\bl{$r_2$}: & \bl{$\epsilon \cdot (b \cdot c)$}\\
\bl{$r_3$}: & \bl{$(\varnothing \cdot (b \cdot c)) + (\epsilon \cdot c)$}\\
\bl{$r_4$}: & \bl{$(\varnothing \cdot (b \cdot c)) + ((\varnothing \cdot c) + \epsilon)$}\\
\end{tabular}
\end{bubble}
\end{textblock}
\begin{textblock}{6}(1,11.4)
\begin{bubble}[7.6cm]
\small
\begin{tabular}{ll}
\bl{$v_1$}: & \bl{$Seq(Char(a), Seq(Char(b), Char(c)))$}\\
\bl{$v_2$}: & \bl{$Seq(Empty, Seq(Char(b), Char(c)))$}\\
\bl{$v_3$}: & \bl{$Right(Seq(Empty, Char(c)))$}\\
\bl{$v_4$}: & \bl{$Right(Right(Empty))$}\\
\end{tabular}
\end{bubble}
\end{textblock}
\begin{textblock}{6}(12,11.4)
\begin{bubble}[2cm]
\small
\begin{tabular}{ll}
\bl{$|v_1|$}: & \bl{$abc$}\\
\bl{$|v_2|$}: & \bl{$bc$}\\
\bl{$|v_3|$}: & \bl{$c$}\\
\bl{$|v_4|$}: & \bl{$[]$}
\end{tabular}
\end{bubble}
\end{textblock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Simplification}
\begin{itemize}
\item If we simplify after the derivative, then we are builing the
value for the simplified regular expression, but \emph{not} for the original
regular expression.
\end{itemize}
\begin{center}
\begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}]
\node (r1) {\bl{$r_1$}};
\node (r2) [right=of r1] {\bl{$r_2$}};
\draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};
\node (r3) [right=of r2] {\bl{$r_3$}};
\draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};
\node (r4) [right=of r3] {\bl{$r_4$}};
\draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}};
\node (v4) [below=of r4] {\bl{$v_4$}};
\draw[->,line width=1mm] (r4) -- (v4);
\node (v3) [left=of v4] {\bl{$v_3$}};
\draw[->,line width=1mm] (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};
\node (v2) [left=of v3] {\bl{$v_2$}};
\draw[->,line width=1mm] (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};
\node (v1) [left=of v2] {\bl{$v_1$}};
\draw[->,line width=1mm] (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};
\draw[->,line width=0.5mm] (r3) -- (v3);
\draw[->,line width=0.5mm] (r2) -- (v2);
\draw[->,line width=0.5mm] (r1) -- (v1);
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
\end{tikzpicture}
\end{center}
\small
\hspace{4.5cm}\bl{$(\varnothing \cdot (b \cdot c)) + ((\varnothing \cdot c) + \epsilon)$}
$\mapsto$
\bl{$\epsilon$}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Records}
\begin{itemize}
\item new regex: \bl{$(x:r)$}\hspace{7mm}new value:
\bl{$Rec(x,v)$}\medskip
\item \bl{$nullable(x:r) \dn nullable(r)$}
\item \bl{$der\,c\,(x:r) \dn (x:der\,c\,r)$}
\item \bl{$mkeps(x:r) \dn Rec(x, mkeps(r))$}
\item \bl{$inj\,(x:r)\,c\,v \dn Rec(x, inj\,r\,c\,v)$}
\end{itemize}\bigskip\bigskip
\small
for extracting subpatterns \bl{$(z: ((x:ab) + (y:ba))$}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Environments}
Obtaining the ``recorded'' parts of a value:
\begin{center}
\begin{tabular}{lcl}
\bl{$env(Empty)$} & \bl{$\dn$} & \bl{$[]$}\\
\bl{$env(Char(c))$} & \bl{$\dn$} & \bl{$[]$}\\
\bl{$env(Left(v))$} & \bl{$\dn$} & \bl{$env(v)$}\\
\bl{$env(Right(v))$} & \bl{$\dn$} & \bl{$env(v)$}\\
\bl{$env(Seq(v_1,v_2))$}& \bl{$\dn$} & \bl{$env(v_1) \,@\, env(v_2)$}\\
\bl{$env([v_1,\ldots ,v_n])$} & \bl{$\dn$} &
\bl{$env(v_1) \,@\ldots @\, env(v_n)$}\\
\bl{$env(Rec(x:v))$} & \bl{$\dn$} & \bl{$(x:|v|) :: env(v)$}\\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{While Tokens}
\begin{center}
\begin{tabular}{@{}r@{\hspace{2mm}}c@{\hspace{2mm}}l@{}}
\pcode{WHILE\_REGS} & $\dn$ & \raisebox{-1mm}{\large(}\pcode{("k" : KEYWORD)} +\\
& & \phantom{(}\pcode{("i" : ID)} +\\
& & \phantom{(}\pcode{("o" : OP)} + \\
& & \phantom{(}\pcode{("n" : NUM)} + \\
& & \phantom{(}\pcode{("s" : SEMI)} +\\
& & \phantom{(}\pcode{("p" : (LPAREN + RPAREN))} +\\
& & \phantom{(}\pcode{("b" : (BEGIN + END))} +\\
& & \phantom{(}\pcode{("w" : WHITESPACE)}\raisebox{-1mm}{\large)$^*$}
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\consolas
\begin{center}
\code{"if true then then 42 else +"}
\end{center}
\only<1>{
\small\begin{tabular}{l}
KEYWORD(if),\\
WHITESPACE,\\
IDENT(true),\\
WHITESPACE,\\
KEYWORD(then),\\
WHITESPACE,\\
KEYWORD(then),\\
WHITESPACE,\\
NUM(42),\\
WHITESPACE,\\
KEYWORD(else),\\
WHITESPACE,\\
OP(+)
\end{tabular}}
\only<2>{
\small\begin{tabular}{l}
KEYWORD(if),\\
IDENT(true),\\
KEYWORD(then),\\
KEYWORD(then),\\
NUM(42),\\
KEYWORD(else),\\
OP(+)
\end{tabular}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Two Rules}
\begin{itemize}
\item Longest match rule (``maximal munch rule''): The
longest initial substring matched by any regular expression is taken
as next token.\bigskip
\item Rule priority:
For a particular longest initial substring, the first regular
expression that can match determines the token.
\end{itemize}
%\url{http://www.technologyreview.com/tr10/?year=2011}
%finite deterministic automata/ nondeterministic automaton
%\item problem with infix operations, for example i-12
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\begin{itemize}
\item Regular expression for email addresses
\begin{center}
\begin{tabular}{l}
(name: \bl{$[a\mbox{-}z0\mbox{-}9\_\!\_\,.-]^+$})\bl{$\cdot @\cdot$}\\
\qquad(domain: \bl{$[a\mbox{-}z0\mbox{-}9\,.-]^+$}) \bl{$\cdot .\cdot$}\\
\qquad\qquad(top\_level: \bl{$[a\mbox{-}z\,.]^{\{2,6\}}$})
\end{tabular}
\end{center}
\bl{\[
\texttt{christian.urban@kcl.ac.uk}
\]}
\item result environment:
\begin{center}
\begin{tabular}{l}
\bl{$[(name:\texttt{christian.urban}),$}\\
\bl{$\phantom{[}(domain:\texttt{kcl}),$}\\
\bl{$\phantom{[}(top\_level:\texttt{ac.uk})]$}
\end{tabular}
\end{center}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Coursework}
\begin{center}
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
\bl{$nullable([c_1 c_2 \ldots c_n])$} & \bl{$\dn$} & $?$\\
\bl{$nullable(r^+)$} & \bl{$\dn$} & $?$\\
\bl{$nullable(r^?)$} & \bl{$\dn$} & $?$\\
\bl{$nullable(r^{\{n,m\}})$} & \bl{$\dn$} & $?$\\
\bl{$nullable(\sim{}r)$} & \bl{$\dn$} & $?$\medskip\\
\bl{$der\, c\, ([c_1 c_2 \ldots c_n])$} & \bl{$\dn$} & $?$\\
\bl{$der\, c\, (r^+)$} & \bl{$\dn$} & $?$\\
\bl{$der\, c\, (r^?)$} & \bl{$\dn$} & $?$\\
\bl{$der\, c\, (r^{\{n,m\}})$} & \bl{$\dn$} & $?$\\
\bl{$der\, c\, (\sim{}r)$} & \bl{$\dn$} & $?$\\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Regular Languages}
While regular expressions are very useful for lexing, there is
no regular expression that can recognise the language
\bl{$a^nb^n$}.\bigskip
\begin{center}
\bl{$(((()()))())$} \;\;vs.\;\; \bl{$(((()()))()))$}
\end{center}\bigskip\bigskip
\small
\noindent So we cannot find out with regular expressions
whether parentheses are matched or unmatched. Also regular
expressions are not recursive, e.g.~\bl{$(1 + 2) + 3$}.
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Hierarchy of Languages}
\begin{center}
\begin{tikzpicture}
[rect/.style={draw=black!50,
top color=white,
bottom color=black!20,
rectangle,
very thick,
rounded corners}, scale=1.2]
\draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages};
\draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages};
\draw (0,-0.85) node [rect, text depth=17mm] {context sensitive languages};
\draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages};
\draw (0,-1.4) node [rect] {regular languages};
\end{tikzpicture}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Grammars}
A (context-free) grammar \bl{$G$} consists of
\begin{itemize}
\item a finite set of nonterminal symbols (upper case)
\item a finite terminal symbols or tokens (lower case)
\item a start symbol (which must be a nonterminal)
\item a set of rules
\begin{center}
\bl{$A \rightarrow \textit{rhs}$}
\end{center}
where \bl{\textit{rhs}} are sequences involving terminals and nonterminals,
including the empty sequence \bl{$\epsilon$}.\medskip\pause
We also allow rules
\begin{center}
\bl{$A \rightarrow \textit{rhs}_1 | \textit{rhs}_2 | \ldots$}
\end{center}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Palindromes}
A grammar for palindromes over the alphabet~\bl{$\{a,b\}$}:
\begin{center}
\bl{\begin{tabular}{lcl}
$S$ & $\rightarrow$ & $\epsilon$ \\
$S$ & $\rightarrow$ & $a\cdot S\cdot a$ \\
$S$ & $\rightarrow$ & $b\cdot S\cdot b$ \\
\end{tabular}}
\end{center}\pause
or
\begin{center}
\bl{\begin{tabular}{lcl}
$S$ & $\rightarrow$ & $\epsilon \;|\; a\cdot S\cdot a \;|\;b\cdot S\cdot b$ \\
\end{tabular}}
\end{center}\pause\bigskip
\small
Can you find the grammar rules for matched parentheses?
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Arithmetic Expressions}
\begin{center}
\bl{\begin{tabular}{lcl}
$E$ & $\rightarrow$ & $num\_token$ \\
$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\
$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\
$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\
$E$ & $\rightarrow$ & $( \cdot E \cdot )$
\end{tabular}}
\end{center}\pause
\bl{\texttt{1 + 2 * 3 + 4}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{A CFG Derivation}
\begin{enumerate}
\item Begin with a string containing only the start symbol, say \bl{$S$}\bigskip
\item Replace any nonterminal \bl{$X$} in the string by the
right-hand side of some production \bl{$X \rightarrow \textit{rhs}$}\bigskip
\item Repeat 2 until there are no nonterminals
\end{enumerate}
\begin{center}
\bl{$S \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots $}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Example Derivation}
\begin{center}
\bl{\begin{tabular}{lcl}
$S$ & $\rightarrow$ & $\epsilon \;|\; a\cdot S\cdot a \;|\;b\cdot S\cdot b$ \\
\end{tabular}}
\end{center}\bigskip
\begin{center}
\begin{tabular}{lcl}
\bl{$S$} & \bl{$\rightarrow$} & \bl{$aSa$}\\
& \bl{$\rightarrow$} & \bl{$abSba$}\\
& \bl{$\rightarrow$} & \bl{$abaSaba$}\\
& \bl{$\rightarrow$} & \bl{$abaaba$}\\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Example Derivation}
\begin{center}
\bl{\begin{tabular}{lcl}
$E$ & $\rightarrow$ & $num\_token$ \\
$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\
$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\
$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\
$E$ & $\rightarrow$ & $( \cdot E \cdot )$
\end{tabular}}
\end{center}\bigskip
\begin{center}
\begin{tabular}{@{}c@{}c@{}}
\begin{tabular}{l@{\hspace{1mm}}l@{\hspace{1mm}}l}
\bl{$E$} & \bl{$\rightarrow$} & \bl{$E*E$}\\
& \bl{$\rightarrow$} & \bl{$E+E*E$}\\
& \bl{$\rightarrow$} & \bl{$E+E*E+E$}\\
& \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\
\end{tabular} &\pause
\begin{tabular}{l@{\hspace{1mm}}l@{\hspace{1mm}}l}
\bl{$E$} & \bl{$\rightarrow$} & \bl{$E+E$}\\
& \bl{$\rightarrow$} & \bl{$E+E+E$}\\
& \bl{$\rightarrow$} & \bl{$E+E*E+E$}\\
& \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\
\end{tabular}
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Context Sensitive Grms}
\begin{center}
\bl{\begin{tabular}{lcl}
$S$ & $\Rightarrow$ & $bSAA\;|\; \epsilon$\\
$A$ & $\Rightarrow$ & $a$\\
$bA$ & $\Rightarrow$ & $Ab$\\
\end{tabular}}
\end{center}\pause
\begin{center}
\bl{$S \Rightarrow\ldots\Rightarrow^? "ababaa"$}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Language of a CFG}
Let \bl{$G$} be a context-free grammar with start symbol \bl{$S$}.
Then the language \bl{$L(G)$} is:
\begin{center}
\bl{$\{c_1\ldots c_n \;|\; \forall i.\; c_i \in T \wedge S \rightarrow^* c_1\ldots c_n \}$}
\end{center}\pause
\begin{itemize}
\item Terminals, because there are no rules for replacing them.
\item Once generated, terminals are ``permanent''.
\item Terminals ought to be tokens of the language\\
(but can also be strings).
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Parse Trees}
\begin{center}
\bl{\begin{tabular}{lcl}
$E$ & $\rightarrow$ & $F \;|\; F \cdot * \cdot F$\\
$F$ & $\rightarrow$ & $T \;|\; T \cdot + \cdot T \;|\; T \cdot - \cdot T$\\
$T$ & $\rightarrow$ & $num\_token \;|\; ( \cdot E \cdot )$\\
\end{tabular}}
\end{center}
\begin{center}
\begin{tikzpicture}[level distance=8mm, blue]
\node {$E$}
child {node {$F$}
child {node {$T$}
child {node {(\,$E$\,)}
child {node{$F$ *{} $F$}
child {node {$T$} child {node {2}}}
child {node {$T$} child {node {3}}}
}
}
}
child {node {+}}
child {node {$T$}
child {node {(\,$E$\,)}
child {node {$F$}
child {node {$T$ +{} $T$}
child {node {3}}
child {node {4}}
}
}}
}};
\end{tikzpicture}
\end{center}
\begin{textblock}{5}(1, 6.5)
\bl{\texttt{(2*3)+(3+4)}}
\end{textblock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Arithmetic Expressions}
\begin{center}
\bl{\begin{tabular}{lcl}
$E$ & $\rightarrow$ & $num\_token$ \\
$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\
$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\
$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\
$E$ & $\rightarrow$ & $( \cdot E \cdot )$
\end{tabular}}
\end{center}\pause\bigskip
A CFG is \alert{\bf left-recursive} if it has a nonterminal \bl{$E$} such
that \bl{$E \rightarrow^+ E\cdot \ldots$}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Ambiguous Grammars}
A grammar is \alert{\bf ambiguous} if there is a string that
has at least two different parse trees.
\begin{center}
\bl{\begin{tabular}{lcl}
$E$ & $\rightarrow$ & $num\_token$ \\
$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\
$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\
$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\
$E$ & $\rightarrow$ & $( \cdot E \cdot )$
\end{tabular}}
\end{center}
\bl{\texttt{1 + 2 * 3 + 4}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Dangling Else}
Another ambiguous grammar:\bigskip
\begin{center}
\bl{\begin{tabular}{lcl}
$E$ & $\rightarrow$ & if $E$ then $E$\\
& $|$ & if $E$ then $E$ else $E$ \\
& $|$ & \ldots
\end{tabular}}
\end{center}\bigskip
\bl{\texttt{if a then if x then y else c}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Parser Combinators}
One of the simplest ways to implement a parser, see
{\small\url{https://vimeo.com/142341803}}\bigskip
Parser combinators: \bigskip
\begin{minipage}{1.1\textwidth}
\begin{center}
\mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$}
$\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$
\end{center}
\end{minipage}\bigskip
\begin{itemize}
\item atomic parsers
\item sequencing
\item alternative
\item semantic action
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
Atomic parsers, for example, number tokens
\begin{center}
\bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$}
\end{center}\bigskip
\begin{itemize}
\item you consume one or more token from the\\
input (stream)
\item also works for characters and strings
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
Alternative parser (code \bl{$p\;||\;q$})\bigskip
\begin{itemize}
\item apply \bl{$p$} and also \bl{$q$}; then combine
the outputs
\end{itemize}
\begin{center}
\large \bl{$p(\text{input}) \cup q(\text{input})$}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
Sequence parser (code \bl{$p\sim q$})\bigskip
\begin{itemize}
\item apply first \bl{$p$} producing a set of pairs
\item then apply \bl{$q$} to the unparsed parts
\item then combine the results:\medskip
\begin{center}
((output$_1$, output$_2$), unparsed part)
\end{center}
\end{itemize}
\begin{center}
\begin{tabular}{l}
\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm]
\large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm]
\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$}
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
Function parser (code \bl{$p \Rightarrow f\;$})\bigskip
\begin{itemize}
\item apply \bl{$p$} producing a set of pairs
\item then apply the function \bl{$f$} to each first component
\end{itemize}
\begin{center}
\begin{tabular}{l}
\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$}
\end{tabular}
\end{center}\bigskip\bigskip\pause
\bl{$f$} is the semantic action (``what to do with the parsed input'')
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}}
Addition
\begin{center}
\bl{$T \sim + \sim E \Rightarrow \underbrace{f((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$}
\end{center}\pause
Multiplication
\begin{center}
\bl{$F \sim * \sim T \Rightarrow f((x,y), z) \Rightarrow x * z$}
\end{center}\pause
Parenthesis
\begin{center}
\bl{$\text{(} \sim E \sim \text{)} \Rightarrow f((x,y), z) \Rightarrow y$}
\end{center}
\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Types of Parsers}
\begin{itemize}
\item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$},
then \bl{$p \sim q$} returns results of type
\begin{center}
\bl{$T \times S$}
\end{center}\pause
\item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then \bl{$q$} \alert{must} also have results of type \bl{$T$},
and \bl{$p \;||\; q$} returns results of type
\begin{center}
\bl{$T$}
\end{center}\pause
\item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from
\bl{$T$} to \bl{$S$}, then
\bl{$p \Rightarrow f$} returns results of type
\begin{center}
\bl{$S$}
\end{center}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Input Types of Parsers}
\begin{itemize}
\item input: \alert{token list}
\item output: set of (output\_type, \alert{token list})
\end{itemize}\bigskip\pause
actually it can be any input type as long as it is a kind of
sequence (for example a string)
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Scannerless Parsers}
\begin{itemize}
\item input: \alert{string}
\item output: set of (output\_type, \alert{string})
\end{itemize}\bigskip
but lexers are better when whitespaces or comments need to be
filtered out; then input is a sequence of tokens
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Successful Parses}
\begin{itemize}
\item input: string
\item output: \alert{set of} (output\_type, string)
\end{itemize}\bigskip
a parse is successful whenever the input has been fully
``consumed'' (that is the second component is empty)
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Abstract Parser Class}
\small
\lstinputlisting[language=Scala,xleftmargin=1mm]
{../progs/app7.scala}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\small
\fontsize{10}{12}\selectfont
\lstinputlisting[language=Scala,xleftmargin=1mm]
{../progs/app8.scala}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Two Grammars}
Which languages are recognised by the following two grammars?
\begin{center}
\bl{\begin{tabular}{lcl}
$S$ & $\rightarrow$ & $1 \cdot S \cdot S$\\
& $|$ & $\epsilon$
\end{tabular}}
\end{center}\bigskip
\begin{center}
\bl{\begin{tabular}{lcl}
$U$ & $\rightarrow$ & $1 \cdot U$\\
& $|$ & $\epsilon$
\end{tabular}}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Ambiguous Grammars}
\begin{center}
\begin{tikzpicture}
\begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs},
enlargelimits=false,
xtick={0,100,...,1000},
xmax=1050,
ymax=33,
ytick={0,5,...,30},
scaled ticks=false,
axis lines=left,
width=11cm,
height=7cm,
legend entries={unambiguous,ambiguous},
legend pos=north east,
legend cell align=left,
x tick label style={font=\small,/pgf/number format/1000 sep={}}]
\addplot[blue,mark=*, mark options={fill=white}]
table {s-grammar1.data};
\only<2>{
\addplot[red,mark=triangle*, mark options={fill=white}]
table {s-grammar2.data};}
\end{axis}
\end{tikzpicture}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{While-Language}
\mbox{}\\[-23mm]\mbox{}
\bl{\begin{plstx}[rhs style=,one per line]
: \meta{Stmt} ::= skip
| \meta{Id} := \meta{AExp}
| if \meta{BExp} then \meta{Block} else \meta{Block}
| while \meta{BExp} do \meta{Block}\\
: \meta{Stmts} ::= \meta{Stmt} ; \meta{Stmts}
| \meta{Stmt}\\
: \meta{Block} ::= \{ \meta{Stmts} \}
| \meta{Stmt}\\
: \meta{AExp} ::= \ldots\\
: \meta{BExp} ::= \ldots\\
\end{plstx}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{An Interpreter}
\begin{center}
\bl{\begin{tabular}{l}
$\{$\\
\;\;$x := 5 \text{;}$\\
\;\;$y := x * 3\text{;}$\\
\;\;$y := x * 4\text{;}$\\
\;\;$x := u * 3$\\
$\}$
\end{tabular}}
\end{center}
\begin{itemize}
\item the interpreter has to record the value of \bl{$x$} before assigning a value to \bl{$y$}\pause
\item \bl{\texttt{eval(stmt, env)}}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{document}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: t
%%% End: