% !TEX program = xelatex
\documentclass[dvipsnames,14pt,t,xelatex,aspectratio=169,xcolor={table}]{beamer}
\usepackage{../slides}
\usepackage{../graphics}
\usepackage{../langs}
\usepackage{../data}
\usepackage{../grammar}
\hfuzz=220pt
\pgfplotsset{compat=1.11}
\newcommand{\bl}[1]{\textcolor{blue}{#1}}
% beamer stuff
\renewcommand{\slidecaption}{CFL 05, King's College London}
\begin{document}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{%
\begin{tabular}{@ {}c@ {}}
\\[-3mm]
\LARGE Compilers and \\[-2mm]
\LARGE Formal Languages\\[3mm]
\end{tabular}}
\normalsize
\begin{center}
\begin{tabular}{ll}
Email: & christian.urban at kcl.ac.uk\\
%Office Hours: & Thursdays 12 -- 14\\
%Location: & N7.07 (North Wing, Bush House)\\
Slides \& Progs: & KEATS (also homework is there)\\
\end{tabular}
\end{center}
\begin{center}
\begin{tikzpicture}
\node[drop shadow,fill=white,inner sep=0pt]
{\footnotesize\rowcolors{1}{capri!10}{white}
\begin{tabular}{|p{4.8cm}|p{4.8cm}|}\hline
1 Introduction, Languages & 6 While-Language \\
2 Regular Expressions, Derivatives & 7 Compilation, JVM \\
3 Automata, Regular Languages & 8 Compiling Functional Languages \\
4 Lexing, Tokenising & 9 Optimisations \\
\cellcolor{blue!50}
5 Grammars, Parsing & 10 LLVM \\ \hline
\end{tabular}%
};
\end{tikzpicture}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
% \frametitle{Coursework 1: Submissions}
%
% \begin{itemize}
% \item Scala (29)
% \item Haskell (1)
% \item Kotlin (1)
% \item Rust (1)
% \end{itemize}\bigskip\bigskip
%
% \small
% Please get in contact if you intend to do CW Strand 2. No zips please.
% Give definitions also on paper if asked. BTW, simp
% can stay unchanged. Use \texttt{ders} for CW2, not \texttt{ders2}!
% \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Lexer, Parser}
\mbox{}\\[-16mm]\mbox{}
\begin{center}
\begin{tikzpicture}[scale=1,
node/.style={
rectangle,rounded corners=3mm,
very thick,draw=black!50,
minimum height=18mm, minimum width=20mm,
top color=white,bottom color=black!20,drop shadow}]
\node (0) at (-2.3,0) {};
\node (A) at (0,0) [node] {};
\node [below right] at (A.north west) {lexer};
\node (B) at (3,0) [node] {};
\node [below right=1mm] at (B.north west)
{\mbox{}\hspace{-1mm}parser};
\node (C) at (6,0) [node] {};
\node [below right] at (C.north west)
{\mbox{}\hspace{-1mm}code gen};
\node (1) at (8.4,0) {};
\draw [->,line width=4mm] (0) -- (A);
\draw [->,line width=4mm] (A) -- (B);
\draw [->,line width=4mm] (B) -- (C);
\draw [->,line width=4mm] (C) -- (1);
\end{tikzpicture}
\end{center}
Today a parser.
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{What Parsing is Not}
Usually parsing does not check semantic correctness, e.g.
\begin{itemize}
\item whether a function is not used before it
is defined
\item whether a function has the correct number of arguments
or are of correct type
\item whether a variable can be declared twice in a scope
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Regular Languages}
While regular expressions are very useful for lexing, there is
no regular expression that can recognise the language
\bl{$a^nb^n$}.\bigskip
\begin{center}
\bl{$(((()()))())$} \;\;vs.\;\; \bl{$(((()()))()))$}
\end{center}\bigskip\bigskip
\small
\noindent So we cannot find out with regular expressions
whether parentheses are matched or unmatched. Also regular
expressions are not recursive, e.g.~\bl{$(1 + 2) + 3$}.
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Hierarchy of Languages}
\begin{center}
\begin{tikzpicture}
[rect/.style={draw=black!50,
top color=white,
bottom color=black!20,
rectangle,
very thick,
rounded corners}, scale=1.2]
\draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages};
\draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages};
\draw (0,-0.85) node [rect, text depth=17mm] {context sensitive languages};
\draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages};
\draw (0,-1.4) node [rect] {regular languages};
\end{tikzpicture}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{CF Grammars}
A \alert{\bf context-free grammar} \bl{$G$} consists of
\begin{itemize}
\item a finite set of nonterminal symbols (e.g.~$\meta{A}$ upper case)
\item a finite set terminal symbols or tokens (lower case)
\item a start symbol (which must be a nonterminal)
\item a set of rules
\begin{center}
\bl{$\meta{A} ::= \textit{rhs}$}
\end{center}
where \bl{\textit{rhs}} are sequences involving terminals and nonterminals,
including the empty sequence \bl{$\epsilon$}.\medskip\pause
We also allow rules
\begin{center}
\bl{$\meta{A} ::= \textit{rhs}_1 | \textit{rhs}_2 | \ldots$}
\end{center}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Palindromes}
A grammar for palindromes over the alphabet~\bl{$\{a,b\}$}:
\bl{\begin{plstx}[margin=3cm]
: \meta{S} ::= a\cdot\meta{S}\cdot a\\
: \meta{S} ::= b\cdot\meta{S}\cdot b\\
: \meta{S} ::= a\\
: \meta{S} ::= b\\
: \meta{S} ::= \epsilon\\
\end{plstx}}\pause
or
\bl{\begin{plstx}[margin=2cm]
: \meta{S} ::= a\cdot \meta{S}\cdot a | b\cdot \meta{S}\cdot b | a | b | \epsilon\\
\end{plstx}}\pause\bigskip
\small
Can you find the grammar rules for matched parentheses?
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Arithmetic Expressions}
\bl{\begin{plstx}[margin=3cm,one per line]
: \meta{E} ::= num\_token
| \meta{E} \cdot + \cdot \meta{E}
| \meta{E} \cdot - \cdot \meta{E}
| \meta{E} \cdot * \cdot \meta{E}
| ( \cdot \meta{E} \cdot ) \\
\end{plstx}}\pause
\bl{\texttt{1 + 2 * 3 + 4}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{A CFG Derivation}
\begin{enumerate}
\item Begin with a string containing only the start symbol, say \bl{\meta{S}}\bigskip
\item Replace any nonterminal \bl{\meta{X}} in the string by the
right-hand side of some production \bl{$\meta{X} ::= \textit{rhs}$}\bigskip
\item Repeat 2 until there are no nonterminals left
\end{enumerate}
\begin{center}
\bl{$\meta{S} \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots $}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Example Derivation}
\bl{\begin{plstx}[margin=2cm]
: \meta{S} ::= \epsilon | a\cdot \meta{S}\cdot a | b\cdot \meta{S}\cdot b \\
\end{plstx}}\bigskip
\begin{center}
\begin{tabular}{lcl}
\bl{\meta{S}} & \bl{$\rightarrow$} & \bl{$a\meta{S}a$}\\
& \bl{$\rightarrow$} & \bl{$ab\meta{S}ba$}\\
& \bl{$\rightarrow$} & \bl{$aba\meta{S}aba$}\\
& \bl{$\rightarrow$} & \bl{$abaaba$}\\
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Example Derivation}
\bl{\begin{plstx}[margin=3cm,one per line]
: \meta{E} ::= num\_token
| \meta{E} \cdot + \cdot \meta{E}
| \meta{E} \cdot - \cdot \meta{E}
| \meta{E} \cdot * \cdot \meta{E}
| ( \cdot \meta{E} \cdot ) \\
\end{plstx}}
\small
\begin{center}
\begin{tabular}{@{}c@{}c@{}}
\begin{tabular}{@{\hspace{-2mm}}l@{\hspace{1mm}}l@{\hspace{1mm}}l@{\hspace{4mm}}}
\bl{\meta{E}} & \bl{$\rightarrow$} & \bl{$\meta{E}*\meta{E}$}\\
& \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}*\meta{E}$}\\
& \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}*\meta{E}+\meta{E}$}\\
& \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\
\end{tabular} &\pause
\begin{tabular}{@{}l@{\hspace{0mm}}l@{\hspace{1mm}}l}
\bl{$\meta{E}$} & \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}$}\\
& \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}+\meta{E}$}\\
& \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}*\meta{E}+\meta{E}$}\\
& \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\
\end{tabular}
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Context Sensitive Grammars}
It is much harder to find out whether a string is parsed
by a context sensitive grammar:
\bl{\begin{plstx}[margin=2cm]
: \meta{S} ::= b\meta{S}\meta{A}\meta{A} | \epsilon\\
: \meta{A} ::= a\\
: b\meta{A} ::= \meta{A}b\\
\end{plstx}}\pause
\begin{center}
\bl{$\meta{S} \rightarrow\ldots\rightarrow^? ababaa$}
\end{center}\pause
\begin{center}
\tt Time flies like an arrow;\\
fruit flies like bananas.
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Language of a CFG}
Let \bl{$G$} be a context-free grammar with start symbol \bl{\meta{S}}.
Then the language \bl{$L(G)$} is:
\begin{center}
\bl{$\{c_1\ldots c_n \;|\; \forall i.\; c_i \in T \wedge \meta{S} \rightarrow^* c_1\ldots c_n \}$}
\end{center}\pause
\begin{itemize}
\item Terminals, because there are no rules for replacing them.
\item Once generated, terminals are ``permanent''.
\item Terminals ought to be tokens of the language\\
(but can also be strings).
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Parse Trees}
\mbox{}\\[-12mm]
\bl{\begin{plstx}: \meta{E} ::= \meta{T} | \meta{T} \cdot + \cdot \meta{E} | \meta{T} \cdot - \cdot \meta{E}\\
: \meta{T} ::= \meta{F} | \meta{F} \cdot * \cdot \meta{T}\\
: \meta{F} ::= num\_token | ( \cdot \meta{E} \cdot )\\
\end{plstx}}
\begin{center}\small
\begin{tikzpicture}[level distance=8mm, blue]
\node {$\meta{E}$}
child {node {$\meta{T}$}
child {node {$\meta{T}$}
child {node {(\,$\meta{E}$\,)}
child {node{$\meta{F}$ *{} $\meta{F}$}
child {node {$\meta{T}$} child {node {2}}}
child {node {$\meta{T}$} child {node {3}}}
}
}
}
child {node {+}}
child {node {$\meta{T}$}
child {node {(\,$\meta{E}$\,)}
child {node {$\meta{F}$}
child {node {$\meta{T}$ +{} $\meta{T}$}
child {node {3}}
child {node {4}}
}
}}
}};
\end{tikzpicture}
\end{center}
\begin{textblock}{5}(1, 6.5)
\bl{\texttt{(2*3)+(3+4)}}
\end{textblock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Arithmetic Expressions}
\bl{\begin{plstx}[margin=3cm,one per line]
: \meta{E} ::= num\_token
| \meta{E} \cdot + \cdot \meta{E}
| \meta{E} \cdot - \cdot \meta{E}
| \meta{E} \cdot * \cdot \meta{E}
| ( \cdot \meta{E} \cdot ) \\
\end{plstx}}\pause\bigskip
A CFG is \alert{\bf left-recursive} if it has a nonterminal \bl{$\meta{E}$} such
that \bl{$\meta{E} \rightarrow^+ \meta{E}\cdot \ldots$}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Ambiguous Grammars}
A grammar is \alert{\bf ambiguous} if there is a string that
has at least two different parse trees.
\bl{\begin{plstx}[margin=3cm,one per line]: \meta{E} ::= num\_token
| \meta{E} \cdot + \cdot \meta{E}
| \meta{E} \cdot - \cdot \meta{E}
| \meta{E} \cdot * \cdot \meta{E}
| ( \cdot \meta{E} \cdot ) \\
\end{plstx}}
\bl{\texttt{1 + 2 * 3 + 4}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{`Dangling' Else}
Another ambiguous grammar:\bigskip
\begin{center}
\bl{\begin{tabular}{lcl}
$E$ & $\rightarrow$ & if $E$ then $E$\\
& $|$ & if $E$ then $E$ else $E$ \\
& $|$ & \ldots
\end{tabular}}
\end{center}\bigskip
\bl{\texttt{if a then if x then y else c}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Parser Combinators}
One of the simplest ways to implement a parser, see
{\small\url{https://vimeo.com/142341803}}\bigskip
Parser combinators: \bigskip
\begin{minipage}{1.1\textwidth}
\begin{center}
\mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$}
$\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$
\end{center}
\end{minipage}\bigskip
\begin{itemize}
\item atomic parsers
\item sequencing
\item alternative
\item semantic action
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
Atomic parsers, for example, number tokens
\begin{center}
\bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$}
\end{center}\bigskip
\begin{itemize}
\item you consume one or more token from the\\
input (stream)
\item also works for characters and strings
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
Alternative parser (code \bl{$p\;||\;q$})\bigskip
\begin{itemize}
\item apply \bl{$p$} and also \bl{$q$}; then combine
the outputs
\end{itemize}
\begin{center}
\large \bl{$p(\text{input}) \cup q(\text{input})$}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
Sequence parser (code \bl{$p\sim q$})\bigskip
\begin{itemize}
\item apply first \bl{$p$} producing a set of pairs
\item then apply \bl{$q$} to the unparsed part
\item then combine the results:\medskip
\begin{center}
((output$_1$, output$_2$), unparsed part)
\end{center}
\end{itemize}
\begin{center}
\begin{tabular}{l}
\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm]
\large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm]
\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$}
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
Function parser (code \bl{$p \Rightarrow f\;$})\bigskip
\begin{itemize}
\item apply \bl{$p$} producing a set of pairs
\item then apply the function \bl{$f$} to each first component
\end{itemize}
\begin{center}
\begin{tabular}{l}
\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$}
\end{tabular}
\end{center}\bigskip\bigskip\pause
\bl{$f$} is the semantic action (``what to do with the parsed input'')
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}}
Addition
\begin{center}
\bl{$\meta{T} \sim + \sim \meta{E} \Rightarrow \underbrace{f\,((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$}
\end{center}\pause
Multiplication
\begin{center}
\bl{$\meta{F} \sim * \sim \meta{T} \Rightarrow f\,((x,y), z) \Rightarrow x * z$}
\end{center}\pause
Parenthesis
\begin{center}
\bl{$\text{(} \sim \meta{E} \sim \text{)} \Rightarrow f\,((x,y), z) \Rightarrow y$}
\end{center}
\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Types of Parsers}
\begin{itemize}
\item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$},
then \bl{$p \sim q$} returns results of type
\begin{center}
\bl{$T \times S$}
\end{center}\pause
\item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then \bl{$q$} \alert{must} also have results of type \bl{$T$},
and \bl{$p \;||\; q$} returns results of type
\begin{center}
\bl{$T$}
\end{center}\pause
\item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from
\bl{$T$} to \bl{$S$}, then
\bl{$p \Rightarrow f$} returns results of type
\begin{center}
\bl{$S$}
\end{center}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Input Types of Parsers}
\begin{itemize}
\item input: \alert{token list}
\item output: set of (output\_type, \alert{token list})
\end{itemize}\bigskip\pause
actually it can be any input type as long as it is a kind of
sequence (for example a string)
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Scannerless Parsers}
\begin{itemize}
\item input: \alert{string}
\item output: set of (output\_type, \alert{string})
\end{itemize}\bigskip\bigskip
but using lexers is better because whitespaces or comments can be
filtered out; then input is a sequence of tokens
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Successful Parses}
\begin{itemize}
\item input: string
\item output: \alert{set of} (output\_type, string)
\end{itemize}\bigskip
a parse is successful whenever the input has been fully
``consumed'' (that is the second component is empty)
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Abstract Parser Class}
\small
\lstinputlisting[language=Scala,xleftmargin=1mm]
{../progs/app7.scala}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\small
\fontsize{10}{12}\selectfont
\lstinputlisting[language=Scala,xleftmargin=1mm]
{../progs/app8.scala}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Two Grammars}
Which languages are recognised by the following two grammars?
\begin{center}
\bl{\begin{tabular}{lcl}
$\meta{S}$ & $\rightarrow$ & $1 \cdot \meta{S} \cdot \meta{S}$\\
& $|$ & $\epsilon$
\end{tabular}}
\end{center}\bigskip
\begin{center}
\bl{\begin{tabular}{lcl}
$\meta{U}$ & $\rightarrow$ & $1 \cdot \meta{U}$\\
& $|$ & $\epsilon$
\end{tabular}}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Ambiguous Grammars}
\begin{center}
\begin{tikzpicture}
\begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs},
enlargelimits=false,
xtick={0,100,...,1000},
xmax=1050,
ymax=33,
ytick={0,5,...,30},
scaled ticks=false,
axis lines=left,
width=11cm,
height=7cm,
legend entries={unambiguous,ambiguous},
legend pos=north east,
legend cell align=left,
x tick label style={font=\small,/pgf/number format/1000 sep={}}]
\addplot[blue,mark=*, mark options={fill=white}]
table {s-grammar1.data};
\only<2>{
\addplot[red,mark=triangle*, mark options={fill=white}]
table {s-grammar2.data};}
\end{axis}
\end{tikzpicture}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{While-Language}
\mbox{}\\[-23mm]\mbox{}
\bl{\begin{plstx}[rhs style=,one per line]: \meta{Stmt} ::= skip
| \meta{Id} := \meta{AExp}
| if \meta{BExp} then \meta{Block} else \meta{Block}
| while \meta{BExp} do \meta{Block}\\
: \meta{Stmts} ::= \meta{Stmt} ; \meta{Stmts}
| \meta{Stmt}\\
: \meta{Block} ::= \{ \meta{Stmts} \}
| \meta{Stmt}\\
: \meta{AExp} ::= \ldots\\
: \meta{BExp} ::= \ldots\\\end{plstx}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{An Interpreter}
\begin{center}
\bl{\begin{tabular}{l}
$\{$\\
\;\;$x := 5 \text{;}$\\
\;\;$y := x * 3\text{;}$\\
\;\;$y := x * 4\text{;}$\\
\;\;$x := u * 3$\\
$\}$
\end{tabular}}
\end{center}
\begin{itemize}
\item the interpreter has to record the value of \bl{$x$} before assigning a value to \bl{$y$}\pause
\item \bl{\texttt{eval(stmt, env)}}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{\begin{tabular}{c}Interpreter\end{tabular}}
\begin{center}
\bl{\begin{tabular}{@{}lcl@{}}
$\text{eval}(n, E)$ & $\dn$ & $n$\\
$\text{eval}(x, E)$ & $\dn$ & $E(x)$ \;\;\;\textcolor{black}{lookup \bl{$x$} in \bl{$E$}}\\
$\text{eval}(a_1 + a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) + \text{eval}(a_2, E)$\\
$\text{eval}(a_1 - a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) - \text{eval}(a_2, E)$\\
$\text{eval}(a_1 * a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) * \text{eval}(a_2, E)$\bigskip\\
$\text{eval}(a_1 = a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) = \text{eval}(a_2, E)$\\
$\text{eval}(a_1\,!\!= a_2, E)$ & $\dn$ & $\neg(\text{eval}(a_1, E) = \text{eval}(a_2, E))$\\
$\text{eval}(a_1 < a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) < \text{eval}(a_2, E)$\
\end{tabular}}
\end{center}
\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{\begin{tabular}{c}Interpreter (2)\end{tabular}}
\begin{center}
\bl{\begin{tabular}{@{}lcl@{}}
$\text{eval}(\text{skip}, E)$ & $\dn$ & $E$\\
$\text{eval}(x:=a, E)$ & $\dn$ & \bl{$E(x \mapsto \text{eval}(a, E))$}\\
\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{if}\;b\;\text{then}\;cs_1\;\text{else}\;cs_2 , E) \dn$}\\
\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)\;\text{then}\;
\text{eval}(cs_1,E)$}\\
\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\phantom{\text{if}\;\text{eval}(b,E)\;}\text{else}\;\text{eval}(cs_2,E)$}\\
\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{while}\;b\;\text{do}\;cs, E) \dn$}\\
\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)$}\\
\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{then}\;
\text{eval}(\text{while}\;b\;\text{do}\;cs, \text{eval}(cs,E))$}\\
\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{else}\; E$}\\
$\text{eval}(\text{write}\; x, E)$ & $\dn$ & $\{\;\text{println}(E(x))\; ;\;E\;\}$\\
\end{tabular}}
\end{center}
\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Test Program}
\mbox{}\\[-18mm]\mbox{}
??%{\lstset{language=While}%%\fontsize{10}{12}\selectfont
%\texttt{\lstinputlisting{../progs/loops.while}}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[t]
\frametitle{\begin{tabular}{c}Interpreted Code\end{tabular}}
\begin{center}
\begin{tikzpicture}
\begin{axis}[axis x line=bottom, axis y line=left, xlabel=n, ylabel=secs, legend style=small]
\addplot+[smooth] file {interpreted.data};
\end{axis}
\end{tikzpicture}
\end{center}
\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{\begin{tabular}{c}Java Virtual Machine\end{tabular}}
\begin{itemize}
\item introduced in 1995
\item is a stack-based VM (like Postscript, CLR of .Net)
\item contains a JIT compiler
\item many languages take advantage of JVM's infrastructure (JRE)
\item is garbage collected $\Rightarrow$ no buffer overflows
\item some languages compile to the JVM: Scala, Clojure\ldots
\end{itemize}
\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{document}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: t
%%% End: