+ −
% !TEX program = xelatex+ −
\documentclass[dvipsnames,14pt,t,xelatex,aspectratio=169,xcolor={table}]{beamer}+ −
\usepackage{../slides}+ −
\usepackage{../graphics}+ −
\usepackage{../langs}+ −
\usepackage{../data}+ −
\usepackage{../grammar}+ −
+ −
\hfuzz=220pt + −
+ −
\pgfplotsset{compat=1.11}+ −
+ −
\newcommand{\bl}[1]{\textcolor{blue}{#1}} + −
+ −
% beamer stuff + −
\renewcommand{\slidecaption}{CFL 05, King's College London}+ −
+ −
+ −
\begin{document}+ −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[t]+ −
\frametitle{%+ −
\begin{tabular}{@ {}c@ {}}+ −
\\[-3mm]+ −
\LARGE Compilers and \\[-2mm] + −
\LARGE Formal Languages\\[3mm] + −
\end{tabular}}+ −
+ −
\normalsize+ −
\begin{center}+ −
\begin{tabular}{ll}+ −
Email: & christian.urban at kcl.ac.uk\\+ −
%Office Hours: & Thursdays 12 -- 14\\+ −
%Location: & N7.07 (North Wing, Bush House)\\+ −
Slides \& Progs: & KEATS (also homework is there)\\ + −
\end{tabular}+ −
\end{center}+ −
+ −
\begin{center}+ −
\begin{tikzpicture}+ −
\node[drop shadow,fill=white,inner sep=0pt] + −
{\footnotesize\rowcolors{1}{capri!10}{white}+ −
\begin{tabular}{|p{4.8cm}|p{4.8cm}|}\hline+ −
1 Introduction, Languages & 6 While-Language \\+ −
2 Regular Expressions, Derivatives & 7 Compilation, JVM \\+ −
3 Automata, Regular Languages & 8 Compiling Functional Languages \\+ −
4 Lexing, Tokenising & 9 Optimisations \\+ −
\cellcolor{blue!50}+ −
5 Grammars, Parsing & 10 LLVM \\ \hline+ −
\end{tabular}%+ −
};+ −
\end{tikzpicture}+ −
\end{center}+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
%\begin{frame}[c]+ −
% \frametitle{Coursework 1: Submissions}+ −
% + −
% \begin{itemize}+ −
% \item Scala (29)+ −
% \item Haskell (1)+ −
% \item Kotlin (1)+ −
% \item Rust (1)+ −
% \end{itemize}\bigskip\bigskip + −
% + −
% \small+ −
% Please get in contact if you intend to do CW Strand 2. No zips please.+ −
% Give definitions also on paper if asked. BTW, simp + −
% can stay unchanged. Use \texttt{ders} for CW2, not \texttt{ders2}!+ −
% \end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[t]+ −
\frametitle{Parser}+ −
\mbox{}\\[-16mm]\mbox{}+ −
+ −
\begin{center}+ −
\begin{tikzpicture}[scale=1,+ −
node/.style={+ −
rectangle,rounded corners=3mm,+ −
very thick,draw=black!50,+ −
minimum height=18mm, minimum width=20mm,+ −
top color=white,bottom color=black!20,drop shadow}]+ −
\node (0) at (-2.3,0) {}; + −
+ −
\node (A) at (0,0) [node] {};+ −
\node [below right] at (A.north west) {lexer};+ −
+ −
\node (B) at (3,0) [node] {};+ −
\node [below right=1mm] at (B.north west) + −
{\mbox{}\hspace{-1mm}parser};+ −
+ −
\node (C) at (6,0) [node] {};+ −
\node [below right] at (C.north west) + −
{\mbox{}\hspace{-1mm}code gen};+ −
+ −
\node (1) at (8.4,0) {}; + −
+ −
\draw [->,line width=4mm] (0) -- (A); + −
\draw [->,line width=4mm] (A) -- (B); + −
\draw [->,line width=4mm] (B) -- (C); + −
\draw [->,line width=4mm] (C) -- (1); + −
\end{tikzpicture}+ −
\end{center}+ −
+ −
+ −
\only<2>{+ −
\begin{textblock}{1}(3,6)+ −
\begin{bubble}[8.5cm]+ −
\normalsize+ −
parser input: a sequence of tokens\smallskip\\+ −
+ −
{\small\hspace{5mm}\code{key(read) lpar id(n) rpar semi}}\smallskip\\+ −
+ −
parser output: an abstract syntax tree\smallskip\\+ −
\footnotesize+ −
\hspace{2cm}\begin{tikzpicture}+ −
\node {\code{read}}+ −
child {node {\code{lpar}}}+ −
child {node {\code{n}}}+ −
child {node {\code{rpar}}};+ −
\end{tikzpicture}+ −
\end{bubble}+ −
\end{textblock}}+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{What Parsing is Not}+ −
+ −
Usually parsing does not check semantic correctness, e.g.+ −
+ −
\begin{itemize}+ −
\item whether a function is not used before it+ −
is defined+ −
\item whether a function has the correct number of arguments + −
or are of correct type+ −
\item whether a variable can be declared twice in a scope + −
\end{itemize} + −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
+ −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{Regular Languages}+ −
+ −
While regular expressions are very useful for lexing, there is+ −
no regular expression that can recognise the language+ −
\bl{$a^nb^n$}.\bigskip+ −
+ −
\begin{center}+ −
\bl{$(((()()))())$} \;\;vs.\;\; \bl{$(((()()))()))$}+ −
\end{center}\bigskip\bigskip+ −
+ −
\small+ −
\noindent So we cannot find out with regular expressions+ −
whether parentheses are matched or unmatched. Also regular+ −
expressions are not recursive, e.g.~\bl{$(1 + 2) + 3$}.+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{Hierarchy of Languages}+ −
+ −
\begin{center}+ −
\begin{tikzpicture}+ −
[rect/.style={draw=black!50, + −
top color=white,+ −
bottom color=black!20, + −
rectangle, + −
very thick, + −
rounded corners}, scale=1.2]+ −
+ −
\draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages};+ −
\draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages};+ −
\draw (0,-0.85) node [rect, text depth=17mm] {context sensitive languages};+ −
\draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages};+ −
\draw (0,-1.4) node [rect] {regular languages};+ −
\end{tikzpicture}+ −
+ −
\end{center}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
\begin{frame}[c]+ −
\LARGE+ −
\begin{center}+ −
Time flies like an arrow.\\ + −
Fruit flies like bananas.+ −
\end{center} + −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{CFGs}+ −
+ −
A \alert{\bf context-free grammar} \bl{$G$} consists of+ −
+ −
\begin{itemize}+ −
\item a finite set of nonterminal symbols (e.g.~$\meta{A}$ upper case)+ −
\item a finite set terminal symbols or tokens (lower case)+ −
\item a start symbol (which must be a nonterminal)+ −
\item a set of rules+ −
\begin{center}+ −
\bl{$\meta{A} ::= \textit{rhs}$}+ −
\end{center}+ −
+ −
where \bl{\textit{rhs}} are sequences involving terminals and nonterminals,+ −
including the empty sequence \bl{$\epsilon$}.\medskip\pause+ −
+ −
We also allow rules+ −
\begin{center}+ −
\bl{$\meta{A} ::= \textit{rhs}_1 | \textit{rhs}_2 | \ldots$}+ −
\end{center}+ −
\end{itemize}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[t]+ −
\frametitle{Palindromes}+ −
+ −
A grammar for palindromes over the alphabet~\bl{$\{a,b\}$}:+ −
+ −
\only<1>{%+ −
\bl{\begin{plstx}[margin=1cm]+ −
: \meta{S} ::= a\cdot\meta{S}\cdot a\\+ −
: \meta{S} ::= b\cdot\meta{S}\cdot b\\+ −
: \meta{S} ::= a\\+ −
: \meta{S} ::= b\\+ −
: \meta{S} ::= \epsilon\\+ −
\end{plstx}}}+ −
%+ −
\only<2>{%+ −
\bl{\begin{plstx}[margin=1cm]+ −
: \meta{S} ::= a\cdot \meta{S}\cdot a | b\cdot \meta{S}\cdot b | a | b | \epsilon\\+ −
\end{plstx}}}+ −
+ −
%\small+ −
%Can you find the grammar rules for matched parentheses?+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{Arithmetic Expressions}+ −
+ −
\bl{\begin{plstx}[margin=3cm,one per line]+ −
: \meta{E} ::= 0 \mid 1 \mid 2 \mid ... \mid 9+ −
| \meta{E} \cdot + \cdot \meta{E} + −
| \meta{E} \cdot - \cdot \meta{E} + −
| \meta{E} \cdot * \cdot \meta{E} + −
| ( \cdot \meta{E} \cdot ) \\+ −
\end{plstx}}\pause+ −
+ −
\bl{\texttt{1 + 2 * 3 + 4}}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{A CFG Derivation}+ −
+ −
\begin{enumerate}+ −
\item Begin with a string containing only the start symbol, say \bl{\meta{S}}\bigskip+ −
\item Replace any nonterminal \bl{\meta{X}} in the string by the+ −
right-hand side of some production \bl{$\meta{X} ::= \textit{rhs}$}\bigskip+ −
\item Repeat 2 until there are no nonterminals left+ −
\end{enumerate}+ −
+ −
\begin{center}+ −
\bl{$\meta{S} \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots $}+ −
\end{center}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[t]+ −
\frametitle{Example Derivation}+ −
+ −
\bl{\begin{plstx}[margin=2cm]+ −
: \meta{S} ::= \epsilon | a\cdot \meta{S}\cdot a | b\cdot \meta{S}\cdot b \\+ −
\end{plstx}}\bigskip+ −
+ −
\begin{center}+ −
\begin{tabular}{lcl}+ −
\bl{\meta{S}} & \bl{$\rightarrow$} & \bl{$a\meta{S}a$}\\+ −
& \bl{$\rightarrow$} & \bl{$ab\meta{S}ba$}\\+ −
& \bl{$\rightarrow$} & \bl{$aba\meta{S}aba$}\\+ −
& \bl{$\rightarrow$} & \bl{$abaaba$}\\+ −
+ −
+ −
\end{tabular}+ −
\end{center}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[t]+ −
\frametitle{Example Derivation}+ −
+ −
\bl{\begin{plstx}[margin=3cm,one per line]+ −
: \meta{E} ::= 0 \mid 1 \mid 2 \mid ... \mid 9+ −
| \meta{E} \cdot + \cdot \meta{E} + −
| \meta{E} \cdot - \cdot \meta{E} + −
| \meta{E} \cdot * \cdot \meta{E} + −
| ( \cdot \meta{E} \cdot ) \\+ −
\end{plstx}}+ −
+ −
\small+ −
\begin{center}+ −
\begin{tabular}{@{}c@{}c@{}}+ −
\begin{tabular}{@{\hspace{-2mm}}l@{\hspace{1mm}}l@{\hspace{1mm}}l@{\hspace{4mm}}}+ −
\bl{\meta{E}} & \bl{$\rightarrow$} & \bl{$\meta{E}*\meta{E}$}\\+ −
& \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}*\meta{E}$}\\+ −
& \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}*\meta{E}+\meta{E}$}\\+ −
& \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\+ −
\end{tabular} &\pause+ −
\begin{tabular}{@{}l@{\hspace{0mm}}l@{\hspace{1mm}}l}+ −
\bl{$\meta{E}$} & \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}$}\\+ −
& \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}+\meta{E}$}\\+ −
& \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}*\meta{E}+\meta{E}$}\\+ −
& \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\+ −
\end{tabular}+ −
\end{tabular}+ −
\end{center}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{Context Sensitive Grammars}+ −
+ −
It is much harder to find out whether a string is parsed+ −
by a context sensitive grammar:+ −
+ −
\bl{\begin{plstx}[margin=2cm]+ −
: \meta{S} ::= b\meta{S}\meta{A}\meta{A} | \epsilon\\+ −
: \meta{A} ::= a\\+ −
: b\meta{A} ::= \meta{A}b\\+ −
\end{plstx}}\pause+ −
+ −
\begin{center}+ −
\bl{$\meta{S} \rightarrow\ldots\rightarrow^? ababaa$}+ −
\end{center}\pause+ −
+ −
\begin{center}+ −
\tt Time flies like an arrow;\\ + −
fruit flies like bananas.+ −
\end{center} + −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{Language of a CFG}+ −
+ −
Let \bl{$G$} be a context-free grammar with start symbol \bl{\meta{S}}. + −
Then the language \bl{$L(G)$} is:+ −
+ −
\begin{center}+ −
\bl{$\{c_1\ldots c_n \;|\; \forall i.\; c_i \in T \wedge \meta{S} \rightarrow^* c_1\ldots c_n \}$}+ −
\end{center}\pause+ −
+ −
\begin{itemize}+ −
\item Terminals, because there are no rules for replacing them.+ −
\item Once generated, terminals are ``permanent''.+ −
\item Terminals ought to be tokens of the language\\+ −
(but can also be strings).+ −
\end{itemize}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[t]+ −
\frametitle{Parse Trees}+ −
\mbox{}\\[-12mm]+ −
+ −
\bl{\begin{plstx}: \meta{E} ::= \meta{T} | \meta{T} \cdot + \cdot \meta{E} | \meta{T} \cdot - \cdot \meta{E}\\+ −
: \meta{T} ::= \meta{F} | \meta{F} \cdot * \cdot \meta{T}\\+ −
: \meta{F} ::= 0 ... 9 | ( \cdot \meta{E} \cdot )\\+ −
\end{plstx}}+ −
+ −
\begin{textblock}{5}(6, 5)+ −
\small+ −
\begin{tikzpicture}[level distance=10mm, blue]+ −
\node {$\meta{E}$}+ −
child {node {$\meta{T}$}+ −
child {node {$\meta{F}$} child {node {1}}}+ −
}+ −
child {node {+}}+ −
child {node {$\meta{E}$}+ −
child[sibling distance=10mm] {node {$\meta{T}$}+ −
child {node {$\meta{F}$} child {node {2}}}+ −
child {node {+}}+ −
child {node {$\meta{T}$} child {node {$\meta{F}$} child {node {3}}}}+ −
}+ −
child {node {+}}+ −
child {node {$\meta{E}$} child {node {$\meta{T}$}+ −
child {node {$\meta{F}$} child {node {4}}}}}+ −
} + −
;+ −
\end{tikzpicture}+ −
\end{textblock}+ −
+ −
\begin{textblock}{5}(1, 10)+ −
\bl{\texttt{1 + 2 * 3 + 4}}+ −
\end{textblock}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
\begin{tikzpicture}[level distance=8mm, blue]+ −
\node {$\meta{E}$}+ −
child {node {$\meta{T}$} + −
child {node {$\meta{T}$} + −
child {node {(\,$\meta{E}$\,)}+ −
child {node{$\meta{F}$ *{} $\meta{F}$}+ −
child {node {$\meta{T}$} child {node {2}}}+ −
child {node {$\meta{T}$} child {node {3}}} + −
}+ −
}+ −
}+ −
child {node {+}}+ −
child {node {$\meta{T}$}+ −
child {node {(\,$\meta{E}$\,)} + −
child {node {$\meta{F}$}+ −
child {node {$\meta{T}$ +{} $\meta{T}$}+ −
child {node {3}}+ −
child {node {4}} + −
}+ −
}}+ −
}};+ −
\end{tikzpicture}+ −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[t]+ −
\frametitle{Arithmetic Expressions}+ −
+ −
\bl{\begin{plstx}[margin=3cm,one per line]+ −
: \meta{E} ::= 0..9 + −
| \meta{E} \cdot + \cdot \meta{E} + −
| \meta{E} \cdot - \cdot \meta{E} + −
| \meta{E} \cdot * \cdot \meta{E} + −
| ( \cdot \meta{E} \cdot ) \\+ −
\end{plstx}}\pause\bigskip+ −
+ −
A CFG is \alert{\bf left-recursive} if it has a nonterminal \bl{$\meta{E}$} such+ −
that \bl{$\meta{E} \rightarrow^+ \meta{E}\cdot \ldots$}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[t]+ −
\frametitle{Ambiguous Grammars}+ −
+ −
A grammar is \alert{\bf ambiguous} if there is a string that+ −
has at least two different parse trees.+ −
+ −
\bl{\begin{plstx}[margin=3cm,one per line]: \meta{E} ::= num\_token + −
| \meta{E} \cdot + \cdot \meta{E} + −
| \meta{E} \cdot - \cdot \meta{E} + −
| \meta{E} \cdot * \cdot \meta{E} + −
| ( \cdot \meta{E} \cdot ) \\+ −
\end{plstx}}+ −
+ −
+ −
\bl{\texttt{1 + 2 * 3 + 4}}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{`Dangling' Else}+ −
+ −
Another ambiguous grammar:\bigskip+ −
+ −
\begin{center}+ −
\bl{\begin{tabular}{lcl}+ −
$E$ & $\rightarrow$ & if $E$ then $E$\\+ −
& $|$ & if $E$ then $E$ else $E$ \\+ −
& $|$ & \ldots+ −
\end{tabular}}+ −
\end{center}\bigskip+ −
+ −
\bl{\texttt{if a then if x then y else c}}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{Parser Combinators}+ −
+ −
One of the simplest ways to implement a parser, see+ −
{\small\url{https://vimeo.com/142341803}}\bigskip+ −
+ −
Parser combinators: \bigskip+ −
+ −
\begin{minipage}{1.1\textwidth}+ −
\begin{center}+ −
\mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} + −
$\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$+ −
\end{center}+ −
\end{minipage}\bigskip+ −
+ −
\begin{itemize}+ −
\item atomic parsers+ −
\item sequencing+ −
\item alternative+ −
\item semantic action+ −
\end{itemize}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
+ −
Atomic parsers, for example, number tokens+ −
+ −
\begin{center}+ −
\bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$} + −
\end{center}\bigskip+ −
+ −
\begin{itemize}+ −
\item you consume one or more token from the\\ + −
input (stream)+ −
\item also works for characters and strings+ −
\end{itemize}+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
+ −
Alternative parser (code \bl{$p\;||\;q$})\bigskip+ −
+ −
\begin{itemize}+ −
\item apply \bl{$p$} and also \bl{$q$}; then combine + −
the outputs+ −
\end{itemize}+ −
+ −
\begin{center}+ −
\large \bl{$p(\text{input}) \cup q(\text{input})$}+ −
\end{center}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
+ −
Sequence parser (code \bl{$p\sim q$})\bigskip+ −
+ −
\begin{itemize}+ −
\item apply first \bl{$p$} producing a set of pairs+ −
\item then apply \bl{$q$} to the unparsed part+ −
\item then combine the results:\medskip + −
\begin{center}+ −
((output$_1$, output$_2$), unparsed part)+ −
\end{center}+ −
\end{itemize}+ −
+ −
\begin{center}+ −
\begin{tabular}{l}+ −
\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] + −
\large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm]+ −
\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$}+ −
\end{tabular}+ −
\end{center}+ −
+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
+ −
Function parser (code \bl{$p \Rightarrow f\;$})\bigskip+ −
+ −
\begin{itemize}+ −
\item apply \bl{$p$} producing a set of pairs+ −
\item then apply the function \bl{$f$} to each first component+ −
\end{itemize}+ −
+ −
\begin{center}+ −
\begin{tabular}{l}+ −
\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$}+ −
\end{tabular}+ −
\end{center}\bigskip\bigskip\pause+ −
+ −
\bl{$f$} is the semantic action (``what to do with the parsed input'')+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\mode<presentation>{+ −
\begin{frame}[c]+ −
\frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}}+ −
+ −
Addition+ −
+ −
\begin{center}+ −
\bl{$\meta{T} \sim + \sim \meta{E} \Rightarrow \underbrace{f\,((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$}+ −
\end{center}\pause+ −
+ −
Multiplication+ −
+ −
\begin{center}+ −
\bl{$\meta{F} \sim * \sim \meta{T} \Rightarrow f\,((x,y), z) \Rightarrow x * z$}+ −
\end{center}\pause+ −
+ −
Parenthesis+ −
+ −
\begin{center}+ −
\bl{$\text{(} \sim \meta{E} \sim \text{)} \Rightarrow f\,((x,y), z) \Rightarrow y$}+ −
\end{center}+ −
+ −
\end{frame}}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{Types of Parsers}+ −
+ −
\begin{itemize}+ −
\item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$},+ −
then \bl{$p \sim q$} returns results of type+ −
+ −
\begin{center}+ −
\bl{$T \times S$}+ −
\end{center}\pause+ −
+ −
\item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then \bl{$q$} \alert{must} also have results of type \bl{$T$},+ −
and \bl{$p \;||\; q$} returns results of type+ −
+ −
\begin{center}+ −
\bl{$T$}+ −
\end{center}\pause+ −
+ −
\item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from+ −
\bl{$T$} to \bl{$S$}, then+ −
\bl{$p \Rightarrow f$} returns results of type+ −
+ −
\begin{center}+ −
\bl{$S$}+ −
\end{center}+ −
+ −
\end{itemize}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{Input Types of Parsers}+ −
+ −
\begin{itemize}+ −
\item input: \alert{token list}+ −
\item output: set of (output\_type, \alert{token list})+ −
\end{itemize}\bigskip\pause+ −
+ −
actually it can be any input type as long as it is a kind of+ −
sequence (for example a string)+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{Scannerless Parsers}+ −
+ −
\begin{itemize}+ −
\item input: \alert{string}+ −
\item output: set of (output\_type, \alert{string})+ −
\end{itemize}\bigskip\bigskip+ −
+ −
but using lexers is better because whitespaces or comments can be+ −
filtered out; then input is a sequence of tokens+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{Successful Parses}+ −
+ −
\begin{itemize}+ −
\item input: string+ −
\item output: \alert{set of} (output\_type, string)+ −
\end{itemize}\bigskip+ −
+ −
a parse is successful whenever the input has been fully+ −
``consumed'' (that is the second component is empty)+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{Abstract Parser Class}+ −
+ −
\small+ −
\lstinputlisting[language=Scala,xleftmargin=1mm]+ −
{../progs/app7.scala}+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
+ −
\small+ −
\fontsize{10}{12}\selectfont+ −
\lstinputlisting[language=Scala,xleftmargin=1mm]+ −
{../progs/app8.scala}+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{Two Grammars}+ −
+ −
Which languages are recognised by the following two grammars?+ −
+ −
\begin{center}+ −
\bl{\begin{tabular}{lcl}+ −
$\meta{S}$ & $\rightarrow$ & $1 \cdot \meta{S} \cdot \meta{S}$\\+ −
& $|$ & $\epsilon$+ −
\end{tabular}}+ −
\end{center}\bigskip+ −
+ −
\begin{center}+ −
\bl{\begin{tabular}{lcl}+ −
$\meta{U}$ & $\rightarrow$ & $1 \cdot \meta{U}$\\+ −
& $|$ & $\epsilon$+ −
\end{tabular}}+ −
\end{center}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[t]+ −
\frametitle{Ambiguous Grammars}+ −
+ −
\begin{center}+ −
\begin{tikzpicture}+ −
\begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs},+ −
enlargelimits=false,+ −
xtick={0,100,...,1000},+ −
xmax=1050,+ −
ymax=33,+ −
ytick={0,5,...,30},+ −
scaled ticks=false,+ −
axis lines=left,+ −
width=11cm,+ −
height=7cm, + −
legend entries={unambiguous,ambiguous}, + −
legend pos=north east,+ −
legend cell align=left,+ −
x tick label style={font=\small,/pgf/number format/1000 sep={}}]+ −
\addplot[blue,mark=*, mark options={fill=white}] + −
table {s-grammar1.data};+ −
\only<2>{+ −
\addplot[red,mark=triangle*, mark options={fill=white}] + −
table {s-grammar2.data};} + −
\end{axis}+ −
\end{tikzpicture}+ −
\end{center}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
\begin{frame}+ −
\frametitle{While-Language}+ −
\mbox{}\\[-23mm]\mbox{}+ −
+ −
\bl{\begin{plstx}[rhs style=,one per line]: \meta{Stmt} ::= skip+ −
| \meta{Id} := \meta{AExp}+ −
| if \meta{BExp} then \meta{Block} else \meta{Block}+ −
| while \meta{BExp} do \meta{Block}\\+ −
: \meta{Stmts} ::= \meta{Stmt} ; \meta{Stmts}+ −
| \meta{Stmt}\\+ −
: \meta{Block} ::= \{ \meta{Stmts} \}+ −
| \meta{Stmt}\\+ −
: \meta{AExp} ::= \ldots\\+ −
: \meta{BExp} ::= \ldots\\\end{plstx}}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{An Interpreter}+ −
+ −
\begin{center}+ −
\bl{\begin{tabular}{l}+ −
$\{$\\+ −
\;\;$x := 5 \text{;}$\\+ −
\;\;$y := x * 3\text{;}$\\+ −
\;\;$y := x * 4\text{;}$\\+ −
\;\;$x := u * 3$\\+ −
$\}$+ −
\end{tabular}}+ −
\end{center}+ −
+ −
\begin{itemize}+ −
\item the interpreter has to record the value of \bl{$x$} before assigning a value to \bl{$y$}\pause+ −
\item \bl{\texttt{eval(stmt, env)}}+ −
\end{itemize}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\mode<presentation>{+ −
\begin{frame}[c]+ −
\frametitle{\begin{tabular}{c}Interpreter\end{tabular}}+ −
+ −
\begin{center}+ −
\bl{\begin{tabular}{@{}lcl@{}}+ −
$\text{eval}(n, E)$ & $\dn$ & $n$\\+ −
$\text{eval}(x, E)$ & $\dn$ & $E(x)$ \;\;\;\textcolor{black}{lookup \bl{$x$} in \bl{$E$}}\\+ −
$\text{eval}(a_1 + a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) + \text{eval}(a_2, E)$\\+ −
$\text{eval}(a_1 - a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) - \text{eval}(a_2, E)$\\+ −
$\text{eval}(a_1 * a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) * \text{eval}(a_2, E)$\bigskip\\+ −
$\text{eval}(a_1 = a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) = \text{eval}(a_2, E)$\\+ −
$\text{eval}(a_1\,!\!= a_2, E)$ & $\dn$ & $\neg(\text{eval}(a_1, E) = \text{eval}(a_2, E))$\\+ −
$\text{eval}(a_1 < a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) < \text{eval}(a_2, E)$\+ −
\end{tabular}}+ −
\end{center}+ −
+ −
\end{frame}}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\mode<presentation>{+ −
\begin{frame}[c]+ −
\frametitle{\begin{tabular}{c}Interpreter (2)\end{tabular}}+ −
+ −
\begin{center}+ −
\bl{\begin{tabular}{@{}lcl@{}}+ −
$\text{eval}(\text{skip}, E)$ & $\dn$ & $E$\\+ −
$\text{eval}(x:=a, E)$ & $\dn$ & \bl{$E(x \mapsto \text{eval}(a, E))$}\\+ −
\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{if}\;b\;\text{then}\;cs_1\;\text{else}\;cs_2 , E) \dn$}\\+ −
\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)\;\text{then}\;+ −
\text{eval}(cs_1,E)$}\\+ −
\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\phantom{\text{if}\;\text{eval}(b,E)\;}\text{else}\;\text{eval}(cs_2,E)$}\\+ −
\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{while}\;b\;\text{do}\;cs, E) \dn$}\\+ −
\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)$}\\+ −
\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{then}\;+ −
\text{eval}(\text{while}\;b\;\text{do}\;cs, \text{eval}(cs,E))$}\\+ −
\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{else}\; E$}\\+ −
$\text{eval}(\text{write}\; x, E)$ & $\dn$ & $\{\;\text{println}(E(x))\; ;\;E\;\}$\\+ −
\end{tabular}}+ −
\end{center}+ −
+ −
\end{frame}}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\begin{frame}[c]+ −
\frametitle{Test Program}+ −
+ −
\mbox{}\\[-18mm]\mbox{}+ −
+ −
??%{\lstset{language=While}%%\fontsize{10}{12}\selectfont+ −
%\texttt{\lstinputlisting{../progs/loops.while}}}+ −
+ −
\end{frame}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\mode<presentation>{+ −
\begin{frame}[t]+ −
\frametitle{\begin{tabular}{c}Interpreted Code\end{tabular}}+ −
+ −
\begin{center}+ −
\begin{tikzpicture}+ −
\begin{axis}[axis x line=bottom, axis y line=left, xlabel=n, ylabel=secs, legend style=small]+ −
\addplot+[smooth] file {interpreted.data};+ −
\end{axis}+ −
\end{tikzpicture}+ −
\end{center}+ −
+ −
\end{frame}}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+ −
\mode<presentation>{+ −
\begin{frame}[c]+ −
\frametitle{\begin{tabular}{c}Java Virtual Machine\end{tabular}}+ −
+ −
\begin{itemize}+ −
\item introduced in 1995+ −
\item is a stack-based VM (like Postscript, CLR of .Net)+ −
\item contains a JIT compiler+ −
\item many languages take advantage of JVM's infrastructure (JRE)+ −
\item is garbage collected $\Rightarrow$ no buffer overflows+ −
\item some languages compile to the JVM: Scala, Clojure\ldots+ −
\end{itemize}+ −
+ −
\end{frame}}+ −
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + −
+ −
+ −
\end{document}+ −
+ −
%%% Local Variables: + −
%%% mode: latex+ −
%%% TeX-master: t+ −
%%% End: + −
+ −