% !TEX program = xelatex\documentclass[dvipsnames,14pt,t,xelatex,aspectratio=169,xcolor={table}]{beamer}\usepackage{../slides}\usepackage{../graphics}\usepackage{../langs}\usepackage{../data}\usepackage{../grammar}\hfuzz=220pt \pgfplotsset{compat=1.11}\newcommand{\bl}[1]{\textcolor{blue}{#1}} % beamer stuff \renewcommand{\slidecaption}{CFL 05, King's College London}\begin{document}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]\frametitle{% \begin{tabular}{@ {}c@ {}} \\[-3mm] \LARGE Compilers and \\[-2mm] \LARGE Formal Languages\\[3mm] \end{tabular}} \normalsize \begin{center} \begin{tabular}{ll} Email: & christian.urban at kcl.ac.uk\\ %Office Hours: & Thursdays 12 -- 14\\ %Location: & N7.07 (North Wing, Bush House)\\ Slides \& Progs: & KEATS (also homework is there)\\ \end{tabular}\end{center} \begin{center} \begin{tikzpicture} \node[drop shadow,fill=white,inner sep=0pt] {\footnotesize\rowcolors{1}{capri!10}{white} \begin{tabular}{|p{4.8cm}|p{4.8cm}|}\hline 1 Introduction, Languages & 6 While-Language \\ 2 Regular Expressions, Derivatives & 7 Compilation, JVM \\ 3 Automata, Regular Languages & 8 Compiling Functional Languages \\ 4 Lexing, Tokenising & 9 Optimisations \\ \cellcolor{blue!50} 5 Grammars, Parsing & 10 LLVM \\ \hline \end{tabular}% }; \end{tikzpicture} \end{center}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]% \frametitle{Coursework 1: Submissions}% % \begin{itemize}% \item Scala (29)% \item Haskell (1)% \item Kotlin (1)% \item Rust (1)% \end{itemize}\bigskip\bigskip % % \small% Please get in contact if you intend to do CW Strand 2. No zips please.% Give definitions also on paper if asked. BTW, simp % can stay unchanged. Use \texttt{ders} for CW2, not \texttt{ders2}!% \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]\frametitle{Lexer, Parser}\mbox{}\\[-16mm]\mbox{}\begin{center} \begin{tikzpicture}[scale=1, node/.style={ rectangle,rounded corners=3mm, very thick,draw=black!50, minimum height=18mm, minimum width=20mm, top color=white,bottom color=black!20,drop shadow}] \node (0) at (-2.3,0) {}; \node (A) at (0,0) [node] {}; \node [below right] at (A.north west) {lexer}; \node (B) at (3,0) [node] {}; \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser}; \node (C) at (6,0) [node] {}; \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen}; \node (1) at (8.4,0) {}; \draw [->,line width=4mm] (0) -- (A); \draw [->,line width=4mm] (A) -- (B); \draw [->,line width=4mm] (B) -- (C); \draw [->,line width=4mm] (C) -- (1); \end{tikzpicture} \end{center}Today a parser. \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{What Parsing is Not}Usually parsing does not check semantic correctness, e.g.\begin{itemize}\item whether a function is not used before it is defined\item whether a function has the correct number of arguments or are of correct type\item whether a variable can be declared twice in a scope \end{itemize} \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Regular Languages}While regular expressions are very useful for lexing, there isno regular expression that can recognise the language\bl{$a^nb^n$}.\bigskip\begin{center}\bl{$(((()()))())$} \;\;vs.\;\; \bl{$(((()()))()))$}\end{center}\bigskip\bigskip\small\noindent So we cannot find out with regular expressionswhether parentheses are matched or unmatched. Also regularexpressions are not recursive, e.g.~\bl{$(1 + 2) + 3$}.\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Hierarchy of Languages}\begin{center}\begin{tikzpicture}[rect/.style={draw=black!50, top color=white, bottom color=black!20, rectangle, very thick, rounded corners}, scale=1.2]\draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages};\draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages};\draw (0,-0.85) node [rect, text depth=17mm] {context sensitive languages};\draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages};\draw (0,-1.4) node [rect] {regular languages};\end{tikzpicture}\end{center}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{CF Grammars}A \alert{\bf context-free grammar} \bl{$G$} consists of\begin{itemize}\item a finite set of nonterminal symbols (e.g.~$\meta{A}$ upper case)\item a finite set terminal symbols or tokens (lower case)\item a start symbol (which must be a nonterminal)\item a set of rules\begin{center}\bl{$\meta{A} ::= \textit{rhs}$}\end{center}where \bl{\textit{rhs}} are sequences involving terminals and nonterminals,including the empty sequence \bl{$\epsilon$}.\medskip\pauseWe also allow rules\begin{center}\bl{$\meta{A} ::= \textit{rhs}_1 | \textit{rhs}_2 | \ldots$}\end{center}\end{itemize}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]\frametitle{Palindromes}A grammar for palindromes over the alphabet~\bl{$\{a,b\}$}:\bl{\begin{plstx}[margin=3cm]: \meta{S} ::= a\cdot\meta{S}\cdot a\\: \meta{S} ::= b\cdot\meta{S}\cdot b\\: \meta{S} ::= a\\: \meta{S} ::= b\\: \meta{S} ::= \epsilon\\\end{plstx}}\pauseor\bl{\begin{plstx}[margin=2cm]: \meta{S} ::= a\cdot \meta{S}\cdot a | b\cdot \meta{S}\cdot b | a | b | \epsilon\\\end{plstx}}\pause\bigskip\smallCan you find the grammar rules for matched parentheses?\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Arithmetic Expressions}\bl{\begin{plstx}[margin=3cm,one per line]: \meta{E} ::= num\_token | \meta{E} \cdot + \cdot \meta{E} | \meta{E} \cdot - \cdot \meta{E} | \meta{E} \cdot * \cdot \meta{E} | ( \cdot \meta{E} \cdot ) \\\end{plstx}}\pause\bl{\texttt{1 + 2 * 3 + 4}}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{A CFG Derivation}\begin{enumerate}\item Begin with a string containing only the start symbol, say \bl{\meta{S}}\bigskip\item Replace any nonterminal \bl{\meta{X}} in the string by theright-hand side of some production \bl{$\meta{X} ::= \textit{rhs}$}\bigskip\item Repeat 2 until there are no nonterminals left\end{enumerate}\begin{center}\bl{$\meta{S} \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots $}\end{center}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]\frametitle{Example Derivation}\bl{\begin{plstx}[margin=2cm]: \meta{S} ::= \epsilon | a\cdot \meta{S}\cdot a | b\cdot \meta{S}\cdot b \\\end{plstx}}\bigskip\begin{center}\begin{tabular}{lcl}\bl{\meta{S}} & \bl{$\rightarrow$} & \bl{$a\meta{S}a$}\\ & \bl{$\rightarrow$} & \bl{$ab\meta{S}ba$}\\ & \bl{$\rightarrow$} & \bl{$aba\meta{S}aba$}\\ & \bl{$\rightarrow$} & \bl{$abaaba$}\\\end{tabular}\end{center}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]\frametitle{Example Derivation}\bl{\begin{plstx}[margin=3cm,one per line]: \meta{E} ::= num\_token | \meta{E} \cdot + \cdot \meta{E} | \meta{E} \cdot - \cdot \meta{E} | \meta{E} \cdot * \cdot \meta{E} | ( \cdot \meta{E} \cdot ) \\\end{plstx}}\small\begin{center}\begin{tabular}{@{}c@{}c@{}}\begin{tabular}{@{\hspace{-2mm}}l@{\hspace{1mm}}l@{\hspace{1mm}}l@{\hspace{4mm}}}\bl{\meta{E}} & \bl{$\rightarrow$} & \bl{$\meta{E}*\meta{E}$}\\ & \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}*\meta{E}$}\\ & \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}*\meta{E}+\meta{E}$}\\ & \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\\end{tabular} &\pause\begin{tabular}{@{}l@{\hspace{0mm}}l@{\hspace{1mm}}l}\bl{$\meta{E}$} & \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}$}\\ & \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}+\meta{E}$}\\ & \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}*\meta{E}+\meta{E}$}\\ & \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\\end{tabular}\end{tabular}\end{center}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Context Sensitive Grammars}It is much harder to find out whether a string is parsedby a context sensitive grammar:\bl{\begin{plstx}[margin=2cm]: \meta{S} ::= b\meta{S}\meta{A}\meta{A} | \epsilon\\: \meta{A} ::= a\\: b\meta{A} ::= \meta{A}b\\\end{plstx}}\pause\begin{center}\bl{$\meta{S} \rightarrow\ldots\rightarrow^? ababaa$}\end{center}\pause\begin{center} \tt Time flies like an arrow;\\ fruit flies like bananas. \end{center} \end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Language of a CFG}Let \bl{$G$} be a context-free grammar with start symbol \bl{\meta{S}}. Then the language \bl{$L(G)$} is:\begin{center}\bl{$\{c_1\ldots c_n \;|\; \forall i.\; c_i \in T \wedge \meta{S} \rightarrow^* c_1\ldots c_n \}$}\end{center}\pause\begin{itemize}\item Terminals, because there are no rules for replacing them.\item Once generated, terminals are ``permanent''.\item Terminals ought to be tokens of the language\\(but can also be strings).\end{itemize}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]\frametitle{Parse Trees}\mbox{}\\[-12mm]\bl{\begin{plstx}: \meta{E} ::= \meta{T} | \meta{T} \cdot + \cdot \meta{E} | \meta{T} \cdot - \cdot \meta{E}\\: \meta{T} ::= \meta{F} | \meta{F} \cdot * \cdot \meta{T}\\: \meta{F} ::= num\_token | ( \cdot \meta{E} \cdot )\\\end{plstx}}\begin{center}\small\begin{tikzpicture}[level distance=8mm, blue] \node {$\meta{E}$} child {node {$\meta{T}$} child {node {$\meta{T}$} child {node {(\,$\meta{E}$\,)} child {node{$\meta{F}$ *{} $\meta{F}$} child {node {$\meta{T}$} child {node {2}}} child {node {$\meta{T}$} child {node {3}}} } } } child {node {+}} child {node {$\meta{T}$} child {node {(\,$\meta{E}$\,)} child {node {$\meta{F}$} child {node {$\meta{T}$ +{} $\meta{T}$} child {node {3}} child {node {4}} } }} }};\end{tikzpicture}\end{center}\begin{textblock}{5}(1, 6.5)\bl{\texttt{(2*3)+(3+4)}}\end{textblock}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]\frametitle{Arithmetic Expressions}\bl{\begin{plstx}[margin=3cm,one per line]: \meta{E} ::= num\_token | \meta{E} \cdot + \cdot \meta{E} | \meta{E} \cdot - \cdot \meta{E} | \meta{E} \cdot * \cdot \meta{E} | ( \cdot \meta{E} \cdot ) \\\end{plstx}}\pause\bigskipA CFG is \alert{\bf left-recursive} if it has a nonterminal \bl{$\meta{E}$} suchthat \bl{$\meta{E} \rightarrow^+ \meta{E}\cdot \ldots$}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]\frametitle{Ambiguous Grammars}A grammar is \alert{\bf ambiguous} if there is a string thathas at least two different parse trees.\bl{\begin{plstx}[margin=3cm,one per line]: \meta{E} ::= num\_token | \meta{E} \cdot + \cdot \meta{E} | \meta{E} \cdot - \cdot \meta{E} | \meta{E} \cdot * \cdot \meta{E} | ( \cdot \meta{E} \cdot ) \\\end{plstx}}\bl{\texttt{1 + 2 * 3 + 4}}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{`Dangling' Else}Another ambiguous grammar:\bigskip\begin{center}\bl{\begin{tabular}{lcl}$E$ & $\rightarrow$ & if $E$ then $E$\\ & $|$ & if $E$ then $E$ else $E$ \\ & $|$ & \ldots\end{tabular}}\end{center}\bigskip\bl{\texttt{if a then if x then y else c}}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Parser Combinators}One of the simplest ways to implement a parser, see{\small\url{https://vimeo.com/142341803}}\bigskipParser combinators: \bigskip\begin{minipage}{1.1\textwidth}\begin{center}\mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} $\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$\end{center}\end{minipage}\bigskip\begin{itemize}\item atomic parsers\item sequencing\item alternative\item semantic action\end{itemize}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]Atomic parsers, for example, number tokens\begin{center}\bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$} \end{center}\bigskip\begin{itemize}\item you consume one or more token from the\\ input (stream)\item also works for characters and strings\end{itemize}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]Alternative parser (code \bl{$p\;||\;q$})\bigskip\begin{itemize}\item apply \bl{$p$} and also \bl{$q$}; then combine the outputs\end{itemize}\begin{center}\large \bl{$p(\text{input}) \cup q(\text{input})$}\end{center}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]Sequence parser (code \bl{$p\sim q$})\bigskip\begin{itemize}\item apply first \bl{$p$} producing a set of pairs\item then apply \bl{$q$} to the unparsed part\item then combine the results:\medskip \begin{center}((output$_1$, output$_2$), unparsed part)\end{center}\end{itemize}\begin{center}\begin{tabular}{l}\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] \large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm]\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$}\end{tabular}\end{center}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]Function parser (code \bl{$p \Rightarrow f\;$})\bigskip\begin{itemize}\item apply \bl{$p$} producing a set of pairs\item then apply the function \bl{$f$} to each first component\end{itemize}\begin{center}\begin{tabular}{l}\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$}\end{tabular}\end{center}\bigskip\bigskip\pause\bl{$f$} is the semantic action (``what to do with the parsed input'')\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\mode<presentation>{\begin{frame}[c]\frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}}Addition\begin{center}\bl{$\meta{T} \sim + \sim \meta{E} \Rightarrow \underbrace{f\,((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$}\end{center}\pauseMultiplication\begin{center}\bl{$\meta{F} \sim * \sim \meta{T} \Rightarrow f\,((x,y), z) \Rightarrow x * z$}\end{center}\pauseParenthesis\begin{center}\bl{$\text{(} \sim \meta{E} \sim \text{)} \Rightarrow f\,((x,y), z) \Rightarrow y$}\end{center}\end{frame}}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Types of Parsers}\begin{itemize}\item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$},then \bl{$p \sim q$} returns results of type\begin{center}\bl{$T \times S$}\end{center}\pause\item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then \bl{$q$} \alert{must} also have results of type \bl{$T$},and \bl{$p \;||\; q$} returns results of type\begin{center}\bl{$T$}\end{center}\pause\item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from\bl{$T$} to \bl{$S$}, then\bl{$p \Rightarrow f$} returns results of type\begin{center}\bl{$S$}\end{center}\end{itemize}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Input Types of Parsers}\begin{itemize}\item input: \alert{token list}\item output: set of (output\_type, \alert{token list})\end{itemize}\bigskip\pauseactually it can be any input type as long as it is a kind ofsequence (for example a string)\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Scannerless Parsers}\begin{itemize}\item input: \alert{string}\item output: set of (output\_type, \alert{string})\end{itemize}\bigskip\bigskipbut using lexers is better because whitespaces or comments can befiltered out; then input is a sequence of tokens\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Successful Parses}\begin{itemize}\item input: string\item output: \alert{set of} (output\_type, string)\end{itemize}\bigskipa parse is successful whenever the input has been fully``consumed'' (that is the second component is empty)\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Abstract Parser Class}\small\lstinputlisting[language=Scala,xleftmargin=1mm] {../progs/app7.scala}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\small\fontsize{10}{12}\selectfont\lstinputlisting[language=Scala,xleftmargin=1mm] {../progs/app8.scala}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Two Grammars}Which languages are recognised by the following two grammars?\begin{center}\bl{\begin{tabular}{lcl}$\meta{S}$ & $\rightarrow$ & $1 \cdot \meta{S} \cdot \meta{S}$\\ & $|$ & $\epsilon$\end{tabular}}\end{center}\bigskip\begin{center}\bl{\begin{tabular}{lcl}$\meta{U}$ & $\rightarrow$ & $1 \cdot \meta{U}$\\ & $|$ & $\epsilon$\end{tabular}}\end{center}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[t]\frametitle{Ambiguous Grammars}\begin{center}\begin{tikzpicture}\begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs}, enlargelimits=false, xtick={0,100,...,1000}, xmax=1050, ymax=33, ytick={0,5,...,30}, scaled ticks=false, axis lines=left, width=11cm, height=7cm, legend entries={unambiguous,ambiguous}, legend pos=north east, legend cell align=left, x tick label style={font=\small,/pgf/number format/1000 sep={}}]\addplot[blue,mark=*, mark options={fill=white}] table {s-grammar1.data};\only<2>{ \addplot[red,mark=triangle*, mark options={fill=white}] table {s-grammar2.data};} \end{axis}\end{tikzpicture}\end{center}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}\frametitle{While-Language}\mbox{}\\[-23mm]\mbox{}\bl{\begin{plstx}[rhs style=,one per line]: \meta{Stmt} ::= skip | \meta{Id} := \meta{AExp} | if \meta{BExp} then \meta{Block} else \meta{Block} | while \meta{BExp} do \meta{Block}\\: \meta{Stmts} ::= \meta{Stmt} ; \meta{Stmts} | \meta{Stmt}\\: \meta{Block} ::= \{ \meta{Stmts} \} | \meta{Stmt}\\: \meta{AExp} ::= \ldots\\: \meta{BExp} ::= \ldots\\\end{plstx}}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{An Interpreter}\begin{center}\bl{\begin{tabular}{l}$\{$\\\;\;$x := 5 \text{;}$\\\;\;$y := x * 3\text{;}$\\\;\;$y := x * 4\text{;}$\\\;\;$x := u * 3$\\$\}$\end{tabular}}\end{center}\begin{itemize}\item the interpreter has to record the value of \bl{$x$} before assigning a value to \bl{$y$}\pause\item \bl{\texttt{eval(stmt, env)}}\end{itemize}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\mode<presentation>{\begin{frame}[c]\frametitle{\begin{tabular}{c}Interpreter\end{tabular}}\begin{center}\bl{\begin{tabular}{@{}lcl@{}}$\text{eval}(n, E)$ & $\dn$ & $n$\\$\text{eval}(x, E)$ & $\dn$ & $E(x)$ \;\;\;\textcolor{black}{lookup \bl{$x$} in \bl{$E$}}\\$\text{eval}(a_1 + a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) + \text{eval}(a_2, E)$\\$\text{eval}(a_1 - a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) - \text{eval}(a_2, E)$\\$\text{eval}(a_1 * a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) * \text{eval}(a_2, E)$\bigskip\\$\text{eval}(a_1 = a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) = \text{eval}(a_2, E)$\\$\text{eval}(a_1\,!\!= a_2, E)$ & $\dn$ & $\neg(\text{eval}(a_1, E) = \text{eval}(a_2, E))$\\$\text{eval}(a_1 < a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) < \text{eval}(a_2, E)$\\end{tabular}}\end{center}\end{frame}}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\mode<presentation>{\begin{frame}[c]\frametitle{\begin{tabular}{c}Interpreter (2)\end{tabular}}\begin{center}\bl{\begin{tabular}{@{}lcl@{}}$\text{eval}(\text{skip}, E)$ & $\dn$ & $E$\\$\text{eval}(x:=a, E)$ & $\dn$ & \bl{$E(x \mapsto \text{eval}(a, E))$}\\\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{if}\;b\;\text{then}\;cs_1\;\text{else}\;cs_2 , E) \dn$}\\\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)\;\text{then}\;\text{eval}(cs_1,E)$}\\\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\phantom{\text{if}\;\text{eval}(b,E)\;}\text{else}\;\text{eval}(cs_2,E)$}\\\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{while}\;b\;\text{do}\;cs, E) \dn$}\\\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)$}\\\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{then}\;\text{eval}(\text{while}\;b\;\text{do}\;cs, \text{eval}(cs,E))$}\\\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{else}\; E$}\\$\text{eval}(\text{write}\; x, E)$ & $\dn$ & $\{\;\text{println}(E(x))\; ;\;E\;\}$\\\end{tabular}}\end{center}\end{frame}}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\begin{frame}[c]\frametitle{Test Program}\mbox{}\\[-18mm]\mbox{}??%{\lstset{language=While}%%\fontsize{10}{12}\selectfont%\texttt{\lstinputlisting{../progs/loops.while}}}\end{frame}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\mode<presentation>{\begin{frame}[t]\frametitle{\begin{tabular}{c}Interpreted Code\end{tabular}}\begin{center}\begin{tikzpicture}\begin{axis}[axis x line=bottom, axis y line=left, xlabel=n, ylabel=secs, legend style=small]\addplot+[smooth] file {interpreted.data};\end{axis}\end{tikzpicture}\end{center}\end{frame}}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\mode<presentation>{\begin{frame}[c]\frametitle{\begin{tabular}{c}Java Virtual Machine\end{tabular}}\begin{itemize}\item introduced in 1995\item is a stack-based VM (like Postscript, CLR of .Net)\item contains a JIT compiler\item many languages take advantage of JVM's infrastructure (JRE)\item is garbage collected $\Rightarrow$ no buffer overflows\item some languages compile to the JVM: Scala, Clojure\ldots\end{itemize}\end{frame}}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{document}%%% Local Variables: %%% mode: latex%%% TeX-master: t%%% End: