diff -r aaf0bd0a211d -r 85267be9a5ed slides/slides06.tex --- a/slides/slides06.tex Fri Oct 30 01:45:03 2020 +0000 +++ b/slides/slides06.tex Wed Nov 04 17:34:52 2020 +0000 @@ -55,6 +55,479 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Parser Combinators} + +One of the simplest ways to implement a parser, see +{\small\url{https://vimeo.com/142341803}} (by Haoyi Li)\bigskip + +\begin{itemize} +\item[$\bullet$] build-in library in Scala +\item[$\bullet$] fastparse (2) library by Haoyi Li; is part of Ammonite +\item[$\bullet$] possible exponential runtime behaviour +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Parser Combinators} + +Parser combinators: \bigskip + +\begin{minipage}{1.1\textwidth} +\begin{center} +\mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} +$\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$ +\end{center} +\end{minipage}\bigskip + +\begin{itemize} +\item atomic parsers +\item sequencing +\item alternative +\item semantic action (map-parser) +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Atomic parsers, for example, number tokens + +\begin{center} +\bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$} +\end{center}\bigskip + +\begin{itemize} +\item you consume one or more token from the\\ + input (stream) +\item also works for characters and strings +\end{itemize} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Alternative parser (code \bl{$p\;||\;q$})\bigskip + +\begin{itemize} +\item apply \bl{$p$} and also \bl{$q$}; then combine + the outputs +\end{itemize} + +\begin{center} +\large \bl{$p(\text{input}) \cup q(\text{input})$} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Sequence parser (code \bl{$p\sim q$})\bigskip + +\begin{itemize} +\item apply first \bl{$p$} producing a set of pairs +\item then apply \bl{$q$} to the unparsed part +\item then combine the results:\medskip +\begin{center} +((output$_1$, output$_2$), unparsed part) +\end{center} +\end{itemize} + +\begin{center} +\begin{tabular}{l} +\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] +\large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm] +\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$} +\end{tabular} +\end{center} + + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Map-parser (code \bl{$p.map(f)\;$})\bigskip + +\begin{itemize} +\item apply \bl{$p$} producing a set of pairs +\item then apply the function \bl{$f$} to each first component +\end{itemize} + +\begin{center} +\begin{tabular}{l} +\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$} +\end{tabular} +\end{center}\bigskip\bigskip\pause + +\bl{$f$} is the semantic action (``what to do with the parsed input'') + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}} + +Addition + +\begin{center} +\bl{$\meta{T} \sim + \sim \meta{E} \Rightarrow \underbrace{f\,((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$} +\end{center}\pause + +Multiplication + +\begin{center} +\bl{$\meta{F} \sim * \sim \meta{T} \Rightarrow f\,((x,y), z) \Rightarrow x * z$} +\end{center}\pause + +Parenthesis + +\begin{center} +\bl{$\text{(} \sim \meta{E} \sim \text{)} \Rightarrow f\,((x,y), z) \Rightarrow y$} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Types of Parsers} + +\begin{itemize} +\item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$}, +then \bl{$p \sim q$} returns results of type + +\begin{center} +\bl{$T \times S$} +\end{center}\pause + +\item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then \bl{$q$} \alert{must} also have results of type \bl{$T$}, +and \bl{$p \;||\; q$} returns results of type + +\begin{center} +\bl{$T$} +\end{center}\pause + +\item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from +\bl{$T$} to \bl{$S$}, then +\bl{$p \Rightarrow f$} returns results of type + +\begin{center} +\bl{$S$} +\end{center} + +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Input Types of Parsers} + +\begin{itemize} +\item input: \alert{token list} +\item output: set of (output\_type, \alert{token list}) +\end{itemize}\bigskip\pause + +actually it can be any input type as long as it is a kind of +sequence (for example a string) + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Scannerless Parsers} + +\begin{itemize} +\item input: \alert{string} +\item output: set of (output\_type, \alert{string}) +\end{itemize}\bigskip\bigskip + +but using lexers is better because whitespaces or comments can be +filtered out; then input is a sequence of tokens + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Successful Parses} + +\begin{itemize} +\item input: string +\item output: \alert{set of} (output\_type, string) +\end{itemize}\bigskip + +a parse is successful whenever the input has been fully +``consumed'' (that is the second component is empty) + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Abstract Parser Class} + +\small +\lstinputlisting[language=Scala,xleftmargin=1mm] + {../progs/app7.scala} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +\small +\fontsize{10}{12}\selectfont +\lstinputlisting[language=Scala,xleftmargin=1mm] + {../progs/app8.scala} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Two Grammars} + +Which languages are recognised by the following two grammars? + +\begin{center} +\bl{\begin{tabular}{lcl} +$\meta{S}$ & $\rightarrow$ & $1 \cdot \meta{S} \cdot \meta{S}$\\ + & $|$ & $\epsilon$ +\end{tabular}} +\end{center}\bigskip + +\begin{center} +\bl{\begin{tabular}{lcl} +$\meta{U}$ & $\rightarrow$ & $1 \cdot \meta{U}$\\ + & $|$ & $\epsilon$ +\end{tabular}} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{Ambiguous Grammars} + +\begin{center} +\begin{tikzpicture} +\begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs}, + enlargelimits=false, + xtick={0,100,...,1000}, + xmax=1050, + ymax=33, + ytick={0,5,...,30}, + scaled ticks=false, + axis lines=left, + width=11cm, + height=7cm, + legend entries={unambiguous,ambiguous}, + legend pos=north east, + legend cell align=left, + x tick label style={font=\small,/pgf/number format/1000 sep={}}] +\addplot[blue,mark=*, mark options={fill=white}] + table {s-grammar1.data}; +\only<2>{ + \addplot[red,mark=triangle*, mark options={fill=white}] + table {s-grammar2.data};} +\end{axis} +\end{tikzpicture} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame} +\frametitle{While-Language} +\mbox{}\\[-23mm]\mbox{} + +\bl{\begin{plstx}[rhs style=,one per line]: \meta{Stmt} ::= skip + | \meta{Id} := \meta{AExp} + | if \meta{BExp} then \meta{Block} else \meta{Block} + | while \meta{BExp} do \meta{Block}\\ +: \meta{Stmts} ::= \meta{Stmt} ; \meta{Stmts} + | \meta{Stmt}\\ +: \meta{Block} ::= \{ \meta{Stmts} \} + | \meta{Stmt}\\ +: \meta{AExp} ::= \ldots\\ +: \meta{BExp} ::= \ldots\\\end{plstx}} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{An Interpreter} + +\begin{center} +\bl{\begin{tabular}{l} +$\{$\\ +\;\;$x := 5 \text{;}$\\ +\;\;$y := x * 3\text{;}$\\ +\;\;$y := x * 4\text{;}$\\ +\;\;$x := u * 3$\\ +$\}$ +\end{tabular}} +\end{center} + +\begin{itemize} +\item the interpreter has to record the value of \bl{$x$} before assigning a value to \bl{$y$}\pause +\item \bl{\texttt{eval(stmt, env)}} +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Interpreter\end{tabular}} + +\begin{center} +\bl{\begin{tabular}{@{}lcl@{}} +$\text{eval}(n, E)$ & $\dn$ & $n$\\ +$\text{eval}(x, E)$ & $\dn$ & $E(x)$ \;\;\;\textcolor{black}{lookup \bl{$x$} in \bl{$E$}}\\ +$\text{eval}(a_1 + a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) + \text{eval}(a_2, E)$\\ +$\text{eval}(a_1 - a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) - \text{eval}(a_2, E)$\\ +$\text{eval}(a_1 * a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) * \text{eval}(a_2, E)$\bigskip\\ +$\text{eval}(a_1 = a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) = \text{eval}(a_2, E)$\\ +$\text{eval}(a_1\,!\!= a_2, E)$ & $\dn$ & $\neg(\text{eval}(a_1, E) = \text{eval}(a_2, E))$\\ +$\text{eval}(a_1 < a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) < \text{eval}(a_2, E)$\ +\end{tabular}} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Interpreter (2)\end{tabular}} + +\begin{center} +\bl{\begin{tabular}{@{}lcl@{}} +$\text{eval}(\text{skip}, E)$ & $\dn$ & $E$\\ +$\text{eval}(x:=a, E)$ & $\dn$ & \bl{$E(x \mapsto \text{eval}(a, E))$}\\ +\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{if}\;b\;\text{then}\;cs_1\;\text{else}\;cs_2 , E) \dn$}\\ +\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)\;\text{then}\; +\text{eval}(cs_1,E)$}\\ +\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\phantom{\text{if}\;\text{eval}(b,E)\;}\text{else}\;\text{eval}(cs_2,E)$}\\ +\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{while}\;b\;\text{do}\;cs, E) \dn$}\\ +\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)$}\\ +\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{then}\; +\text{eval}(\text{while}\;b\;\text{do}\;cs, \text{eval}(cs,E))$}\\ +\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{else}\; E$}\\ +$\text{eval}(\text{write}\; x, E)$ & $\dn$ & $\{\;\text{println}(E(x))\; ;\;E\;\}$\\ +\end{tabular}} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Test Program} + +\mbox{}\\[-18mm]\mbox{} + +??%{\lstset{language=While}%%\fontsize{10}{12}\selectfont +%\texttt{\lstinputlisting{../progs/loops.while}}} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Interpreted Code\end{tabular}} + +\begin{center} +\begin{tikzpicture} +\begin{axis}[axis x line=bottom, axis y line=left, xlabel=n, ylabel=secs, legend style=small] +\addplot+[smooth] file {interpreted.data}; +\end{axis} +\end{tikzpicture} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Java Virtual Machine\end{tabular}} + +\begin{itemize} +\item introduced in 1995 +\item is a stack-based VM (like Postscript, CLR of .Net) +\item contains a JIT compiler +\item many languages take advantage of JVM's infrastructure (JRE) +\item is garbage collected $\Rightarrow$ no buffer overflows +\item some languages compile to the JVM: Scala, Clojure\ldots +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{% + \begin{tabular}{@ {}c@ {}} + \\[-3mm] + \LARGE Compilers and \\[-2mm] + \LARGE Formal Languages\\[3mm] + \end{tabular}} + + \normalsize + \begin{center} + \begin{tabular}{ll} + Email: & christian.urban at kcl.ac.uk\\ + %Office Hours: & Thursdays 12 -- 14\\ + %Location: & N7.07 (North Wing, Bush House)\\ + Slides \& Progs: & KEATS (also homework is there)\\ + \end{tabular} + \end{center} + + \begin{center} + \begin{tikzpicture} + \node[drop shadow,fill=white,inner sep=0pt] + {\footnotesize\rowcolors{1}{capri!10}{white} + \begin{tabular}{|p{4.8cm}|p{4.8cm}|}\hline + 1 Introduction, Languages & \cellcolor{blue!50} 6 While-Language \\ + 2 Regular Expressions, Derivatives & 7 Compilation, JVM \\ + 3 Automata, Regular Languages & 8 Compiling Functional Languages \\ + 4 Lexing, Tokenising & 9 Optimisations \\ + 5 Grammars, Parsing & 10 LLVM \\ \hline + \end{tabular}% + }; + \end{tikzpicture} + \end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % \begin{frame}[c] % \small % \mbox{}\\[5mm]