slides/slides06.tex
changeset 799 85267be9a5ed
parent 750 e93a9e74ca8e
child 803 d4fb8c7fc3bf
--- a/slides/slides06.tex	Fri Oct 30 01:45:03 2020 +0000
+++ b/slides/slides06.tex	Wed Nov 04 17:34:52 2020 +0000
@@ -55,6 +55,479 @@
 
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Parser Combinators}
+
+One of the simplest ways to implement a parser, see
+{\small\url{https://vimeo.com/142341803}} (by Haoyi Li)\bigskip
+
+\begin{itemize}
+\item[$\bullet$] build-in library in Scala
+\item[$\bullet$] fastparse (2) library by Haoyi Li; is part of Ammonite
+\item[$\bullet$] possible exponential runtime behaviour  
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Parser Combinators}
+
+Parser combinators: \bigskip
+
+\begin{minipage}{1.1\textwidth}
+\begin{center}
+\mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} 
+$\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$
+\end{center}
+\end{minipage}\bigskip
+
+\begin{itemize}
+\item atomic parsers
+\item sequencing
+\item alternative
+\item semantic action (map-parser)
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+Atomic parsers, for example, number tokens
+
+\begin{center}
+\bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$} 
+\end{center}\bigskip
+
+\begin{itemize}
+\item you consume one or more token from the\\ 
+  input (stream)
+\item also works for characters and strings
+\end{itemize}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+Alternative parser (code \bl{$p\;||\;q$})\bigskip
+
+\begin{itemize}
+\item apply \bl{$p$} and also \bl{$q$}; then combine 
+  the outputs
+\end{itemize}
+
+\begin{center}
+\large \bl{$p(\text{input}) \cup q(\text{input})$}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+Sequence parser (code \bl{$p\sim q$})\bigskip
+
+\begin{itemize}
+\item apply first \bl{$p$} producing a set of pairs
+\item then apply \bl{$q$} to the unparsed part
+\item then combine the results:\medskip 
+\begin{center}
+((output$_1$, output$_2$), unparsed part)
+\end{center}
+\end{itemize}
+
+\begin{center}
+\begin{tabular}{l}
+\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] 
+\large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm]
+\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$}
+\end{tabular}
+\end{center}
+
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+Map-parser (code \bl{$p.map(f)\;$})\bigskip
+
+\begin{itemize}
+\item apply \bl{$p$} producing a set of pairs
+\item then apply the function \bl{$f$} to each first component
+\end{itemize}
+
+\begin{center}
+\begin{tabular}{l}
+\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$}
+\end{tabular}
+\end{center}\bigskip\bigskip\pause
+
+\bl{$f$} is the semantic action (``what to do with the parsed input'')
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[c]
+\frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}}
+
+Addition
+
+\begin{center}
+\bl{$\meta{T} \sim + \sim \meta{E} \Rightarrow \underbrace{f\,((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$}
+\end{center}\pause
+
+Multiplication
+
+\begin{center}
+\bl{$\meta{F} \sim * \sim \meta{T} \Rightarrow f\,((x,y), z) \Rightarrow x * z$}
+\end{center}\pause
+
+Parenthesis
+
+\begin{center}
+\bl{$\text{(} \sim \meta{E} \sim \text{)} \Rightarrow f\,((x,y), z) \Rightarrow y$}
+\end{center}
+
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Types of Parsers}
+
+\begin{itemize}
+\item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$},
+then \bl{$p \sim q$} returns results of type
+
+\begin{center}
+\bl{$T \times S$}
+\end{center}\pause
+
+\item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then  \bl{$q$} \alert{must} also have results of type \bl{$T$},
+and \bl{$p \;||\; q$} returns results of type
+
+\begin{center}
+\bl{$T$}
+\end{center}\pause
+
+\item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from
+\bl{$T$} to \bl{$S$}, then
+\bl{$p \Rightarrow f$} returns results of type
+
+\begin{center}
+\bl{$S$}
+\end{center}
+
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Input Types of Parsers}
+
+\begin{itemize}
+\item input: \alert{token list}
+\item output: set of (output\_type, \alert{token list})
+\end{itemize}\bigskip\pause
+
+actually it can be any input type as long as it is a kind of
+sequence (for example a string)
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Scannerless Parsers}
+
+\begin{itemize}
+\item input: \alert{string}
+\item output: set of (output\_type, \alert{string})
+\end{itemize}\bigskip\bigskip
+
+but using lexers is better because whitespaces or comments can be
+filtered out; then input is a sequence of tokens
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Successful Parses}
+
+\begin{itemize}
+\item input: string
+\item output: \alert{set of} (output\_type, string)
+\end{itemize}\bigskip
+
+a parse is successful whenever the input has been fully
+``consumed'' (that is the second component is empty)
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Abstract Parser Class}
+
+\small
+\lstinputlisting[language=Scala,xleftmargin=1mm]
+ {../progs/app7.scala}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+\small
+\fontsize{10}{12}\selectfont
+\lstinputlisting[language=Scala,xleftmargin=1mm]
+  {../progs/app8.scala}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Two Grammars}
+
+Which languages are recognised by the following two grammars?
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$\meta{S}$ & $\rightarrow$ &  $1 \cdot \meta{S} \cdot \meta{S}$\\
+        & $|$ & $\epsilon$
+\end{tabular}}
+\end{center}\bigskip
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$\meta{U}$ & $\rightarrow$ &  $1 \cdot \meta{U}$\\
+        & $|$ & $\epsilon$
+\end{tabular}}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t]
+\frametitle{Ambiguous Grammars}
+
+\begin{center}
+\begin{tikzpicture}
+\begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs},
+    enlargelimits=false,
+    xtick={0,100,...,1000},
+    xmax=1050,
+    ymax=33,
+    ytick={0,5,...,30},
+    scaled ticks=false,
+    axis lines=left,
+    width=11cm,
+    height=7cm, 
+    legend entries={unambiguous,ambiguous},  
+    legend pos=north east,
+    legend cell align=left,
+    x tick label style={font=\small,/pgf/number format/1000 sep={}}]
+\addplot[blue,mark=*, mark options={fill=white}] 
+  table {s-grammar1.data};
+\only<2>{
+  \addplot[red,mark=triangle*, mark options={fill=white}] 
+  table {s-grammar2.data};}  
+\end{axis}
+\end{tikzpicture}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+\begin{frame}
+\frametitle{While-Language}
+\mbox{}\\[-23mm]\mbox{}
+
+\bl{\begin{plstx}[rhs style=,one per line]: \meta{Stmt} ::= skip
+         | \meta{Id} := \meta{AExp}
+         | if \meta{BExp} then \meta{Block} else \meta{Block}
+         | while \meta{BExp} do \meta{Block}\\
+: \meta{Stmts} ::= \meta{Stmt} ; \meta{Stmts}
+          | \meta{Stmt}\\
+: \meta{Block} ::= \{ \meta{Stmts} \}
+          | \meta{Stmt}\\
+: \meta{AExp} ::= \ldots\\
+: \meta{BExp} ::= \ldots\\\end{plstx}}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{An Interpreter}
+
+\begin{center}
+\bl{\begin{tabular}{l}
+$\{$\\
+\;\;$x := 5 \text{;}$\\
+\;\;$y := x * 3\text{;}$\\
+\;\;$y := x * 4\text{;}$\\
+\;\;$x := u * 3$\\
+$\}$
+\end{tabular}}
+\end{center}
+
+\begin{itemize}
+\item the interpreter has to record the value of \bl{$x$} before assigning a value to \bl{$y$}\pause
+\item \bl{\texttt{eval(stmt, env)}}
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[c]
+\frametitle{\begin{tabular}{c}Interpreter\end{tabular}}
+
+\begin{center}
+\bl{\begin{tabular}{@{}lcl@{}}
+$\text{eval}(n, E)$ & $\dn$ & $n$\\
+$\text{eval}(x, E)$ & $\dn$ & $E(x)$ \;\;\;\textcolor{black}{lookup \bl{$x$} in \bl{$E$}}\\
+$\text{eval}(a_1 + a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) + \text{eval}(a_2, E)$\\
+$\text{eval}(a_1 - a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) - \text{eval}(a_2, E)$\\
+$\text{eval}(a_1 * a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) * \text{eval}(a_2, E)$\bigskip\\
+$\text{eval}(a_1 = a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) = \text{eval}(a_2, E)$\\
+$\text{eval}(a_1\,!\!= a_2, E)$ & $\dn$ & $\neg(\text{eval}(a_1, E) = \text{eval}(a_2, E))$\\
+$\text{eval}(a_1 < a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) < \text{eval}(a_2, E)$\
+\end{tabular}}
+\end{center}
+
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[c]
+\frametitle{\begin{tabular}{c}Interpreter (2)\end{tabular}}
+
+\begin{center}
+\bl{\begin{tabular}{@{}lcl@{}}
+$\text{eval}(\text{skip}, E)$ & $\dn$ & $E$\\
+$\text{eval}(x:=a, E)$ & $\dn$ & \bl{$E(x \mapsto \text{eval}(a, E))$}\\
+\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{if}\;b\;\text{then}\;cs_1\;\text{else}\;cs_2 , E) \dn$}\\
+\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)\;\text{then}\;
+\text{eval}(cs_1,E)$}\\
+\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\phantom{\text{if}\;\text{eval}(b,E)\;}\text{else}\;\text{eval}(cs_2,E)$}\\
+\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{while}\;b\;\text{do}\;cs, E) \dn$}\\
+\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)$}\\
+\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{then}\;
+\text{eval}(\text{while}\;b\;\text{do}\;cs, \text{eval}(cs,E))$}\\
+\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{else}\; E$}\\
+$\text{eval}(\text{write}\; x, E)$ & $\dn$ & $\{\;\text{println}(E(x))\; ;\;E\;\}$\\
+\end{tabular}}
+\end{center}
+
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Test Program}
+
+\mbox{}\\[-18mm]\mbox{}
+
+??%{\lstset{language=While}%%\fontsize{10}{12}\selectfont
+%\texttt{\lstinputlisting{../progs/loops.while}}}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[t]
+\frametitle{\begin{tabular}{c}Interpreted Code\end{tabular}}
+
+\begin{center}
+\begin{tikzpicture}
+\begin{axis}[axis x line=bottom, axis y line=left, xlabel=n, ylabel=secs, legend style=small]
+\addplot+[smooth] file {interpreted.data};
+\end{axis}
+\end{tikzpicture}
+\end{center}
+
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[c]
+\frametitle{\begin{tabular}{c}Java Virtual Machine\end{tabular}}
+
+\begin{itemize}
+\item introduced in 1995
+\item is a stack-based VM (like Postscript, CLR of .Net)
+\item contains a JIT compiler
+\item many languages take advantage of JVM's infrastructure (JRE)
+\item is garbage collected $\Rightarrow$ no buffer overflows
+\item some languages compile to the JVM: Scala, Clojure\ldots
+\end{itemize}
+
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t]
+\frametitle{%
+  \begin{tabular}{@ {}c@ {}}
+  \\[-3mm]
+  \LARGE Compilers and \\[-2mm] 
+  \LARGE Formal Languages\\[3mm] 
+  \end{tabular}}
+
+  \normalsize
+  \begin{center}
+  \begin{tabular}{ll}
+    Email:  & christian.urban at kcl.ac.uk\\
+    %Office Hours: & Thursdays 12 -- 14\\
+    %Location: & N7.07 (North Wing, Bush House)\\
+    Slides \& Progs: & KEATS (also homework is there)\\  
+  \end{tabular}
+  \end{center}
+
+  \begin{center}
+    \begin{tikzpicture}
+      \node[drop shadow,fill=white,inner sep=0pt] 
+      {\footnotesize\rowcolors{1}{capri!10}{white}
+        \begin{tabular}{|p{4.8cm}|p{4.8cm}|}\hline
+          1 Introduction, Languages          & \cellcolor{blue!50} 6 While-Language \\
+          2 Regular Expressions, Derivatives & 7 Compilation, JVM \\
+          3 Automata, Regular Languages      & 8 Compiling Functional Languages \\
+          4 Lexing, Tokenising               & 9 Optimisations \\
+          5 Grammars, Parsing                & 10 LLVM \\ \hline
+        \end{tabular}%
+      };
+    \end{tikzpicture}
+  \end{center}
+  
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % \begin{frame}[c]
 % \small
 % \mbox{}\\[5mm]