afl-material: slides/slides05.tex@3e5f5d19f514



% !TEX program = xelatex
\documentclass[dvipsnames,14pt,t,xelatex,aspectratio=169,xcolor={table}]{beamer}
\usepackage{../slides}
\usepackage{../graphics}
\usepackage{../langs}
\usepackage{../data}
\usepackage{../grammar}

\hfuzz=220pt 

\pgfplotsset{compat=1.11}

\newcommand{\bl}[1]{\textcolor{blue}{#1}}  

% beamer stuff 
\renewcommand{\slidecaption}{CFL 05, King's College London}


\begin{document}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{%
  \begin{tabular}{@ {}c@ {}}
  \\[-3mm]
  \LARGE Compilers and \\[-2mm] 
  \LARGE Formal Languages\\[3mm] 
  \end{tabular}}

  \normalsize
  \begin{center}
  \begin{tabular}{ll}
    Email:  & christian.urban at kcl.ac.uk\\
    %Office Hours: & Thursdays 12 -- 14\\
    %Location: & N7.07 (North Wing, Bush House)\\
    Slides \& Progs: & KEATS (also homework is there)\\  
  \end{tabular}
\end{center}

  \begin{center}
    \begin{tikzpicture}
      \node[drop shadow,fill=white,inner sep=0pt] 
      {\footnotesize\rowcolors{1}{capri!10}{white}
        \begin{tabular}{|p{4.8cm}|p{4.8cm}|}\hline
          1 Introduction, Languages          & 6 While-Language \\
          2 Regular Expressions, Derivatives & 7 Compilation, JVM \\
          3 Automata, Regular Languages      & 8 Compiling Functional Languages \\
          4 Lexing, Tokenising               & 9 Optimisations \\
          \cellcolor{blue!50}
          5 Grammars, Parsing                & 10 LLVM \\ \hline
        \end{tabular}%
      };
    \end{tikzpicture}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{frame}[c]
%  \frametitle{Coursework 1: Submissions}
%  
%  \begin{itemize}
%  \item Scala (29)
%  \item Haskell (1)
%  \item Kotlin (1)
%  \item Rust (1)
%  \end{itemize}\bigskip\bigskip  
%  
%  \small
%  Please get in contact if you intend to do CW Strand 2. No zips please.
%  Give definitions also on paper if asked. BTW, simp 
%  can stay unchanged. Use \texttt{ders} for CW2, not \texttt{ders2}!
%  \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Lexer, Parser}
\mbox{}\\[-16mm]\mbox{}

\begin{center}
  \begin{tikzpicture}[scale=1,
                      node/.style={
                      rectangle,rounded corners=3mm,
                      very thick,draw=black!50,
                      minimum height=18mm, minimum width=20mm,
                      top color=white,bottom color=black!20,drop shadow}]
  \node (0) at (-2.3,0) {}; 
  
  \node (A) at (0,0)  [node] {};
  \node [below right] at (A.north west) {lexer};

  \node (B) at (3,0)  [node] {};
  \node [below right=1mm] at (B.north west) 
    {\mbox{}\hspace{-1mm}parser};

  \node (C) at (6,0)  [node] {};
  \node [below right] at (C.north west) 
    {\mbox{}\hspace{-1mm}code gen};

  \node (1) at (8.4,0) {}; 

  \draw [->,line width=4mm] (0) -- (A); 
  \draw [->,line width=4mm] (A) -- (B); 
  \draw [->,line width=4mm] (B) -- (C); 
  \draw [->,line width=4mm] (C) -- (1); 
  \end{tikzpicture}
  \end{center}
  
Today a parser.  
  
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{What Parsing is Not}

Usually parsing does not check semantic correctness, e.g.

\begin{itemize}
\item  whether a function is not used before it
  is defined
\item whether a function has the correct number of arguments 
  or are of correct type
\item whether a variable can be declared twice in a scope  
\end{itemize}  

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Regular Languages}

While regular expressions are very useful for lexing, there is
no regular expression that can recognise the language
\bl{$a^nb^n$}.\bigskip

\begin{center}
\bl{$(((()()))())$} \;\;vs.\;\; \bl{$(((()()))()))$}
\end{center}\bigskip\bigskip

\small
\noindent So we cannot find out with regular expressions
whether parentheses are matched or unmatched. Also regular
expressions are not recursive, e.g.~\bl{$(1 + 2) + 3$}.

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Hierarchy of Languages}

\begin{center}
\begin{tikzpicture}
[rect/.style={draw=black!50, 
              top color=white,
              bottom color=black!20, 
              rectangle, 
              very thick, 
              rounded corners}, scale=1.2]

\draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages};
\draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages};
\draw (0,-0.85) node [rect, text depth=17mm] {context sensitive languages};
\draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages};
\draw (0,-1.4) node [rect] {regular languages};
\end{tikzpicture}

\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{CF Grammars}

A \alert{\bf context-free grammar} \bl{$G$} consists of

\begin{itemize}
\item a finite set of nonterminal symbols (e.g.~$\meta{A}$ upper case)
\item a finite set terminal symbols or tokens (lower case)
\item a start symbol (which must be a nonterminal)
\item a set of rules
\begin{center}
\bl{$\meta{A} ::= \textit{rhs}$}
\end{center}

where \bl{\textit{rhs}} are sequences involving terminals and nonterminals,
including the empty sequence \bl{$\epsilon$}.\medskip\pause

We also allow rules
\begin{center}
\bl{$\meta{A} ::= \textit{rhs}_1 | \textit{rhs}_2 | \ldots$}
\end{center}
\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Palindromes}

A grammar for palindromes over the alphabet~\bl{$\{a,b\}$}:

\bl{\begin{plstx}[margin=3cm]
: \meta{S} ::= a\cdot\meta{S}\cdot a\\
: \meta{S} ::= b\cdot\meta{S}\cdot b\\
: \meta{S} ::= a\\
: \meta{S} ::= b\\
: \meta{S} ::= \epsilon\\
\end{plstx}}\pause

or

\bl{\begin{plstx}[margin=2cm]
: \meta{S} ::=  a\cdot \meta{S}\cdot a | b\cdot \meta{S}\cdot b | a | b | \epsilon\\
\end{plstx}}\pause\bigskip

\small
Can you find the grammar rules for matched parentheses?

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Arithmetic Expressions}

\bl{\begin{plstx}[margin=3cm,one per line]
: \meta{E} ::=  num\_token 
   | \meta{E} \cdot + \cdot \meta{E} 
   | \meta{E} \cdot - \cdot \meta{E} 
   | \meta{E} \cdot * \cdot \meta{E} 
   | ( \cdot \meta{E} \cdot ) \\
\end{plstx}}\pause

\bl{\texttt{1 + 2 * 3 + 4}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{A CFG Derivation}

\begin{enumerate}
\item Begin with a string containing only the start symbol, say \bl{\meta{S}}\bigskip
\item Replace any nonterminal \bl{\meta{X}} in the string by the
right-hand side of some production \bl{$\meta{X} ::= \textit{rhs}$}\bigskip
\item Repeat 2 until there are no nonterminals left
\end{enumerate}

\begin{center}
\bl{$\meta{S} \rightarrow \ldots \rightarrow \ldots  \rightarrow \ldots  \rightarrow \ldots $}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
  
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Example Derivation}

\bl{\begin{plstx}[margin=2cm]
: \meta{S} ::=  \epsilon | a\cdot \meta{S}\cdot a | b\cdot \meta{S}\cdot b \\
\end{plstx}}\bigskip

\begin{center}
\begin{tabular}{lcl}
\bl{\meta{S}} & \bl{$\rightarrow$} & \bl{$a\meta{S}a$}\\
              & \bl{$\rightarrow$} & \bl{$ab\meta{S}ba$}\\
              & \bl{$\rightarrow$} & \bl{$aba\meta{S}aba$}\\
              & \bl{$\rightarrow$} & \bl{$abaaba$}\\

 
\end{tabular}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Example Derivation}

\bl{\begin{plstx}[margin=3cm,one per line]
: \meta{E} ::=  num\_token 
   | \meta{E} \cdot + \cdot \meta{E} 
   | \meta{E} \cdot - \cdot \meta{E} 
   | \meta{E} \cdot * \cdot \meta{E} 
   | ( \cdot \meta{E} \cdot ) \\
\end{plstx}}

\small
\begin{center}
\begin{tabular}{@{}c@{}c@{}}
\begin{tabular}{@{\hspace{-2mm}}l@{\hspace{1mm}}l@{\hspace{1mm}}l@{\hspace{4mm}}}
\bl{\meta{E}} & \bl{$\rightarrow$} & \bl{$\meta{E}*\meta{E}$}\\
              & \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}*\meta{E}$}\\
              & \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}*\meta{E}+\meta{E}$}\\
              & \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\
\end{tabular} &\pause
\begin{tabular}{@{}l@{\hspace{0mm}}l@{\hspace{1mm}}l}
\bl{$\meta{E}$} & \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}$}\\
                & \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}+\meta{E}$}\\
                & \bl{$\rightarrow$} & \bl{$\meta{E}+\meta{E}*\meta{E}+\meta{E}$}\\
                & \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\
\end{tabular}
\end{tabular}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Context Sensitive Grammars}

It is much harder to find out whether a string is parsed
by a context sensitive grammar:

\bl{\begin{plstx}[margin=2cm]
: \meta{S} ::= b\meta{S}\meta{A}\meta{A} | \epsilon\\
: \meta{A} ::= a\\
: b\meta{A} ::= \meta{A}b\\
\end{plstx}}\pause

\begin{center}
\bl{$\meta{S} \rightarrow\ldots\rightarrow^? ababaa$}
\end{center}\pause

\begin{center}
  \tt Time flies like an arrow;\\ 
  fruit flies like bananas.
  \end{center}  

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Language of a CFG}

Let \bl{$G$} be a context-free grammar with start symbol \bl{\meta{S}}. 
Then the language \bl{$L(G)$} is:

\begin{center}
\bl{$\{c_1\ldots c_n \;|\; \forall i.\; c_i \in T \wedge \meta{S} \rightarrow^* c_1\ldots c_n \}$}
\end{center}\pause

\begin{itemize}
\item Terminals, because there are no rules for replacing them.
\item Once generated, terminals are ``permanent''.
\item Terminals ought to be tokens of the language\\
(but can also be strings).
\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Parse Trees}
\mbox{}\\[-12mm]

\bl{\begin{plstx}: \meta{E} ::= \meta{T} | \meta{T} \cdot + \cdot \meta{E} |  \meta{T} \cdot - \cdot \meta{E}\\
: \meta{T} ::= \meta{F} | \meta{F} \cdot * \cdot \meta{T}\\
: \meta{F} ::= num\_token | ( \cdot \meta{E} \cdot )\\
\end{plstx}}

\begin{center}\small
\begin{tikzpicture}[level distance=8mm, blue]
  \node {$\meta{E}$}
    child {node {$\meta{T}$} 
     child {node {$\meta{T}$} 
                 child {node {(\,$\meta{E}$\,)}
                            child {node{$\meta{F}$ *{} $\meta{F}$}
                                  child {node {$\meta{T}$} child {node {2}}}
                                  child {node {$\meta{T}$} child {node {3}}} 
                               }
                          }
              }
     child {node {+}}
     child {node {$\meta{T}$}
       child {node {(\,$\meta{E}$\,)} 
       child {node {$\meta{F}$}
       child {node {$\meta{T}$ +{} $\meta{T}$}
                    child {node {3}}
                    child {node {4}} 
                 }
                 }}
    }};
\end{tikzpicture}
\end{center}

\begin{textblock}{5}(1, 6.5)
\bl{\texttt{(2*3)+(3+4)}}
\end{textblock}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Arithmetic Expressions}

\bl{\begin{plstx}[margin=3cm,one per line]
: \meta{E} ::=  num\_token 
   | \meta{E} \cdot + \cdot \meta{E} 
   | \meta{E} \cdot - \cdot \meta{E} 
   | \meta{E} \cdot * \cdot \meta{E} 
   | ( \cdot \meta{E} \cdot ) \\
\end{plstx}}\pause\bigskip

A CFG is \alert{\bf left-recursive} if it has a nonterminal \bl{$\meta{E}$} such
that \bl{$\meta{E} \rightarrow^+ \meta{E}\cdot \ldots$}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Ambiguous Grammars}

A grammar is \alert{\bf ambiguous} if there is a string that
has at least two different parse trees.

\bl{\begin{plstx}[margin=3cm,one per line]: \meta{E} ::=  num\_token 
   | \meta{E} \cdot + \cdot \meta{E} 
   | \meta{E} \cdot - \cdot \meta{E} 
   | \meta{E} \cdot * \cdot \meta{E} 
   | ( \cdot \meta{E} \cdot ) \\
\end{plstx}}


\bl{\texttt{1 + 2 * 3 + 4}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{`Dangling' Else}

Another ambiguous grammar:\bigskip

\begin{center}
\bl{\begin{tabular}{lcl}
$E$ & $\rightarrow$ &  if $E$ then $E$\\
 & $|$ &  if $E$ then $E$ else $E$ \\
 & $|$ &  \ldots
\end{tabular}}
\end{center}\bigskip

\bl{\texttt{if a then if x then y else c}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Parser Combinators}

One of the simplest ways to implement a parser, see
{\small\url{https://vimeo.com/142341803}}\bigskip

Parser combinators: \bigskip

\begin{minipage}{1.1\textwidth}
\begin{center}
\mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} 
$\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$
\end{center}
\end{minipage}\bigskip

\begin{itemize}
\item atomic parsers
\item sequencing
\item alternative
\item semantic action
\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]

Atomic parsers, for example, number tokens

\begin{center}
\bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$} 
\end{center}\bigskip

\begin{itemize}
\item you consume one or more token from the\\ 
  input (stream)
\item also works for characters and strings
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]

Alternative parser (code \bl{$p\;||\;q$})\bigskip

\begin{itemize}
\item apply \bl{$p$} and also \bl{$q$}; then combine 
  the outputs
\end{itemize}

\begin{center}
\large \bl{$p(\text{input}) \cup q(\text{input})$}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]

Sequence parser (code \bl{$p\sim q$})\bigskip

\begin{itemize}
\item apply first \bl{$p$} producing a set of pairs
\item then apply \bl{$q$} to the unparsed part
\item then combine the results:\medskip 
\begin{center}
((output$_1$, output$_2$), unparsed part)
\end{center}
\end{itemize}

\begin{center}
\begin{tabular}{l}
\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] 
\large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm]
\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$}
\end{tabular}
\end{center}


\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]

Function parser (code \bl{$p \Rightarrow f\;$})\bigskip

\begin{itemize}
\item apply \bl{$p$} producing a set of pairs
\item then apply the function \bl{$f$} to each first component
\end{itemize}

\begin{center}
\begin{tabular}{l}
\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$}
\end{tabular}
\end{center}\bigskip\bigskip\pause

\bl{$f$} is the semantic action (``what to do with the parsed input'')

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}}

Addition

\begin{center}
\bl{$\meta{T} \sim + \sim \meta{E} \Rightarrow \underbrace{f\,((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$}
\end{center}\pause

Multiplication

\begin{center}
\bl{$\meta{F} \sim * \sim \meta{T} \Rightarrow f\,((x,y), z) \Rightarrow x * z$}
\end{center}\pause

Parenthesis

\begin{center}
\bl{$\text{(} \sim \meta{E} \sim \text{)} \Rightarrow f\,((x,y), z) \Rightarrow y$}
\end{center}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Types of Parsers}

\begin{itemize}
\item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$},
then \bl{$p \sim q$} returns results of type

\begin{center}
\bl{$T \times S$}
\end{center}\pause

\item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then  \bl{$q$} \alert{must} also have results of type \bl{$T$},
and \bl{$p \;||\; q$} returns results of type

\begin{center}
\bl{$T$}
\end{center}\pause

\item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from
\bl{$T$} to \bl{$S$}, then
\bl{$p \Rightarrow f$} returns results of type

\begin{center}
\bl{$S$}
\end{center}

\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Input Types of Parsers}

\begin{itemize}
\item input: \alert{token list}
\item output: set of (output\_type, \alert{token list})
\end{itemize}\bigskip\pause

actually it can be any input type as long as it is a kind of
sequence (for example a string)

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Scannerless Parsers}

\begin{itemize}
\item input: \alert{string}
\item output: set of (output\_type, \alert{string})
\end{itemize}\bigskip\bigskip

but using lexers is better because whitespaces or comments can be
filtered out; then input is a sequence of tokens

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Successful Parses}

\begin{itemize}
\item input: string
\item output: \alert{set of} (output\_type, string)
\end{itemize}\bigskip

a parse is successful whenever the input has been fully
``consumed'' (that is the second component is empty)

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Abstract Parser Class}

\small
\lstinputlisting[language=Scala,xleftmargin=1mm]
 {../progs/app7.scala}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]

\small
\fontsize{10}{12}\selectfont
\lstinputlisting[language=Scala,xleftmargin=1mm]
  {../progs/app8.scala}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Two Grammars}

Which languages are recognised by the following two grammars?

\begin{center}
\bl{\begin{tabular}{lcl}
$\meta{S}$ & $\rightarrow$ &  $1 \cdot \meta{S} \cdot \meta{S}$\\
        & $|$ & $\epsilon$
\end{tabular}}
\end{center}\bigskip

\begin{center}
\bl{\begin{tabular}{lcl}
$\meta{U}$ & $\rightarrow$ &  $1 \cdot \meta{U}$\\
        & $|$ & $\epsilon$
\end{tabular}}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Ambiguous Grammars}

\begin{center}
\begin{tikzpicture}
\begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs},
    enlargelimits=false,
    xtick={0,100,...,1000},
    xmax=1050,
    ymax=33,
    ytick={0,5,...,30},
    scaled ticks=false,
    axis lines=left,
    width=11cm,
    height=7cm, 
    legend entries={unambiguous,ambiguous},  
    legend pos=north east,
    legend cell align=left,
    x tick label style={font=\small,/pgf/number format/1000 sep={}}]
\addplot[blue,mark=*, mark options={fill=white}] 
  table {s-grammar1.data};
\only<2>{
  \addplot[red,mark=triangle*, mark options={fill=white}] 
  table {s-grammar2.data};}  
\end{axis}
\end{tikzpicture}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
\begin{frame}
\frametitle{While-Language}
\mbox{}\\[-23mm]\mbox{}

\bl{\begin{plstx}[rhs style=,one per line]: \meta{Stmt} ::= skip
         | \meta{Id} := \meta{AExp}
         | if \meta{BExp} then \meta{Block} else \meta{Block}
         | while \meta{BExp} do \meta{Block}\\
: \meta{Stmts} ::= \meta{Stmt} ; \meta{Stmts}
          | \meta{Stmt}\\
: \meta{Block} ::= \{ \meta{Stmts} \}
          | \meta{Stmt}\\
: \meta{AExp} ::= \ldots\\
: \meta{BExp} ::= \ldots\\\end{plstx}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{An Interpreter}

\begin{center}
\bl{\begin{tabular}{l}
$\{$\\
\;\;$x := 5 \text{;}$\\
\;\;$y := x * 3\text{;}$\\
\;\;$y := x * 4\text{;}$\\
\;\;$x := u * 3$\\
$\}$
\end{tabular}}
\end{center}

\begin{itemize}
\item the interpreter has to record the value of \bl{$x$} before assigning a value to \bl{$y$}\pause
\item \bl{\texttt{eval(stmt, env)}}
\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{\begin{tabular}{c}Interpreter\end{tabular}}

\begin{center}
\bl{\begin{tabular}{@{}lcl@{}}
$\text{eval}(n, E)$ & $\dn$ & $n$\\
$\text{eval}(x, E)$ & $\dn$ & $E(x)$ \;\;\;\textcolor{black}{lookup \bl{$x$} in \bl{$E$}}\\
$\text{eval}(a_1 + a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) + \text{eval}(a_2, E)$\\
$\text{eval}(a_1 - a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) - \text{eval}(a_2, E)$\\
$\text{eval}(a_1 * a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) * \text{eval}(a_2, E)$\bigskip\\
$\text{eval}(a_1 = a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) = \text{eval}(a_2, E)$\\
$\text{eval}(a_1\,!\!= a_2, E)$ & $\dn$ & $\neg(\text{eval}(a_1, E) = \text{eval}(a_2, E))$\\
$\text{eval}(a_1 < a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) < \text{eval}(a_2, E)$\
\end{tabular}}
\end{center}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{\begin{tabular}{c}Interpreter (2)\end{tabular}}

\begin{center}
\bl{\begin{tabular}{@{}lcl@{}}
$\text{eval}(\text{skip}, E)$ & $\dn$ & $E$\\
$\text{eval}(x:=a, E)$ & $\dn$ & \bl{$E(x \mapsto \text{eval}(a, E))$}\\
\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{if}\;b\;\text{then}\;cs_1\;\text{else}\;cs_2 , E) \dn$}\\
\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)\;\text{then}\;
\text{eval}(cs_1,E)$}\\
\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\phantom{\text{if}\;\text{eval}(b,E)\;}\text{else}\;\text{eval}(cs_2,E)$}\\
\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{while}\;b\;\text{do}\;cs, E) \dn$}\\
\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)$}\\
\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{then}\;
\text{eval}(\text{while}\;b\;\text{do}\;cs, \text{eval}(cs,E))$}\\
\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{else}\; E$}\\
$\text{eval}(\text{write}\; x, E)$ & $\dn$ & $\{\;\text{println}(E(x))\; ;\;E\;\}$\\
\end{tabular}}
\end{center}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Test Program}

\mbox{}\\[-18mm]\mbox{}

??%{\lstset{language=While}%%\fontsize{10}{12}\selectfont
%\texttt{\lstinputlisting{../progs/loops.while}}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[t]
\frametitle{\begin{tabular}{c}Interpreted Code\end{tabular}}

\begin{center}
\begin{tikzpicture}
\begin{axis}[axis x line=bottom, axis y line=left, xlabel=n, ylabel=secs, legend style=small]
\addplot+[smooth] file {interpreted.data};
\end{axis}
\end{tikzpicture}
\end{center}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{\begin{tabular}{c}Java Virtual Machine\end{tabular}}

\begin{itemize}
\item introduced in 1995
\item is a stack-based VM (like Postscript, CLR of .Net)
\item contains a JIT compiler
\item many languages take advantage of JVM's infrastructure (JRE)
\item is garbage collected $\Rightarrow$ no buffer overflows
\item some languages compile to the JVM: Scala, Clojure\ldots
\end{itemize}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   


\end{document}

%%% Local Variables:  
%%% mode: latex
%%% TeX-master: t
%%% End:
author	Christian Urban <christian.urban@kcl.ac.uk>
	Sun, 11 Oct 2020 09:10:08 +0100
changeset 778	3e5f5d19f514
parent 743	6acabeecdf75
child 792	34132a854d03
permissions	-rw-r--r--