afl-material: slides/slides05.tex@b3129cff41e9


\documentclass[dvipsnames,14pt,t]{beamer}
\usepackage{../slides}
\usepackage{../graphics}
\usepackage{../langs}
\usepackage{../data}
\usepackage{../grammar}

\hfuzz=220pt 

\pgfplotsset{compat=1.11}

\newcommand{\bl}[1]{\textcolor{blue}{#1}}  

% beamer stuff 
\renewcommand{\slidecaption}{AFL 05, King's College London}


\begin{document}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{%
  \begin{tabular}{@ {}c@ {}}
  \\[-3mm]
  \LARGE Automata and \\[-2mm] 
  \LARGE Formal Languages (5)\\[3mm] 
  \end{tabular}}

  \normalsize
  \begin{center}
  \begin{tabular}{ll}
  Email:  & christian.urban at kcl.ac.uk\\
  Office: & S1.27 (1st floor Strand Building)\\
  Slides: & KEATS (also home work is there)\\
  \end{tabular}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{\begin{tabular}{c}Last Week\\[-1mm] 
            Regexes and Values\end{tabular}}

Regular expressions and their corresponding values:

\begin{center}
\begin{columns}
\begin{column}{3cm}
\begin{tabular}{@{}rrl@{}}
  \bl{$r$} & \bl{$::=$}  & \bl{$\varnothing$}\\
           & \bl{$\mid$} & \bl{$\epsilon$}   \\
           & \bl{$\mid$} & \bl{$c$}          \\
           & \bl{$\mid$} & \bl{$r_1 \cdot r_2$}\\
           & \bl{$\mid$} & \bl{$r_1 + r_2$}   \\
  \\
           & \bl{$\mid$} & \bl{$r^*$}         \\
  \end{tabular}
\end{column}
\begin{column}{3cm}
\begin{tabular}{@{\hspace{-7mm}}rrl@{}}
  \bl{$v$} & \bl{$::=$}  & \\
           &             & \bl{$Empty$}   \\
           & \bl{$\mid$} & \bl{$Char(c)$}          \\
           & \bl{$\mid$} & \bl{$Seq(v_1,v_2)$}\\
           & \bl{$\mid$} & \bl{$Left(v)$}   \\
           & \bl{$\mid$} & \bl{$Right(v)$}  \\
           & \bl{$\mid$} & \bl{$[v_1,\ldots\,v_n]$} \\
  \end{tabular}
\end{column}
\end{columns}
\end{center}


\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]

\begin{textblock}{10}(3,5)
\begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}]
\node (r1)  {\bl{$r_1$}};
\node (r2) [right=of r1] {\bl{$r_2$}};
\draw[->,line width=1mm]  (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};
\node (r3) [right=of r2] {\bl{$r_3$}};
\draw[->,line width=1mm]  (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};
\node (r4) [right=of r3] {\bl{$r_4$}};
\draw[->,line width=1mm]  (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}};
\node (v4) [below=of r4] {\bl{$v_4$}};
\draw[->,line width=1mm]  (r4) -- (v4);
\node (v3) [left=of v4] {\bl{$v_3$}};
\draw[->,line width=1mm]  (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};
\node (v2) [left=of v3] {\bl{$v_2$}};
\draw[->,line width=1mm]  (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};
\node (v1) [left=of v2] {\bl{$v_1$}};
\draw[->,line width=1mm]  (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};
\draw[->,line width=0.5mm]  (r3) -- (v3);
\draw[->,line width=0.5mm]  (r2) -- (v2);
\draw[->,line width=0.5mm]  (r1) -- (v1);
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
\end{tikzpicture}
\end{textblock}

\begin{textblock}{6}(1,0.8)
\begin{bubble}[6cm]
\small
\begin{tabular}{ll}
\bl{$r_1$}: & \bl{$a \cdot (b \cdot c)$}\\
\bl{$r_2$}: & \bl{$\epsilon \cdot (b \cdot c)$}\\
\bl{$r_3$}: & \bl{$(\varnothing \cdot (b \cdot c)) + (\epsilon \cdot c)$}\\
\bl{$r_4$}: & \bl{$(\varnothing \cdot (b \cdot c)) + ((\varnothing \cdot c) + \epsilon)$}\\
\end{tabular}
\end{bubble}
\end{textblock}

\begin{textblock}{6}(1,11.4)
\begin{bubble}[7.6cm]
\small
\begin{tabular}{ll}
\bl{$v_1$}: & \bl{$Seq(Char(a), Seq(Char(b), Char(c)))$}\\
\bl{$v_2$}: & \bl{$Seq(Empty, Seq(Char(b), Char(c)))$}\\
\bl{$v_3$}: & \bl{$Right(Seq(Empty, Char(c)))$}\\
\bl{$v_4$}: & \bl{$Right(Right(Empty))$}\\
\end{tabular}
\end{bubble}
\end{textblock}

\begin{textblock}{6}(12,11.4)
\begin{bubble}[2cm]
\small
\begin{tabular}{ll}
\bl{$|v_1|$}: & \bl{$abc$}\\
\bl{$|v_2|$}: & \bl{$bc$}\\
\bl{$|v_3|$}: & \bl{$c$}\\
\bl{$|v_4|$}: & \bl{$[]$}
\end{tabular}
\end{bubble}
\end{textblock}


\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Simplification}

\begin{itemize}
\item If we simplify after the derivative, then we are builing the
value for the simplified regular expression, but \emph{not} for the original
regular expression.
\end{itemize}

\begin{center}
\begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}]
\node (r1)  {\bl{$r_1$}};
\node (r2) [right=of r1] {\bl{$r_2$}};
\draw[->,line width=1mm]  (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};
\node (r3) [right=of r2] {\bl{$r_3$}};
\draw[->,line width=1mm]  (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};
\node (r4) [right=of r3] {\bl{$r_4$}};
\draw[->,line width=1mm]  (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}};
\node (v4) [below=of r4] {\bl{$v_4$}};
\draw[->,line width=1mm]  (r4) -- (v4);
\node (v3) [left=of v4] {\bl{$v_3$}};
\draw[->,line width=1mm]  (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};
\node (v2) [left=of v3] {\bl{$v_2$}};
\draw[->,line width=1mm]  (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};
\node (v1) [left=of v2] {\bl{$v_1$}};
\draw[->,line width=1mm]  (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};
\draw[->,line width=0.5mm]  (r3) -- (v3);
\draw[->,line width=0.5mm]  (r2) -- (v2);
\draw[->,line width=0.5mm]  (r1) -- (v1);
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
\end{tikzpicture}
\end{center}

\small
\hspace{4.5cm}\bl{$(\varnothing \cdot (b \cdot c)) + ((\varnothing \cdot c) + \epsilon)$}
$\mapsto$
\bl{$\epsilon$}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Records}

\begin{itemize}
\item new regex: \bl{$(x:r)$}\hspace{7mm}new value: 
\bl{$Rec(x,v)$}\medskip

\item \bl{$nullable(x:r) \dn nullable(r)$}
\item \bl{$der\,c\,(x:r) \dn (x:der\,c\,r)$}
\item \bl{$mkeps(x:r) \dn Rec(x, mkeps(r))$}
\item \bl{$inj\,(x:r)\,c\,v \dn Rec(x, inj\,r\,c\,v)$}
\end{itemize}\bigskip\bigskip

\small
for extracting subpatterns \bl{$(z: ((x:ab) + (y:ba))$}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Environments}

Obtaining the ``recorded'' parts of a value: 

\begin{center}
\begin{tabular}{lcl}
  \bl{$env(Empty)$}     & \bl{$\dn$} & \bl{$[]$}\\
  \bl{$env(Char(c))$}   & \bl{$\dn$} & \bl{$[]$}\\
  \bl{$env(Left(v))$}   & \bl{$\dn$} & \bl{$env(v)$}\\
  \bl{$env(Right(v))$}  & \bl{$\dn$} & \bl{$env(v)$}\\
  \bl{$env(Seq(v_1,v_2))$}& \bl{$\dn$} & \bl{$env(v_1) \,@\, env(v_2)$}\\
  \bl{$env([v_1,\ldots ,v_n])$} & \bl{$\dn$} & 
     \bl{$env(v_1) \,@\ldots @\, env(v_n)$}\\
  \bl{$env(Rec(x:v))$} & \bl{$\dn$} & \bl{$(x:|v|) :: env(v)$}\\
\end{tabular}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{While Tokens}

\begin{center}
\begin{tabular}{@{}r@{\hspace{2mm}}c@{\hspace{2mm}}l@{}}
\pcode{WHILE\_REGS} & $\dn$ & \raisebox{-1mm}{\large(}\pcode{("k" : KEYWORD)} +\\ 
                  &       & \phantom{(}\pcode{("i" : ID)} +\\ 
                  &       & \phantom{(}\pcode{("o" : OP)} + \\
                  &       & \phantom{(}\pcode{("n" : NUM)} + \\
                  &       & \phantom{(}\pcode{("s" : SEMI)} +\\ 
                  &       & \phantom{(}\pcode{("p" : (LPAREN + RPAREN))} +\\ 
                  &       & \phantom{(}\pcode{("b" : (BEGIN + END))} +\\ 
                  &       & \phantom{(}\pcode{("w" : WHITESPACE)}\raisebox{-1mm}{\large)$^*$}
\end{tabular}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]

\consolas
\begin{center}
\code{"if true then then 42 else +"}
\end{center}

\only<1>{
\small\begin{tabular}{l}
KEYWORD(if),\\ 
WHITESPACE,\\ 
IDENT(true),\\ 
WHITESPACE,\\ 
KEYWORD(then),\\ 
WHITESPACE,\\ 
KEYWORD(then),\\ 
WHITESPACE,\\ 
NUM(42),\\ 
WHITESPACE,\\ 
KEYWORD(else),\\ 
WHITESPACE,\\ 
OP(+)
\end{tabular}}

\only<2>{
\small\begin{tabular}{l}
KEYWORD(if),\\ 
IDENT(true),\\ 
KEYWORD(then),\\ 
KEYWORD(then),\\ 
NUM(42),\\ 
KEYWORD(else),\\ 
OP(+)
\end{tabular}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Two Rules}

\begin{itemize}
\item Longest match rule (``maximal munch rule''): The 
longest initial substring matched by any regular expression is taken
as next token.\bigskip

\item Rule priority:
For a particular longest initial substring, the first regular
expression that can match determines the token.

\end{itemize}

%\url{http://www.technologyreview.com/tr10/?year=2011}
  
%finite deterministic automata/ nondeterministic automaton

%\item problem with infix operations, for example i-12


\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]

\begin{itemize}
\item Regular expression for email addresses

\begin{center}
\begin{tabular}{l}
(name: \bl{$[a\mbox{-}z0\mbox{-}9\_\!\_\,.-]^+$})\bl{$\cdot @\cdot$}\\ 
\qquad(domain: \bl{$[a\mbox{-}z0\mbox{-}9\,.-]^+$}) \bl{$\cdot .\cdot$}\\ 
\qquad\qquad(top\_level: \bl{$[a\mbox{-}z\,.]^{\{2,6\}}$})
\end{tabular}
\end{center}

\bl{\[
\texttt{christian.urban@kcl.ac.uk}
\]}

\item result environment:

\begin{center}
\begin{tabular}{l}
\bl{$[(name:\texttt{christian.urban}),$}\\ 
\bl{$\phantom{[}(domain:\texttt{kcl}),$}\\ 
\bl{$\phantom{[}(top\_level:\texttt{ac.uk})]$}
\end{tabular}
\end{center}
\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Coursework}

\begin{center}
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
\bl{$nullable([c_1 c_2 \ldots c_n])$}  & \bl{$\dn$} & $?$\\
\bl{$nullable(r^+)$}                   & \bl{$\dn$} & $?$\\
\bl{$nullable(r^?)$}                   & \bl{$\dn$} & $?$\\
\bl{$nullable(r^{\{n,m\}})$}            & \bl{$\dn$} & $?$\\
\bl{$nullable(\sim{}r)$}               & \bl{$\dn$} & $?$\medskip\\
\bl{$der\, c\, ([c_1 c_2 \ldots c_n])$}  & \bl{$\dn$} & $?$\\
\bl{$der\, c\, (r^+)$}                   & \bl{$\dn$} & $?$\\
\bl{$der\, c\, (r^?)$}                   & \bl{$\dn$} & $?$\\
\bl{$der\, c\, (r^{\{n,m\}})$}            & \bl{$\dn$} & $?$\\
\bl{$der\, c\, (\sim{}r)$}               & \bl{$\dn$} & $?$\\
\end{tabular}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Regular Languages}

While regular expressions are very useful for lexing, there is
no regular expression that can recognise the language
\bl{$a^nb^n$}.\bigskip

\begin{center}
\bl{$(((()()))())$} \;\;vs.\;\; \bl{$(((()()))()))$}
\end{center}\bigskip\bigskip

\small
\noindent So we cannot find out with regular expressions
whether parentheses are matched or unmatched. Also regular
expressions are not recursive, e.g.~\bl{$(1 + 2) + 3$}.

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Hierarchy of Languages}

\begin{center}
\begin{tikzpicture}
[rect/.style={draw=black!50, 
              top color=white,
              bottom color=black!20, 
              rectangle, 
              very thick, 
              rounded corners}, scale=1.2]

\draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages};
\draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages};
\draw (0,-0.85) node [rect, text depth=17mm] {context sensitive languages};
\draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages};
\draw (0,-1.4) node [rect] {regular languages};
\end{tikzpicture}

\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Grammars}

A (context-free) grammar \bl{$G$} consists of

\begin{itemize}
\item a finite set of nonterminal symbols (upper case)
\item a finite terminal symbols or tokens (lower case)
\item a start symbol (which must be a nonterminal)
\item a set of rules
\begin{center}
\bl{$A \rightarrow \textit{rhs}$}
\end{center}

where \bl{\textit{rhs}} are sequences involving terminals and nonterminals,
including the empty sequence \bl{$\epsilon$}.\medskip\pause

We also allow rules
\begin{center}
\bl{$A \rightarrow \textit{rhs}_1 | \textit{rhs}_2 | \ldots$}
\end{center}
\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Palindromes}

A grammar for palindromes over the alphabet~\bl{$\{a,b\}$}:

\begin{center}
\bl{\begin{tabular}{lcl}
$S$ & $\rightarrow$ &  $\epsilon$ \\
$S$ & $\rightarrow$ &  $a\cdot S\cdot a$ \\
$S$ & $\rightarrow$ &  $b\cdot S\cdot b$ \\
\end{tabular}}
\end{center}\pause

or

\begin{center}
\bl{\begin{tabular}{lcl}
$S$ & $\rightarrow$ &  $\epsilon \;|\; a\cdot S\cdot a \;|\;b\cdot S\cdot b$ \\
\end{tabular}}
\end{center}\pause\bigskip

\small
Can you find the grammar rules for matched parentheses?

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Arithmetic Expressions}

\begin{center}
\bl{\begin{tabular}{lcl}
$E$ & $\rightarrow$ &  $num\_token$ \\
$E$ & $\rightarrow$ &  $E \cdot + \cdot E$ \\
$E$ & $\rightarrow$ &  $E \cdot - \cdot E$ \\
$E$ & $\rightarrow$ &  $E \cdot * \cdot E$ \\
$E$ & $\rightarrow$ &  $( \cdot E \cdot )$ 
\end{tabular}}
\end{center}\pause

\bl{\texttt{1 + 2 * 3 + 4}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{A CFG Derivation}

\begin{enumerate}
\item Begin with a string containing only the start symbol, say \bl{$S$}\bigskip
\item Replace any nonterminal \bl{$X$} in the string by the
right-hand side of some production \bl{$X \rightarrow \textit{rhs}$}\bigskip
\item Repeat 2 until there are no nonterminals
\end{enumerate}

\begin{center}
\bl{$S \rightarrow \ldots \rightarrow \ldots  \rightarrow \ldots  \rightarrow \ldots $}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
  
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Example Derivation}

\begin{center}
\bl{\begin{tabular}{lcl}
$S$ & $\rightarrow$ &  $\epsilon \;|\; a\cdot S\cdot a \;|\;b\cdot S\cdot b$ \\
\end{tabular}}
\end{center}\bigskip

\begin{center}
\begin{tabular}{lcl}
\bl{$S$} & \bl{$\rightarrow$} & \bl{$aSa$}\\
              & \bl{$\rightarrow$} & \bl{$abSba$}\\
              & \bl{$\rightarrow$} & \bl{$abaSaba$}\\
              & \bl{$\rightarrow$} & \bl{$abaaba$}\\

 
\end{tabular}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Example Derivation}

\begin{center}
\bl{\begin{tabular}{lcl}
$E$ & $\rightarrow$ &  $num\_token$ \\
$E$ & $\rightarrow$ &  $E \cdot + \cdot E$ \\
$E$ & $\rightarrow$ &  $E \cdot - \cdot E$ \\
$E$ & $\rightarrow$ &  $E \cdot * \cdot E$ \\
$E$ & $\rightarrow$ &  $( \cdot E \cdot )$ 
\end{tabular}}
\end{center}\bigskip


\begin{center}
\begin{tabular}{@{}c@{}c@{}}
\begin{tabular}{l@{\hspace{1mm}}l@{\hspace{1mm}}l}
\bl{$E$} & \bl{$\rightarrow$} & \bl{$E*E$}\\
              & \bl{$\rightarrow$} & \bl{$E+E*E$}\\
              & \bl{$\rightarrow$} & \bl{$E+E*E+E$}\\
              & \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\
\end{tabular} &\pause
\begin{tabular}{l@{\hspace{1mm}}l@{\hspace{1mm}}l}
\bl{$E$} & \bl{$\rightarrow$} & \bl{$E+E$}\\
              & \bl{$\rightarrow$} & \bl{$E+E+E$}\\
              & \bl{$\rightarrow$} & \bl{$E+E*E+E$}\\
              & \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\
\end{tabular}
\end{tabular}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Context Sensitive Grms}


\begin{center}
\bl{\begin{tabular}{lcl}
$S$ & $\Rightarrow$ &  $bSAA\;|\; \epsilon$\\
$A$ & $\Rightarrow$ & $a$\\
$bA$ & $\Rightarrow$ & $Ab$\\
\end{tabular}}
\end{center}\pause

\begin{center}
\bl{$S \Rightarrow\ldots\Rightarrow^? "ababaa"$}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Language of a CFG}

Let \bl{$G$} be a context-free grammar with start symbol \bl{$S$}. 
Then the language \bl{$L(G)$} is:

\begin{center}
\bl{$\{c_1\ldots c_n \;|\; \forall i.\; c_i \in T \wedge S \rightarrow^* c_1\ldots c_n \}$}
\end{center}\pause

\begin{itemize}
\item Terminals, because there are no rules for replacing them.
\item Once generated, terminals are ``permanent''.
\item Terminals ought to be tokens of the language\\
(but can also be strings).
\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Parse Trees}

\begin{center}
\bl{\begin{tabular}{lcl}
$E$ & $\rightarrow$ &  $F \;|\; F \cdot * \cdot F$\\
$F$ & $\rightarrow$ & $T \;|\; T \cdot + \cdot T \;|\; T \cdot - \cdot T$\\
$T$ & $\rightarrow$ & $num\_token \;|\; ( \cdot E \cdot )$\\
\end{tabular}}
\end{center}

\begin{center}
\begin{tikzpicture}[level distance=8mm, blue]
  \node {$E$}
    child {node {$F$} 
     child {node {$T$} 
                 child {node {(\,$E$\,)}
                            child {node{$F$ *{} $F$}
                                  child {node {$T$} child {node {2}}}
                                  child {node {$T$} child {node {3}}} 
                               }
                          }
              }
     child {node {+}}
     child {node {$T$}
       child {node {(\,$E$\,)} 
       child {node {$F$}
       child {node {$T$ +{} $T$}
                    child {node {3}}
                    child {node {4}} 
                 }
                 }}
    }};
\end{tikzpicture}
\end{center}

\begin{textblock}{5}(1, 6.5)
\bl{\texttt{(2*3)+(3+4)}}
\end{textblock}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Arithmetic Expressions}

\begin{center}
\bl{\begin{tabular}{lcl}
$E$ & $\rightarrow$ &  $num\_token$ \\
$E$ & $\rightarrow$ &  $E \cdot + \cdot E$ \\
$E$ & $\rightarrow$ &  $E \cdot - \cdot E$ \\
$E$ & $\rightarrow$ &  $E \cdot * \cdot E$ \\
$E$ & $\rightarrow$ &  $( \cdot E \cdot )$ 
\end{tabular}}
\end{center}\pause\bigskip

A CFG is \alert{\bf left-recursive} if it has a nonterminal \bl{$E$} such
that \bl{$E \rightarrow^+ E\cdot \ldots$}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Ambiguous Grammars}

A grammar is \alert{\bf ambiguous} if there is a string that
has at least two different parse trees.


\begin{center}
\bl{\begin{tabular}{lcl}
$E$ & $\rightarrow$ &  $num\_token$ \\
$E$ & $\rightarrow$ &  $E \cdot + \cdot E$ \\
$E$ & $\rightarrow$ &  $E \cdot - \cdot E$ \\
$E$ & $\rightarrow$ &  $E \cdot * \cdot E$ \\
$E$ & $\rightarrow$ &  $( \cdot E \cdot )$ 
\end{tabular}}
\end{center}

\bl{\texttt{1 + 2 * 3 + 4}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Dangling Else}

Another ambiguous grammar:\bigskip

\begin{center}
\bl{\begin{tabular}{lcl}
$E$ & $\rightarrow$ &  if $E$ then $E$\\
 & $|$ &  if $E$ then $E$ else $E$ \\
 & $|$ &  \ldots
\end{tabular}}
\end{center}\bigskip

\bl{\texttt{if a then if x then y else c}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Parser Combinators}

One of the simplest ways to implement a parser, see
{\small\url{https://vimeo.com/142341803}}\bigskip

Parser combinators: \bigskip

\begin{minipage}{1.1\textwidth}
\begin{center}
\mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} 
$\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$
\end{center}
\end{minipage}\bigskip

\begin{itemize}
\item atomic parsers
\item sequencing
\item alternative
\item semantic action
\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]

Atomic parsers, for example, number tokens

\begin{center}
\bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$} 
\end{center}\bigskip

\begin{itemize}
\item you consume one or more token from the\\ 
  input (stream)
\item also works for characters and strings
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]

Alternative parser (code \bl{$p\;||\;q$})\bigskip

\begin{itemize}
\item apply \bl{$p$} and also \bl{$q$}; then combine 
  the outputs
\end{itemize}

\begin{center}
\large \bl{$p(\text{input}) \cup q(\text{input})$}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]

Sequence parser (code \bl{$p\sim q$})\bigskip

\begin{itemize}
\item apply first \bl{$p$} producing a set of pairs
\item then apply \bl{$q$} to the unparsed parts
\item then combine the results:\medskip 
\begin{center}
((output$_1$, output$_2$), unparsed part)
\end{center}
\end{itemize}

\begin{center}
\begin{tabular}{l}
\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] 
\large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm]
\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$}
\end{tabular}
\end{center}


\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]

Function parser (code \bl{$p \Rightarrow f\;$})\bigskip

\begin{itemize}
\item apply \bl{$p$} producing a set of pairs
\item then apply the function \bl{$f$} to each first component
\end{itemize}

\begin{center}
\begin{tabular}{l}
\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$}
\end{tabular}
\end{center}\bigskip\bigskip\pause

\bl{$f$} is the semantic action (``what to do with the parsed input'')

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}}

Addition

\begin{center}
\bl{$T \sim + \sim E \Rightarrow \underbrace{f((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$}
\end{center}\pause

Multiplication

\begin{center}
\bl{$F \sim * \sim T \Rightarrow f((x,y), z) \Rightarrow x * z$}
\end{center}\pause

Parenthesis

\begin{center}
\bl{$\text{(} \sim E \sim \text{)} \Rightarrow f((x,y), z) \Rightarrow y$}
\end{center}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Types of Parsers}

\begin{itemize}
\item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$},
then \bl{$p \sim q$} returns results of type

\begin{center}
\bl{$T \times S$}
\end{center}\pause

\item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then  \bl{$q$} \alert{must} also have results of type \bl{$T$},
and \bl{$p \;||\; q$} returns results of type

\begin{center}
\bl{$T$}
\end{center}\pause

\item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from
\bl{$T$} to \bl{$S$}, then
\bl{$p \Rightarrow f$} returns results of type

\begin{center}
\bl{$S$}
\end{center}

\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Input Types of Parsers}

\begin{itemize}
\item input: \alert{token list}
\item output: set of (output\_type, \alert{token list})
\end{itemize}\bigskip\pause

actually it can be any input type as long as it is a kind of
sequence (for example a string)

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Scannerless Parsers}

\begin{itemize}
\item input: \alert{string}
\item output: set of (output\_type, \alert{string})
\end{itemize}\bigskip

but lexers are better when whitespaces or comments need to be
filtered out; then input is a sequence of tokens

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Successful Parses}

\begin{itemize}
\item input: string
\item output: \alert{set of} (output\_type, string)
\end{itemize}\bigskip

a parse is successful whenever the input has been fully
``consumed'' (that is the second component is empty)

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Abstract Parser Class}

\small
\lstinputlisting[language=Scala,xleftmargin=1mm]
 {../progs/app7.scala}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]

\small
\fontsize{10}{12}\selectfont
\lstinputlisting[language=Scala,xleftmargin=1mm]
  {../progs/app8.scala}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Two Grammars}

Which languages are recognised by the following two grammars?

\begin{center}
\bl{\begin{tabular}{lcl}
$S$ & $\rightarrow$ &  $1 \cdot S \cdot S$\\
        & $|$ & $\epsilon$
\end{tabular}}
\end{center}\bigskip

\begin{center}
\bl{\begin{tabular}{lcl}
$U$ & $\rightarrow$ &  $1 \cdot U$\\
        & $|$ & $\epsilon$
\end{tabular}}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{Ambiguous Grammars}

\begin{center}
\begin{tikzpicture}
\begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs},
    enlargelimits=false,
    xtick={0,100,...,1000},
    xmax=1050,
    ymax=33,
    ytick={0,5,...,30},
    scaled ticks=false,
    axis lines=left,
    width=11cm,
    height=7cm, 
    legend entries={unambiguous,ambiguous},  
    legend pos=north east,
    legend cell align=left,
    x tick label style={font=\small,/pgf/number format/1000 sep={}}]
\addplot[blue,mark=*, mark options={fill=white}] 
  table {s-grammar1.data};
\only<2>{
  \addplot[red,mark=triangle*, mark options={fill=white}] 
  table {s-grammar2.data};}  
\end{axis}
\end{tikzpicture}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
\begin{frame}
\frametitle{While-Language}
\mbox{}\\[-23mm]\mbox{}

\bl{\begin{plstx}[rhs style=,one per line]
: \meta{Stmt} ::= skip
         | \meta{Id} := \meta{AExp}
         | if \meta{BExp} then \meta{Block} else \meta{Block}
         | while \meta{BExp} do \meta{Block}\\
: \meta{Stmts} ::= \meta{Stmt} ; \meta{Stmts}
          | \meta{Stmt}\\
: \meta{Block} ::= \{ \meta{Stmts} \}
          | \meta{Stmt}\\
: \meta{AExp} ::= \ldots\\
: \meta{BExp} ::= \ldots\\
\end{plstx}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{An Interpreter}

\begin{center}
\bl{\begin{tabular}{l}
$\{$\\
\;\;$x := 5 \text{;}$\\
\;\;$y := x * 3\text{;}$\\
\;\;$y := x * 4\text{;}$\\
\;\;$x := u * 3$\\
$\}$
\end{tabular}}
\end{center}

\begin{itemize}
\item the interpreter has to record the value of \bl{$x$} before assigning a value to \bl{$y$}\pause
\item \bl{\texttt{eval(stmt, env)}}
\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   



\end{document}

%%% Local Variables:  
%%% mode: latex
%%% TeX-master: t
%%% End:
author	Christian Urban <christian dot urban at kcl dot ac dot uk>
	Mon, 19 Oct 2015 23:49:25 +0100
changeset 358	b3129cff41e9
parent 290	3a2fa69ea675
child 360	c6c574d2ca0c
permissions	-rw-r--r--