slides/slides04.tex
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Thu, 16 Oct 2014 17:30:05 +0100
changeset 283 c14e5ebf0c3b
parent 280 267bd65b2e29
child 352 1e1b0fe66107
permissions -rw-r--r--
updated

\documentclass[dvipsnames,14pt,t]{beamer}
\usepackage{../slides}
\usepackage{../graphics}
\usepackage{../langs}
\usepackage{../data}

\hfuzz=220pt 

\pgfplotsset{compat=1.11}

\newcommand{\bl}[1]{\textcolor{blue}{#1}}  

\renewcommand{\slidecaption}{AFL 04, King's College London}

\begin{document}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{%
  \begin{tabular}{@ {}c@ {}}
  \\[-3mm]
  \LARGE Automata and \\[-2mm] 
  \LARGE Formal Languages (4)\\[3mm] 
  \end{tabular}}

  \normalsize
  \begin{center}
  \begin{tabular}{ll}
  Email:  & christian.urban at kcl.ac.uk\\
  Office: & S1.27 (1st floor Strand Building)\\
  Slides: & KEATS (also home work is there)\\
  \end{tabular}
  \end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Regexps and Automata}

\begin{center}
\begin{tikzpicture}
\node (rexp)  {\bl{\bf Regexps}};
\node (nfa) [right=of rexp] {\bl{\bf NFAs}};
\node (dfa) [right=of nfa] {\bl{\bf DFAs}};
\node (mdfa) [right=of dfa] {\bl{\bf \begin{tabular}{c}minimal\\ DFAs\end{tabular}}};
\path[->,red, line width=2mm] (rexp) edge node [above=4mm, black] 
     {\begin{tabular}{c@{\hspace{9mm}}}Thompson's\\[-1mm] construction\end{tabular}} (nfa);
\path[->,red, line width=2mm] (nfa) edge node [above=4mm, black] 
     {\begin{tabular}{c}subset\\[-1mm] construction\end{tabular}}(dfa);
\path[->, red, line width=2mm] (dfa) edge node [below=5mm, black] {minimisation} (mdfa);
\path[->, red, line width=2mm] (dfa) edge [bend left=45] (rexp);
\end{tikzpicture}\\
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[t]
\frametitle{\bl{$(a?\{n\}) \cdot a\{n\}$}}

\begin{tikzpicture}
\begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs},
    enlargelimits=false,
    xtick={0,5,...,30},
    xmax=30,
    ymax=35,
    ytick={0,5,...,30},
    scaled ticks=false,
    axis lines=left,
    width=10cm,
    height=7cm, 
    legend entries={Python,Ruby,my NFA},  
    legend pos=north west,
    legend cell align=left]
\addplot[blue,mark=*, mark options={fill=white}] 
  table {re-python.data};
\addplot[brown,mark=pentagon*, mark options={fill=white}] 
  table {re-ruby.data};  
\addplot[red,mark=triangle*, mark options={fill=white}] 
  table {nfasearch.data};	  
\end{axis}
\end{tikzpicture}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{DFA to Rexp}

\begin{center}
\begin{tikzpicture}[scale=2,>=stealth',very thick,
                    every state/.style={minimum size=0pt,
                    draw=blue!50,very thick,fill=blue!20},]
  \node[state, initial]        (q0) at ( 0,1) {$q_0$};
  \node[state]                    (q1) at ( 1,1) {$q_1$};
  \node[state, accepting] (q2) at ( 2,1) {$q_2$};
  \path[->] (q0) edge[bend left] node[above] {\alert{$a$}} (q1)
            (q1) edge[bend left] node[above] {\alert{$b$}} (q0)
            (q2) edge[bend left=50] node[below] {\alert{$b$}} (q0)
            (q1) edge node[above] {\alert{$a$}} (q2)
            (q2) edge [loop right] node {\alert{$a$}} ()
            (q0) edge [loop below] node {\alert{$b$}} ();
\end{tikzpicture}
\end{center}\bigskip

\begin{center}
\begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l@{\hspace{7mm}}l}
\bl{$q_0$} & \bl{$=$} & \bl{$\epsilon + q_0\,b + q_1\,b +  q_2\,b$} & (start state)\\
\bl{$q_1$} & \bl{$=$} & \bl{$q_0\,a$}\\
\bl{$q_2$} & \bl{$=$} & \bl{$q_1\,a + q_2\,a$}\\

\end{tabular}
\end{center}

\onslide<2->{
Arden's Lemma:
\begin{center}
If \bl{$q = q\,r + s$}\; then\; \bl{$q = s\, r^*$}
\end{center}
}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{DFA Minimisation}

\begin{enumerate}
\item Take all pairs \bl{$(q, p)$} with \bl{$q \not= p$}
\item Mark all pairs that accepting and non-accepting states
\item For  all unmarked pairs \bl{$(q, p)$} and all characters \bl{$c$} test whether
\begin{center}
\bl{$(\delta(q, c), \delta(p,c))$}
\end{center} 
are marked. If yes, then also mark \bl{$(q, p)$}.
\item Repeat last step until no change.
\item All unmarked pairs can be merged.
\end{enumerate}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]

\begin{center}
\begin{tikzpicture}[>=stealth',very thick,auto,
                             every state/.style={minimum size=0pt,inner sep=2pt,draw=blue!50,very thick,fill=blue!20},]
\node[state,initial]  (q_0)  {$q_0$};
\node[state] (q_1) [right=of q_0] {$q_1$};
\node[state] (q_2) [below right=of q_0] {$q_2$};
\node[state] (q_3) [right=of q_2] {$q_3$};
\node[state, accepting] (q_4) [right=of q_1] {$q_4$};
\path[->] (q_0) edge node [above]  {\alert{$a$}} (q_1);
\path[->] (q_1) edge node [above]  {\alert{$a$}} (q_4);
\path[->] (q_4) edge [loop right] node  {\alert{$a, b$}} ();
\path[->] (q_3) edge node [right]  {\alert{$a$}} (q_4);
\path[->] (q_2) edge node [above]  {\alert{$a$}} (q_3);
\path[->] (q_1) edge node [right]  {\alert{$b$}} (q_2);
\path[->] (q_0) edge node [above]  {\alert{$b$}} (q_2);
\path[->] (q_2) edge [loop left] node  {\alert{$b$}} ();
\path[->] (q_3) edge [bend left=95, looseness=1.3] node [below]  {\alert{$b$}} (q_0);
\end{tikzpicture}
\end{center}

\mbox{}\\[-20mm]\mbox{}

\begin{center}
\begin{tikzpicture}[scale=0.8,line width=0.8mm]
\draw (0,0) -- (4,0);
\draw (0,1) -- (4,1);
\draw (0,2) -- (3,2);
\draw (0,3) -- (2,3);
\draw (0,4) -- (1,4);

\draw (0,0) -- (0, 4);
\draw (1,0) -- (1, 4);
\draw (2,0) -- (2, 3);
\draw (3,0) -- (3, 2);
\draw (4,0) -- (4, 1);

\draw (0.5,-0.5) node {$q_0$}; 
\draw (1.5,-0.5) node {$q_1$}; 
\draw (2.5,-0.5) node {$q_2$}; 
\draw (3.5,-0.5) node {$q_3$};
 
\draw (-0.5, 3.5) node {$q_1$}; 
\draw (-0.5, 2.5) node {$q_2$}; 
\draw (-0.5, 1.5) node {$q_3$}; 
\draw (-0.5, 0.5) node {$q_4$}; 

\draw (0.5,0.5) node {\large$\star$}; 
\draw (1.5,0.5) node {\large$\star$}; 
\draw (2.5,0.5) node {\large$\star$}; 
\draw (3.5,0.5) node {\large$\star$};
\end{tikzpicture}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]

\begin{center}
\begin{tabular}{@{\hspace{-8mm}}cc@{}}
\begin{tikzpicture}[>=stealth',very thick,auto,
                             every state/.style={minimum size=0pt,inner sep=2pt,draw=blue!50,very thick,fill=blue!20},]
\node[state,initial]  (q_0)  {$q_0$};
\node[state] (q_1) [right=of q_0] {$q_1$};
\node[state] (q_2) [below right=of q_0] {$q_2$};
\node[state] (q_3) [right=of q_2] {$q_3$};
\node[state, accepting] (q_4) [right=of q_1] {$q_4$};
\path[->] (q_0) edge node [above]  {\alert{$a$}} (q_1);
\path[->] (q_1) edge node [above]  {\alert{$a$}} (q_4);
\path[->] (q_4) edge [loop right] node  {\alert{$a, b$}} ();
\path[->] (q_3) edge node [right]  {\alert{$a$}} (q_4);
\path[->] (q_2) edge node [above]  {\alert{$a$}} (q_3);
\path[->] (q_1) edge node [right]  {\alert{$b$}} (q_2);
\path[->] (q_0) edge node [above]  {\alert{$b$}} (q_2);
\path[->] (q_2) edge [loop left] node  {\alert{$b$}} ();
\path[->] (q_3) edge [bend left=95, looseness=1.3] node [below]  {\alert{$b$}} (q_0);
\end{tikzpicture}
&
\raisebox{9mm}{\begin{tikzpicture}[scale=0.6,line width=0.8mm]
\draw (0,0) -- (4,0);
\draw (0,1) -- (4,1);
\draw (0,2) -- (3,2);
\draw (0,3) -- (2,3);
\draw (0,4) -- (1,4);

\draw (0,0) -- (0, 4);
\draw (1,0) -- (1, 4);
\draw (2,0) -- (2, 3);
\draw (3,0) -- (3, 2);
\draw (4,0) -- (4, 1);

\draw (0.5,-0.5) node {$q_0$}; 
\draw (1.5,-0.5) node {$q_1$}; 
\draw (2.5,-0.5) node {$q_2$}; 
\draw (3.5,-0.5) node {$q_3$};
 
\draw (-0.5, 3.5) node {$q_1$}; 
\draw (-0.5, 2.5) node {$q_2$}; 
\draw (-0.5, 1.5) node {$q_3$}; 
\draw (-0.5, 0.5) node {$q_4$}; 

\draw (0.5,0.5) node {\large$\star$}; 
\draw (1.5,0.5) node {\large$\star$}; 
\draw (2.5,0.5) node {\large$\star$}; 
\draw (3.5,0.5) node {\large$\star$};
\draw (0.5,1.5) node {\large$\star$}; 
\draw (2.5,1.5) node {\large$\star$}; 
\draw (0.5,3.5) node {\large$\star$}; 
\draw (1.5,2.5) node {\large$\star$}; 
\end{tikzpicture}}
\end{tabular}
\end{center}


\mbox{}\\[-20mm]\mbox{}

\begin{center}
\begin{tikzpicture}[>=stealth',very thick,auto,
                             every state/.style={minimum size=0pt,inner sep=2pt,draw=blue!50,very thick,fill=blue!20},]
\node[state,initial]  (q_02)  {$q_{0, 2}$};
\node[state] (q_13) [right=of q_02] {$q_{1, 3}$};
\node[state, accepting] (q_4) [right=of q_13] {$q_{4\phantom{,0}}$};
\path[->] (q_02) edge [bend left] node [above]  {\alert{$a$}} (q_13);
\path[->] (q_13) edge [bend left] node [below]  {\alert{$b$}} (q_02);
\path[->] (q_02) edge [loop below] node  {\alert{$b$}} ();
\path[->] (q_13) edge node [above]  {\alert{$a$}} (q_4);
\path[->] (q_4) edge [loop above] node  {\alert{$a, b$}} ();
\end{tikzpicture}\\
minimal automaton
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Alternatives}
\mbox{}\\[-17mm]\mbox{}

\begin{center}
\begin{tikzpicture}[>=stealth',very thick,auto,
                    every state/.style={minimum size=0pt,
                    inner sep=2pt,draw=blue!50,very thick,fill=blue!20}]
\only<1>{\node[state,initial]  (q_0)  {$q_0$};}
\only<2->{\node[state,accepting]  (q_0)  {$q_0$};}
\node[state] (q_1) [right=of q_0] {$q_1$};
\node[state] (q_2) [below right=of q_0] {$q_2$};
\node[state] (q_3) [right=of q_2] {$q_3$};
\only<1>{\node[state, accepting] (q_4) [right=of q_1] {$q_4$};}
\only<2->{\node[state, initial right] (q_4) [right=of q_1] {$q_4$};}
\only<1-2>{
\path[->] (q_0) edge node [above]  {\alert{$a$}} (q_1);
\path[->] (q_1) edge node [above]  {\alert{$a$}} (q_4);
\path[->] (q_4) edge [loop above] node  {\alert{$a, b$}} ();
\path[->] (q_3) edge node [right]  {\alert{$a$}} (q_4);
\path[->] (q_2) edge node [above]  {\alert{$a$}} (q_3);
\path[->] (q_1) edge node [right]  {\alert{$b$}} (q_2);
\path[->] (q_0) edge node [above]  {\alert{$b$}} (q_2);
\path[->] (q_2) edge [loop left] node  {\alert{$b$}} ();
\path[->] (q_3) edge [bend left=95, looseness=1.3] node [below]  {\alert{$b$}} (q_0);}
\only<3->{
\path[<-] (q_0) edge node [above]  {\alert{$a$}} (q_1);
\path[<-] (q_1) edge node [above]  {\alert{$a$}} (q_4);
\path[<-] (q_4) edge [loop above] node  {\alert{$a, b$}} ();
\path[<-] (q_3) edge node [right]  {\alert{$a$}} (q_4);
\path[<-] (q_2) edge node [above]  {\alert{$a$}} (q_3);
\path[<-] (q_1) edge node [right]  {\alert{$b$}} (q_2);
\path[<-] (q_0) edge node [above]  {\alert{$b$}} (q_2);
\path[<-] (q_2) edge [loop left] node  {\alert{$b$}} ();
\path[<-] (q_3) edge [bend left=95, looseness=1.3] node [below]  {\alert{$b$}} (q_0);}
\end{tikzpicture}
\end{center}
\mbox{}\\[-18mm]

\begin{itemize}
\item<2-> exchange initial / accepting states
\item<3-> reverse all edges
\item<4-> subset construction $\Rightarrow$ DFA
\item<5-> repeat once more \onslide<6->{$\Rightarrow$ minimal DFA}
\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Regular Languages}

Two equivalent definitions:\bigskip

\begin{quote}\rm A language is \alert{regular} iff there exists a
regular expression that recognises all its strings.
\end{quote}

\begin{quote}\rm A language is \alert{regular} iff there exists an
automaton that recognises all its strings.
\end{quote}\bigskip\bigskip


\small
for example $a^nb^n$ is not regular
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Negation}

Regular languages are closed under negation:\bigskip

\begin{center}
\begin{tikzpicture}[scale=2,>=stealth',very thick,
                    every state/.style={minimum size=0pt,
                    draw=blue!50,very thick,fill=blue!20}]
  \only<1>{\node[state,initial] (q0) at ( 0,1) {$q_0$};}
  \only<2>{\node[state,initial,accepting] (q0) at ( 0,1) {$q_0$};}
  \only<1>{\node[state] (q1) at ( 1,1) {$q_1$};}
  \only<2>{\node[state,accepting] (q1) at ( 1,1) {$q_1$};}
  \only<1>{\node[state, accepting] (q2) at ( 2,1) {$q_2$};}
  \only<2>{\node[state] (q2) at ( 2,1) {$q_2$};}
  \path[->] (q0) edge[bend left] node[above] {\alert{$a$}} (q1)
            (q1) edge[bend left] node[above] {\alert{$b$}} (q0)
            (q2) edge[bend left=50] node[below] {\alert{$b$}} (q0)
            (q1) edge node[above] {\alert{$a$}} (q2)
            (q2) edge [loop right] node {\alert{$a$}} ()
            (q0) edge [loop below] node {\alert{$b$}} ();
\end{tikzpicture}
\end{center}\bigskip\bigskip

\onslide<2>{But requires that the automaton is \alert{completed}!}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]

\mbox{\lstinputlisting[language=While]{../progs/fib.while}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{??}

\mbox{\lstinputlisting[language=While]{../progs/collatz.while}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{A Compiler}

\begin{tikzpicture}[scale=1]
  \draw[line width=1mm] (-0.3, 0) rectangle (1.5,2);
  \draw[line width=1mm] (-1.8, 0) rectangle (-3.6,2);
  \draw (4.4,1) node {Code Gen};
  \draw (0.6,1.7) node {\small Parser};
  \draw (-2.7,1.7) node {\small Lexer};
  
  \draw[red,->,line width = 2mm] (1.7,1) -- (3.2,1);
  \draw[red,<-,line width = 2mm] (-0.6,1) -- (-1.6,1);
  \draw[red,<-,line width = 2mm] (-3.8,1) -- (-4.8,1);

  \draw (-4.6,1.7) node {\small string};
  \draw (-1.1,1.7) node {\small tokens};
  \draw ( 2.3,1.7) node {\small AST};
\end{tikzpicture}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]

\tt
\begin{center}\large
\code{"if true then then 42 else +"}
\end{center}


\begin{tabular}{@{}l}
KEYWORD: \\
\hspace{5mm}{if}, {then}, {else},\\ 
WHITESPACE:\\
\hspace{5mm}{" "}, {$\backslash$n},\\ 
IDENT:\\
\hspace{5mm}LETTER $\cdot$ (LETTER + DIGIT + {\_})$^*$\\ 
NUM:\\
\hspace{5mm}(NONZERODIGIT $\cdot$ DIGIT$^*$) + {0}\\
OP:\\
\hspace{5mm}{+}\\
COMMENT:\\
\hspace{5mm}{$\slash$*} $\cdot$ $\sim$(ALL$^*$ $\cdot$ (*$\slash$) $\cdot$ ALL$^*$) $\cdot$ {*$\slash$}
\end{tabular}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]

\tt
\begin{center}\large
\code{"if true then then 42 else +"}
\end{center}

\only<1>{
\small\begin{tabular}{l}
KEYWORD(if),\\ 
WHITESPACE,\\ 
IDENT(true),\\ 
WHITESPACE,\\ 
KEYWORD(then),\\ 
WHITESPACE,\\ 
KEYWORD(then),\\ 
WHITESPACE,\\ 
NUM(42),\\ 
WHITESPACE,\\ 
KEYWORD(else),\\ 
WHITESPACE,\\ 
OP(+)
\end{tabular}}

\only<2>{
\small\begin{tabular}{l}
KEYWORD(if),\\ 
IDENT(true),\\ 
KEYWORD(then),\\ 
KEYWORD(then),\\ 
NUM(42),\\ 
KEYWORD(else),\\ 
OP(+)
\end{tabular}}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]


There is one small problem with the tokenizer. How should we 
tokenize:

\begin{center}
\large\code{"x-3"}
\end{center}

\tt
\begin{tabular}{@{}l}
ID: \ldots\\
OP:\\
\hspace{5mm}\texttt{"+"}, \texttt{"-"}\\
NUM:\\
\hspace{5mm}(NONZERODIGIT $\cdot$ DIGIT$^*$) + {''0''}\\
NUMBER:\\
\hspace{5mm}NUM +  (\texttt{"-"} $\cdot$ NUM)\\
\end{tabular}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{POSIX: Two Rules}

\begin{itemize}
\item Longest match rule (``maximal munch rule''): The 
longest initial substring matched by any regular expression is taken
as the next token.\bigskip

\item Rule priority:
For a particular longest initial substring, the first regular
expression that can match determines the token.
\end{itemize}\bigskip\bigskip\pause

\small
\hfill most posix matchers are buggy\\
\footnotesize
\hfill \url{http://www.haskell.org/haskellwiki/Regex_Posix}

%\url{http://www.technologyreview.com/tr10/?year=2011}  
%finite deterministic automata/ nondeterministic automaton
%\item problem with infix operations, for example i-12

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Sulzmann Matcher}

We want to match the string \bl{$abc$} using \bl{$r_1$}:

\begin{center}
\begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}]
\node (r1)  {\bl{$r_1$}};
\node (r2) [right=of r1] {\bl{$r_2$}};
\draw[->,line width=1mm]  (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};\pause
\node (r3) [right=of r2] {\bl{$r_3$}};
\draw[->,line width=1mm]  (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};\pause
\node (r4) [right=of r3] {\bl{$r_4$}};
\draw[->,line width=1mm]  (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};\pause
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}};\pause
\node (v4) [below=of r4] {\bl{$v_4$}};
\draw[->,line width=1mm]  (r4) -- (v4);\pause
\node (v3) [left=of v4] {\bl{$v_3$}};
\draw[->,line width=1mm]  (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};\pause
\node (v2) [left=of v3] {\bl{$v_2$}};
\draw[->,line width=1mm]  (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};\pause
\node (v1) [left=of v2] {\bl{$v_1$}};
\draw[->,line width=1mm]  (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};\pause
\draw[->,line width=0.5mm]  (r3) -- (v3);
\draw[->,line width=0.5mm]  (r2) -- (v2);
\draw[->,line width=0.5mm]  (r1) -- (v1);
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
\end{tikzpicture}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Regexes and Values}

Regular expressions and their corresponding values:

\begin{center}
\begin{columns}
\begin{column}{3cm}
\begin{tabular}{@{}rrl@{}}
  \bl{$r$} & \bl{$::=$}  & \bl{$\varnothing$}\\
           & \bl{$\mid$} & \bl{$\epsilon$}   \\
           & \bl{$\mid$} & \bl{$c$}          \\
           & \bl{$\mid$} & \bl{$r_1 \cdot r_2$}\\
           & \bl{$\mid$} & \bl{$r_1 + r_2$}   \\
  \\
           & \bl{$\mid$} & \bl{$r^*$}         \\
  \\
  \end{tabular}
\end{column}
\begin{column}{3cm}
\begin{tabular}{@{\hspace{-7mm}}rrl@{}}
  \bl{$v$} & \bl{$::=$}  & \\
           &             & \bl{$Empty$}   \\
           & \bl{$\mid$} & \bl{$Char(c)$}          \\
           & \bl{$\mid$} & \bl{$Seq(v_1,v_2)$}\\
           & \bl{$\mid$} & \bl{$Left(v)$}   \\
           & \bl{$\mid$} & \bl{$Right(v)$}  \\
           & \bl{$\mid$} & \bl{$[]$}      \\
           & \bl{$\mid$} & \bl{$[v_1,\ldots\,v_n]$} \\
  \end{tabular}
\end{column}
\end{columns}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Mkeps}

Finding a (posix) value for recognising the empty string:

\begin{center}
\begin{tabular}{lcl}
  \bl{$mkeps\,\epsilon$}  & \bl{$\dn$}  & \bl{$Empty$}\\
  \bl{$mkeps\,r_1 + r_2$} & \bl{$\dn$}  & \bl{if $nullable(r_1)$}  \\
                          &             & \bl{then $Left(mkeps(r_1))$}\\
                          &             & \bl{else $Right(mkeps(r_2))$}\\
  \bl{$mkeps\,r_1 \cdot r_2$}  & \bl{$\dn$} & \bl{$Seq(mkeps(r_1),mkeps(r_2))$}\\
  \bl{$mkeps\,r^*$}      & \bl{$\dn$} & \bl{$[]$}  \\
\end{tabular}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Inject}

Injecting (``Adding'') a character to a value\\

\begin{center}
\begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l}
  \bl{$inj\,(c)\,c\,Empty$} & \bl{$\dn$}  & \bl{$Char\,c$}\\
  \bl{$inj\,(r_1 + r_2)\,c\,Left(v)$} & \bl{$\dn$}  & \bl{$Left(inj\,r_1\,c\,v)$}\\
  \bl{$inj\,(r_1 + r_2)\,c\,Right(v)$} & \bl{$\dn$}  & \bl{$Right(inj\,r_2\,c\,v)$}\\
  \bl{$inj\,(r_1 \cdot r_2)\,c\,Seq(v_1,v_2)$} & \bl{$\dn$}  & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\
  \bl{$inj\,(r_1 \cdot r_2)\,c\,Left(Seq(v_1,v_2))$} & \bl{$\dn$}  & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\
  \bl{$inj\,(r_1 \cdot r_2)\,c\,Right(v)$} & \bl{$\dn$}  & \bl{$Seq(mkeps(r_1),inj\,r_2\,c\,v)$}\\
  \bl{$inj\,(r^*)\,c\,Seq(v,vs)$} & \bl{$\dn$}  & \bl{$inj\,r\,c\,v\,::\,vs$}\\
\end{tabular}
\end{center}\bigskip

\footnotesize
\bl{$inj$}: 1st arg $\mapsto$ a rexp; 2nd arg $\mapsto$ a character; 3rd arg $\mapsto$ a value 
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Lexing}

\begin{center}
\begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l}
  \bl{$lex\,r\,[]$} & \bl{$\dn$}  & \bl{if $nullable(r)$ then $mkeps(r)$ else $error$}\\
  \bl{$lex\,r\,c::s$} & \bl{$\dn$}  & \bl{$inj\,r\,c\,lex(der(c,r), s)$}\\
\end{tabular}
\end{center}

\footnotesize
\bl{$lex$}: returns a value

\begin{center}
\begin{tikzpicture}[scale=2,node distance=1.0cm,every node/.style={minimum size=6mm}]
\node (r1)  {\bl{$r_1$}};
\node (r2) [right=of r1] {\bl{$r_2$}};
\draw[->,line width=1mm]  (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};
\node (r3) [right=of r2] {\bl{$r_3$}};
\draw[->,line width=1mm]  (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};
\node (r4) [right=of r3] {\bl{$r_4$}};
\draw[->,line width=1mm]  (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}};
\node (v4) [below=of r4] {\bl{$v_4$}};
\draw[->,line width=1mm]  (r4) -- (v4);
\node (v3) [left=of v4] {\bl{$v_3$}};
\draw[->,line width=1mm]  (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};
\node (v2) [left=of v3] {\bl{$v_2$}};
\draw[->,line width=1mm]  (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};
\node (v1) [left=of v2] {\bl{$v_1$}};
\draw[->,line width=1mm]  (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};
\draw[->,line width=0.5mm]  (r3) -- (v3);
\draw[->,line width=0.5mm]  (r2) -- (v2);
\draw[->,line width=0.5mm]  (r1) -- (v1);
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
\end{tikzpicture}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Records}

\begin{itemize}
\item new regex: \bl{$(x:r)$}\hspace{7mm}new value: \bl{$Rec(x,v)$}\medskip\pause

\item \bl{$nullable(x:r) \dn nullable(r)$}
\item \bl{$der\,c\,(x:r) \dn (x:der\,c\,r)$}
\item \bl{$mkeps(x:r) \dn Rec(x, mkeps(r))$}
\item \bl{$inj\,(x:r)\,c\,v \dn Rec(x, inj\,r\,c\,v)$}
\end{itemize}\bigskip\bigskip\pause

\small
for extracting subpatterns \bl{$(z: ((x:ab) + (y:ba))$}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{While Tokens}

\begin{center}
\begin{tabular}{rcl}
\pcode{WHILE\_REGS} & $\dn$ & \raisebox{-1mm}{\large(}\pcode{("k" : KEYWORD)} +\\ 
                  &       & \phantom{(}\pcode{("i" : ID)} +\\ 
                  &       & \phantom{(}\pcode{("o" : OP)} + \\
                  &       & \phantom{(}\pcode{("n" : NUM)} + \\
                  &       & \phantom{(}\pcode{("s" : SEMI)} +\\ 
                  &       & \phantom{(}\pcode{("p" : (LPAREN + RPAREN))} +\\ 
                  &       & \phantom{(}\pcode{("b" : (BEGIN + END))} +\\ 
                  &       & \phantom{(}\pcode{("w" : WHITESPACE)}\raisebox{-1mm}{\large)$^*$}
\end{tabular}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Simplification}

\begin{itemize}
\item If we simplify after the derivative, then we are builing the
value for the simplified regular expression, but \emph{not} for the original
regular expression.
\end{itemize}

\begin{center}
\begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}]
\node (r1)  {\bl{$r_1$}};
\node (r2) [right=of r1] {\bl{$r_2$}};
\draw[->,line width=1mm]  (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};
\node (r3) [right=of r2] {\bl{$r_3$}};
\draw[->,line width=1mm]  (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};
\node (r4) [right=of r3] {\bl{$r_4$}};
\draw[->,line width=1mm]  (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}};
\node (v4) [below=of r4] {\bl{$v_4$}};
\draw[->,line width=1mm]  (r4) -- (v4);
\node (v3) [left=of v4] {\bl{$v_3$}};
\draw[->,line width=1mm]  (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};
\node (v2) [left=of v3] {\bl{$v_2$}};
\draw[->,line width=1mm]  (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};
\node (v1) [left=of v2] {\bl{$v_1$}};
\draw[->,line width=1mm]  (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};
\draw[->,line width=0.5mm]  (r3) -- (v3);
\draw[->,line width=0.5mm]  (r2) -- (v2);
\draw[->,line width=0.5mm]  (r1) -- (v1);
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
\end{tikzpicture}
\end{center}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Rectification}

\def\arraystretch{1.05}
\begin{center}
\begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}l@{\hspace{5mm}}l}
& & & \hspace{5mm}rectification \\
& & & \hspace{5mm}functions:\\
\bl{$r \cdot \varnothing$} & $\mapsto$ & \bl{$\varnothing$} & \\ 
\bl{$\varnothing \cdot r$} & $\mapsto$ & \bl{$\varnothing$} & \\ 
\bl{$r \cdot \epsilon$} & $\mapsto$ & \bl{$r$} & \bl{$\lambda f_1\,f_2\,v.\, Seq(f_1\,v, f_2\,Empty)$}\\ 
\bl{$\epsilon \cdot r$} & $\mapsto$ & \bl{$r$} & \bl{$\lambda f_1\,f_2\,v.\, Seq(f_1\,Empty, f_2\,v)$}\\ 
\bl{$r + \varnothing$} & $\mapsto$ & \bl{$r$}   & \bl{$\lambda f_1\,f_2\,v.\, Left(f_1\,v)$}\\ 
\bl{$\varnothing + r$} & $\mapsto$ & \bl{$r$}   & \bl{$\lambda f_1\,f_2\,v.\, Right(f_2\,v)$}\\
\bl{$r + r$} & $\mapsto$ & \bl{$r$} & \bl{$\lambda f_1\,f_2\,v.\, Left(f_1\,v)$}
\end{tabular}
\end{center}\medskip\pause

\small
old \bl{$simp$} returns a rexp;\\
new \bl{$simp$} returns a rexp and a rectification~fun.
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Lexing with Simplification}

\begin{center}
\begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l}
  \bl{$lex\,r\,[]$} & \bl{$\dn$}  & \bl{if $nullable(r)$ then $mkeps(r)$ else $error$}\\
  \bl{$lex\,r\,c::s$} & \bl{$\dn$}  & \bl{let $(r', frect) = simp(der(c, r))$}\smallskip\\
                      & & \bl{$inj\,r\,c\,(frect(lex(r', s)))$}\\
\end{tabular}
\end{center}\bigskip

\begin{center}\small
\begin{tikzpicture}[node distance=1.1cm,every node/.style={minimum size=7mm}]
\node (r1)  {\bl{$r_1$}};
\node (r2) [right=of r1] {\bl{$r_2$}};
\draw[->,line width=1mm]  (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};
\node (r3) [right=of r2] {\bl{$r_3$}};
\draw[->,line width=1mm]  (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};
\node (r4) [right=of r3] {\bl{$r_4$}};
\draw[->,line width=1mm]  (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}};
\node (v4) [below=of r4] {\bl{$v_4$}};
\draw[->,line width=1mm]  (r4) -- (v4);
\node (v3) [left=of v4] {\bl{$v_3$}};
\draw[->,line width=1mm]  (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};
\node (v2) [left=of v3] {\bl{$v_2$}};
\draw[->,line width=1mm]  (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};
\node (v1) [left=of v2] {\bl{$v_1$}};
\draw[->,line width=1mm]  (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};
\draw[->,line width=0.5mm]  (r3) -- (v3);
\draw[->,line width=0.5mm]  (r2) -- (v2);
\draw[->,line width=0.5mm]  (r1) -- (v1);
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
\end{tikzpicture}
\end{center}


\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

\end{document}

%%% Local Variables:  
%%% mode: latex
%%% TeX-master: t
%%% End: