updated
authorChristian Urban <christian dot urban at kcl dot ac dot uk>
Mon, 19 Oct 2015 23:49:25 +0100
changeset 358 b3129cff41e9
parent 357 603e171a7b48
child 359 db106e5b7c4d
updated
coursework/cw01.pdf
coursework/cw01.tex
coursework/cw02.pdf
coursework/cw02.tex
coursework/cw03.pdf
coursework/cw03.tex
coursework/cw04.pdf
coursework/cw04.tex
handouts/ho05-bak.tex
handouts/ho05.pdf
handouts/ho05.tex
slides/slides05.pdf
slides/slides05.tex
Binary file coursework/cw01.pdf has changed
--- a/coursework/cw01.tex	Mon Oct 19 15:03:44 2015 +0100
+++ b/coursework/cw01.tex	Mon Oct 19 23:49:25 2015 +0100
@@ -6,14 +6,17 @@
 
 \section*{Coursework 1 (Strand 1)}
 
-This coursework is worth 5\% and is due on 20 October at 16:00. You
-are asked to implement a regular expression matcher and submit a
-document containing the answers for the questions below. You can do
-the implementation in any programming language you like, but you need
-to submit the source code with which you answered the
-questions. However, the coursework will \emph{only} be judged
-according to the answers. You can submit your answers in a txt-file or
-pdf.
+This coursework is worth 5\% and is due on 20 October at
+16:00. You are asked to implement a regular expression matcher
+and submit a document containing the answers for the questions
+below. You can do the implementation in any programming
+language you like, but you need to submit the source code with
+which you answered the questions, otherwise a mark of 0\% will
+be awarded. However, the coursework will \emph{only} be judged
+according to the answers. You can submit your answers in a
+txt-file or pdf.
+
+
 
 \subsubsection*{Disclaimer}
 
Binary file coursework/cw02.pdf has changed
--- a/coursework/cw02.tex	Mon Oct 19 15:03:44 2015 +0100
+++ b/coursework/cw02.tex	Mon Oct 19 23:49:25 2015 +0100
@@ -6,27 +6,28 @@
 
 \section*{Coursework 2 (Strand 1)}
 
-\noindent
-This coursework is worth 5\% and is due on 6 November at 16:00. You
-are asked to implement the Sulzmann tokeniser for the WHILE language.
-You need to submit a document containing the answers for the questions
-below. You can do the implementation in any programming language you
-like, but you need to submit the source code with which you answered
-the questions. However, the coursework will \emph{only} be judged
-according to the answers. You can submit your answers in a txt-file or
-as pdf.
+\noindent This coursework is worth 5\% and is due on 6
+November at 16:00. You are asked to implement the Sulzmann \&
+Lu tokeniser for the WHILE language. You can do the
+implementation in any programming language you like, but you
+need to submit the source code with which you answered the
+questions, otherwise a mark of 0\% will be awarded. However,
+the coursework will \emph{only} be judged according to the
+answers. You can submit your answers in a txt-file or as pdf.
 
 \subsection*{Disclaimer}
 
-It should be understood that the work you submit represents your own
-effort.  You have not copied from anyone else. An exception is the
-Scala code I showed during the lectures, which you can use.
-You can also use your own code from the CW~1.
+It should be understood that the work you submit represents
+your own effort. You have not copied from anyone else. An
+exception is the Scala code I showed during the lectures,
+which you can use. You can also use your own code from the
+CW~1.
 
 \subsection*{Question 1 (marked with 1\%)}
 
-To implement a tokeniser for the WHILE language, you first need to design 
-the appropriate regular expressions for the following eight syntactic entities:
+To implement a tokeniser for the WHILE language, you first
+need to design the appropriate regular expressions for the
+following eight syntactic entities:
 
 \begin{enumerate}
 \item keywords are
@@ -95,17 +96,17 @@
 
 \subsection*{Question 2 (marked with 3\%)}
 
-Implement the Sulzmann tokeniser from the lectures. For this you need
-to implement the functions $nullable$ and $der$ (you can use your code
-from CW 1), as well as $mkeps$ and $inj$. These functions need to be
-appropriately extended for the extended regular expressions from
-Q1. Also add the record regular expression from the lectures and
-implement a function, say \pcode{env}, that returns all assignments
-from a value (such that you can extract easily the tokens from a
-value). 
+Implement the Sulzmann \& Lu tokeniser from the lectures. For
+this you need to implement the functions $nullable$ and $der$
+(you can use your code from CW 1), as well as $mkeps$ and
+$inj$. These functions need to be appropriately extended for
+the extended regular expressions from Q1. Also add the record
+regular expression from the lectures and implement a function,
+say \pcode{env}, that returns all assignments from a value
+(such that you can extract easily the tokens from a value). 
 
-The functions $mkeps$ and $inj$ return values. Calculate
-the value for your regular expressions from Q1 and the string
+The functions $mkeps$ and $inj$ return values. Calculate the
+value for your regular expressions from Q1 and the string
 
 \begin{center}
 \code{"read n;"}
Binary file coursework/cw03.pdf has changed
--- a/coursework/cw03.tex	Mon Oct 19 15:03:44 2015 +0100
+++ b/coursework/cw03.tex	Mon Oct 19 23:49:25 2015 +0100
@@ -6,12 +6,21 @@
 
 \section*{Coursework 3 (Strand 1)}
 
-\noindent
-This coursework is worth 5\% and is due on 26 November at 16:00. You 
-are asked to implement a parser for the WHILE language and also 
-an interpreter. You should use the lexer from the previous
-coursework for the parser. 
+\noindent This coursework is worth 5\% and is due on 27
+November at 16:00. You are asked to implement a parser for the
+WHILE language and also an interpreter. You can do the
+implementation in any programming language you like, but you
+need to submit the source code with which you answered the
+questions, otherwise a mark of 0\% will be awarded. You should
+use the lexer from the previous coursework for the parser. 
 
+\subsection*{Disclaimer}
+
+It should be understood that the work you submit represents
+your own effort. You have not copied from anyone else. An
+exception is the Scala code I showed during the lectures,
+which you can use. You can also use your own code from the
+CW~1 and CW~2.
 
 
 \subsection*{Question 1 (marked with 1\%)}
Binary file coursework/cw04.pdf has changed
--- a/coursework/cw04.tex	Mon Oct 19 15:03:44 2015 +0100
+++ b/coursework/cw04.tex	Mon Oct 19 23:49:25 2015 +0100
@@ -12,7 +12,24 @@
 \noindent This coursework is worth 10\% and is due on 11
 December at 16:00. You are asked to implement a compiler for
 the WHILE language that targets the assembler language
-provided by Jasmin. This assembler is available from
+provided by Jasmin or Krakatau. You can do the implementation
+in any programming language you like, but you need to submit
+the source code with which you answered the questions,
+otherwise a mark of 0\% will be awarded. You should use the
+lexer and parser from the previous courseworks. 
+
+\subsection*{Disclaimer}
+
+It should be understood that the work you submit represents
+your own effort. You have not copied from anyone else. An
+exception is the Scala code I showed during the lectures,
+which you can use. You can also use your own code from the
+CW~1, CW~2 and CW~3.
+
+
+\subsection*{Assemblers}
+
+The Jasmin assembler is available from
 
 \begin{center}
 \url{http://jasmin.sourceforge.net}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/handouts/ho05-bak.tex	Mon Oct 19 23:49:25 2015 +0100
@@ -0,0 +1,290 @@
+\documentclass{article}
+\usepackage{../style}
+\usepackage{../langs}
+\usepackage{../graphics}
+
+\begin{document}
+
+\section*{Handout 5 (Lexing)}
+
+Whenever you want to design a new programming language or
+implement a compiler for an existing language, the first task
+is to fix the basic ``words'' of the language. For example
+what are the keywords, or reserved words, of the language,
+what are permitted identifiers, numbers, expressions and so
+on. One convenient way to do this is, of course, by using
+regular expressions. 
+
+In this course we want to take a closer look at the WHILE
+programming language. This is a simple imperative programming
+language consisting of arithmetic expressions, assignments,
+if-statements and loops. For example the Fibonacci program can
+be written in this language as follows:
+
+\begin{center}
+\mbox{\lstinputlisting[language=while]{../progs/fib.while}}
+\end{center}
+
+\noindent
+The keywords in this language will be
+
+\begin{center}
+\texttt{while}, \texttt{if}, \texttt{then}, \texttt{else}, \texttt{write}, \texttt{read}
+\end{center}
+
+\noindent
+In addition we will have some common operators, such as \texttt{<}, \texttt{>}, \texttt{:=} and so on, as well as numbers
+and strings (which we however ignore for the moment). We also need to specify what the ``whitespace''
+is in our programming language and what comments should look like.
+As a first try, we might specify the regular expressions for our language roughly as follows
+
+\begin{center}
+\begin{tabular}{lcl}
+$\textit{LETTER}$ & $:=$ & $\texttt{a} + \texttt{A} + \texttt{b} + \texttt{B} + \ldots$\\
+$\textit{DIGIT}$ & $:=$ & $\texttt{0} + \texttt{1} + \texttt{2} + \ldots$\\
+$\textit{KEYWORD}$ & $:=$ & $\texttt{while}  + \texttt{if} + \texttt{then} + \texttt{else} + \dots$\\
+$\textit{IDENT}$ & $:=$ & $\textit{LETTER} \cdot (\textit{LETTER} + \textit{DIGIT} + {\_})^*$\\ 
+$\textit{OP}$      &  $:=$ & $\texttt{:=} + \texttt{<} + \ldots$\\
+$\textit{NUM}$   &  $:=$ & $\textit{DIGIT}^+$\\
+$\textit{WHITESPACE}$ & $:=$ & $("\hspace{2mm}" + \backslash\texttt{n})^+$
+\end{tabular}
+\end{center}
+
+Having the regular expressions in place, the problem we have to solve is: 
+given a string of our programming language, which regular expression 
+matches which part of the string. By solving this problem, we can split up a string 
+of our language into components. 
+For example given the input string
+
+\begin{center}
+\texttt{\Grid{if\VS true\VS then\VS x+2\VS else\VS x+3}}
+\end{center}
+
+\noindent
+we expect it is split up as follows
+
+\begin{center}
+\tt
+\Grid{if}\,
+\Grid{\VS}\, 
+\Grid{true}\, 
+\Grid{\VS}\, 
+\Grid{then}\, 
+\Grid{\VS}\, 
+\Grid{x}\, 
+\Grid{+}\, 
+\Grid{2}\,
+\Grid{\VS}\, 
+\Grid{else}\,
+\Grid{\VS}\,
+\Grid{x}\,
+\Grid{+}\,
+\Grid{3}
+\end{center}
+
+\noindent
+This process of splitting up an input string into components is often called \emph{lexing} or \emph{scanning}.
+It is usually the first phase of a compiler. Note that the separation into words cannot, in general, 
+be done by just looking at whitespaces: while \texttt{if} and \texttt{true} are separated by a whitespace
+in the example above, this is not always the case. As can be seen the three components in \texttt{x+2} are 
+not separated by any whitespace. Another reason for recognising whitespaces explicitly is
+that in some languages, for example Python, whitespaces matters, that is carry meaning. However in 
+our small language we will eventually just filter out all whitespaces and also all comments.
+
+Lexing not just separates a string into its components, but also classifies the components, meaning it explicitly records that \texttt{if} is a keyword,  \VS{} a whitespace, \texttt{true} an identifier and so on.
+For the moment, though, we will only focus on the simpler problem of just splitting up 
+a string into components.
+
+There are a few subtleties  we need to consider first. For example, say the string is
+
+\begin{center}
+\texttt{\Grid{iffoo\VS}}\;\ldots
+\end{center}
+
+\noindent
+then there are two possibilities for how it can be split up: either we regard the input as the keyword \texttt{if} followed
+by the identifier \texttt{foo} (both regular expressions match) or we regard \texttt{iffoo} as a 
+single identifier. The choice that is often made in lexers is to look for the longest possible match.
+This leaves  \texttt{iffoo} as the only match in this case (since it is longer than \texttt{if}).
+
+Unfortunately, the convention about the longest match does not yet make the process 
+of lexing completely deterministic. Consider the string
+
+\begin{center}
+\texttt{\Grid{then\VS}}\;\dots
+\end{center}
+
+\noindent
+Clearly, this string should be identified as a keyword. The problem is that also the regular expression \textit{IDENT} for identifiers matches this string. To overcome this ambiguity we need to rank our 
+regular expressions. In our running example we just use the ranking
+
+\[
+\textit{KEYWORD} < \textit{IDENT} < \textit{OP} < \ldots
+\]
+
+\noindent
+So even if both regular expressions match in the example above,
+we give preference to the regular expression for keywords.
+
+Let us see how our algorithm for lexing works in detail. In addition to the functions $nullable$
+and $der$, it will  use the function \emph{zeroable} defined as follows:
+
+\begin{center}
+\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
+$zeroable(\varnothing)$      & $\dn$ & $true$\\
+$zeroable(\epsilon)$           & $\dn$ &  $f\!alse$\\
+$zeroable (c)$                    & $\dn$ &  $f\!alse$\\
+$zeroable (r_1 + r_2)$        & $\dn$ &  $zeroable(r_1) \wedge zeroable(r_2)$ \\ 
+$zeroable (r_1 \cdot r_2)$  & $\dn$ &  $zeroable(r_1) \vee zeroable(r_2)$ \\
+$zeroable (r^*)$                  & $\dn$ & $f\!alse$\\
+\end{tabular}
+\end{center}
+
+\noindent
+Recall that the function $nullable(r)$ tests whether a regular expression
+can match the empty string. The function $zeroable$, on the other hand, tests whether a regular
+expression cannot match anything at all. The mathematical way of stating this
+property is
+
+\begin{center}
+$zeroable(r)$ if and only if $L(r) = \varnothing$
+\end{center}
+
+\noindent
+For what follows let us fix a set of regular expressions $rs$ as being 
+
+\begin{center}
+\textit{KEYWORD}, \textit{IDENT}, \textit{WHITESPACE}
+\end{center}
+
+\noindent
+specifying the ``words'' 
+of our programming language. The algorithm takes as input the $rs$ and a string, say
+
+\begin{center}
+\texttt{\grid{$c_1$}\grid{$c_2$}\grid{$c_3$}\grid{$c_4$}}\;\ldots
+\end{center}
+
+\noindent
+and tries to chop off one word from the beginning of the string. If none of the
+regular expression in $rs$ matches, we will just return
+the empty string.
+
+The crucial idea in the algorithm is to build the derivatives of all regular expressions in $rs$ with respect
+to the first character $c_1$. Then we take the results and continue with 
+building the derivatives with respect to $c_2$ until we have either exhausted our 
+input string or all of the regular expressions are ``zeroable''.  Suppose the input string is 
+
+\begin{center}
+\texttt{\Grid{if2\VS}}\;\dots
+\end{center}
+
+\noindent
+then building the derivatives with respect to \texttt{i} gives
+
+\begin{center}
+\begin{tabular}{l|c}
+ & $zeroable$\\\hline
+ $der\;\texttt{i}\;(\textit{KEYWORD})$      & no\\
+ $der\;\texttt{i}\;(\textit{IDENT})$              & no\\
+ $der\;\texttt{i}\;(\textit{WHITESPACE})$ & yes\\
+\end{tabular}
+\end{center}
+
+\noindent
+We can eliminate \textit{WHITESPACE} as a potential candidate, because
+no derivative can go from $zeroable = \text{yes}$ to no. That leaves the other
+two regular expressions as potential candidate and we have to consider the
+next character, \texttt{f}, from the input string
+
+\begin{center}
+\begin{tabular}{l|c}
+ & $zeroable$\\\hline
+ $der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{KEYWORD}))$      & no\\
+ $der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT}))$              & no\\
+\end{tabular}
+\end{center}
+
+\noindent
+Since both are `no', we have to continue with \texttt{2} from the input string
+
+\begin{center}
+\begin{tabular}{l|c}
+ & $zeroable$\\\hline
+ $der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{KEYWORD})))$      & yes\\
+ $der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT})))$              & no\\
+\end{tabular}
+\end{center}
+
+\noindent
+Although we now know that the beginning is definitely an \textit{IDENT}, we do not yet
+know how much of the input string should be considered as an \textit{IDENT}. So we
+still have to continue and consider the next derivative.
+
+\begin{center}
+\begin{tabular}{l|c}
+ & $zeroable$\\\hline
+ $der\;\texttt{\VS}\;(der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT}))))$              & yes\\
+\end{tabular}
+\end{center}
+
+\noindent
+Since the answer is now `yes' also in this case, we can stop: once all derivatives are
+zeroable, we know the regular expressions cannot match any more letters from
+the input. In this case we only have to go back to the derivative that is nullable.  In this case it
+is
+
+\begin{center}
+$der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT})))$ 
+\end{center} 
+
+\noindent
+which means we recognised an identifier. In case where there is a choice of more than one
+regular expressions that are nullable, then we choose the one with the highest precedence. 
+You can try out such a case with the input string
+
+\begin{center}
+\texttt{\Grid{then\VS}}\;\dots
+\end{center}
+
+\noindent
+which can both be recognised as a keyword, but also an identifier. 
+
+While in the example above the last nullable derivative is the one directly before
+the derivative turns zeroable, this is not always the case. Imagine, identifiers can be 
+letters, as permuted by the regular expression \textit{IDENT}, but must end with an 
+undercore.
+
+\begin{center}
+\begin{tabular}{lcl}
+$\textit{NEWIDENT}$ & $:=$ & $\textit{LETTER} \cdot (\textit{LETTER} + \textit{DIGIT} + {\_})^* \cdot \_$\\ 
+\end{tabular}
+\end{center}
+
+\noindent
+If we use \textit{NEWIDENT} with the input string
+
+\begin{center}
+\texttt{\Grid{iffoo\VS}}\;\ldots
+\end{center}
+
+\noindent
+then it will only become $zeroable$ after the $\VS$ has been analysed. In this case we have to go back to the
+first \texttt{f} because only 
+
+\begin{center}
+ $der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{KEYWORD}))$  
+\end{center}
+
+\noindent
+is nullable. As a result we recognise successfully the keyword \texttt{if} and the remaining
+string needs to be consumed by other regular expressions or lead to a lexing error.
+
+
+
+\end{document}
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: t
+%%% End: 
Binary file handouts/ho05.pdf has changed
--- a/handouts/ho05.tex	Mon Oct 19 15:03:44 2015 +0100
+++ b/handouts/ho05.tex	Mon Oct 19 23:49:25 2015 +0100
@@ -1,290 +1,472 @@
 \documentclass{article}
 \usepackage{../style}
 \usepackage{../langs}
-\usepackage{../graphics}
+
 
 \begin{document}
 
-\section*{Handout 5 (Lexing)}
+\section*{Handout 6 (Parser Combinators)}
 
-Whenever you want to design a new programming language or
-implement a compiler for an existing language, the first task
-is to fix the basic ``words'' of the language. For example
-what are the keywords, or reserved words, of the language,
-what are permitted identifiers, numbers, expressions and so
-on. One convenient way to do this is, of course, by using
-regular expressions. 
-
-In this course we want to take a closer look at the WHILE
-programming language. This is a simple imperative programming
-language consisting of arithmetic expressions, assignments,
-if-statements and loops. For example the Fibonacci program can
-be written in this language as follows:
+While regular expressions are very useful for lexing and for recognising
+many patterns in strings (like email addresses), they have their limitations. For
+example there is no regular expression that can recognise the language 
+$a^nb^n$. Another example for which there exists no regular expression is the language of well-parenthesised 
+expressions.  In languages like Lisp, which use parentheses rather
+extensively, it might be of interest whether the following two expressions
+are well-parenthesised (the left one is, the right one is not):
 
 \begin{center}
-\mbox{\lstinputlisting[language=while]{../progs/fib.while}}
+$(((()()))())$  \hspace{10mm} $(((()()))()))$
 \end{center}
 
 \noindent
-The keywords in this language will be
-
-\begin{center}
-\texttt{while}, \texttt{if}, \texttt{then}, \texttt{else}, \texttt{write}, \texttt{read}
-\end{center}
+Not being able to solve such recognition problems is a serious limitation.
+In order to solve such recognition problems, we need more powerful 
+techniques than regular expressions. We will in particular look at \emph{context-free
+languages}. They include the regular languages as the picture below shows:
 
-\noindent
-In addition we will have some common operators, such as \texttt{<}, \texttt{>}, \texttt{:=} and so on, as well as numbers
-and strings (which we however ignore for the moment). We also need to specify what the ``whitespace''
-is in our programming language and what comments should look like.
-As a first try, we might specify the regular expressions for our language roughly as follows
 
 \begin{center}
-\begin{tabular}{lcl}
-$\textit{LETTER}$ & $:=$ & $\texttt{a} + \texttt{A} + \texttt{b} + \texttt{B} + \ldots$\\
-$\textit{DIGIT}$ & $:=$ & $\texttt{0} + \texttt{1} + \texttt{2} + \ldots$\\
-$\textit{KEYWORD}$ & $:=$ & $\texttt{while}  + \texttt{if} + \texttt{then} + \texttt{else} + \dots$\\
-$\textit{IDENT}$ & $:=$ & $\textit{LETTER} \cdot (\textit{LETTER} + \textit{DIGIT} + {\_})^*$\\ 
-$\textit{OP}$      &  $:=$ & $\texttt{:=} + \texttt{<} + \ldots$\\
-$\textit{NUM}$   &  $:=$ & $\textit{DIGIT}^+$\\
-$\textit{WHITESPACE}$ & $:=$ & $("\hspace{2mm}" + \backslash\texttt{n})^+$
-\end{tabular}
-\end{center}
+\begin{tikzpicture}
+[rect/.style={draw=black!50, 
+ top color=white,bottom color=black!20, 
+ rectangle, very thick, rounded corners}]
 
-Having the regular expressions in place, the problem we have to solve is: 
-given a string of our programming language, which regular expression 
-matches which part of the string. By solving this problem, we can split up a string 
-of our language into components. 
-For example given the input string
-
-\begin{center}
-\texttt{\Grid{if\VS true\VS then\VS x+2\VS else\VS x+3}}
+\draw (0,0) node [rect, text depth=30mm, text width=46mm] {\small all languages};
+\draw (0,-0.4) node [rect, text depth=20mm, text width=44mm] {\small decidable languages};
+\draw (0,-0.65) node [rect, text depth=13mm] {\small context sensitive languages};
+\draw (0,-0.84) node [rect, text depth=7mm, text width=35mm] {\small context-free languages};
+\draw (0,-1.05) node [rect] {\small regular languages};
+\end{tikzpicture}
 \end{center}
 
 \noindent
-we expect it is split up as follows
+Context-free languages play an important role in `day-to-day' text processing and in
+programming languages. Context-free languages are usually specified by grammars.
+For example a grammar for well-parenthesised  expressions is
 
 \begin{center}
-\tt
-\Grid{if}\,
-\Grid{\VS}\, 
-\Grid{true}\, 
-\Grid{\VS}\, 
-\Grid{then}\, 
-\Grid{\VS}\, 
-\Grid{x}\, 
-\Grid{+}\, 
-\Grid{2}\,
-\Grid{\VS}\, 
-\Grid{else}\,
-\Grid{\VS}\,
-\Grid{x}\,
-\Grid{+}\,
-\Grid{3}
+$P \;\;\rightarrow\;\; ( \cdot  P \cdot ) \cdot P \;|\; \epsilon$
+\end{center}
+ 
+\noindent
+In general grammars consist of finitely many rules built up from \emph{terminal symbols} 
+(usually lower-case letters) and \emph{non-terminal symbols} (upper-case letters).  Rules 
+have the shape
+
+\begin{center}
+$NT \;\;\rightarrow\;\; \textit{rhs}$
+\end{center}
+ 
+\noindent
+where on the left-hand side is a single non-terminal and on the right a string consisting
+of both terminals and non-terminals including the $\epsilon$-symbol for indicating the
+empty string. We use the convention  to separate components on
+the right hand-side by using the $\cdot$ symbol, as in the grammar for well-parenthesised  expressions.
+We also use the convention to use $|$ as a shorthand notation for several rules. For example
+
+\begin{center}
+$NT \;\;\rightarrow\;\; \textit{rhs}_1 \;|\; \textit{rhs}_2$
 \end{center}
 
 \noindent
-This process of splitting up an input string into components is often called \emph{lexing} or \emph{scanning}.
-It is usually the first phase of a compiler. Note that the separation into words cannot, in general, 
-be done by just looking at whitespaces: while \texttt{if} and \texttt{true} are separated by a whitespace
-in the example above, this is not always the case. As can be seen the three components in \texttt{x+2} are 
-not separated by any whitespace. Another reason for recognising whitespaces explicitly is
-that in some languages, for example Python, whitespaces matters, that is carry meaning. However in 
-our small language we will eventually just filter out all whitespaces and also all comments.
-
-Lexing not just separates a string into its components, but also classifies the components, meaning it explicitly records that \texttt{if} is a keyword,  \VS{} a whitespace, \texttt{true} an identifier and so on.
-For the moment, though, we will only focus on the simpler problem of just splitting up 
-a string into components.
-
-There are a few subtleties  we need to consider first. For example, say the string is
+means that the non-terminal $NT$ can be replaced by either $\textit{rhs}_1$ or $\textit{rhs}_2$.
+If there are more than one non-terminal on the left-hand side of the rules, then we need to indicate
+what is the \emph{starting} symbol of the grammar. For example the grammar for arithmetic expressions
+can be given as follows
 
 \begin{center}
-\texttt{\Grid{iffoo\VS}}\;\ldots
-\end{center}
-
-\noindent
-then there are two possibilities for how it can be split up: either we regard the input as the keyword \texttt{if} followed
-by the identifier \texttt{foo} (both regular expressions match) or we regard \texttt{iffoo} as a 
-single identifier. The choice that is often made in lexers is to look for the longest possible match.
-This leaves  \texttt{iffoo} as the only match in this case (since it is longer than \texttt{if}).
-
-Unfortunately, the convention about the longest match does not yet make the process 
-of lexing completely deterministic. Consider the string
-
-\begin{center}
-\texttt{\Grid{then\VS}}\;\dots
-\end{center}
-
-\noindent
-Clearly, this string should be identified as a keyword. The problem is that also the regular expression \textit{IDENT} for identifiers matches this string. To overcome this ambiguity we need to rank our 
-regular expressions. In our running example we just use the ranking
-
-\[
-\textit{KEYWORD} < \textit{IDENT} < \textit{OP} < \ldots
-\]
-
-\noindent
-So even if both regular expressions match in the example above,
-we give preference to the regular expression for keywords.
-
-Let us see how our algorithm for lexing works in detail. In addition to the functions $nullable$
-and $der$, it will  use the function \emph{zeroable} defined as follows:
-
-\begin{center}
-\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
-$zeroable(\varnothing)$      & $\dn$ & $true$\\
-$zeroable(\epsilon)$           & $\dn$ &  $f\!alse$\\
-$zeroable (c)$                    & $\dn$ &  $f\!alse$\\
-$zeroable (r_1 + r_2)$        & $\dn$ &  $zeroable(r_1) \wedge zeroable(r_2)$ \\ 
-$zeroable (r_1 \cdot r_2)$  & $\dn$ &  $zeroable(r_1) \vee zeroable(r_2)$ \\
-$zeroable (r^*)$                  & $\dn$ & $f\!alse$\\
+\begin{tabular}{lcl}
+$E$ & $\rightarrow$ &  $N$ \\
+$E$ & $\rightarrow$ &  $E \cdot + \cdot E$ \\
+$E$ & $\rightarrow$ &  $E \cdot - \cdot E$ \\
+$E$ & $\rightarrow$ &  $E \cdot * \cdot E$ \\
+$E$ & $\rightarrow$ &  $( \cdot E \cdot )$\\
+$N$ & $\rightarrow$ & $N \cdot N \;|\; 0 \;|\; 1 \;|\: \ldots \;|\; 9$ 
 \end{tabular}
 \end{center}
 
 \noindent
-Recall that the function $nullable(r)$ tests whether a regular expression
-can match the empty string. The function $zeroable$, on the other hand, tests whether a regular
-expression cannot match anything at all. The mathematical way of stating this
-property is
+where $E$ is the starting symbol. A \emph{derivation} for a grammar
+starts with the staring symbol of the grammar and in each step replaces one
+non-terminal by a right-hand side of a rule. A derivation ends with a string
+in which only terminal symbols are left. For example a derivation for the
+string $(1 + 2) + 3$ is as follows:
+
+\begin{center}
+\begin{tabular}{lll}
+$E$ & $\rightarrow$ & $E+E$\\
+       & $\rightarrow$ & $(E)+E$\\
+       & $\rightarrow$ & $(E+E)+E$\\
+       & $\rightarrow$ & $(E+E)+N$\\
+       & $\rightarrow$ & $(E+E)+3$\\
+       & $\rightarrow$ & $(N+E)+3$\\	
+       & $\rightarrow^+$ & $(1+2)+3$\\
+\end{tabular} 
+\end{center}
+
+\noindent
+The \emph{language} of a context-free grammar $G$ with start symbol $S$ 
+is defined as the set of strings derivable by a derivation, that is
 
 \begin{center}
-$zeroable(r)$ if and only if $L(r) = \varnothing$
+$\{c_1\ldots c_n \;|\; S \rightarrow^* c_1\ldots c_n \;\;\text{with all} \; c_i \;\text{being non-terminals}\}$
+\end{center}
+
+\noindent
+A \emph{parse-tree} encodes how a string is derived with the starting symbol on 
+top and each non-terminal containing a subtree for how it is replaced in a derivation.
+The parse tree for the string $(1 + 23)+4$ is as follows:
+
+\begin{center}
+\begin{tikzpicture}[level distance=8mm, black]
+  \node {$E$}
+    child {node {$E$} 
+       child {node {$($}}
+       child {node {$E$}       
+         child {node {$E$} child {node {$N$} child {node {$1$}}}}
+         child {node {$+$}}
+         child {node {$E$} 
+            child {node {$N$} child {node {$2$}}}
+            child {node {$N$} child {node {$3$}}}
+            } 
+        }
+       child {node {$)$}}
+     }
+     child {node {$+$}}
+     child {node {$E$}
+        child {node {$N$} child {node {$4$}}}
+     };
+\end{tikzpicture}
 \end{center}
 
 \noindent
-For what follows let us fix a set of regular expressions $rs$ as being 
+We are often interested in these parse-trees since they encode the structure of
+how a string is derived by a grammar. Before we come to the problem of constructing
+such parse-trees, we need to consider the following two properties of grammars.
+A grammar is \emph{left-recursive} if there is a derivation starting from a non-terminal, say
+$NT$ which leads to a string which again starts with $NT$. This means a derivation of the
+form.
 
 \begin{center}
-\textit{KEYWORD}, \textit{IDENT}, \textit{WHITESPACE}
+$NT \rightarrow \ldots \rightarrow NT \cdot \ldots$
+\end{center}
+
+\noindent
+It can be easily seen that the grammar above for arithmetic expressions is left-recursive:
+for example the rules $E \rightarrow E\cdot + \cdot E$ and $N \rightarrow N\cdot N$ 
+show that this grammar is left-recursive. Some algorithms cannot cope with left-recursive 
+grammars. Fortunately every left-recursive grammar can be transformed into one that is
+not left-recursive, although this transformation might make the grammar less human-readable.
+For example if we want to give a non-left-recursive grammar for numbers we might
+specify
+
+\begin{center}
+$N \;\;\rightarrow\;\; 0\;|\;\ldots\;|\;9\;|\;1\cdot N\;|\;2\cdot N\;|\;\ldots\;|\;9\cdot N$
 \end{center}
 
 \noindent
-specifying the ``words'' 
-of our programming language. The algorithm takes as input the $rs$ and a string, say
+Using this grammar we can still derive every number string, but we will never be able 
+to derive a string of the form $\ldots \rightarrow N \cdot \ldots$.
+
+The other property we have to watch out for is when a grammar is
+\emph{ambiguous}. A grammar is said to be ambiguous if there are two parse-trees
+for one string. Again the grammar for arithmetic expressions shown above is ambiguous.
+While the shown parse tree for the string $(1 + 23) + 4$ is unique, this is not the case in
+general. For example there are two parse
+trees for the string $1 + 2 + 3$, namely
 
 \begin{center}
-\texttt{\grid{$c_1$}\grid{$c_2$}\grid{$c_3$}\grid{$c_4$}}\;\ldots
+\begin{tabular}{c@{\hspace{10mm}}c}
+\begin{tikzpicture}[level distance=8mm, black]
+  \node {$E$}
+    child {node {$E$} child {node {$N$} child {node {$1$}}}}
+    child {node {$+$}}
+    child {node {$E$}
+       child {node {$E$} child {node {$N$} child {node {$2$}}}}
+       child {node {$+$}}
+       child {node {$E$} child {node {$N$} child {node {$3$}}}}
+    }
+    ;
+\end{tikzpicture} 
+&
+\begin{tikzpicture}[level distance=8mm, black]
+  \node {$E$}
+    child {node {$E$}
+       child {node {$E$} child {node {$N$} child {node {$1$}}}}
+       child {node {$+$}}
+       child {node {$E$} child {node {$N$} child {node {$2$}}}} 
+    }
+    child {node {$+$}}
+    child {node {$E$} child {node {$N$} child {node {$3$}}}}
+    ;
+\end{tikzpicture}
+\end{tabular} 
 \end{center}
 
 \noindent
-and tries to chop off one word from the beginning of the string. If none of the
-regular expression in $rs$ matches, we will just return
-the empty string.
+In particular in programming languages we will try to avoid ambiguous
+grammars because two different parse-trees for a string mean a program can
+be interpreted in two different ways. In such cases we have to somehow make sure
+the two different ways do not matter, or disambiguate the grammar in
+some other way (for example making the $+$ left-associative). Unfortunately already 
+the problem of deciding whether a grammar
+is ambiguous or not is in general undecidable. 
 
-The crucial idea in the algorithm is to build the derivatives of all regular expressions in $rs$ with respect
-to the first character $c_1$. Then we take the results and continue with 
-building the derivatives with respect to $c_2$ until we have either exhausted our 
-input string or all of the regular expressions are ``zeroable''.  Suppose the input string is 
+Let us now turn to the problem of generating a parse-tree for a grammar and string.
+In what follows we explain \emph{parser combinators}, because they are easy
+to implement and closely resemble grammar rules. Imagine that a grammar
+describes the strings of natural numbers, such as the grammar $N$ shown above.
+For all such strings we want to generate the parse-trees or later on we actually 
+want to extract the meaning of these strings, that is the concrete integers ``behind'' 
+these strings. In Scala the parser combinators will be functions of type
+
+\begin{center}
+\texttt{I $\Rightarrow$ Set[(T, I)]}
+\end{center}
+
+\noindent 
+that is they take as input something of type \texttt{I}, typically a list of tokens or a string,
+and return a set of pairs. The first component of these pairs corresponds to what the
+parser combinator was able to process from the input and the second is the unprocessed 
+part of the input. As we shall see shortly, a parser combinator might return more than one such pair,
+with the idea that there are potentially several ways how to interpret the input. As a concrete
+example, consider the case where the input is of type string, say the string
 
 \begin{center}
-\texttt{\Grid{if2\VS}}\;\dots
+\tt\Grid{iffoo\VS testbar}
+\end{center}
+
+\noindent
+We might have a parser combinator which tries to interpret this string as a keyword (\texttt{if}) or
+an identifier (\texttt{iffoo}). Then the output will be the set
+
+\begin{center}
+$\left\{ \left(\texttt{\Grid{if}}\,,\, \texttt{\Grid{foo\VS testbar}}\right), 
+           \left(\texttt{\Grid{iffoo}}\,,\, \texttt{\Grid{\VS testbar}}\right) \right\}$
+\end{center}
+
+\noindent
+where the first pair means the parser could recognise \texttt{if} from the input and leaves 
+the rest as `unprocessed' as the second component of the pair; in the other case
+it could recognise \texttt{iffoo} and leaves \texttt{\VS testbar} as unprocessed. If the parser
+cannot recognise anything from the input then parser combinators just return the empty 
+set $\varnothing$. This will indicate something ``went wrong''.
+
+The main attraction is that we can easily build parser combinators out of smaller components
+following very closely the structure of a grammar. In order to implement this in an object
+oriented programming language, like Scala, we need to specify an abstract class for parser 
+combinators. This abstract class requires the implementation of the function
+\texttt{parse} taking an argument of type \texttt{I} and returns a set of type  
+\mbox{\texttt{Set[(T, I)]}}.
+
+\begin{center}
+\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none]
+abstract class Parser[I, T] {
+  def parse(ts: I): Set[(T, I)]
+
+  def parse_all(ts: I): Set[T] =
+    for ((head, tail) <- parse(ts); if (tail.isEmpty)) 
+      yield head
+}
+\end{lstlisting}
 \end{center}
 
 \noindent
-then building the derivatives with respect to \texttt{i} gives
+From the function \texttt{parse} we can then ``centrally'' derive the function \texttt{parse\_all},
+which just filters out all pairs whose second component is not empty (that is has still some
+unprocessed part). The reason is that at the end of parsing we are only interested in the
+results where all the input has been consumed and no unprocessed part is left.
+
+One of the simplest parser combinators recognises just a character, say $c$, 
+from the beginning of strings. Its behaviour is as follows:
+
+\begin{itemize}
+\item if the head of the input string starts with a $c$, it returns 
+	the set $\{(c, \textit{tail of}\; s)\}$
+\item otherwise it returns the empty set $\varnothing$	
+\end{itemize}
+
+\noindent
+The input type of this simple parser combinator for characters is
+\texttt{String} and the output type \mbox{\texttt{Set[(Char, String)]}}. 
+The code in Scala is as follows:
 
 \begin{center}
-\begin{tabular}{l|c}
- & $zeroable$\\\hline
- $der\;\texttt{i}\;(\textit{KEYWORD})$      & no\\
- $der\;\texttt{i}\;(\textit{IDENT})$              & no\\
- $der\;\texttt{i}\;(\textit{WHITESPACE})$ & yes\\
-\end{tabular}
+\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none]
+case class CharParser(c: Char) extends Parser[String, Char] {
+  def parse(sb: String) = 
+    if (sb.head == c) Set((c, sb.tail)) else Set()
+}
+\end{lstlisting}
 \end{center}
 
 \noindent
-We can eliminate \textit{WHITESPACE} as a potential candidate, because
-no derivative can go from $zeroable = \text{yes}$ to no. That leaves the other
-two regular expressions as potential candidate and we have to consider the
-next character, \texttt{f}, from the input string
+The \texttt{parse} function tests whether the first character of the 
+input string \texttt{sb} is equal to \texttt{c}. If yes, then it splits the
+string into the recognised part \texttt{c} and the unprocessed part
+\texttt{sb.tail}. In case \texttt{sb} does not start with \texttt{c} then
+the parser returns the empty set (in Scala \texttt{Set()}).
+
+More interesting are the parser combinators that build larger parsers
+out of smaller component parsers. For example the alternative 
+parser combinator is as follows.
 
 \begin{center}
-\begin{tabular}{l|c}
- & $zeroable$\\\hline
- $der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{KEYWORD}))$      & no\\
- $der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT}))$              & no\\
+\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none]
+class AltParser[I, T]
+       (p: => Parser[I, T], 
+        q: => Parser[I, T]) extends Parser[I, T] {
+  def parse(sb: I) = p.parse(sb) ++ q.parse(sb)
+}
+\end{lstlisting}
+\end{center}
+
+\noindent
+The types of this parser combinator are polymorphic (we just have \texttt{I}
+for the input type, and \texttt{T} for the output type). The alternative parser
+builds a new parser out of two existing parser combinator \texttt{p} and \texttt{q}.
+Both need to be able to process input of type \texttt{I} and return the same
+output type \texttt{Set[(T, I)]}. (There is an interesting detail of Scala, namely the 
+\texttt{=>} in front of the types of \texttt{p} and \texttt{q}. They will prevent the
+evaluation of the arguments before they are used. This is often called 
+\emph{lazy evaluation} of the arguments.) The alternative parser should run
+the input with the first parser \texttt{p} (producing a set of outputs) and then
+run the same input with \texttt{q}. The result should be then just the union
+of both sets, which is the operation \texttt{++} in Scala.
+
+This parser combinator already allows us to construct a parser that either 
+a character \texttt{a} or \texttt{b}, as
+
+\begin{center}
+\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none]
+new AltParser(CharParser('a'), CharParser('b'))
+\end{lstlisting}
+\end{center}
+
+\noindent
+Scala allows us to introduce some more readable shorthand notation for this, like \texttt{'a' || 'b'}. 
+We can call this parser combinator with the strings
+
+\begin{center}
+\begin{tabular}{rcl}
+input string & & output\medskip\\
+\texttt{\Grid{ac}} & $\rightarrow$ & $\left\{(\texttt{\Grid{a}}, \texttt{\Grid{c}})\right\}$\\
+\texttt{\Grid{bc}} & $\rightarrow$ & $\left\{(\texttt{\Grid{b}}, \texttt{\Grid{c}})\right\}$\\
+\texttt{\Grid{cc}} & $\rightarrow$ & $\varnothing$
 \end{tabular}
 \end{center}
 
 \noindent
-Since both are `no', we have to continue with \texttt{2} from the input string
+We receive in the first two cases a successful output (that is a non-empty set).
+
+A bit more interesting is the \emph{sequence parser combinator} implemented in
+Scala as follows:
 
 \begin{center}
-\begin{tabular}{l|c}
- & $zeroable$\\\hline
- $der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{KEYWORD})))$      & yes\\
- $der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT})))$              & no\\
-\end{tabular}
+\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none]
+class SeqParser[I, T, S]
+       (p: => Parser[I, T], 
+        q: => Parser[I, S]) extends Parser[I, (T, S)] {
+  def parse(sb: I) = 
+    for ((head1, tail1) <- p.parse(sb); 
+         (head2, tail2) <- q.parse(tail1)) 
+            yield ((head1, head2), tail2)
+}
+\end{lstlisting}
 \end{center}
 
 \noindent
-Although we now know that the beginning is definitely an \textit{IDENT}, we do not yet
-know how much of the input string should be considered as an \textit{IDENT}. So we
-still have to continue and consider the next derivative.
+This parser takes as input two parsers, \texttt{p} and \texttt{q}. It implements \texttt{parse} 
+as follows: let first run the parser \texttt{p} on the input producing a set of pairs (\texttt{head1}, \texttt{tail1}).
+The \texttt{tail1} stands for the unprocessed parts left over by \texttt{p}. 
+Let \texttt{q} run on these unprocessed parts
+producing again a set of pairs. The output of the sequence parser combinator is then a set
+containing pairs where the first components are again pairs, namely what the first parser could parse
+together with what the second parser could parse; the second component is the unprocessed
+part left over after running the second parser \texttt{q}. Therefore the input type of
+the sequence parser combinator is as usual \texttt{I}, but the output type is
 
 \begin{center}
-\begin{tabular}{l|c}
- & $zeroable$\\\hline
- $der\;\texttt{\VS}\;(der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT}))))$              & yes\\
+\texttt{Set[((T, S), I)]}
+\end{center}
+
+Scala allows us to provide some
+shorthand notation for the sequence parser combinator. So we can write for 
+example \texttt{'a'  $\sim$ 'b'}, which is the
+parser combinator that first consumes the character \texttt{a} from a string and then \texttt{b}.
+Calling this parser combinator with the strings
+
+\begin{center}
+\begin{tabular}{rcl}
+input string & & output\medskip\\
+\texttt{\Grid{abc}} & $\rightarrow$ & $\left\{((\texttt{\Grid{a}}, \texttt{\Grid{b}}), \texttt{\Grid{c}})\right\}$\\
+\texttt{\Grid{bac}} & $\rightarrow$ & $\varnothing$\\
+\texttt{\Grid{ccc}} & $\rightarrow$ & $\varnothing$
 \end{tabular}
 \end{center}
 
 \noindent
-Since the answer is now `yes' also in this case, we can stop: once all derivatives are
-zeroable, we know the regular expressions cannot match any more letters from
-the input. In this case we only have to go back to the derivative that is nullable.  In this case it
-is
-
-\begin{center}
-$der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT})))$ 
-\end{center} 
-
-\noindent
-which means we recognised an identifier. In case where there is a choice of more than one
-regular expressions that are nullable, then we choose the one with the highest precedence. 
-You can try out such a case with the input string
+A slightly more complicated parser is \texttt{('a'  || 'b') $\sim$ 'b'} which parses as first character either
+an \texttt{a} or \texttt{b} followed by a \texttt{b}. This parser produces the following results.
 
 \begin{center}
-\texttt{\Grid{then\VS}}\;\dots
-\end{center}
-
-\noindent
-which can both be recognised as a keyword, but also an identifier. 
-
-While in the example above the last nullable derivative is the one directly before
-the derivative turns zeroable, this is not always the case. Imagine, identifiers can be 
-letters, as permuted by the regular expression \textit{IDENT}, but must end with an 
-undercore.
-
-\begin{center}
-\begin{tabular}{lcl}
-$\textit{NEWIDENT}$ & $:=$ & $\textit{LETTER} \cdot (\textit{LETTER} + \textit{DIGIT} + {\_})^* \cdot \_$\\ 
+\begin{tabular}{rcl}
+input string & & output\medskip\\
+\texttt{\Grid{abc}} & $\rightarrow$ & $\left\{((\texttt{\Grid{a}}, \texttt{\Grid{b}}), \texttt{\Grid{c}})\right\}$\\
+\texttt{\Grid{bbc}} & $\rightarrow$ & $\left\{((\texttt{\Grid{b}}, \texttt{\Grid{b}}), \texttt{\Grid{c}})\right\}$\\
+\texttt{\Grid{aac}} & $\rightarrow$ & $\varnothing$
 \end{tabular}
 \end{center}
 
-\noindent
-If we use \textit{NEWIDENT} with the input string
+Note carefully that constructing the parser \texttt{'a' || ('a' $\sim$ 'b')} will result in a tying error.
+The first parser has as output type a single character (recall the type of \texttt{CharParser}),
+but the second parser produces a pair of characters as output. The alternative parser is however
+required to have both component parsers to have the same type. We will see later how we can 
+build this parser without the typing error.
+
+The next parser combinator does not actually combine smaller parsers, but applies
+a function to the result of the parser. It is implemented in Scala as follows
 
 \begin{center}
-\texttt{\Grid{iffoo\VS}}\;\ldots
+\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none]
+class FunParser[I, T, S]
+         (p: => Parser[I, T], 
+          f: T => S) extends Parser[I, S] {
+  def parse(sb: I) = 
+    for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
+}
+\end{lstlisting}
 \end{center}
 
+
 \noindent
-then it will only become $zeroable$ after the $\VS$ has been analysed. In this case we have to go back to the
-first \texttt{f} because only 
+This parser combinator takes a parser \texttt{p} with output type \texttt{T} as
+input as well as a function \texttt{f} with type \texttt{T => S}. The parser \texttt{p}
+produces sets of type \texttt{(T, I)}. The \texttt{FunParser} combinator then
+applies the function \texttt{f} to all the parer outputs. Since this function
+is of type \texttt{T => S}, we obtain a parser with output type \texttt{S}.
+Again Scala lets us introduce some shorthand notation for this parser combinator. 
+Therefore we will write \texttt{p ==> f} for it.
 
-\begin{center}
- $der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{KEYWORD}))$  
-\end{center}
-
-\noindent
-is nullable. As a result we recognise successfully the keyword \texttt{if} and the remaining
-string needs to be consumed by other regular expressions or lead to a lexing error.
+%\bigskip
+%takes advantage of the full generality---have a look
+%what it produces if we call it with the string \texttt{abc}
+%
+%\begin{center}
+%\begin{tabular}{rcl}
+%input string & & output\medskip\\
+%\texttt{\Grid{abc}} & $\rightarrow$ & $\left\{((\texttt{\Grid{a}}, \texttt{\Grid{b}}), \texttt{\Grid{c}})\right\}$\\
+%\texttt{\Grid{bbc}} & $\rightarrow$ & $\left\{((\texttt{\Grid{b}}, \texttt{\Grid{b}}), \texttt{\Grid{c}})\right\}$\\
+%\texttt{\Grid{aac}} & $\rightarrow$ & $\varnothing$
+%\end{tabular}
+%\end{center}
 
 
 
 \end{document}
 
 %%% Local Variables: 
-%%% mode: latex
+%%% mode: latex  
 %%% TeX-master: t
 %%% End: 
Binary file slides/slides05.pdf has changed
--- a/slides/slides05.tex	Mon Oct 19 15:03:44 2015 +0100
+++ b/slides/slides05.tex	Mon Oct 19 23:49:25 2015 +0100
@@ -3,6 +3,7 @@
 \usepackage{../graphics}
 \usepackage{../langs}
 \usepackage{../data}
+\usepackage{../grammar}
 
 \hfuzz=220pt 
 
@@ -38,7 +39,8 @@
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}[c]
-\frametitle{\begin{tabular}{c}Last Week\\ Regexes and Values\end{tabular}}
+\frametitle{\begin{tabular}{c}Last Week\\[-1mm] 
+            Regexes and Values\end{tabular}}
 
 Regular expressions and their corresponding values:
 
@@ -73,127 +75,6 @@
 \end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{frame}[c]
-
-\begin{textblock}{10}(3,5)
-\begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}]
-\node (r1)  {\bl{$r_1$}};
-\node (r2) [right=of r1] {\bl{$r_2$}};
-\draw[->,line width=1mm]  (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};
-\node (r3) [right=of r2] {\bl{$r_3$}};
-\draw[->,line width=1mm]  (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};
-\node (r4) [right=of r3] {\bl{$r_4$}};
-\draw[->,line width=1mm]  (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};
-\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}};
-\node (v4) [below=of r4] {\bl{$v_4$}};
-\draw[->,line width=1mm]  (r4) -- (v4);
-\node (v3) [left=of v4] {\bl{$v_3$}};
-\draw[->,line width=1mm]  (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};
-\node (v2) [left=of v3] {\bl{$v_2$}};
-\draw[->,line width=1mm]  (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};
-\node (v1) [left=of v2] {\bl{$v_1$}};
-\draw[->,line width=1mm]  (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};
-\draw[->,line width=0.5mm]  (r3) -- (v3);
-\draw[->,line width=0.5mm]  (r2) -- (v2);
-\draw[->,line width=0.5mm]  (r1) -- (v1);
-\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
-\end{tikzpicture}
-\end{textblock}
-
-\only<2->{
-\begin{textblock}{6}(1,0.8)
-\begin{bubble}[6cm]
-\small
-\begin{tabular}{ll}
-\bl{$r_1$}: & \bl{$a \cdot (b \cdot c)$}\\
-\bl{$r_2$}: & \bl{$\epsilon \cdot (b \cdot c)$}\\
-\bl{$r_3$}: & \bl{$(\varnothing \cdot (b \cdot c)) + (\epsilon \cdot c)$}\\
-\bl{$r_4$}: & \bl{$(\varnothing \cdot (b \cdot c)) + ((\varnothing \cdot c) + \epsilon)$}\\
-\end{tabular}
-\end{bubble}
-\end{textblock}}
-
-\only<2->{
-\begin{textblock}{6}(5,11.4)
-\begin{bubble}[7.6cm]
-\small
-\begin{tabular}{ll}
-\bl{$v_1$}: & \bl{$Seq(Char(a), Seq(Char(b), Char(c)))$}\\
-\bl{$v_2$}: & \bl{$Seq(Empty, Seq(Char(b), Char(c)))$}\\
-\bl{$v_3$}: & \bl{$Right(Seq(Empty, Char(c)))$}\\
-\bl{$v_4$}: & \bl{$Right(Right(Empty))$}\\
-\end{tabular}
-\end{bubble}
-\end{textblock}}
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{frame}[c]
-\frametitle{Mkeps}
-
-Finding a (posix) value for recognising the empty string:
-
-\begin{center}
-\begin{tabular}{lcl}
-  \bl{$mkeps\,\epsilon$}  & \bl{$\dn$}  & \bl{$Empty$}\\
-  \bl{$mkeps\,r_1 + r_2$} & \bl{$\dn$}  & \bl{if $nullable(r_1)$}  \\
-                          &             & \bl{then $Left(mkeps(r_1))$}\\
-                          &             & \bl{else $Right(mkeps(r_2))$}\\
-  \bl{$mkeps\,r_1 \cdot r_2$}  & \bl{$\dn$} & \bl{$Seq(mkeps(r_1),mkeps(r_2))$}\\
-  \bl{$mkeps\,r^*$}      & \bl{$\dn$} & \bl{$[]$}  \\
-\end{tabular}
-\end{center}
-
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{frame}[c]
-\frametitle{Inject}
-
-Injecting (``Adding'') a character to a value\\
-
-\begin{center}
-\begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l}
-  \bl{$inj\,(c)\,c\,Empty$} & \bl{$\dn$}  & \bl{$Char\,c$}\\
-  \bl{$inj\,(r_1 + r_2)\,c\,Left(v)$} & \bl{$\dn$}  & \bl{$Left(inj\,r_1\,c\,v)$}\\
-  \bl{$inj\,(r_1 + r_2)\,c\,Right(v)$} & \bl{$\dn$}  & \bl{$Right(inj\,r_2\,c\,v)$}\\
-  \bl{$inj\,(r_1 \cdot r_2)\,c\,Seq(v_1,v_2)$} & \bl{$\dn$}  & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\
-  \bl{$inj\,(r_1 \cdot r_2)\,c\,Left(Seq(v_1,v_2))$} & \bl{$\dn$}  & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\
-  \bl{$inj\,(r_1 \cdot r_2)\,c\,Right(v)$} & \bl{$\dn$}  & \bl{$Seq(mkeps(r_1),inj\,r_2\,c\,v)$}\\
-  \bl{$inj\,(r^*)\,c\,Seq(v,vs)$} & \bl{$\dn$}  & \bl{$inj\,r\,c\,v\,::\,vs$}\\
-\end{tabular}
-\end{center}\bigskip
-
-\footnotesize
-\bl{$inj$}: 1st arg $\mapsto$ a rexp; 2nd arg $\mapsto$ a character; 3rd arg $\mapsto$ a value 
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{frame}[c]
-\frametitle{Flatten}
-
-Obtaining the string underlying a value:
-
-\begin{center}
-\begin{tabular}{lcl}
-  \bl{$|Empty|$}     & \bl{$\dn$} & \bl{$[]$}\\
-  \bl{$|Char(c)|$}   & \bl{$\dn$} & \bl{$[c]$}\\
-  \bl{$|Left(v)|$}   & \bl{$\dn$} & \bl{$|v|$}\\
-  \bl{$|Right(v)|$}  & \bl{$\dn$} & \bl{$|v|$}\\
-  \bl{$|Seq(v_1,v_2)|$}& \bl{$\dn$} & \bl{$|v_1| \,@\, |v_2|$}\\
-  \bl{$|[v_1,\ldots ,v_n]|$} & \bl{$\dn$} & \bl{$|v_1| \,@\ldots @\, |v_n|$}\\
-\end{tabular}
-\end{center}
-
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
-
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}[c]
 
@@ -307,127 +188,17 @@
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}[c]
-\frametitle{Rectification}
-
-\begin{center}
-\begin{tabular}{l}
-\bl{$simp(r)$}:\\
-\quad case \bl{$r = r_1 + r_2$}\\
-\qquad let \bl{$(r_{1s}, f_{1s}) = simp(r_1)$}\\
-\qquad \phantom{let} \bl{$(r_{2s}, f_{2s}) = simp(r_2)$}\smallskip\\
-\qquad case \bl{$r_{1s} = \varnothing$}: 
-       return \bl{$(r_{2s}, \lambda v. \,Right(f_{2s}(v)))$}\\
-\qquad case \bl{$r_{2s} = \varnothing$}: 
-       return \bl{$(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$}\\
-\qquad case \bl{$r_{1s} = r_{2s}$}:
-       return \bl{$(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$}\\
-\qquad otherwise: 
-       return \bl{$(r_{1s} + r_{2s}, f_{alt}(f_{1s}, f_{2s}))$}\\
-\end{tabular}
-\end{center}
-
-\small
-\begin{center}
-\begin{tabular}{l@{\hspace{1mm}}l}
-\bl{$f_{alt}(f_1, f_2) \dn$}\\
-\qquad \bl{$\lambda v.\,$} case \bl{$v = Left(v')$}: 
-      & return \bl{$Left(f_1(v'))$}\\
-\qquad \phantom{$\lambda v.\,$} case \bl{$v = Right(v')$}: 
-      & return \bl{$Right(f_2(v'))$}\\      
-\end{tabular}
-\end{center}
-
-
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{frame}[c]
-\frametitle{Rectification}
-
-\begin{center}
-\begin{tabular}{@{\hspace{-3mm}}l}
-\bl{$simp(r)$}:\ldots\\
-\quad case \bl{$r = r_1 \cdot r_2$}\\
-\qquad let \bl{$(r_{1s}, f_{1s}) = simp(r_1)$}\\
-\qquad \phantom{let} \bl{$(r_{2s}, f_{2s}) = simp(r_2)$}\smallskip\\
-\qquad case \bl{$r_{1s} = \varnothing$}: 
-       return \bl{$(\varnothing, f_{error})$}\\
-\qquad case \bl{$r_{2s} = \varnothing$}: 
-       return \bl{$(\varnothing, f_{error})$}\\
-\qquad case \bl{$r_{1s} = \epsilon$}: 
-return \bl{$(r_{2s}, \lambda v. \,Seq(f_{1s}(Empty), f_{2s}(v)))$}\\
-\qquad case \bl{$r_{2s} = \epsilon$}: 
-return \bl{$(r_{1s}, \lambda v. \,Seq(f_{1s}(v), f_{2s}(Empty)))$}\\
-\qquad otherwise: 
-       return \bl{$(r_{1s} \cdot r_{2s}, f_{seq}(f_{1s}, f_{2s}))$}\\
-\end{tabular}
-\end{center}
-
-\small
-\begin{center}
-\begin{tabular}{l@{\hspace{1mm}}l}
-\bl{$f_{seq}(f_1, f_2) \dn$}\\
-\qquad \bl{$\lambda v.\,$ case $v = Seq(v_1, v_2)$}: 
-      & return \bl{$Seq(f_1(v_1), f_2(v_2))$}\\
-\end{tabular}
-\end{center}
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{frame}[c]
-\frametitle{Lexing with Simplification}
-
-\begin{center}
-\begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l}
-  \bl{$lex\,r\,[]$} & \bl{$\dn$}  & \bl{if $nullable(r)$ then $mkeps(r)$ else $error$}\\
-  \bl{$lex\,r\,c::s$} & \bl{$\dn$}  & \bl{let $(r', frect) = simp(der(c, r))$}\smallskip\\
-                      & & \bl{$inj\,r\,c\,(frect(lex(r', s)))$}\\
-\end{tabular}
-\end{center}\bigskip
-
-\begin{center}\small
-\begin{tikzpicture}[node distance=1.1cm,every node/.style={minimum size=7mm}]
-\node (r1)  {\bl{$r_1$}};
-\node (r2) [right=of r1] {\bl{$r_2$}};
-\draw[->,line width=1mm]  (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};
-\node (r3) [right=of r2] {\bl{$r_3$}};
-\draw[->,line width=1mm]  (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};
-\node (r4) [right=of r3] {\bl{$r_4$}};
-\draw[->,line width=1mm]  (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};
-\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}};
-\node (v4) [below=of r4] {\bl{$v_4$}};
-\draw[->,line width=1mm]  (r4) -- (v4);
-\node (v3) [left=of v4] {\bl{$v_3$}};
-\draw[->,line width=1mm]  (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};
-\node (v2) [left=of v3] {\bl{$v_2$}};
-\draw[->,line width=1mm]  (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};
-\node (v1) [left=of v2] {\bl{$v_1$}};
-\draw[->,line width=1mm]  (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};
-\draw[->,line width=0.5mm]  (r3) -- (v3);
-\draw[->,line width=0.5mm]  (r2) -- (v2);
-\draw[->,line width=0.5mm]  (r1) -- (v1);
-\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
-\end{tikzpicture}
-\end{center}
-
-
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{frame}[c]
 \frametitle{Records}
 
 \begin{itemize}
-\item new regex: \bl{$(x:r)$}\hspace{7mm}new value: \bl{$Rec(x,v)$}\medskip\pause
+\item new regex: \bl{$(x:r)$}\hspace{7mm}new value: 
+\bl{$Rec(x,v)$}\medskip
 
 \item \bl{$nullable(x:r) \dn nullable(r)$}
 \item \bl{$der\,c\,(x:r) \dn (x:der\,c\,r)$}
 \item \bl{$mkeps(x:r) \dn Rec(x, mkeps(r))$}
 \item \bl{$inj\,(x:r)\,c\,v \dn Rec(x, inj\,r\,c\,v)$}
-\end{itemize}\bigskip\bigskip\pause
+\end{itemize}\bigskip\bigskip
 
 \small
 for extracting subpatterns \bl{$(z: ((x:ab) + (y:ba))$}
@@ -437,6 +208,29 @@
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}[c]
+\frametitle{Environments}
+
+Obtaining the ``recorded'' parts of a value: 
+
+\begin{center}
+\begin{tabular}{lcl}
+  \bl{$env(Empty)$}     & \bl{$\dn$} & \bl{$[]$}\\
+  \bl{$env(Char(c))$}   & \bl{$\dn$} & \bl{$[]$}\\
+  \bl{$env(Left(v))$}   & \bl{$\dn$} & \bl{$env(v)$}\\
+  \bl{$env(Right(v))$}  & \bl{$\dn$} & \bl{$env(v)$}\\
+  \bl{$env(Seq(v_1,v_2))$}& \bl{$\dn$} & \bl{$env(v_1) \,@\, env(v_2)$}\\
+  \bl{$env([v_1,\ldots ,v_n])$} & \bl{$\dn$} & 
+     \bl{$env(v_1) \,@\ldots @\, env(v_n)$}\\
+  \bl{$env(Rec(x:v))$} & \bl{$\dn$} & \bl{$(x:|v|) :: env(v)$}\\
+\end{tabular}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
 \frametitle{While Tokens}
 
 \begin{center}
@@ -455,13 +249,14 @@
 \end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
 
+
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
 \begin{frame}[t]
 
 \consolas
 \begin{center}
-"if true then then 42 else +"
+\code{"if true then then 42 else +"}
 \end{center}
 
 \only<1>{
@@ -492,39 +287,12 @@
 OP(+)
 \end{tabular}}
 
-\end{frame}}
+\end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
 \begin{frame}[c]
-
-
-There is one small problem with the tokenizer. How should we 
-tokenize:
-
-\begin{center}
-{\consolas "x - 3"}
-\end{center}
-
-\consolas
-\begin{tabular}{@{}l}
-OP:\\
-\hspace{5mm}\texttt{"+"}, \texttt{"-"}\\
-NUM:\\
-\hspace{5mm}(NONZERODIGIT $\cdot$ DIGIT$^*$) + {''0''}\\
-NUMBER:\\
-\hspace{5mm}NUM +  (\texttt{"-"} $\cdot$ NUM)\\
-\end{tabular}
-
-
-\end{frame}}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
-\begin{frame}[c]
-\frametitle{\begin{tabular}{c}Two Rules\end{tabular}}
+\frametitle{Two Rules}
 
 \begin{itemize}
 \item Longest match rule (``maximal munch rule''): The 
@@ -544,30 +312,9 @@
 %\item problem with infix operations, for example i-12
 
 
-\end{frame}}
+\end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
 
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{frame}[c]
-\frametitle{Environment}
-
-Obtaining the ``recorded'' parts of a regular expression: 
-
-\begin{center}
-\begin{tabular}{lcl}
-  \bl{$env(Empty)$}     & \bl{$\dn$} & \bl{$[]$}\\
-  \bl{$env(Char(c))$}   & \bl{$\dn$} & \bl{$[]$}\\
-  \bl{$env(Left(v))$}   & \bl{$\dn$} & \bl{$env(v)$}\\
-  \bl{$env(Right(v))$}  & \bl{$\dn$} & \bl{$env(v)$}\\
-  \bl{$env(Seq(v_1,v_2))$}& \bl{$\dn$} & \bl{$env(v_1) \,@\, env(v_2)$}\\
-  \bl{$env([v_1,\ldots ,v_n])$} & \bl{$\dn$} & 
-     \bl{$env(v_1) \,@\ldots @\, env(v_n)$}\\
-  \bl{$env(Rec(x:v))$} & \bl{$\dn$} & \bl{$(x:|v|) :: env(v)$}\\
-\end{tabular}
-\end{center}
-
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}[c]
@@ -623,6 +370,681 @@
 \end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Regular Languages}
+
+While regular expressions are very useful for lexing, there is
+no regular expression that can recognise the language
+\bl{$a^nb^n$}.\bigskip
+
+\begin{center}
+\bl{$(((()()))())$} \;\;vs.\;\; \bl{$(((()()))()))$}
+\end{center}\bigskip\bigskip
+
+\small
+\noindent So we cannot find out with regular expressions
+whether parentheses are matched or unmatched. Also regular
+expressions are not recursive, e.g.~\bl{$(1 + 2) + 3$}.
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Hierarchy of Languages}
+
+\begin{center}
+\begin{tikzpicture}
+[rect/.style={draw=black!50, 
+              top color=white,
+              bottom color=black!20, 
+              rectangle, 
+              very thick, 
+              rounded corners}, scale=1.2]
+
+\draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages};
+\draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages};
+\draw (0,-0.85) node [rect, text depth=17mm] {context sensitive languages};
+\draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages};
+\draw (0,-1.4) node [rect] {regular languages};
+\end{tikzpicture}
+
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Grammars}
+
+A (context-free) grammar \bl{$G$} consists of
+
+\begin{itemize}
+\item a finite set of nonterminal symbols (upper case)
+\item a finite terminal symbols or tokens (lower case)
+\item a start symbol (which must be a nonterminal)
+\item a set of rules
+\begin{center}
+\bl{$A \rightarrow \textit{rhs}$}
+\end{center}
+
+where \bl{\textit{rhs}} are sequences involving terminals and nonterminals,
+including the empty sequence \bl{$\epsilon$}.\medskip\pause
+
+We also allow rules
+\begin{center}
+\bl{$A \rightarrow \textit{rhs}_1 | \textit{rhs}_2 | \ldots$}
+\end{center}
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Palindromes}
+
+A grammar for palindromes over the alphabet~\bl{$\{a,b\}$}:
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$S$ & $\rightarrow$ &  $\epsilon$ \\
+$S$ & $\rightarrow$ &  $a\cdot S\cdot a$ \\
+$S$ & $\rightarrow$ &  $b\cdot S\cdot b$ \\
+\end{tabular}}
+\end{center}\pause
+
+or
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$S$ & $\rightarrow$ &  $\epsilon \;|\; a\cdot S\cdot a \;|\;b\cdot S\cdot b$ \\
+\end{tabular}}
+\end{center}\pause\bigskip
+
+\small
+Can you find the grammar rules for matched parentheses?
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Arithmetic Expressions}
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$E$ & $\rightarrow$ &  $num\_token$ \\
+$E$ & $\rightarrow$ &  $E \cdot + \cdot E$ \\
+$E$ & $\rightarrow$ &  $E \cdot - \cdot E$ \\
+$E$ & $\rightarrow$ &  $E \cdot * \cdot E$ \\
+$E$ & $\rightarrow$ &  $( \cdot E \cdot )$ 
+\end{tabular}}
+\end{center}\pause
+
+\bl{\texttt{1 + 2 * 3 + 4}}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{A CFG Derivation}
+
+\begin{enumerate}
+\item Begin with a string containing only the start symbol, say \bl{$S$}\bigskip
+\item Replace any nonterminal \bl{$X$} in the string by the
+right-hand side of some production \bl{$X \rightarrow \textit{rhs}$}\bigskip
+\item Repeat 2 until there are no nonterminals
+\end{enumerate}
+
+\begin{center}
+\bl{$S \rightarrow \ldots \rightarrow \ldots  \rightarrow \ldots  \rightarrow \ldots $}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+  
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Example Derivation}
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$S$ & $\rightarrow$ &  $\epsilon \;|\; a\cdot S\cdot a \;|\;b\cdot S\cdot b$ \\
+\end{tabular}}
+\end{center}\bigskip
+
+\begin{center}
+\begin{tabular}{lcl}
+\bl{$S$} & \bl{$\rightarrow$} & \bl{$aSa$}\\
+              & \bl{$\rightarrow$} & \bl{$abSba$}\\
+              & \bl{$\rightarrow$} & \bl{$abaSaba$}\\
+              & \bl{$\rightarrow$} & \bl{$abaaba$}\\
+
+ 
+\end{tabular}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Example Derivation}
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$E$ & $\rightarrow$ &  $num\_token$ \\
+$E$ & $\rightarrow$ &  $E \cdot + \cdot E$ \\
+$E$ & $\rightarrow$ &  $E \cdot - \cdot E$ \\
+$E$ & $\rightarrow$ &  $E \cdot * \cdot E$ \\
+$E$ & $\rightarrow$ &  $( \cdot E \cdot )$ 
+\end{tabular}}
+\end{center}\bigskip
+
+
+\begin{center}
+\begin{tabular}{@{}c@{}c@{}}
+\begin{tabular}{l@{\hspace{1mm}}l@{\hspace{1mm}}l}
+\bl{$E$} & \bl{$\rightarrow$} & \bl{$E*E$}\\
+              & \bl{$\rightarrow$} & \bl{$E+E*E$}\\
+              & \bl{$\rightarrow$} & \bl{$E+E*E+E$}\\
+              & \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\
+\end{tabular} &\pause
+\begin{tabular}{l@{\hspace{1mm}}l@{\hspace{1mm}}l}
+\bl{$E$} & \bl{$\rightarrow$} & \bl{$E+E$}\\
+              & \bl{$\rightarrow$} & \bl{$E+E+E$}\\
+              & \bl{$\rightarrow$} & \bl{$E+E*E+E$}\\
+              & \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\
+\end{tabular}
+\end{tabular}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Context Sensitive Grms}
+
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$S$ & $\Rightarrow$ &  $bSAA\;|\; \epsilon$\\
+$A$ & $\Rightarrow$ & $a$\\
+$bA$ & $\Rightarrow$ & $Ab$\\
+\end{tabular}}
+\end{center}\pause
+
+\begin{center}
+\bl{$S \Rightarrow\ldots\Rightarrow^? "ababaa"$}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Language of a CFG}
+
+Let \bl{$G$} be a context-free grammar with start symbol \bl{$S$}. 
+Then the language \bl{$L(G)$} is:
+
+\begin{center}
+\bl{$\{c_1\ldots c_n \;|\; \forall i.\; c_i \in T \wedge S \rightarrow^* c_1\ldots c_n \}$}
+\end{center}\pause
+
+\begin{itemize}
+\item Terminals, because there are no rules for replacing them.
+\item Once generated, terminals are ``permanent''.
+\item Terminals ought to be tokens of the language\\
+(but can also be strings).
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Parse Trees}
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$E$ & $\rightarrow$ &  $F \;|\; F \cdot * \cdot F$\\
+$F$ & $\rightarrow$ & $T \;|\; T \cdot + \cdot T \;|\; T \cdot - \cdot T$\\
+$T$ & $\rightarrow$ & $num\_token \;|\; ( \cdot E \cdot )$\\
+\end{tabular}}
+\end{center}
+
+\begin{center}
+\begin{tikzpicture}[level distance=8mm, blue]
+  \node {$E$}
+    child {node {$F$} 
+     child {node {$T$} 
+                 child {node {(\,$E$\,)}
+                            child {node{$F$ *{} $F$}
+                                  child {node {$T$} child {node {2}}}
+                                  child {node {$T$} child {node {3}}} 
+                               }
+                          }
+              }
+     child {node {+}}
+     child {node {$T$}
+       child {node {(\,$E$\,)} 
+       child {node {$F$}
+       child {node {$T$ +{} $T$}
+                    child {node {3}}
+                    child {node {4}} 
+                 }
+                 }}
+    }};
+\end{tikzpicture}
+\end{center}
+
+\begin{textblock}{5}(1, 6.5)
+\bl{\texttt{(2*3)+(3+4)}}
+\end{textblock}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Arithmetic Expressions}
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$E$ & $\rightarrow$ &  $num\_token$ \\
+$E$ & $\rightarrow$ &  $E \cdot + \cdot E$ \\
+$E$ & $\rightarrow$ &  $E \cdot - \cdot E$ \\
+$E$ & $\rightarrow$ &  $E \cdot * \cdot E$ \\
+$E$ & $\rightarrow$ &  $( \cdot E \cdot )$ 
+\end{tabular}}
+\end{center}\pause\bigskip
+
+A CFG is \alert{\bf left-recursive} if it has a nonterminal \bl{$E$} such
+that \bl{$E \rightarrow^+ E\cdot \ldots$}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Ambiguous Grammars}
+
+A grammar is \alert{\bf ambiguous} if there is a string that
+has at least two different parse trees.
+
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$E$ & $\rightarrow$ &  $num\_token$ \\
+$E$ & $\rightarrow$ &  $E \cdot + \cdot E$ \\
+$E$ & $\rightarrow$ &  $E \cdot - \cdot E$ \\
+$E$ & $\rightarrow$ &  $E \cdot * \cdot E$ \\
+$E$ & $\rightarrow$ &  $( \cdot E \cdot )$ 
+\end{tabular}}
+\end{center}
+
+\bl{\texttt{1 + 2 * 3 + 4}}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Dangling Else}
+
+Another ambiguous grammar:\bigskip
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$E$ & $\rightarrow$ &  if $E$ then $E$\\
+ & $|$ &  if $E$ then $E$ else $E$ \\
+ & $|$ &  \ldots
+\end{tabular}}
+\end{center}\bigskip
+
+\bl{\texttt{if a then if x then y else c}}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Parser Combinators}
+
+One of the simplest ways to implement a parser, see
+{\small\url{https://vimeo.com/142341803}}\bigskip
+
+Parser combinators: \bigskip
+
+\begin{minipage}{1.1\textwidth}
+\begin{center}
+\mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} 
+$\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$
+\end{center}
+\end{minipage}\bigskip
+
+\begin{itemize}
+\item atomic parsers
+\item sequencing
+\item alternative
+\item semantic action
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+Atomic parsers, for example, number tokens
+
+\begin{center}
+\bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$} 
+\end{center}\bigskip
+
+\begin{itemize}
+\item you consume one or more token from the\\ 
+  input (stream)
+\item also works for characters and strings
+\end{itemize}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+Alternative parser (code \bl{$p\;||\;q$})\bigskip
+
+\begin{itemize}
+\item apply \bl{$p$} and also \bl{$q$}; then combine 
+  the outputs
+\end{itemize}
+
+\begin{center}
+\large \bl{$p(\text{input}) \cup q(\text{input})$}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+Sequence parser (code \bl{$p\sim q$})\bigskip
+
+\begin{itemize}
+\item apply first \bl{$p$} producing a set of pairs
+\item then apply \bl{$q$} to the unparsed parts
+\item then combine the results:\medskip 
+\begin{center}
+((output$_1$, output$_2$), unparsed part)
+\end{center}
+\end{itemize}
+
+\begin{center}
+\begin{tabular}{l}
+\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] 
+\large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm]
+\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$}
+\end{tabular}
+\end{center}
+
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+Function parser (code \bl{$p \Rightarrow f\;$})\bigskip
+
+\begin{itemize}
+\item apply \bl{$p$} producing a set of pairs
+\item then apply the function \bl{$f$} to each first component
+\end{itemize}
+
+\begin{center}
+\begin{tabular}{l}
+\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$}
+\end{tabular}
+\end{center}\bigskip\bigskip\pause
+
+\bl{$f$} is the semantic action (``what to do with the parsed input'')
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[c]
+\frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}}
+
+Addition
+
+\begin{center}
+\bl{$T \sim + \sim E \Rightarrow \underbrace{f((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$}
+\end{center}\pause
+
+Multiplication
+
+\begin{center}
+\bl{$F \sim * \sim T \Rightarrow f((x,y), z) \Rightarrow x * z$}
+\end{center}\pause
+
+Parenthesis
+
+\begin{center}
+\bl{$\text{(} \sim E \sim \text{)} \Rightarrow f((x,y), z) \Rightarrow y$}
+\end{center}
+
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Types of Parsers}
+
+\begin{itemize}
+\item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$},
+then \bl{$p \sim q$} returns results of type
+
+\begin{center}
+\bl{$T \times S$}
+\end{center}\pause
+
+\item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then  \bl{$q$} \alert{must} also have results of type \bl{$T$},
+and \bl{$p \;||\; q$} returns results of type
+
+\begin{center}
+\bl{$T$}
+\end{center}\pause
+
+\item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from
+\bl{$T$} to \bl{$S$}, then
+\bl{$p \Rightarrow f$} returns results of type
+
+\begin{center}
+\bl{$S$}
+\end{center}
+
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Input Types of Parsers}
+
+\begin{itemize}
+\item input: \alert{token list}
+\item output: set of (output\_type, \alert{token list})
+\end{itemize}\bigskip\pause
+
+actually it can be any input type as long as it is a kind of
+sequence (for example a string)
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Scannerless Parsers}
+
+\begin{itemize}
+\item input: \alert{string}
+\item output: set of (output\_type, \alert{string})
+\end{itemize}\bigskip
+
+but lexers are better when whitespaces or comments need to be
+filtered out; then input is a sequence of tokens
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Successful Parses}
+
+\begin{itemize}
+\item input: string
+\item output: \alert{set of} (output\_type, string)
+\end{itemize}\bigskip
+
+a parse is successful whenever the input has been fully
+``consumed'' (that is the second component is empty)
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Abstract Parser Class}
+
+\small
+\lstinputlisting[language=Scala,xleftmargin=1mm]
+ {../progs/app7.scala}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+\small
+\fontsize{10}{12}\selectfont
+\lstinputlisting[language=Scala,xleftmargin=1mm]
+  {../progs/app8.scala}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Two Grammars}
+
+Which languages are recognised by the following two grammars?
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$S$ & $\rightarrow$ &  $1 \cdot S \cdot S$\\
+        & $|$ & $\epsilon$
+\end{tabular}}
+\end{center}\bigskip
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$U$ & $\rightarrow$ &  $1 \cdot U$\\
+        & $|$ & $\epsilon$
+\end{tabular}}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t]
+\frametitle{Ambiguous Grammars}
+
+\begin{center}
+\begin{tikzpicture}
+\begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs},
+    enlargelimits=false,
+    xtick={0,100,...,1000},
+    xmax=1050,
+    ymax=33,
+    ytick={0,5,...,30},
+    scaled ticks=false,
+    axis lines=left,
+    width=11cm,
+    height=7cm, 
+    legend entries={unambiguous,ambiguous},  
+    legend pos=north east,
+    legend cell align=left,
+    x tick label style={font=\small,/pgf/number format/1000 sep={}}]
+\addplot[blue,mark=*, mark options={fill=white}] 
+  table {s-grammar1.data};
+\only<2>{
+  \addplot[red,mark=triangle*, mark options={fill=white}] 
+  table {s-grammar2.data};}  
+\end{axis}
+\end{tikzpicture}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+\begin{frame}
+\frametitle{While-Language}
+\mbox{}\\[-23mm]\mbox{}
+
+\bl{\begin{plstx}[rhs style=,one per line]
: \meta{Stmt} ::= skip
+         | \meta{Id} := \meta{AExp}
+         | if \meta{BExp} then \meta{Block} else \meta{Block}
+         | while \meta{BExp} do \meta{Block}\\
+: \meta{Stmts} ::= \meta{Stmt} ; \meta{Stmts}
+          | \meta{Stmt}\\
+: \meta{Block} ::= \{ \meta{Stmts} \}
+          | \meta{Stmt}\\
+: \meta{AExp} ::= \ldots\\
+: \meta{BExp} ::= \ldots\\
\end{plstx}}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{An Interpreter}
+
+\begin{center}
+\bl{\begin{tabular}{l}
+$\{$\\
+\;\;$x := 5 \text{;}$\\
+\;\;$y := x * 3\text{;}$\\
+\;\;$y := x * 4\text{;}$\\
+\;\;$x := u * 3$\\
+$\}$
+\end{tabular}}
+\end{center}
+
+\begin{itemize}
+\item the interpreter has to record the value of \bl{$x$} before assigning a value to \bl{$y$}\pause
+\item \bl{\texttt{eval(stmt, env)}}
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+
+
 \end{document}
 
 %%% Local Variables: