# HG changeset patch # User Christian Urban # Date 1445294965 -3600 # Node ID b3129cff41e9b21644c91f1a6d51f9b53b94a51d # Parent 603e171a7b4828167c7fc1bd20ba225e335e9cbf updated diff -r 603e171a7b48 -r b3129cff41e9 coursework/cw01.pdf Binary file coursework/cw01.pdf has changed diff -r 603e171a7b48 -r b3129cff41e9 coursework/cw01.tex --- a/coursework/cw01.tex Mon Oct 19 15:03:44 2015 +0100 +++ b/coursework/cw01.tex Mon Oct 19 23:49:25 2015 +0100 @@ -6,14 +6,17 @@ \section*{Coursework 1 (Strand 1)} -This coursework is worth 5\% and is due on 20 October at 16:00. You -are asked to implement a regular expression matcher and submit a -document containing the answers for the questions below. You can do -the implementation in any programming language you like, but you need -to submit the source code with which you answered the -questions. However, the coursework will \emph{only} be judged -according to the answers. You can submit your answers in a txt-file or -pdf. +This coursework is worth 5\% and is due on 20 October at +16:00. You are asked to implement a regular expression matcher +and submit a document containing the answers for the questions +below. You can do the implementation in any programming +language you like, but you need to submit the source code with +which you answered the questions, otherwise a mark of 0\% will +be awarded. However, the coursework will \emph{only} be judged +according to the answers. You can submit your answers in a +txt-file or pdf. + + \subsubsection*{Disclaimer} diff -r 603e171a7b48 -r b3129cff41e9 coursework/cw02.pdf Binary file coursework/cw02.pdf has changed diff -r 603e171a7b48 -r b3129cff41e9 coursework/cw02.tex --- a/coursework/cw02.tex Mon Oct 19 15:03:44 2015 +0100 +++ b/coursework/cw02.tex Mon Oct 19 23:49:25 2015 +0100 @@ -6,27 +6,28 @@ \section*{Coursework 2 (Strand 1)} -\noindent -This coursework is worth 5\% and is due on 6 November at 16:00. You -are asked to implement the Sulzmann tokeniser for the WHILE language. -You need to submit a document containing the answers for the questions -below. You can do the implementation in any programming language you -like, but you need to submit the source code with which you answered -the questions. However, the coursework will \emph{only} be judged -according to the answers. You can submit your answers in a txt-file or -as pdf. +\noindent This coursework is worth 5\% and is due on 6 +November at 16:00. You are asked to implement the Sulzmann \& +Lu tokeniser for the WHILE language. You can do the +implementation in any programming language you like, but you +need to submit the source code with which you answered the +questions, otherwise a mark of 0\% will be awarded. However, +the coursework will \emph{only} be judged according to the +answers. You can submit your answers in a txt-file or as pdf. \subsection*{Disclaimer} -It should be understood that the work you submit represents your own -effort. You have not copied from anyone else. An exception is the -Scala code I showed during the lectures, which you can use. -You can also use your own code from the CW~1. +It should be understood that the work you submit represents +your own effort. You have not copied from anyone else. An +exception is the Scala code I showed during the lectures, +which you can use. You can also use your own code from the +CW~1. \subsection*{Question 1 (marked with 1\%)} -To implement a tokeniser for the WHILE language, you first need to design -the appropriate regular expressions for the following eight syntactic entities: +To implement a tokeniser for the WHILE language, you first +need to design the appropriate regular expressions for the +following eight syntactic entities: \begin{enumerate} \item keywords are @@ -95,17 +96,17 @@ \subsection*{Question 2 (marked with 3\%)} -Implement the Sulzmann tokeniser from the lectures. For this you need -to implement the functions $nullable$ and $der$ (you can use your code -from CW 1), as well as $mkeps$ and $inj$. These functions need to be -appropriately extended for the extended regular expressions from -Q1. Also add the record regular expression from the lectures and -implement a function, say \pcode{env}, that returns all assignments -from a value (such that you can extract easily the tokens from a -value). +Implement the Sulzmann \& Lu tokeniser from the lectures. For +this you need to implement the functions $nullable$ and $der$ +(you can use your code from CW 1), as well as $mkeps$ and +$inj$. These functions need to be appropriately extended for +the extended regular expressions from Q1. Also add the record +regular expression from the lectures and implement a function, +say \pcode{env}, that returns all assignments from a value +(such that you can extract easily the tokens from a value). -The functions $mkeps$ and $inj$ return values. Calculate -the value for your regular expressions from Q1 and the string +The functions $mkeps$ and $inj$ return values. Calculate the +value for your regular expressions from Q1 and the string \begin{center} \code{"read n;"} diff -r 603e171a7b48 -r b3129cff41e9 coursework/cw03.pdf Binary file coursework/cw03.pdf has changed diff -r 603e171a7b48 -r b3129cff41e9 coursework/cw03.tex --- a/coursework/cw03.tex Mon Oct 19 15:03:44 2015 +0100 +++ b/coursework/cw03.tex Mon Oct 19 23:49:25 2015 +0100 @@ -6,12 +6,21 @@ \section*{Coursework 3 (Strand 1)} -\noindent -This coursework is worth 5\% and is due on 26 November at 16:00. You -are asked to implement a parser for the WHILE language and also -an interpreter. You should use the lexer from the previous -coursework for the parser. +\noindent This coursework is worth 5\% and is due on 27 +November at 16:00. You are asked to implement a parser for the +WHILE language and also an interpreter. You can do the +implementation in any programming language you like, but you +need to submit the source code with which you answered the +questions, otherwise a mark of 0\% will be awarded. You should +use the lexer from the previous coursework for the parser. +\subsection*{Disclaimer} + +It should be understood that the work you submit represents +your own effort. You have not copied from anyone else. An +exception is the Scala code I showed during the lectures, +which you can use. You can also use your own code from the +CW~1 and CW~2. \subsection*{Question 1 (marked with 1\%)} diff -r 603e171a7b48 -r b3129cff41e9 coursework/cw04.pdf Binary file coursework/cw04.pdf has changed diff -r 603e171a7b48 -r b3129cff41e9 coursework/cw04.tex --- a/coursework/cw04.tex Mon Oct 19 15:03:44 2015 +0100 +++ b/coursework/cw04.tex Mon Oct 19 23:49:25 2015 +0100 @@ -12,7 +12,24 @@ \noindent This coursework is worth 10\% and is due on 11 December at 16:00. You are asked to implement a compiler for the WHILE language that targets the assembler language -provided by Jasmin. This assembler is available from +provided by Jasmin or Krakatau. You can do the implementation +in any programming language you like, but you need to submit +the source code with which you answered the questions, +otherwise a mark of 0\% will be awarded. You should use the +lexer and parser from the previous courseworks. + +\subsection*{Disclaimer} + +It should be understood that the work you submit represents +your own effort. You have not copied from anyone else. An +exception is the Scala code I showed during the lectures, +which you can use. You can also use your own code from the +CW~1, CW~2 and CW~3. + + +\subsection*{Assemblers} + +The Jasmin assembler is available from \begin{center} \url{http://jasmin.sourceforge.net} diff -r 603e171a7b48 -r b3129cff41e9 handouts/ho05-bak.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/handouts/ho05-bak.tex Mon Oct 19 23:49:25 2015 +0100 @@ -0,0 +1,290 @@ +\documentclass{article} +\usepackage{../style} +\usepackage{../langs} +\usepackage{../graphics} + +\begin{document} + +\section*{Handout 5 (Lexing)} + +Whenever you want to design a new programming language or +implement a compiler for an existing language, the first task +is to fix the basic ``words'' of the language. For example +what are the keywords, or reserved words, of the language, +what are permitted identifiers, numbers, expressions and so +on. One convenient way to do this is, of course, by using +regular expressions. + +In this course we want to take a closer look at the WHILE +programming language. This is a simple imperative programming +language consisting of arithmetic expressions, assignments, +if-statements and loops. For example the Fibonacci program can +be written in this language as follows: + +\begin{center} +\mbox{\lstinputlisting[language=while]{../progs/fib.while}} +\end{center} + +\noindent +The keywords in this language will be + +\begin{center} +\texttt{while}, \texttt{if}, \texttt{then}, \texttt{else}, \texttt{write}, \texttt{read} +\end{center} + +\noindent +In addition we will have some common operators, such as \texttt{<}, \texttt{>}, \texttt{:=} and so on, as well as numbers +and strings (which we however ignore for the moment). We also need to specify what the ``whitespace'' +is in our programming language and what comments should look like. +As a first try, we might specify the regular expressions for our language roughly as follows + +\begin{center} +\begin{tabular}{lcl} +$\textit{LETTER}$ & $:=$ & $\texttt{a} + \texttt{A} + \texttt{b} + \texttt{B} + \ldots$\\ +$\textit{DIGIT}$ & $:=$ & $\texttt{0} + \texttt{1} + \texttt{2} + \ldots$\\ +$\textit{KEYWORD}$ & $:=$ & $\texttt{while} + \texttt{if} + \texttt{then} + \texttt{else} + \dots$\\ +$\textit{IDENT}$ & $:=$ & $\textit{LETTER} \cdot (\textit{LETTER} + \textit{DIGIT} + {\_})^*$\\ +$\textit{OP}$ & $:=$ & $\texttt{:=} + \texttt{<} + \ldots$\\ +$\textit{NUM}$ & $:=$ & $\textit{DIGIT}^+$\\ +$\textit{WHITESPACE}$ & $:=$ & $("\hspace{2mm}" + \backslash\texttt{n})^+$ +\end{tabular} +\end{center} + +Having the regular expressions in place, the problem we have to solve is: +given a string of our programming language, which regular expression +matches which part of the string. By solving this problem, we can split up a string +of our language into components. +For example given the input string + +\begin{center} +\texttt{\Grid{if\VS true\VS then\VS x+2\VS else\VS x+3}} +\end{center} + +\noindent +we expect it is split up as follows + +\begin{center} +\tt +\Grid{if}\, +\Grid{\VS}\, +\Grid{true}\, +\Grid{\VS}\, +\Grid{then}\, +\Grid{\VS}\, +\Grid{x}\, +\Grid{+}\, +\Grid{2}\, +\Grid{\VS}\, +\Grid{else}\, +\Grid{\VS}\, +\Grid{x}\, +\Grid{+}\, +\Grid{3} +\end{center} + +\noindent +This process of splitting up an input string into components is often called \emph{lexing} or \emph{scanning}. +It is usually the first phase of a compiler. Note that the separation into words cannot, in general, +be done by just looking at whitespaces: while \texttt{if} and \texttt{true} are separated by a whitespace +in the example above, this is not always the case. As can be seen the three components in \texttt{x+2} are +not separated by any whitespace. Another reason for recognising whitespaces explicitly is +that in some languages, for example Python, whitespaces matters, that is carry meaning. However in +our small language we will eventually just filter out all whitespaces and also all comments. + +Lexing not just separates a string into its components, but also classifies the components, meaning it explicitly records that \texttt{if} is a keyword, \VS{} a whitespace, \texttt{true} an identifier and so on. +For the moment, though, we will only focus on the simpler problem of just splitting up +a string into components. + +There are a few subtleties we need to consider first. For example, say the string is + +\begin{center} +\texttt{\Grid{iffoo\VS}}\;\ldots +\end{center} + +\noindent +then there are two possibilities for how it can be split up: either we regard the input as the keyword \texttt{if} followed +by the identifier \texttt{foo} (both regular expressions match) or we regard \texttt{iffoo} as a +single identifier. The choice that is often made in lexers is to look for the longest possible match. +This leaves \texttt{iffoo} as the only match in this case (since it is longer than \texttt{if}). + +Unfortunately, the convention about the longest match does not yet make the process +of lexing completely deterministic. Consider the string + +\begin{center} +\texttt{\Grid{then\VS}}\;\dots +\end{center} + +\noindent +Clearly, this string should be identified as a keyword. The problem is that also the regular expression \textit{IDENT} for identifiers matches this string. To overcome this ambiguity we need to rank our +regular expressions. In our running example we just use the ranking + +\[ +\textit{KEYWORD} < \textit{IDENT} < \textit{OP} < \ldots +\] + +\noindent +So even if both regular expressions match in the example above, +we give preference to the regular expression for keywords. + +Let us see how our algorithm for lexing works in detail. In addition to the functions $nullable$ +and $der$, it will use the function \emph{zeroable} defined as follows: + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} +$zeroable(\varnothing)$ & $\dn$ & $true$\\ +$zeroable(\epsilon)$ & $\dn$ & $f\!alse$\\ +$zeroable (c)$ & $\dn$ & $f\!alse$\\ +$zeroable (r_1 + r_2)$ & $\dn$ & $zeroable(r_1) \wedge zeroable(r_2)$ \\ +$zeroable (r_1 \cdot r_2)$ & $\dn$ & $zeroable(r_1) \vee zeroable(r_2)$ \\ +$zeroable (r^*)$ & $\dn$ & $f\!alse$\\ +\end{tabular} +\end{center} + +\noindent +Recall that the function $nullable(r)$ tests whether a regular expression +can match the empty string. The function $zeroable$, on the other hand, tests whether a regular +expression cannot match anything at all. The mathematical way of stating this +property is + +\begin{center} +$zeroable(r)$ if and only if $L(r) = \varnothing$ +\end{center} + +\noindent +For what follows let us fix a set of regular expressions $rs$ as being + +\begin{center} +\textit{KEYWORD}, \textit{IDENT}, \textit{WHITESPACE} +\end{center} + +\noindent +specifying the ``words'' +of our programming language. The algorithm takes as input the $rs$ and a string, say + +\begin{center} +\texttt{\grid{$c_1$}\grid{$c_2$}\grid{$c_3$}\grid{$c_4$}}\;\ldots +\end{center} + +\noindent +and tries to chop off one word from the beginning of the string. If none of the +regular expression in $rs$ matches, we will just return +the empty string. + +The crucial idea in the algorithm is to build the derivatives of all regular expressions in $rs$ with respect +to the first character $c_1$. Then we take the results and continue with +building the derivatives with respect to $c_2$ until we have either exhausted our +input string or all of the regular expressions are ``zeroable''. Suppose the input string is + +\begin{center} +\texttt{\Grid{if2\VS}}\;\dots +\end{center} + +\noindent +then building the derivatives with respect to \texttt{i} gives + +\begin{center} +\begin{tabular}{l|c} + & $zeroable$\\\hline + $der\;\texttt{i}\;(\textit{KEYWORD})$ & no\\ + $der\;\texttt{i}\;(\textit{IDENT})$ & no\\ + $der\;\texttt{i}\;(\textit{WHITESPACE})$ & yes\\ +\end{tabular} +\end{center} + +\noindent +We can eliminate \textit{WHITESPACE} as a potential candidate, because +no derivative can go from $zeroable = \text{yes}$ to no. That leaves the other +two regular expressions as potential candidate and we have to consider the +next character, \texttt{f}, from the input string + +\begin{center} +\begin{tabular}{l|c} + & $zeroable$\\\hline + $der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{KEYWORD}))$ & no\\ + $der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT}))$ & no\\ +\end{tabular} +\end{center} + +\noindent +Since both are `no', we have to continue with \texttt{2} from the input string + +\begin{center} +\begin{tabular}{l|c} + & $zeroable$\\\hline + $der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{KEYWORD})))$ & yes\\ + $der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT})))$ & no\\ +\end{tabular} +\end{center} + +\noindent +Although we now know that the beginning is definitely an \textit{IDENT}, we do not yet +know how much of the input string should be considered as an \textit{IDENT}. So we +still have to continue and consider the next derivative. + +\begin{center} +\begin{tabular}{l|c} + & $zeroable$\\\hline + $der\;\texttt{\VS}\;(der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT}))))$ & yes\\ +\end{tabular} +\end{center} + +\noindent +Since the answer is now `yes' also in this case, we can stop: once all derivatives are +zeroable, we know the regular expressions cannot match any more letters from +the input. In this case we only have to go back to the derivative that is nullable. In this case it +is + +\begin{center} +$der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT})))$ +\end{center} + +\noindent +which means we recognised an identifier. In case where there is a choice of more than one +regular expressions that are nullable, then we choose the one with the highest precedence. +You can try out such a case with the input string + +\begin{center} +\texttt{\Grid{then\VS}}\;\dots +\end{center} + +\noindent +which can both be recognised as a keyword, but also an identifier. + +While in the example above the last nullable derivative is the one directly before +the derivative turns zeroable, this is not always the case. Imagine, identifiers can be +letters, as permuted by the regular expression \textit{IDENT}, but must end with an +undercore. + +\begin{center} +\begin{tabular}{lcl} +$\textit{NEWIDENT}$ & $:=$ & $\textit{LETTER} \cdot (\textit{LETTER} + \textit{DIGIT} + {\_})^* \cdot \_$\\ +\end{tabular} +\end{center} + +\noindent +If we use \textit{NEWIDENT} with the input string + +\begin{center} +\texttt{\Grid{iffoo\VS}}\;\ldots +\end{center} + +\noindent +then it will only become $zeroable$ after the $\VS$ has been analysed. In this case we have to go back to the +first \texttt{f} because only + +\begin{center} + $der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{KEYWORD}))$ +\end{center} + +\noindent +is nullable. As a result we recognise successfully the keyword \texttt{if} and the remaining +string needs to be consumed by other regular expressions or lead to a lexing error. + + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 603e171a7b48 -r b3129cff41e9 handouts/ho05.pdf Binary file handouts/ho05.pdf has changed diff -r 603e171a7b48 -r b3129cff41e9 handouts/ho05.tex --- a/handouts/ho05.tex Mon Oct 19 15:03:44 2015 +0100 +++ b/handouts/ho05.tex Mon Oct 19 23:49:25 2015 +0100 @@ -1,290 +1,472 @@ \documentclass{article} \usepackage{../style} \usepackage{../langs} -\usepackage{../graphics} + \begin{document} -\section*{Handout 5 (Lexing)} +\section*{Handout 6 (Parser Combinators)} -Whenever you want to design a new programming language or -implement a compiler for an existing language, the first task -is to fix the basic ``words'' of the language. For example -what are the keywords, or reserved words, of the language, -what are permitted identifiers, numbers, expressions and so -on. One convenient way to do this is, of course, by using -regular expressions. - -In this course we want to take a closer look at the WHILE -programming language. This is a simple imperative programming -language consisting of arithmetic expressions, assignments, -if-statements and loops. For example the Fibonacci program can -be written in this language as follows: +While regular expressions are very useful for lexing and for recognising +many patterns in strings (like email addresses), they have their limitations. For +example there is no regular expression that can recognise the language +$a^nb^n$. Another example for which there exists no regular expression is the language of well-parenthesised +expressions. In languages like Lisp, which use parentheses rather +extensively, it might be of interest whether the following two expressions +are well-parenthesised (the left one is, the right one is not): \begin{center} -\mbox{\lstinputlisting[language=while]{../progs/fib.while}} +$(((()()))())$ \hspace{10mm} $(((()()))()))$ \end{center} \noindent -The keywords in this language will be - -\begin{center} -\texttt{while}, \texttt{if}, \texttt{then}, \texttt{else}, \texttt{write}, \texttt{read} -\end{center} +Not being able to solve such recognition problems is a serious limitation. +In order to solve such recognition problems, we need more powerful +techniques than regular expressions. We will in particular look at \emph{context-free +languages}. They include the regular languages as the picture below shows: -\noindent -In addition we will have some common operators, such as \texttt{<}, \texttt{>}, \texttt{:=} and so on, as well as numbers -and strings (which we however ignore for the moment). We also need to specify what the ``whitespace'' -is in our programming language and what comments should look like. -As a first try, we might specify the regular expressions for our language roughly as follows \begin{center} -\begin{tabular}{lcl} -$\textit{LETTER}$ & $:=$ & $\texttt{a} + \texttt{A} + \texttt{b} + \texttt{B} + \ldots$\\ -$\textit{DIGIT}$ & $:=$ & $\texttt{0} + \texttt{1} + \texttt{2} + \ldots$\\ -$\textit{KEYWORD}$ & $:=$ & $\texttt{while} + \texttt{if} + \texttt{then} + \texttt{else} + \dots$\\ -$\textit{IDENT}$ & $:=$ & $\textit{LETTER} \cdot (\textit{LETTER} + \textit{DIGIT} + {\_})^*$\\ -$\textit{OP}$ & $:=$ & $\texttt{:=} + \texttt{<} + \ldots$\\ -$\textit{NUM}$ & $:=$ & $\textit{DIGIT}^+$\\ -$\textit{WHITESPACE}$ & $:=$ & $("\hspace{2mm}" + \backslash\texttt{n})^+$ -\end{tabular} -\end{center} +\begin{tikzpicture} +[rect/.style={draw=black!50, + top color=white,bottom color=black!20, + rectangle, very thick, rounded corners}] -Having the regular expressions in place, the problem we have to solve is: -given a string of our programming language, which regular expression -matches which part of the string. By solving this problem, we can split up a string -of our language into components. -For example given the input string - -\begin{center} -\texttt{\Grid{if\VS true\VS then\VS x+2\VS else\VS x+3}} +\draw (0,0) node [rect, text depth=30mm, text width=46mm] {\small all languages}; +\draw (0,-0.4) node [rect, text depth=20mm, text width=44mm] {\small decidable languages}; +\draw (0,-0.65) node [rect, text depth=13mm] {\small context sensitive languages}; +\draw (0,-0.84) node [rect, text depth=7mm, text width=35mm] {\small context-free languages}; +\draw (0,-1.05) node [rect] {\small regular languages}; +\end{tikzpicture} \end{center} \noindent -we expect it is split up as follows +Context-free languages play an important role in `day-to-day' text processing and in +programming languages. Context-free languages are usually specified by grammars. +For example a grammar for well-parenthesised expressions is \begin{center} -\tt -\Grid{if}\, -\Grid{\VS}\, -\Grid{true}\, -\Grid{\VS}\, -\Grid{then}\, -\Grid{\VS}\, -\Grid{x}\, -\Grid{+}\, -\Grid{2}\, -\Grid{\VS}\, -\Grid{else}\, -\Grid{\VS}\, -\Grid{x}\, -\Grid{+}\, -\Grid{3} +$P \;\;\rightarrow\;\; ( \cdot P \cdot ) \cdot P \;|\; \epsilon$ +\end{center} + +\noindent +In general grammars consist of finitely many rules built up from \emph{terminal symbols} +(usually lower-case letters) and \emph{non-terminal symbols} (upper-case letters). Rules +have the shape + +\begin{center} +$NT \;\;\rightarrow\;\; \textit{rhs}$ +\end{center} + +\noindent +where on the left-hand side is a single non-terminal and on the right a string consisting +of both terminals and non-terminals including the $\epsilon$-symbol for indicating the +empty string. We use the convention to separate components on +the right hand-side by using the $\cdot$ symbol, as in the grammar for well-parenthesised expressions. +We also use the convention to use $|$ as a shorthand notation for several rules. For example + +\begin{center} +$NT \;\;\rightarrow\;\; \textit{rhs}_1 \;|\; \textit{rhs}_2$ \end{center} \noindent -This process of splitting up an input string into components is often called \emph{lexing} or \emph{scanning}. -It is usually the first phase of a compiler. Note that the separation into words cannot, in general, -be done by just looking at whitespaces: while \texttt{if} and \texttt{true} are separated by a whitespace -in the example above, this is not always the case. As can be seen the three components in \texttt{x+2} are -not separated by any whitespace. Another reason for recognising whitespaces explicitly is -that in some languages, for example Python, whitespaces matters, that is carry meaning. However in -our small language we will eventually just filter out all whitespaces and also all comments. - -Lexing not just separates a string into its components, but also classifies the components, meaning it explicitly records that \texttt{if} is a keyword, \VS{} a whitespace, \texttt{true} an identifier and so on. -For the moment, though, we will only focus on the simpler problem of just splitting up -a string into components. - -There are a few subtleties we need to consider first. For example, say the string is +means that the non-terminal $NT$ can be replaced by either $\textit{rhs}_1$ or $\textit{rhs}_2$. +If there are more than one non-terminal on the left-hand side of the rules, then we need to indicate +what is the \emph{starting} symbol of the grammar. For example the grammar for arithmetic expressions +can be given as follows \begin{center} -\texttt{\Grid{iffoo\VS}}\;\ldots -\end{center} - -\noindent -then there are two possibilities for how it can be split up: either we regard the input as the keyword \texttt{if} followed -by the identifier \texttt{foo} (both regular expressions match) or we regard \texttt{iffoo} as a -single identifier. The choice that is often made in lexers is to look for the longest possible match. -This leaves \texttt{iffoo} as the only match in this case (since it is longer than \texttt{if}). - -Unfortunately, the convention about the longest match does not yet make the process -of lexing completely deterministic. Consider the string - -\begin{center} -\texttt{\Grid{then\VS}}\;\dots -\end{center} - -\noindent -Clearly, this string should be identified as a keyword. The problem is that also the regular expression \textit{IDENT} for identifiers matches this string. To overcome this ambiguity we need to rank our -regular expressions. In our running example we just use the ranking - -\[ -\textit{KEYWORD} < \textit{IDENT} < \textit{OP} < \ldots -\] - -\noindent -So even if both regular expressions match in the example above, -we give preference to the regular expression for keywords. - -Let us see how our algorithm for lexing works in detail. In addition to the functions $nullable$ -and $der$, it will use the function \emph{zeroable} defined as follows: - -\begin{center} -\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} -$zeroable(\varnothing)$ & $\dn$ & $true$\\ -$zeroable(\epsilon)$ & $\dn$ & $f\!alse$\\ -$zeroable (c)$ & $\dn$ & $f\!alse$\\ -$zeroable (r_1 + r_2)$ & $\dn$ & $zeroable(r_1) \wedge zeroable(r_2)$ \\ -$zeroable (r_1 \cdot r_2)$ & $\dn$ & $zeroable(r_1) \vee zeroable(r_2)$ \\ -$zeroable (r^*)$ & $\dn$ & $f\!alse$\\ +\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $N$ \\ +$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ +$E$ & $\rightarrow$ & $( \cdot E \cdot )$\\ +$N$ & $\rightarrow$ & $N \cdot N \;|\; 0 \;|\; 1 \;|\: \ldots \;|\; 9$ \end{tabular} \end{center} \noindent -Recall that the function $nullable(r)$ tests whether a regular expression -can match the empty string. The function $zeroable$, on the other hand, tests whether a regular -expression cannot match anything at all. The mathematical way of stating this -property is +where $E$ is the starting symbol. A \emph{derivation} for a grammar +starts with the staring symbol of the grammar and in each step replaces one +non-terminal by a right-hand side of a rule. A derivation ends with a string +in which only terminal symbols are left. For example a derivation for the +string $(1 + 2) + 3$ is as follows: + +\begin{center} +\begin{tabular}{lll} +$E$ & $\rightarrow$ & $E+E$\\ + & $\rightarrow$ & $(E)+E$\\ + & $\rightarrow$ & $(E+E)+E$\\ + & $\rightarrow$ & $(E+E)+N$\\ + & $\rightarrow$ & $(E+E)+3$\\ + & $\rightarrow$ & $(N+E)+3$\\ + & $\rightarrow^+$ & $(1+2)+3$\\ +\end{tabular} +\end{center} + +\noindent +The \emph{language} of a context-free grammar $G$ with start symbol $S$ +is defined as the set of strings derivable by a derivation, that is \begin{center} -$zeroable(r)$ if and only if $L(r) = \varnothing$ +$\{c_1\ldots c_n \;|\; S \rightarrow^* c_1\ldots c_n \;\;\text{with all} \; c_i \;\text{being non-terminals}\}$ +\end{center} + +\noindent +A \emph{parse-tree} encodes how a string is derived with the starting symbol on +top and each non-terminal containing a subtree for how it is replaced in a derivation. +The parse tree for the string $(1 + 23)+4$ is as follows: + +\begin{center} +\begin{tikzpicture}[level distance=8mm, black] + \node {$E$} + child {node {$E$} + child {node {$($}} + child {node {$E$} + child {node {$E$} child {node {$N$} child {node {$1$}}}} + child {node {$+$}} + child {node {$E$} + child {node {$N$} child {node {$2$}}} + child {node {$N$} child {node {$3$}}} + } + } + child {node {$)$}} + } + child {node {$+$}} + child {node {$E$} + child {node {$N$} child {node {$4$}}} + }; +\end{tikzpicture} \end{center} \noindent -For what follows let us fix a set of regular expressions $rs$ as being +We are often interested in these parse-trees since they encode the structure of +how a string is derived by a grammar. Before we come to the problem of constructing +such parse-trees, we need to consider the following two properties of grammars. +A grammar is \emph{left-recursive} if there is a derivation starting from a non-terminal, say +$NT$ which leads to a string which again starts with $NT$. This means a derivation of the +form. \begin{center} -\textit{KEYWORD}, \textit{IDENT}, \textit{WHITESPACE} +$NT \rightarrow \ldots \rightarrow NT \cdot \ldots$ +\end{center} + +\noindent +It can be easily seen that the grammar above for arithmetic expressions is left-recursive: +for example the rules $E \rightarrow E\cdot + \cdot E$ and $N \rightarrow N\cdot N$ +show that this grammar is left-recursive. Some algorithms cannot cope with left-recursive +grammars. Fortunately every left-recursive grammar can be transformed into one that is +not left-recursive, although this transformation might make the grammar less human-readable. +For example if we want to give a non-left-recursive grammar for numbers we might +specify + +\begin{center} +$N \;\;\rightarrow\;\; 0\;|\;\ldots\;|\;9\;|\;1\cdot N\;|\;2\cdot N\;|\;\ldots\;|\;9\cdot N$ \end{center} \noindent -specifying the ``words'' -of our programming language. The algorithm takes as input the $rs$ and a string, say +Using this grammar we can still derive every number string, but we will never be able +to derive a string of the form $\ldots \rightarrow N \cdot \ldots$. + +The other property we have to watch out for is when a grammar is +\emph{ambiguous}. A grammar is said to be ambiguous if there are two parse-trees +for one string. Again the grammar for arithmetic expressions shown above is ambiguous. +While the shown parse tree for the string $(1 + 23) + 4$ is unique, this is not the case in +general. For example there are two parse +trees for the string $1 + 2 + 3$, namely \begin{center} -\texttt{\grid{$c_1$}\grid{$c_2$}\grid{$c_3$}\grid{$c_4$}}\;\ldots +\begin{tabular}{c@{\hspace{10mm}}c} +\begin{tikzpicture}[level distance=8mm, black] + \node {$E$} + child {node {$E$} child {node {$N$} child {node {$1$}}}} + child {node {$+$}} + child {node {$E$} + child {node {$E$} child {node {$N$} child {node {$2$}}}} + child {node {$+$}} + child {node {$E$} child {node {$N$} child {node {$3$}}}} + } + ; +\end{tikzpicture} +& +\begin{tikzpicture}[level distance=8mm, black] + \node {$E$} + child {node {$E$} + child {node {$E$} child {node {$N$} child {node {$1$}}}} + child {node {$+$}} + child {node {$E$} child {node {$N$} child {node {$2$}}}} + } + child {node {$+$}} + child {node {$E$} child {node {$N$} child {node {$3$}}}} + ; +\end{tikzpicture} +\end{tabular} \end{center} \noindent -and tries to chop off one word from the beginning of the string. If none of the -regular expression in $rs$ matches, we will just return -the empty string. +In particular in programming languages we will try to avoid ambiguous +grammars because two different parse-trees for a string mean a program can +be interpreted in two different ways. In such cases we have to somehow make sure +the two different ways do not matter, or disambiguate the grammar in +some other way (for example making the $+$ left-associative). Unfortunately already +the problem of deciding whether a grammar +is ambiguous or not is in general undecidable. -The crucial idea in the algorithm is to build the derivatives of all regular expressions in $rs$ with respect -to the first character $c_1$. Then we take the results and continue with -building the derivatives with respect to $c_2$ until we have either exhausted our -input string or all of the regular expressions are ``zeroable''. Suppose the input string is +Let us now turn to the problem of generating a parse-tree for a grammar and string. +In what follows we explain \emph{parser combinators}, because they are easy +to implement and closely resemble grammar rules. Imagine that a grammar +describes the strings of natural numbers, such as the grammar $N$ shown above. +For all such strings we want to generate the parse-trees or later on we actually +want to extract the meaning of these strings, that is the concrete integers ``behind'' +these strings. In Scala the parser combinators will be functions of type + +\begin{center} +\texttt{I $\Rightarrow$ Set[(T, I)]} +\end{center} + +\noindent +that is they take as input something of type \texttt{I}, typically a list of tokens or a string, +and return a set of pairs. The first component of these pairs corresponds to what the +parser combinator was able to process from the input and the second is the unprocessed +part of the input. As we shall see shortly, a parser combinator might return more than one such pair, +with the idea that there are potentially several ways how to interpret the input. As a concrete +example, consider the case where the input is of type string, say the string \begin{center} -\texttt{\Grid{if2\VS}}\;\dots +\tt\Grid{iffoo\VS testbar} +\end{center} + +\noindent +We might have a parser combinator which tries to interpret this string as a keyword (\texttt{if}) or +an identifier (\texttt{iffoo}). Then the output will be the set + +\begin{center} +$\left\{ \left(\texttt{\Grid{if}}\,,\, \texttt{\Grid{foo\VS testbar}}\right), + \left(\texttt{\Grid{iffoo}}\,,\, \texttt{\Grid{\VS testbar}}\right) \right\}$ +\end{center} + +\noindent +where the first pair means the parser could recognise \texttt{if} from the input and leaves +the rest as `unprocessed' as the second component of the pair; in the other case +it could recognise \texttt{iffoo} and leaves \texttt{\VS testbar} as unprocessed. If the parser +cannot recognise anything from the input then parser combinators just return the empty +set $\varnothing$. This will indicate something ``went wrong''. + +The main attraction is that we can easily build parser combinators out of smaller components +following very closely the structure of a grammar. In order to implement this in an object +oriented programming language, like Scala, we need to specify an abstract class for parser +combinators. This abstract class requires the implementation of the function +\texttt{parse} taking an argument of type \texttt{I} and returns a set of type +\mbox{\texttt{Set[(T, I)]}}. + +\begin{center} +\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] +abstract class Parser[I, T] { + def parse(ts: I): Set[(T, I)] + + def parse_all(ts: I): Set[T] = + for ((head, tail) <- parse(ts); if (tail.isEmpty)) + yield head +} +\end{lstlisting} \end{center} \noindent -then building the derivatives with respect to \texttt{i} gives +From the function \texttt{parse} we can then ``centrally'' derive the function \texttt{parse\_all}, +which just filters out all pairs whose second component is not empty (that is has still some +unprocessed part). The reason is that at the end of parsing we are only interested in the +results where all the input has been consumed and no unprocessed part is left. + +One of the simplest parser combinators recognises just a character, say $c$, +from the beginning of strings. Its behaviour is as follows: + +\begin{itemize} +\item if the head of the input string starts with a $c$, it returns + the set $\{(c, \textit{tail of}\; s)\}$ +\item otherwise it returns the empty set $\varnothing$ +\end{itemize} + +\noindent +The input type of this simple parser combinator for characters is +\texttt{String} and the output type \mbox{\texttt{Set[(Char, String)]}}. +The code in Scala is as follows: \begin{center} -\begin{tabular}{l|c} - & $zeroable$\\\hline - $der\;\texttt{i}\;(\textit{KEYWORD})$ & no\\ - $der\;\texttt{i}\;(\textit{IDENT})$ & no\\ - $der\;\texttt{i}\;(\textit{WHITESPACE})$ & yes\\ -\end{tabular} +\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] +case class CharParser(c: Char) extends Parser[String, Char] { + def parse(sb: String) = + if (sb.head == c) Set((c, sb.tail)) else Set() +} +\end{lstlisting} \end{center} \noindent -We can eliminate \textit{WHITESPACE} as a potential candidate, because -no derivative can go from $zeroable = \text{yes}$ to no. That leaves the other -two regular expressions as potential candidate and we have to consider the -next character, \texttt{f}, from the input string +The \texttt{parse} function tests whether the first character of the +input string \texttt{sb} is equal to \texttt{c}. If yes, then it splits the +string into the recognised part \texttt{c} and the unprocessed part +\texttt{sb.tail}. In case \texttt{sb} does not start with \texttt{c} then +the parser returns the empty set (in Scala \texttt{Set()}). + +More interesting are the parser combinators that build larger parsers +out of smaller component parsers. For example the alternative +parser combinator is as follows. \begin{center} -\begin{tabular}{l|c} - & $zeroable$\\\hline - $der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{KEYWORD}))$ & no\\ - $der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT}))$ & no\\ +\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] +class AltParser[I, T] + (p: => Parser[I, T], + q: => Parser[I, T]) extends Parser[I, T] { + def parse(sb: I) = p.parse(sb) ++ q.parse(sb) +} +\end{lstlisting} +\end{center} + +\noindent +The types of this parser combinator are polymorphic (we just have \texttt{I} +for the input type, and \texttt{T} for the output type). The alternative parser +builds a new parser out of two existing parser combinator \texttt{p} and \texttt{q}. +Both need to be able to process input of type \texttt{I} and return the same +output type \texttt{Set[(T, I)]}. (There is an interesting detail of Scala, namely the +\texttt{=>} in front of the types of \texttt{p} and \texttt{q}. They will prevent the +evaluation of the arguments before they are used. This is often called +\emph{lazy evaluation} of the arguments.) The alternative parser should run +the input with the first parser \texttt{p} (producing a set of outputs) and then +run the same input with \texttt{q}. The result should be then just the union +of both sets, which is the operation \texttt{++} in Scala. + +This parser combinator already allows us to construct a parser that either +a character \texttt{a} or \texttt{b}, as + +\begin{center} +\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] +new AltParser(CharParser('a'), CharParser('b')) +\end{lstlisting} +\end{center} + +\noindent +Scala allows us to introduce some more readable shorthand notation for this, like \texttt{'a' || 'b'}. +We can call this parser combinator with the strings + +\begin{center} +\begin{tabular}{rcl} +input string & & output\medskip\\ +\texttt{\Grid{ac}} & $\rightarrow$ & $\left\{(\texttt{\Grid{a}}, \texttt{\Grid{c}})\right\}$\\ +\texttt{\Grid{bc}} & $\rightarrow$ & $\left\{(\texttt{\Grid{b}}, \texttt{\Grid{c}})\right\}$\\ +\texttt{\Grid{cc}} & $\rightarrow$ & $\varnothing$ \end{tabular} \end{center} \noindent -Since both are `no', we have to continue with \texttt{2} from the input string +We receive in the first two cases a successful output (that is a non-empty set). + +A bit more interesting is the \emph{sequence parser combinator} implemented in +Scala as follows: \begin{center} -\begin{tabular}{l|c} - & $zeroable$\\\hline - $der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{KEYWORD})))$ & yes\\ - $der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT})))$ & no\\ -\end{tabular} +\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] +class SeqParser[I, T, S] + (p: => Parser[I, T], + q: => Parser[I, S]) extends Parser[I, (T, S)] { + def parse(sb: I) = + for ((head1, tail1) <- p.parse(sb); + (head2, tail2) <- q.parse(tail1)) + yield ((head1, head2), tail2) +} +\end{lstlisting} \end{center} \noindent -Although we now know that the beginning is definitely an \textit{IDENT}, we do not yet -know how much of the input string should be considered as an \textit{IDENT}. So we -still have to continue and consider the next derivative. +This parser takes as input two parsers, \texttt{p} and \texttt{q}. It implements \texttt{parse} +as follows: let first run the parser \texttt{p} on the input producing a set of pairs (\texttt{head1}, \texttt{tail1}). +The \texttt{tail1} stands for the unprocessed parts left over by \texttt{p}. +Let \texttt{q} run on these unprocessed parts +producing again a set of pairs. The output of the sequence parser combinator is then a set +containing pairs where the first components are again pairs, namely what the first parser could parse +together with what the second parser could parse; the second component is the unprocessed +part left over after running the second parser \texttt{q}. Therefore the input type of +the sequence parser combinator is as usual \texttt{I}, but the output type is \begin{center} -\begin{tabular}{l|c} - & $zeroable$\\\hline - $der\;\texttt{\VS}\;(der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT}))))$ & yes\\ +\texttt{Set[((T, S), I)]} +\end{center} + +Scala allows us to provide some +shorthand notation for the sequence parser combinator. So we can write for +example \texttt{'a' $\sim$ 'b'}, which is the +parser combinator that first consumes the character \texttt{a} from a string and then \texttt{b}. +Calling this parser combinator with the strings + +\begin{center} +\begin{tabular}{rcl} +input string & & output\medskip\\ +\texttt{\Grid{abc}} & $\rightarrow$ & $\left\{((\texttt{\Grid{a}}, \texttt{\Grid{b}}), \texttt{\Grid{c}})\right\}$\\ +\texttt{\Grid{bac}} & $\rightarrow$ & $\varnothing$\\ +\texttt{\Grid{ccc}} & $\rightarrow$ & $\varnothing$ \end{tabular} \end{center} \noindent -Since the answer is now `yes' also in this case, we can stop: once all derivatives are -zeroable, we know the regular expressions cannot match any more letters from -the input. In this case we only have to go back to the derivative that is nullable. In this case it -is - -\begin{center} -$der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT})))$ -\end{center} - -\noindent -which means we recognised an identifier. In case where there is a choice of more than one -regular expressions that are nullable, then we choose the one with the highest precedence. -You can try out such a case with the input string +A slightly more complicated parser is \texttt{('a' || 'b') $\sim$ 'b'} which parses as first character either +an \texttt{a} or \texttt{b} followed by a \texttt{b}. This parser produces the following results. \begin{center} -\texttt{\Grid{then\VS}}\;\dots -\end{center} - -\noindent -which can both be recognised as a keyword, but also an identifier. - -While in the example above the last nullable derivative is the one directly before -the derivative turns zeroable, this is not always the case. Imagine, identifiers can be -letters, as permuted by the regular expression \textit{IDENT}, but must end with an -undercore. - -\begin{center} -\begin{tabular}{lcl} -$\textit{NEWIDENT}$ & $:=$ & $\textit{LETTER} \cdot (\textit{LETTER} + \textit{DIGIT} + {\_})^* \cdot \_$\\ +\begin{tabular}{rcl} +input string & & output\medskip\\ +\texttt{\Grid{abc}} & $\rightarrow$ & $\left\{((\texttt{\Grid{a}}, \texttt{\Grid{b}}), \texttt{\Grid{c}})\right\}$\\ +\texttt{\Grid{bbc}} & $\rightarrow$ & $\left\{((\texttt{\Grid{b}}, \texttt{\Grid{b}}), \texttt{\Grid{c}})\right\}$\\ +\texttt{\Grid{aac}} & $\rightarrow$ & $\varnothing$ \end{tabular} \end{center} -\noindent -If we use \textit{NEWIDENT} with the input string +Note carefully that constructing the parser \texttt{'a' || ('a' $\sim$ 'b')} will result in a tying error. +The first parser has as output type a single character (recall the type of \texttt{CharParser}), +but the second parser produces a pair of characters as output. The alternative parser is however +required to have both component parsers to have the same type. We will see later how we can +build this parser without the typing error. + +The next parser combinator does not actually combine smaller parsers, but applies +a function to the result of the parser. It is implemented in Scala as follows \begin{center} -\texttt{\Grid{iffoo\VS}}\;\ldots +\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] +class FunParser[I, T, S] + (p: => Parser[I, T], + f: T => S) extends Parser[I, S] { + def parse(sb: I) = + for ((head, tail) <- p.parse(sb)) yield (f(head), tail) +} +\end{lstlisting} \end{center} + \noindent -then it will only become $zeroable$ after the $\VS$ has been analysed. In this case we have to go back to the -first \texttt{f} because only +This parser combinator takes a parser \texttt{p} with output type \texttt{T} as +input as well as a function \texttt{f} with type \texttt{T => S}. The parser \texttt{p} +produces sets of type \texttt{(T, I)}. The \texttt{FunParser} combinator then +applies the function \texttt{f} to all the parer outputs. Since this function +is of type \texttt{T => S}, we obtain a parser with output type \texttt{S}. +Again Scala lets us introduce some shorthand notation for this parser combinator. +Therefore we will write \texttt{p ==> f} for it. -\begin{center} - $der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{KEYWORD}))$ -\end{center} - -\noindent -is nullable. As a result we recognise successfully the keyword \texttt{if} and the remaining -string needs to be consumed by other regular expressions or lead to a lexing error. +%\bigskip +%takes advantage of the full generality---have a look +%what it produces if we call it with the string \texttt{abc} +% +%\begin{center} +%\begin{tabular}{rcl} +%input string & & output\medskip\\ +%\texttt{\Grid{abc}} & $\rightarrow$ & $\left\{((\texttt{\Grid{a}}, \texttt{\Grid{b}}), \texttt{\Grid{c}})\right\}$\\ +%\texttt{\Grid{bbc}} & $\rightarrow$ & $\left\{((\texttt{\Grid{b}}, \texttt{\Grid{b}}), \texttt{\Grid{c}})\right\}$\\ +%\texttt{\Grid{aac}} & $\rightarrow$ & $\varnothing$ +%\end{tabular} +%\end{center} \end{document} %%% Local Variables: -%%% mode: latex +%%% mode: latex %%% TeX-master: t %%% End: diff -r 603e171a7b48 -r b3129cff41e9 slides/slides05.pdf Binary file slides/slides05.pdf has changed diff -r 603e171a7b48 -r b3129cff41e9 slides/slides05.tex --- a/slides/slides05.tex Mon Oct 19 15:03:44 2015 +0100 +++ b/slides/slides05.tex Mon Oct 19 23:49:25 2015 +0100 @@ -3,6 +3,7 @@ \usepackage{../graphics} \usepackage{../langs} \usepackage{../data} +\usepackage{../grammar} \hfuzz=220pt @@ -38,7 +39,8 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] -\frametitle{\begin{tabular}{c}Last Week\\ Regexes and Values\end{tabular}} +\frametitle{\begin{tabular}{c}Last Week\\[-1mm] + Regexes and Values\end{tabular}} Regular expressions and their corresponding values: @@ -73,127 +75,6 @@ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] - -\begin{textblock}{10}(3,5) -\begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}] -\node (r1) {\bl{$r_1$}}; -\node (r2) [right=of r1] {\bl{$r_2$}}; -\draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {\bl{$der\,a$}}; -\node (r3) [right=of r2] {\bl{$r_3$}}; -\draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {\bl{$der\,b$}}; -\node (r4) [right=of r3] {\bl{$r_4$}}; -\draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {\bl{$der\,c$}}; -\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}}; -\node (v4) [below=of r4] {\bl{$v_4$}}; -\draw[->,line width=1mm] (r4) -- (v4); -\node (v3) [left=of v4] {\bl{$v_3$}}; -\draw[->,line width=1mm] (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}}; -\node (v2) [left=of v3] {\bl{$v_2$}}; -\draw[->,line width=1mm] (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}}; -\node (v1) [left=of v2] {\bl{$v_1$}}; -\draw[->,line width=1mm] (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}}; -\draw[->,line width=0.5mm] (r3) -- (v3); -\draw[->,line width=0.5mm] (r2) -- (v2); -\draw[->,line width=0.5mm] (r1) -- (v1); -\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}}; -\end{tikzpicture} -\end{textblock} - -\only<2->{ -\begin{textblock}{6}(1,0.8) -\begin{bubble}[6cm] -\small -\begin{tabular}{ll} -\bl{$r_1$}: & \bl{$a \cdot (b \cdot c)$}\\ -\bl{$r_2$}: & \bl{$\epsilon \cdot (b \cdot c)$}\\ -\bl{$r_3$}: & \bl{$(\varnothing \cdot (b \cdot c)) + (\epsilon \cdot c)$}\\ -\bl{$r_4$}: & \bl{$(\varnothing \cdot (b \cdot c)) + ((\varnothing \cdot c) + \epsilon)$}\\ -\end{tabular} -\end{bubble} -\end{textblock}} - -\only<2->{ -\begin{textblock}{6}(5,11.4) -\begin{bubble}[7.6cm] -\small -\begin{tabular}{ll} -\bl{$v_1$}: & \bl{$Seq(Char(a), Seq(Char(b), Char(c)))$}\\ -\bl{$v_2$}: & \bl{$Seq(Empty, Seq(Char(b), Char(c)))$}\\ -\bl{$v_3$}: & \bl{$Right(Seq(Empty, Char(c)))$}\\ -\bl{$v_4$}: & \bl{$Right(Right(Empty))$}\\ -\end{tabular} -\end{bubble} -\end{textblock}} -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Mkeps} - -Finding a (posix) value for recognising the empty string: - -\begin{center} -\begin{tabular}{lcl} - \bl{$mkeps\,\epsilon$} & \bl{$\dn$} & \bl{$Empty$}\\ - \bl{$mkeps\,r_1 + r_2$} & \bl{$\dn$} & \bl{if $nullable(r_1)$} \\ - & & \bl{then $Left(mkeps(r_1))$}\\ - & & \bl{else $Right(mkeps(r_2))$}\\ - \bl{$mkeps\,r_1 \cdot r_2$} & \bl{$\dn$} & \bl{$Seq(mkeps(r_1),mkeps(r_2))$}\\ - \bl{$mkeps\,r^*$} & \bl{$\dn$} & \bl{$[]$} \\ -\end{tabular} -\end{center} - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Inject} - -Injecting (``Adding'') a character to a value\\ - -\begin{center} -\begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l} - \bl{$inj\,(c)\,c\,Empty$} & \bl{$\dn$} & \bl{$Char\,c$}\\ - \bl{$inj\,(r_1 + r_2)\,c\,Left(v)$} & \bl{$\dn$} & \bl{$Left(inj\,r_1\,c\,v)$}\\ - \bl{$inj\,(r_1 + r_2)\,c\,Right(v)$} & \bl{$\dn$} & \bl{$Right(inj\,r_2\,c\,v)$}\\ - \bl{$inj\,(r_1 \cdot r_2)\,c\,Seq(v_1,v_2)$} & \bl{$\dn$} & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\ - \bl{$inj\,(r_1 \cdot r_2)\,c\,Left(Seq(v_1,v_2))$} & \bl{$\dn$} & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\ - \bl{$inj\,(r_1 \cdot r_2)\,c\,Right(v)$} & \bl{$\dn$} & \bl{$Seq(mkeps(r_1),inj\,r_2\,c\,v)$}\\ - \bl{$inj\,(r^*)\,c\,Seq(v,vs)$} & \bl{$\dn$} & \bl{$inj\,r\,c\,v\,::\,vs$}\\ -\end{tabular} -\end{center}\bigskip - -\footnotesize -\bl{$inj$}: 1st arg $\mapsto$ a rexp; 2nd arg $\mapsto$ a character; 3rd arg $\mapsto$ a value -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Flatten} - -Obtaining the string underlying a value: - -\begin{center} -\begin{tabular}{lcl} - \bl{$|Empty|$} & \bl{$\dn$} & \bl{$[]$}\\ - \bl{$|Char(c)|$} & \bl{$\dn$} & \bl{$[c]$}\\ - \bl{$|Left(v)|$} & \bl{$\dn$} & \bl{$|v|$}\\ - \bl{$|Right(v)|$} & \bl{$\dn$} & \bl{$|v|$}\\ - \bl{$|Seq(v_1,v_2)|$}& \bl{$\dn$} & \bl{$|v_1| \,@\, |v_2|$}\\ - \bl{$|[v_1,\ldots ,v_n]|$} & \bl{$\dn$} & \bl{$|v_1| \,@\ldots @\, |v_n|$}\\ -\end{tabular} -\end{center} - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] @@ -307,127 +188,17 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] -\frametitle{Rectification} - -\begin{center} -\begin{tabular}{l} -\bl{$simp(r)$}:\\ -\quad case \bl{$r = r_1 + r_2$}\\ -\qquad let \bl{$(r_{1s}, f_{1s}) = simp(r_1)$}\\ -\qquad \phantom{let} \bl{$(r_{2s}, f_{2s}) = simp(r_2)$}\smallskip\\ -\qquad case \bl{$r_{1s} = \varnothing$}: - return \bl{$(r_{2s}, \lambda v. \,Right(f_{2s}(v)))$}\\ -\qquad case \bl{$r_{2s} = \varnothing$}: - return \bl{$(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$}\\ -\qquad case \bl{$r_{1s} = r_{2s}$}: - return \bl{$(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$}\\ -\qquad otherwise: - return \bl{$(r_{1s} + r_{2s}, f_{alt}(f_{1s}, f_{2s}))$}\\ -\end{tabular} -\end{center} - -\small -\begin{center} -\begin{tabular}{l@{\hspace{1mm}}l} -\bl{$f_{alt}(f_1, f_2) \dn$}\\ -\qquad \bl{$\lambda v.\,$} case \bl{$v = Left(v')$}: - & return \bl{$Left(f_1(v'))$}\\ -\qquad \phantom{$\lambda v.\,$} case \bl{$v = Right(v')$}: - & return \bl{$Right(f_2(v'))$}\\ -\end{tabular} -\end{center} - - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Rectification} - -\begin{center} -\begin{tabular}{@{\hspace{-3mm}}l} -\bl{$simp(r)$}:\ldots\\ -\quad case \bl{$r = r_1 \cdot r_2$}\\ -\qquad let \bl{$(r_{1s}, f_{1s}) = simp(r_1)$}\\ -\qquad \phantom{let} \bl{$(r_{2s}, f_{2s}) = simp(r_2)$}\smallskip\\ -\qquad case \bl{$r_{1s} = \varnothing$}: - return \bl{$(\varnothing, f_{error})$}\\ -\qquad case \bl{$r_{2s} = \varnothing$}: - return \bl{$(\varnothing, f_{error})$}\\ -\qquad case \bl{$r_{1s} = \epsilon$}: -return \bl{$(r_{2s}, \lambda v. \,Seq(f_{1s}(Empty), f_{2s}(v)))$}\\ -\qquad case \bl{$r_{2s} = \epsilon$}: -return \bl{$(r_{1s}, \lambda v. \,Seq(f_{1s}(v), f_{2s}(Empty)))$}\\ -\qquad otherwise: - return \bl{$(r_{1s} \cdot r_{2s}, f_{seq}(f_{1s}, f_{2s}))$}\\ -\end{tabular} -\end{center} - -\small -\begin{center} -\begin{tabular}{l@{\hspace{1mm}}l} -\bl{$f_{seq}(f_1, f_2) \dn$}\\ -\qquad \bl{$\lambda v.\,$ case $v = Seq(v_1, v_2)$}: - & return \bl{$Seq(f_1(v_1), f_2(v_2))$}\\ -\end{tabular} -\end{center} -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Lexing with Simplification} - -\begin{center} -\begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l} - \bl{$lex\,r\,[]$} & \bl{$\dn$} & \bl{if $nullable(r)$ then $mkeps(r)$ else $error$}\\ - \bl{$lex\,r\,c::s$} & \bl{$\dn$} & \bl{let $(r', frect) = simp(der(c, r))$}\smallskip\\ - & & \bl{$inj\,r\,c\,(frect(lex(r', s)))$}\\ -\end{tabular} -\end{center}\bigskip - -\begin{center}\small -\begin{tikzpicture}[node distance=1.1cm,every node/.style={minimum size=7mm}] -\node (r1) {\bl{$r_1$}}; -\node (r2) [right=of r1] {\bl{$r_2$}}; -\draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {\bl{$der\,a$}}; -\node (r3) [right=of r2] {\bl{$r_3$}}; -\draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {\bl{$der\,b$}}; -\node (r4) [right=of r3] {\bl{$r_4$}}; -\draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {\bl{$der\,c$}}; -\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}}; -\node (v4) [below=of r4] {\bl{$v_4$}}; -\draw[->,line width=1mm] (r4) -- (v4); -\node (v3) [left=of v4] {\bl{$v_3$}}; -\draw[->,line width=1mm] (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}}; -\node (v2) [left=of v3] {\bl{$v_2$}}; -\draw[->,line width=1mm] (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}}; -\node (v1) [left=of v2] {\bl{$v_1$}}; -\draw[->,line width=1mm] (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}}; -\draw[->,line width=0.5mm] (r3) -- (v3); -\draw[->,line width=0.5mm] (r2) -- (v2); -\draw[->,line width=0.5mm] (r1) -- (v1); -\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}}; -\end{tikzpicture} -\end{center} - - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] \frametitle{Records} \begin{itemize} -\item new regex: \bl{$(x:r)$}\hspace{7mm}new value: \bl{$Rec(x,v)$}\medskip\pause +\item new regex: \bl{$(x:r)$}\hspace{7mm}new value: +\bl{$Rec(x,v)$}\medskip \item \bl{$nullable(x:r) \dn nullable(r)$} \item \bl{$der\,c\,(x:r) \dn (x:der\,c\,r)$} \item \bl{$mkeps(x:r) \dn Rec(x, mkeps(r))$} \item \bl{$inj\,(x:r)\,c\,v \dn Rec(x, inj\,r\,c\,v)$} -\end{itemize}\bigskip\bigskip\pause +\end{itemize}\bigskip\bigskip \small for extracting subpatterns \bl{$(z: ((x:ab) + (y:ba))$} @@ -437,6 +208,29 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] +\frametitle{Environments} + +Obtaining the ``recorded'' parts of a value: + +\begin{center} +\begin{tabular}{lcl} + \bl{$env(Empty)$} & \bl{$\dn$} & \bl{$[]$}\\ + \bl{$env(Char(c))$} & \bl{$\dn$} & \bl{$[]$}\\ + \bl{$env(Left(v))$} & \bl{$\dn$} & \bl{$env(v)$}\\ + \bl{$env(Right(v))$} & \bl{$\dn$} & \bl{$env(v)$}\\ + \bl{$env(Seq(v_1,v_2))$}& \bl{$\dn$} & \bl{$env(v_1) \,@\, env(v_2)$}\\ + \bl{$env([v_1,\ldots ,v_n])$} & \bl{$\dn$} & + \bl{$env(v_1) \,@\ldots @\, env(v_n)$}\\ + \bl{$env(Rec(x:v))$} & \bl{$\dn$} & \bl{$(x:|v|) :: env(v)$}\\ +\end{tabular} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] \frametitle{While Tokens} \begin{center} @@ -455,13 +249,14 @@ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ \begin{frame}[t] \consolas \begin{center} -"if true then then 42 else +" +\code{"if true then then 42 else +"} \end{center} \only<1>{ @@ -492,39 +287,12 @@ OP(+) \end{tabular}} -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ \begin{frame}[c] - - -There is one small problem with the tokenizer. How should we -tokenize: - -\begin{center} -{\consolas "x - 3"} -\end{center} - -\consolas -\begin{tabular}{@{}l} -OP:\\ -\hspace{5mm}\texttt{"+"}, \texttt{"-"}\\ -NUM:\\ -\hspace{5mm}(NONZERODIGIT $\cdot$ DIGIT$^*$) + {''0''}\\ -NUMBER:\\ -\hspace{5mm}NUM + (\texttt{"-"} $\cdot$ NUM)\\ -\end{tabular} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Two Rules\end{tabular}} +\frametitle{Two Rules} \begin{itemize} \item Longest match rule (``maximal munch rule''): The @@ -544,30 +312,9 @@ %\item problem with infix operations, for example i-12 -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Environment} - -Obtaining the ``recorded'' parts of a regular expression: - -\begin{center} -\begin{tabular}{lcl} - \bl{$env(Empty)$} & \bl{$\dn$} & \bl{$[]$}\\ - \bl{$env(Char(c))$} & \bl{$\dn$} & \bl{$[]$}\\ - \bl{$env(Left(v))$} & \bl{$\dn$} & \bl{$env(v)$}\\ - \bl{$env(Right(v))$} & \bl{$\dn$} & \bl{$env(v)$}\\ - \bl{$env(Seq(v_1,v_2))$}& \bl{$\dn$} & \bl{$env(v_1) \,@\, env(v_2)$}\\ - \bl{$env([v_1,\ldots ,v_n])$} & \bl{$\dn$} & - \bl{$env(v_1) \,@\ldots @\, env(v_n)$}\\ - \bl{$env(Rec(x:v))$} & \bl{$\dn$} & \bl{$(x:|v|) :: env(v)$}\\ -\end{tabular} -\end{center} - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] @@ -623,6 +370,681 @@ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Regular Languages} + +While regular expressions are very useful for lexing, there is +no regular expression that can recognise the language +\bl{$a^nb^n$}.\bigskip + +\begin{center} +\bl{$(((()()))())$} \;\;vs.\;\; \bl{$(((()()))()))$} +\end{center}\bigskip\bigskip + +\small +\noindent So we cannot find out with regular expressions +whether parentheses are matched or unmatched. Also regular +expressions are not recursive, e.g.~\bl{$(1 + 2) + 3$}. + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Hierarchy of Languages} + +\begin{center} +\begin{tikzpicture} +[rect/.style={draw=black!50, + top color=white, + bottom color=black!20, + rectangle, + very thick, + rounded corners}, scale=1.2] + +\draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages}; +\draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages}; +\draw (0,-0.85) node [rect, text depth=17mm] {context sensitive languages}; +\draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages}; +\draw (0,-1.4) node [rect] {regular languages}; +\end{tikzpicture} + +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Grammars} + +A (context-free) grammar \bl{$G$} consists of + +\begin{itemize} +\item a finite set of nonterminal symbols (upper case) +\item a finite terminal symbols or tokens (lower case) +\item a start symbol (which must be a nonterminal) +\item a set of rules +\begin{center} +\bl{$A \rightarrow \textit{rhs}$} +\end{center} + +where \bl{\textit{rhs}} are sequences involving terminals and nonterminals, +including the empty sequence \bl{$\epsilon$}.\medskip\pause + +We also allow rules +\begin{center} +\bl{$A \rightarrow \textit{rhs}_1 | \textit{rhs}_2 | \ldots$} +\end{center} +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Palindromes} + +A grammar for palindromes over the alphabet~\bl{$\{a,b\}$}: + +\begin{center} +\bl{\begin{tabular}{lcl} +$S$ & $\rightarrow$ & $\epsilon$ \\ +$S$ & $\rightarrow$ & $a\cdot S\cdot a$ \\ +$S$ & $\rightarrow$ & $b\cdot S\cdot b$ \\ +\end{tabular}} +\end{center}\pause + +or + +\begin{center} +\bl{\begin{tabular}{lcl} +$S$ & $\rightarrow$ & $\epsilon \;|\; a\cdot S\cdot a \;|\;b\cdot S\cdot b$ \\ +\end{tabular}} +\end{center}\pause\bigskip + +\small +Can you find the grammar rules for matched parentheses? + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Arithmetic Expressions} + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $num\_token$ \\ +$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ +$E$ & $\rightarrow$ & $( \cdot E \cdot )$ +\end{tabular}} +\end{center}\pause + +\bl{\texttt{1 + 2 * 3 + 4}} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{A CFG Derivation} + +\begin{enumerate} +\item Begin with a string containing only the start symbol, say \bl{$S$}\bigskip +\item Replace any nonterminal \bl{$X$} in the string by the +right-hand side of some production \bl{$X \rightarrow \textit{rhs}$}\bigskip +\item Repeat 2 until there are no nonterminals +\end{enumerate} + +\begin{center} +\bl{$S \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots $} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Example Derivation} + +\begin{center} +\bl{\begin{tabular}{lcl} +$S$ & $\rightarrow$ & $\epsilon \;|\; a\cdot S\cdot a \;|\;b\cdot S\cdot b$ \\ +\end{tabular}} +\end{center}\bigskip + +\begin{center} +\begin{tabular}{lcl} +\bl{$S$} & \bl{$\rightarrow$} & \bl{$aSa$}\\ + & \bl{$\rightarrow$} & \bl{$abSba$}\\ + & \bl{$\rightarrow$} & \bl{$abaSaba$}\\ + & \bl{$\rightarrow$} & \bl{$abaaba$}\\ + + +\end{tabular} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Example Derivation} + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $num\_token$ \\ +$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ +$E$ & $\rightarrow$ & $( \cdot E \cdot )$ +\end{tabular}} +\end{center}\bigskip + + +\begin{center} +\begin{tabular}{@{}c@{}c@{}} +\begin{tabular}{l@{\hspace{1mm}}l@{\hspace{1mm}}l} +\bl{$E$} & \bl{$\rightarrow$} & \bl{$E*E$}\\ + & \bl{$\rightarrow$} & \bl{$E+E*E$}\\ + & \bl{$\rightarrow$} & \bl{$E+E*E+E$}\\ + & \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\ +\end{tabular} &\pause +\begin{tabular}{l@{\hspace{1mm}}l@{\hspace{1mm}}l} +\bl{$E$} & \bl{$\rightarrow$} & \bl{$E+E$}\\ + & \bl{$\rightarrow$} & \bl{$E+E+E$}\\ + & \bl{$\rightarrow$} & \bl{$E+E*E+E$}\\ + & \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\ +\end{tabular} +\end{tabular} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Context Sensitive Grms} + + +\begin{center} +\bl{\begin{tabular}{lcl} +$S$ & $\Rightarrow$ & $bSAA\;|\; \epsilon$\\ +$A$ & $\Rightarrow$ & $a$\\ +$bA$ & $\Rightarrow$ & $Ab$\\ +\end{tabular}} +\end{center}\pause + +\begin{center} +\bl{$S \Rightarrow\ldots\Rightarrow^? "ababaa"$} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Language of a CFG} + +Let \bl{$G$} be a context-free grammar with start symbol \bl{$S$}. +Then the language \bl{$L(G)$} is: + +\begin{center} +\bl{$\{c_1\ldots c_n \;|\; \forall i.\; c_i \in T \wedge S \rightarrow^* c_1\ldots c_n \}$} +\end{center}\pause + +\begin{itemize} +\item Terminals, because there are no rules for replacing them. +\item Once generated, terminals are ``permanent''. +\item Terminals ought to be tokens of the language\\ +(but can also be strings). +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Parse Trees} + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $F \;|\; F \cdot * \cdot F$\\ +$F$ & $\rightarrow$ & $T \;|\; T \cdot + \cdot T \;|\; T \cdot - \cdot T$\\ +$T$ & $\rightarrow$ & $num\_token \;|\; ( \cdot E \cdot )$\\ +\end{tabular}} +\end{center} + +\begin{center} +\begin{tikzpicture}[level distance=8mm, blue] + \node {$E$} + child {node {$F$} + child {node {$T$} + child {node {(\,$E$\,)} + child {node{$F$ *{} $F$} + child {node {$T$} child {node {2}}} + child {node {$T$} child {node {3}}} + } + } + } + child {node {+}} + child {node {$T$} + child {node {(\,$E$\,)} + child {node {$F$} + child {node {$T$ +{} $T$} + child {node {3}} + child {node {4}} + } + }} + }}; +\end{tikzpicture} +\end{center} + +\begin{textblock}{5}(1, 6.5) +\bl{\texttt{(2*3)+(3+4)}} +\end{textblock} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Arithmetic Expressions} + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $num\_token$ \\ +$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ +$E$ & $\rightarrow$ & $( \cdot E \cdot )$ +\end{tabular}} +\end{center}\pause\bigskip + +A CFG is \alert{\bf left-recursive} if it has a nonterminal \bl{$E$} such +that \bl{$E \rightarrow^+ E\cdot \ldots$} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Ambiguous Grammars} + +A grammar is \alert{\bf ambiguous} if there is a string that +has at least two different parse trees. + + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $num\_token$ \\ +$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ +$E$ & $\rightarrow$ & $( \cdot E \cdot )$ +\end{tabular}} +\end{center} + +\bl{\texttt{1 + 2 * 3 + 4}} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Dangling Else} + +Another ambiguous grammar:\bigskip + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & if $E$ then $E$\\ + & $|$ & if $E$ then $E$ else $E$ \\ + & $|$ & \ldots +\end{tabular}} +\end{center}\bigskip + +\bl{\texttt{if a then if x then y else c}} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Parser Combinators} + +One of the simplest ways to implement a parser, see +{\small\url{https://vimeo.com/142341803}}\bigskip + +Parser combinators: \bigskip + +\begin{minipage}{1.1\textwidth} +\begin{center} +\mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} +$\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$ +\end{center} +\end{minipage}\bigskip + +\begin{itemize} +\item atomic parsers +\item sequencing +\item alternative +\item semantic action +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Atomic parsers, for example, number tokens + +\begin{center} +\bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$} +\end{center}\bigskip + +\begin{itemize} +\item you consume one or more token from the\\ + input (stream) +\item also works for characters and strings +\end{itemize} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Alternative parser (code \bl{$p\;||\;q$})\bigskip + +\begin{itemize} +\item apply \bl{$p$} and also \bl{$q$}; then combine + the outputs +\end{itemize} + +\begin{center} +\large \bl{$p(\text{input}) \cup q(\text{input})$} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Sequence parser (code \bl{$p\sim q$})\bigskip + +\begin{itemize} +\item apply first \bl{$p$} producing a set of pairs +\item then apply \bl{$q$} to the unparsed parts +\item then combine the results:\medskip +\begin{center} +((output$_1$, output$_2$), unparsed part) +\end{center} +\end{itemize} + +\begin{center} +\begin{tabular}{l} +\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] +\large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm] +\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$} +\end{tabular} +\end{center} + + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Function parser (code \bl{$p \Rightarrow f\;$})\bigskip + +\begin{itemize} +\item apply \bl{$p$} producing a set of pairs +\item then apply the function \bl{$f$} to each first component +\end{itemize} + +\begin{center} +\begin{tabular}{l} +\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$} +\end{tabular} +\end{center}\bigskip\bigskip\pause + +\bl{$f$} is the semantic action (``what to do with the parsed input'') + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}} + +Addition + +\begin{center} +\bl{$T \sim + \sim E \Rightarrow \underbrace{f((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$} +\end{center}\pause + +Multiplication + +\begin{center} +\bl{$F \sim * \sim T \Rightarrow f((x,y), z) \Rightarrow x * z$} +\end{center}\pause + +Parenthesis + +\begin{center} +\bl{$\text{(} \sim E \sim \text{)} \Rightarrow f((x,y), z) \Rightarrow y$} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Types of Parsers} + +\begin{itemize} +\item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$}, +then \bl{$p \sim q$} returns results of type + +\begin{center} +\bl{$T \times S$} +\end{center}\pause + +\item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then \bl{$q$} \alert{must} also have results of type \bl{$T$}, +and \bl{$p \;||\; q$} returns results of type + +\begin{center} +\bl{$T$} +\end{center}\pause + +\item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from +\bl{$T$} to \bl{$S$}, then +\bl{$p \Rightarrow f$} returns results of type + +\begin{center} +\bl{$S$} +\end{center} + +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Input Types of Parsers} + +\begin{itemize} +\item input: \alert{token list} +\item output: set of (output\_type, \alert{token list}) +\end{itemize}\bigskip\pause + +actually it can be any input type as long as it is a kind of +sequence (for example a string) + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Scannerless Parsers} + +\begin{itemize} +\item input: \alert{string} +\item output: set of (output\_type, \alert{string}) +\end{itemize}\bigskip + +but lexers are better when whitespaces or comments need to be +filtered out; then input is a sequence of tokens + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Successful Parses} + +\begin{itemize} +\item input: string +\item output: \alert{set of} (output\_type, string) +\end{itemize}\bigskip + +a parse is successful whenever the input has been fully +``consumed'' (that is the second component is empty) + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Abstract Parser Class} + +\small +\lstinputlisting[language=Scala,xleftmargin=1mm] + {../progs/app7.scala} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +\small +\fontsize{10}{12}\selectfont +\lstinputlisting[language=Scala,xleftmargin=1mm] + {../progs/app8.scala} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Two Grammars} + +Which languages are recognised by the following two grammars? + +\begin{center} +\bl{\begin{tabular}{lcl} +$S$ & $\rightarrow$ & $1 \cdot S \cdot S$\\ + & $|$ & $\epsilon$ +\end{tabular}} +\end{center}\bigskip + +\begin{center} +\bl{\begin{tabular}{lcl} +$U$ & $\rightarrow$ & $1 \cdot U$\\ + & $|$ & $\epsilon$ +\end{tabular}} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{Ambiguous Grammars} + +\begin{center} +\begin{tikzpicture} +\begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs}, + enlargelimits=false, + xtick={0,100,...,1000}, + xmax=1050, + ymax=33, + ytick={0,5,...,30}, + scaled ticks=false, + axis lines=left, + width=11cm, + height=7cm, + legend entries={unambiguous,ambiguous}, + legend pos=north east, + legend cell align=left, + x tick label style={font=\small,/pgf/number format/1000 sep={}}] +\addplot[blue,mark=*, mark options={fill=white}] + table {s-grammar1.data}; +\only<2>{ + \addplot[red,mark=triangle*, mark options={fill=white}] + table {s-grammar2.data};} +\end{axis} +\end{tikzpicture} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame} +\frametitle{While-Language} +\mbox{}\\[-23mm]\mbox{} + +\bl{\begin{plstx}[rhs style=,one per line] : \meta{Stmt} ::= skip + | \meta{Id} := \meta{AExp} + | if \meta{BExp} then \meta{Block} else \meta{Block} + | while \meta{BExp} do \meta{Block}\\ +: \meta{Stmts} ::= \meta{Stmt} ; \meta{Stmts} + | \meta{Stmt}\\ +: \meta{Block} ::= \{ \meta{Stmts} \} + | \meta{Stmt}\\ +: \meta{AExp} ::= \ldots\\ +: \meta{BExp} ::= \ldots\\ \end{plstx}} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{An Interpreter} + +\begin{center} +\bl{\begin{tabular}{l} +$\{$\\ +\;\;$x := 5 \text{;}$\\ +\;\;$y := x * 3\text{;}$\\ +\;\;$y := x * 4\text{;}$\\ +\;\;$x := u * 3$\\ +$\}$ +\end{tabular}} +\end{center} + +\begin{itemize} +\item the interpreter has to record the value of \bl{$x$} before assigning a value to \bl{$y$}\pause +\item \bl{\texttt{eval(stmt, env)}} +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + \end{document} %%% Local Variables: