# HG changeset patch # User Christian Urban # Date 1445263424 -3600 # Node ID 603e171a7b4828167c7fc1bd20ba225e335e9cbf # Parent d9c784c713058f0f36faa392b5c83ca0aa3c5283 updated diff -r d9c784c71305 -r 603e171a7b48 handouts/ho04.pdf Binary file handouts/ho04.pdf has changed diff -r d9c784c71305 -r 603e171a7b48 handouts/ho04.tex --- a/handouts/ho04.tex Mon Oct 19 14:45:50 2015 +0100 +++ b/handouts/ho04.tex Mon Oct 19 15:03:44 2015 +0100 @@ -730,27 +730,83 @@ \end{center} Recall that we want to lex a little programming language, -called the \emph{While}-language. The main keywords in this -language are \pcode{while}, \pcode{if}, \pcode{then} and -\pcode{else}. As usual we have identifiers, operators, numbers -and so on. For this we would need to design the corresponding -regular expressions to recognise these syntactic categories. I -let you do this design task. Having these regular expressions -at our disposal we can form the regular expression +called the \emph{While}-language. A simple program in this +language is shown in Figure~\ref{while}. The main keywords in +this language are \pcode{while}, \pcode{if}, \pcode{then} and +\pcode{else}. As usual we have syntactic categories for +identifiers, operators, numbers and so on. For this we would +need to design the corresponding regular expressions to +recognise these syntactic categories. I let you do this design +task. Having these regular expressions at our disposal, we can +form the regular expression + +\begin{figure}[t] +\begin{center} +\mbox{\lstinputlisting[language=while]{../progs/fib.while}} +\end{center} +\caption{The Fibonacci program in the While-language.\label{while}} +\end{figure} \begin{center} -\begin{tabular}{rcl} -\textit{WhileRegs} & $\dn$ & (($k$, KEYWORD) +\\ - & & ($i$, ID) +\\ - & & ($o$, OP) + \\ - & & ($n$, NUM) + \\ - & & ($s$, SEMI) + \\ - & & ($p$, (LPAREN + RPAREN)) +\\ - & & ($b$, (BEGIN + END)) + \\ - & & ($w$, WHITESPACE))$^*$ +$\textit{WhileRegs} \;\dn\; +\left(\begin{array}{l} + (k, KeyWords)\; +\\ + (i, Ids)\;+\\ + (o, Ops)\;+ \\ + (n, Nums)\;+ \\ + (s, Semis)\;+ \\ + (p, (LParens + RParens))\;+\\ + (b, (Begin + End))\;+ \\ + (w, WhiteSpacess) + \end{array}\right)\LARGE^\mbox{\LARGE*}$ +\end{center} + +\noindent and ask the algorithm by Sulzmann \& Lu to lex, say +the following string + +\begin{center}\large +\code{"if true then then 42 else +"} +\end{center} + +\noindent +By using the records and extracting the environment, the +result is the following list: + +\begin{center}\tt +\begin{tabular}{l} +KEYWORD(if),\\ +WHITESPACE,\\ +IDENT(true),\\ +WHITESPACE,\\ +KEYWORD(then),\\ +WHITESPACE,\\ +KEYWORD(then),\\ +WHITESPACE,\\ +NUM(42),\\ +WHITESPACE,\\ +KEYWORD(else),\\ +WHITESPACE,\\ +OP(+) \end{tabular} \end{center} +\noindent Typically we are not interested in the whitespaces +and comments and would filter them out: this gives + +\begin{center}\tt +\begin{tabular}{l} +KEYWORD(if),\\ +IDENT(true),\\ +KEYWORD(then),\\ +KEYWORD(then),\\ +NUM(42),\\ +KEYWORD(else),\\ +OP(+) +\end{tabular} +\end{center} + +\noindent +which will be the input for the next phase of our compiler. \end{document}