diff -r 4b208d81e002 -r c0bdd4ad69ca cws/cw02.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cws/cw02.tex Tue Sep 01 16:00:37 2020 +0100 @@ -0,0 +1,222 @@ +% !TEX program = xelatex +\documentclass{article} +\usepackage{../style} +\usepackage{../langs} + +\begin{document} + +\section*{Coursework 2} + +\noindent This coursework is worth 8\% and is due on \cwTWO{} at +18:00. You are asked to implement the Sulzmann \& Lu lexer for the +WHILE language. You can do the implementation in any programming +language you like, but you need to submit the source code with which +you answered the questions, otherwise a mark of 0\% will be +awarded. You can submit your answers in a txt-file or as pdf. Code +submit as code. Please package everything in a zip-file that creates a +directory with the name \texttt{YournameYourfamilyname} on my end. Thanks! + +\subsection*{Disclaimer\alert} + +It should be understood that the work you submit represents +your own effort. You have not copied from anyone else. An +exception is the Scala code from KEATS and the code I showed +during the lectures, which you can both freely use. You can +also use your own code from the CW~1. + +\subsection*{Question 1} + +To implement a lexer for the WHILE language, you first +need to design the appropriate regular expressions for the +following eleven syntactic entities: + +\begin{enumerate} +\item keywords are + +\begin{center} +\texttt{while}, +\texttt{if}, +\texttt{then}, +\texttt{else}, +\texttt{do}, +\texttt{for}, +\texttt{to}, +\texttt{true}, +\texttt{false}, +\texttt{read}, +\texttt{write}, +\texttt{skip} +\end{center} + +\item operators are: +\texttt{+}, +\texttt{-}, +\texttt{*}, +\texttt{\%}, +\texttt{/}, +\texttt{==}, +\texttt{!=}, +\texttt{>}, +\texttt{<}, +\texttt{<=}, +\texttt{>=}, +\texttt{:=}, +\texttt{\&\&}, +\texttt{||} + +\item letters are uppercase and lowercase + +\item symbols are letters plus the characters + \texttt{.}, + \texttt{\_}, + \texttt{>}, + \texttt{<}, + \texttt{=}, + \texttt{;}, + \texttt{,} and + \texttt{:} + +\item strings are enclosed by \texttt{"\ldots"} and consisting of + symbols, whitespaces and digits +\item parentheses are \texttt{(}, \texttt{\{}, \texttt{)} and \texttt{\}} +\item there are semicolons \texttt{;} +\item whitespaces are either \texttt{" "} (one or more) or \texttt{$\backslash$n} or + \texttt{$\backslash$t} +\item identifiers are letters followed by underscores \texttt{\_\!\_}, letters +or digits +\item numbers are \pcode{0}, \pcode{1}, \ldots and so on; give +a regular expression that can recognise \pcode{0}, but not numbers +with leading zeroes, such as \pcode{001} +\item comments start with \texttt{//} and contain symbols, spaces and digits until the end of the line +\end{enumerate} + +\noindent +You can use the basic regular expressions + +\[ +\ZERO,\; \ONE,\; c,\; r_1 + r_2,\; r_1 \cdot r_2,\; r^* +\] + +\noindent +but also the following extended regular expressions + +\begin{center} +\begin{tabular}{ll} +$[c_1,c_2,\ldots,c_n]$ & a set of characters\\ +$r^+$ & one or more times $r$\\ +$r^?$ & optional $r$\\ +$r^{\{n\}}$ & n-times $r$\\ +\end{tabular} +\end{center} + +\noindent +Later on you will also need the record regular expression: + +\begin{center} +\begin{tabular}{ll} +$REC(x:r)$ & record regular expression\\ +\end{tabular} +\end{center} + +\noindent Try to design your regular expressions to be as +small as possible. For example you should use character sets +for identifiers and numbers. Feel free to use the general +character constructor \textit{CFUN} introduced in CW 1. + +\subsection*{Question 2} + +Implement the Sulzmann \& Lu lexer from the lectures. For +this you need to implement the functions $nullable$ and $der$ +(you can use your code from CW~1), as well as $mkeps$ and +$inj$. These functions need to be appropriately extended for +the extended regular expressions from Q1. Write down the +clauses for + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} +$mkeps([c_1,c_2,\ldots,c_n])$ & $\dn$ & $?$\\ +$mkeps(r^+)$ & $\dn$ & $?$\\ +$mkeps(r^?)$ & $\dn$ & $?$\\ +$mkeps(r^{\{n\}})$ & $\dn$ & $?$\medskip\\ +$inj\, ([c_1,c_2,\ldots,c_n])\,c\,\ldots$ & $\dn$ & $?$\\ +$inj\, (r^+)\,c\,\ldots$ & $\dn$ & $?$\\ +$inj\, (r^?)\,c\,\ldots$ & $\dn$ & $?$\\ +$inj\, (r^{\{n\}})\,c\,\ldots$ & $\dn$ & $?$\\ +\end{tabular} +\end{center} + +\noindent where $inj$ takes three arguments: a regular +expression, a character and a value. Test your lexer code +with at least the two small examples below: + +\begin{center} +\begin{tabular}{ll} +regex: & string:\smallskip\\ +$a^{\{3\}}$ & $aaa$\\ +$(a + \ONE)^{\{3\}}$ & $aa$ +\end{tabular} +\end{center} + + +\noindent Both strings should be successfully lexed by the +respective regular expression, that means the lexer returns +in both examples a value. + + +Also add the record regular expression from the +lectures to your lexer and implement a function, say +\pcode{env}, that returns all assignments from a value (such +that you can extract easily the tokens from a value).\medskip + +\noindent +Finally give the tokens for your regular expressions from Q1 and the +string + +\begin{center} +\code{"read n;"} +\end{center} + +\noindent +and use your \pcode{env} function to give the token sequence. + + +\subsection*{Question 3} + +Extend your lexer from Q2 to also simplify regular expressions after +each derivation step and rectify the computed values after each +injection. Use this lexer to tokenize the programs in +Figures~\ref{fib} -- \ref{collatz}. You can find the programms also on +KEATS. Give the tokens of these programs where whitespaces are +filtered out. Make sure you can tokenise \textbf{exactly} these +programs.\bigskip + + +\begin{figure}[h] +\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/fib.while}} +\caption{Fibonacci program in the WHILE language.\label{fib}} +\end{figure} + +\begin{figure}[h] +\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/loops.while}} +\caption{The three-nested-loops program in the WHILE language. +(Usually used for timing measurements.)\label{loop}} +\end{figure} + +\begin{figure}[h] +\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/factors.while}} +\caption{A program that calculates factors for numbers in the WHILE + language.\label{factors}} +\end{figure} + +\begin{figure}[h] +\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/collatz2.while}} +\caption{A program that calculates the Collatz series for numbers + between 1 and 100.\label{collatz}} +\end{figure} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: