diff -r 4b208d81e002 -r c0bdd4ad69ca coursework/cw02.tex --- a/coursework/cw02.tex Tue Sep 01 15:57:55 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,222 +0,0 @@ -% !TEX program = xelatex -\documentclass{article} -\usepackage{../style} -\usepackage{../langs} - -\begin{document} - -\section*{Coursework 2} - -\noindent This coursework is worth 8\% and is due on \cwTWO{} at -18:00. You are asked to implement the Sulzmann \& Lu lexer for the -WHILE language. You can do the implementation in any programming -language you like, but you need to submit the source code with which -you answered the questions, otherwise a mark of 0\% will be -awarded. You can submit your answers in a txt-file or as pdf. Code -submit as code. Please package everything in a zip-file that creates a -directory with the name \texttt{YournameYourfamilyname} on my end. Thanks! - -\subsection*{Disclaimer\alert} - -It should be understood that the work you submit represents -your own effort. You have not copied from anyone else. An -exception is the Scala code from KEATS and the code I showed -during the lectures, which you can both freely use. You can -also use your own code from the CW~1. - -\subsection*{Question 1} - -To implement a lexer for the WHILE language, you first -need to design the appropriate regular expressions for the -following eleven syntactic entities: - -\begin{enumerate} -\item keywords are - -\begin{center} -\texttt{while}, -\texttt{if}, -\texttt{then}, -\texttt{else}, -\texttt{do}, -\texttt{for}, -\texttt{to}, -\texttt{true}, -\texttt{false}, -\texttt{read}, -\texttt{write}, -\texttt{skip} -\end{center} - -\item operators are: -\texttt{+}, -\texttt{-}, -\texttt{*}, -\texttt{\%}, -\texttt{/}, -\texttt{==}, -\texttt{!=}, -\texttt{>}, -\texttt{<}, -\texttt{<=}, -\texttt{>=}, -\texttt{:=}, -\texttt{\&\&}, -\texttt{||} - -\item letters are uppercase and lowercase - -\item symbols are letters plus the characters - \texttt{.}, - \texttt{\_}, - \texttt{>}, - \texttt{<}, - \texttt{=}, - \texttt{;}, - \texttt{,} and - \texttt{:} - -\item strings are enclosed by \texttt{"\ldots"} and consisting of - symbols, whitespaces and digits -\item parentheses are \texttt{(}, \texttt{\{}, \texttt{)} and \texttt{\}} -\item there are semicolons \texttt{;} -\item whitespaces are either \texttt{" "} (one or more) or \texttt{$\backslash$n} or - \texttt{$\backslash$t} -\item identifiers are letters followed by underscores \texttt{\_\!\_}, letters -or digits -\item numbers are \pcode{0}, \pcode{1}, \ldots and so on; give -a regular expression that can recognise \pcode{0}, but not numbers -with leading zeroes, such as \pcode{001} -\item comments start with \texttt{//} and contain symbols, spaces and digits until the end of the line -\end{enumerate} - -\noindent -You can use the basic regular expressions - -\[ -\ZERO,\; \ONE,\; c,\; r_1 + r_2,\; r_1 \cdot r_2,\; r^* -\] - -\noindent -but also the following extended regular expressions - -\begin{center} -\begin{tabular}{ll} -$[c_1,c_2,\ldots,c_n]$ & a set of characters\\ -$r^+$ & one or more times $r$\\ -$r^?$ & optional $r$\\ -$r^{\{n\}}$ & n-times $r$\\ -\end{tabular} -\end{center} - -\noindent -Later on you will also need the record regular expression: - -\begin{center} -\begin{tabular}{ll} -$REC(x:r)$ & record regular expression\\ -\end{tabular} -\end{center} - -\noindent Try to design your regular expressions to be as -small as possible. For example you should use character sets -for identifiers and numbers. Feel free to use the general -character constructor \textit{CFUN} introduced in CW 1. - -\subsection*{Question 2} - -Implement the Sulzmann \& Lu lexer from the lectures. For -this you need to implement the functions $nullable$ and $der$ -(you can use your code from CW~1), as well as $mkeps$ and -$inj$. These functions need to be appropriately extended for -the extended regular expressions from Q1. Write down the -clauses for - -\begin{center} -\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} -$mkeps([c_1,c_2,\ldots,c_n])$ & $\dn$ & $?$\\ -$mkeps(r^+)$ & $\dn$ & $?$\\ -$mkeps(r^?)$ & $\dn$ & $?$\\ -$mkeps(r^{\{n\}})$ & $\dn$ & $?$\medskip\\ -$inj\, ([c_1,c_2,\ldots,c_n])\,c\,\ldots$ & $\dn$ & $?$\\ -$inj\, (r^+)\,c\,\ldots$ & $\dn$ & $?$\\ -$inj\, (r^?)\,c\,\ldots$ & $\dn$ & $?$\\ -$inj\, (r^{\{n\}})\,c\,\ldots$ & $\dn$ & $?$\\ -\end{tabular} -\end{center} - -\noindent where $inj$ takes three arguments: a regular -expression, a character and a value. Test your lexer code -with at least the two small examples below: - -\begin{center} -\begin{tabular}{ll} -regex: & string:\smallskip\\ -$a^{\{3\}}$ & $aaa$\\ -$(a + \ONE)^{\{3\}}$ & $aa$ -\end{tabular} -\end{center} - - -\noindent Both strings should be successfully lexed by the -respective regular expression, that means the lexer returns -in both examples a value. - - -Also add the record regular expression from the -lectures to your lexer and implement a function, say -\pcode{env}, that returns all assignments from a value (such -that you can extract easily the tokens from a value).\medskip - -\noindent -Finally give the tokens for your regular expressions from Q1 and the -string - -\begin{center} -\code{"read n;"} -\end{center} - -\noindent -and use your \pcode{env} function to give the token sequence. - - -\subsection*{Question 3} - -Extend your lexer from Q2 to also simplify regular expressions after -each derivation step and rectify the computed values after each -injection. Use this lexer to tokenize the programs in -Figures~\ref{fib} -- \ref{collatz}. You can find the programms also on -KEATS. Give the tokens of these programs where whitespaces are -filtered out. Make sure you can tokenise \textbf{exactly} these -programs.\bigskip - - -\begin{figure}[h] -\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/fib.while}} -\caption{Fibonacci program in the WHILE language.\label{fib}} -\end{figure} - -\begin{figure}[h] -\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/loops.while}} -\caption{The three-nested-loops program in the WHILE language. -(Usually used for timing measurements.)\label{loop}} -\end{figure} - -\begin{figure}[h] -\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/factors.while}} -\caption{A program that calculates factors for numbers in the WHILE - language.\label{factors}} -\end{figure} - -\begin{figure}[h] -\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/collatz2.while}} -\caption{A program that calculates the Collatz series for numbers - between 1 and 100.\label{collatz}} -\end{figure} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: