diff -r 4b208d81e002 -r c0bdd4ad69ca cws/cw02.tex
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cws/cw02.tex	Tue Sep 01 16:00:37 2020 +0100
@@ -0,0 +1,222 @@
+% !TEX program = xelatex
+\documentclass{article}
+\usepackage{../style}
+\usepackage{../langs}
+
+\begin{document}
+
+\section*{Coursework 2}
+
+\noindent This coursework is worth 8\% and is due on \cwTWO{} at
+18:00. You are asked to implement the Sulzmann \& Lu lexer for the
+WHILE language. You can do the implementation in any programming
+language you like, but you need to submit the source code with which
+you answered the questions, otherwise a mark of 0\% will be
+awarded. You can submit your answers in a txt-file or as pdf. Code
+submit as code. Please package everything in a zip-file that creates a
+directory with the name \texttt{YournameYourfamilyname} on my end. Thanks!
+
+\subsection*{Disclaimer\alert}
+
+It should be understood that the work you submit represents
+your own effort. You have not copied from anyone else. An
+exception is the Scala code from KEATS and the code I showed
+during the lectures, which you can both freely use. You can
+also use your own code from the CW~1.
+
+\subsection*{Question 1}
+
+To implement a lexer for the WHILE language, you first
+need to design the appropriate regular expressions for the
+following eleven syntactic entities:
+
+\begin{enumerate}
+\item keywords are
+
+\begin{center}
+\texttt{while}, 
+\texttt{if}, 
+\texttt{then}, 
+\texttt{else}, 
+\texttt{do}, 
+\texttt{for}, 
+\texttt{to}, 
+\texttt{true}, 
+\texttt{false}, 
+\texttt{read}, 
+\texttt{write},
+\texttt{skip}
+\end{center} 
+
+\item operators are:
+\texttt{+}, 
+\texttt{-}, 
+\texttt{*}, 
+\texttt{\%},
+\texttt{/},
+\texttt{==}, 
+\texttt{!=}, 
+\texttt{>}, 
+\texttt{<},
+\texttt{<=}, 
+\texttt{>=},
+\texttt{:=},
+\texttt{\&\&},
+\texttt{||}
+
+\item letters are uppercase and lowercase
+
+\item symbols are letters plus the characters
+  \texttt{.},
+  \texttt{\_},
+  \texttt{>},
+  \texttt{<},
+  \texttt{=},
+  \texttt{;},
+  \texttt{,} and
+  \texttt{:}
+
+\item strings are enclosed by \texttt{"\ldots"} and consisting of
+  symbols, whitespaces and digits
+\item parentheses are \texttt{(}, \texttt{\{}, \texttt{)} and \texttt{\}}
+\item there are semicolons \texttt{;}
+\item whitespaces are either \texttt{" "} (one or more) or \texttt{$\backslash$n} or
+  \texttt{$\backslash$t}
+\item identifiers are letters followed by underscores \texttt{\_\!\_}, letters
+or digits
+\item numbers are \pcode{0}, \pcode{1}, \ldots and so on; give 
+a regular expression that can recognise \pcode{0}, but not numbers 
+with leading zeroes, such as \pcode{001}
+\item comments start with \texttt{//} and contain symbols, spaces and digits until the end of the line
+\end{enumerate}
+
+\noindent
+You can use the basic regular expressions 
+
+\[
+\ZERO,\; \ONE,\; c,\; r_1 + r_2,\; r_1 \cdot r_2,\; r^*
+\]
+
+\noindent
+but also the following extended regular expressions
+
+\begin{center}
+\begin{tabular}{ll}
+$[c_1,c_2,\ldots,c_n]$ & a set of characters\\
+$r^+$ & one or more times $r$\\
+$r^?$ & optional $r$\\
+$r^{\{n\}}$ & n-times $r$\\
+\end{tabular}
+\end{center}
+
+\noindent
+Later on you will also need the record regular expression:
+
+\begin{center}
+\begin{tabular}{ll}
+$REC(x:r)$ & record regular expression\\
+\end{tabular}
+\end{center}
+
+\noindent Try to design your regular expressions to be as
+small as possible. For example you should use character sets
+for identifiers and numbers. Feel free to use the general
+character constructor \textit{CFUN} introduced in CW 1.
+
+\subsection*{Question 2}
+
+Implement the Sulzmann \& Lu lexer from the lectures. For
+this you need to implement the functions $nullable$ and $der$
+(you can use your code from CW~1), as well as $mkeps$ and
+$inj$. These functions need to be appropriately extended for
+the extended regular expressions from Q1. Write down the 
+clauses for
+
+\begin{center}
+\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
+$mkeps([c_1,c_2,\ldots,c_n])$  & $\dn$ & $?$\\
+$mkeps(r^+)$                   & $\dn$ & $?$\\
+$mkeps(r^?)$                   & $\dn$ & $?$\\
+$mkeps(r^{\{n\}})$             & $\dn$ & $?$\medskip\\
+$inj\, ([c_1,c_2,\ldots,c_n])\,c\,\ldots$  & $\dn$ & $?$\\
+$inj\, (r^+)\,c\,\ldots$                   & $\dn$ & $?$\\
+$inj\, (r^?)\,c\,\ldots$                   & $\dn$ & $?$\\
+$inj\, (r^{\{n\}})\,c\,\ldots$             & $\dn$ & $?$\\
+\end{tabular}
+\end{center}
+
+\noindent where $inj$ takes three arguments: a regular
+expression, a character and a value. Test your lexer code
+with at least the two small examples below:
+
+\begin{center}
+\begin{tabular}{ll}
+regex: & string:\smallskip\\
+$a^{\{3\}}$ & $aaa$\\
+$(a + \ONE)^{\{3\}}$ & $aa$
+\end{tabular}
+\end{center}
+
+
+\noindent Both strings should be successfully lexed by the
+respective regular expression, that means the lexer returns 
+in both examples a value.
+
+
+Also add the record regular expression from the
+lectures to your lexer and implement a function, say
+\pcode{env}, that returns all assignments from a value (such
+that you can extract easily the tokens from a value).\medskip 
+
+\noindent
+Finally give the tokens for your regular expressions from Q1 and the
+string
+
+\begin{center}
+\code{"read n;"}
+\end{center} 
+
+\noindent
+and use your \pcode{env} function to give the token sequence.
+
+
+\subsection*{Question 3}
+
+Extend your lexer from Q2 to also simplify regular expressions after
+each derivation step and rectify the computed values after each
+injection. Use this lexer to tokenize the programs in
+Figures~\ref{fib} -- \ref{collatz}. You can find the programms also on
+KEATS. Give the tokens of these programs where whitespaces are
+filtered out. Make sure you can tokenise \textbf{exactly} these
+programs.\bigskip
+
+
+\begin{figure}[h]
+\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/fib.while}}
+\caption{Fibonacci program in the WHILE language.\label{fib}}
+\end{figure}
+
+\begin{figure}[h]
+\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/loops.while}}
+\caption{The three-nested-loops program in the WHILE language. 
+(Usually used for timing measurements.)\label{loop}}
+\end{figure}
+
+\begin{figure}[h]
+\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/factors.while}}
+\caption{A program that calculates factors for numbers in the WHILE
+  language.\label{factors}}
+\end{figure}
+
+\begin{figure}[h]
+\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/collatz2.while}}
+\caption{A program that calculates the Collatz series for numbers
+  between 1 and 100.\label{collatz}}
+\end{figure}
+
+\end{document}
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: t
+%%% End: