diff -r 663c2a9108d1 -r 4de31fdc0d67 cws/main_cw05.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cws/main_cw05.tex Mon Nov 02 02:31:44 2020 +0000 @@ -0,0 +1,564 @@ +% !TEX program = xelatex +\documentclass{article} +\usepackage{../style} +\usepackage{../langs} +\usepackage{disclaimer} +\usepackage{tikz} +\usepackage{pgf} +\usepackage{pgfplots} +\usepackage{stackengine} +%% \usepackage{accents} +\newcommand\barbelow[1]{\stackunder[1.2pt]{#1}{\raisebox{-4mm}{\boldmath$\uparrow$}}} + +%% change Console to scala.io.StdIn.readByte() + + +\begin{document} + +\section*{Part 10 (Scala)} + +\mbox{}\hfill\textit{``If there's one feature that makes Scala, `Scala',}\\ +\mbox{}\hfill\textit{ I would pick implicits.''}\smallskip\\ +\mbox{}\hfill\textit{ --- Martin Odersky (creator of the Scala language)}\bigskip\bigskip + + +\noindent +This part is about a small (esoteric) programming language called +brainf***. Actually, we will implement an interpreter for our own version +of this language called brainf*ck++.\bigskip + +\IMPORTANT{This part is worth 10\% and you need to submit it on \cwTEN{} at 4pm.} + +\noindent +Also note that the running time of each part will be restricted to a +maximum of 30 seconds on my laptop. + +\DISCLAIMER{} +\newpage + +\subsection*{Reference Implementation} + +As usual, this Scala assignment comes with a reference implementation in +form of two \texttt{jar}-files. You can download them from KEATS. They +allow you to run any test cases on your own computer. For example you +can call Scala on the command line with the option \texttt{-cp bf.jar} +and then query any function from the \texttt{bf.scala} template file. +You have to prefix the calls with \texttt{CW10a} and \texttt{CW10b}, +respectively. For example + + +\begin{lstlisting}[language={},xleftmargin=1mm,numbers=none,basicstyle=\ttfamily\small] +$ scala -cp bf.jar +scala> import CW10a._ +scala> run(load_bff("sierpinski.bf")) ; () + * + * * + * * + * * * * + * * + * * * * + * * * * + * * * * * * * * + * * + * * * * + * * * * + * * * * * * * * + * * * * + * * * * * * * * + * * * * * * * * + * * * * * * * * * * * * * * * * + * * + * * * * + * * * * + * * * * * * * * + * * * * + * * * * * * * * + * * * * * * * * + * * * * * * * * * * * * * * * * + * * * * + * * * * * * * * + * * * * * * * * + * * * * * * * * * * * * * * * * + * * * * * * * * + * * * * * * * * * * * * * * * * + * * * * * * * * * * * * * * * * +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * +\end{lstlisting}%$ + +\newpage + +\subsection*{Part A (6 Marks)} + +Coming from Java or C++, you might think Scala is a rather esoteric +programming language. But remember, some serious companies have built +their business on +Scala.\footnote{\url{https://en.wikipedia.org/wiki/Scala_(programming_language)\#Companies}} +I claim functional programming is not a fad. And there are far, far +more esoteric languages out there. One is called \emph{brainf***}. +\here{https://esolangs.org/wiki/Brainfuck} +You +are asked in this part to implement an interpreter for +a slight extension of this language. + +Urban M\"uller developed the original version of brainf*** in 1993. A close +relative of this language was already introduced in 1964 by Corado +B\"ohm, an Italian computer pioneer. The main feature of brainf*** is +its minimalistic set of instructions---just 8 instructions in total +and all of which are single characters. Despite the minimalism, this +language has been shown to be Turing complete\ldots{}if this doesn't +ring any bell with you: it roughly means that every(!) algorithm can, +in principle, be implemented in brainf***. It just takes a lot of +determination and quite a lot of memory resources. + +Some relatively sophisticated sample programs in brainf*** are given +in the file \texttt{bf.scala}, including a brainf*** program for the +Sierpinski triangle and the Mandelbrot set. There seems to be even a +dedicated Windows IDE for bf programs, though I am not sure whether +this is just an elaborate April fools' joke---judge yourself: + +\begin{center} +\url{https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5} +\end{center} \bigskip + + +\noindent +As mentioned above, the original brainf*** has 8 single-character +commands. Our version of bf++ will contain the commands \texttt{'>'}, +\texttt{'<'}, \texttt{'+'}, \texttt{'-'}, \texttt{'.'}, \texttt{'['} +and \texttt{']'} from the original, and in addition the commands +\texttt{'@'}, \texttt{'*'} and \texttt{'\#'}. Every other character +is considered a comment. + +Our interpreter for bf++ operates on memory cells containing +integers. For this it uses a single memory pointer, called +\texttt{mp}, that points at each stage to one memory cell. + +\begin{center} +\begin{tikzpicture} + \draw [line width=1mm, rounded corners] (0,0) rectangle (5, 0.5); + \draw (0.5, 0) -- (0.5, 0.5); + \draw (1.0, 0) -- (1.0, 0.5); + + \draw (2.5, 0) -- (2.5, 0.5); + \draw (2.0, 0) -- (2.0, 0.5); + + \draw (4.5, 0) -- (4.5, 0.5); + \draw (4.0, 0) -- (4.0, 0.5); + + \draw (1.5,0.25) node {$\cdots$}; + \draw (3.0,0.25) node {$\cdots$}; + + \draw [->, thick] (2.25, -0.5) -- (2.25, -0.15); + \draw (2.25,-0.8) node {\texttt{mp}}; + + \draw (0.7,0.7) node {\sf\footnotesize memory}; +\end{tikzpicture} +\end{center} + +\noindent +This pointer can be moved forward by one memory cell by using the +command \texttt{'>'}, and backward by using \texttt{'<'}. The commands +\texttt{'+'} and \texttt{'-'} increase, respectively decrease, by 1 +the content of the memory cell to which the memory pointer currently +points to. The command for output in bf++ is \texttt{'.'} whereby output works +by reading the content of the memory cell to which the memory pointer +points to and printing it out as an ASCII character.\footnote{In the + original version of bf, there is also a command for input, but we + omit it here. All our programs will be ``autonomous''.} The +commands \texttt{'['} and \texttt{']'} are looping +constructs. Everything in between \texttt{'['} and \texttt{']'} is +repeated until a counter (memory cell) reaches zero. A typical +program in brainf*** looks as follows: + +\begin{center} +\begin{verbatim} + ++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.++ + +++++..+++.>>.<-.<.+++.------.--------.>>+.>++. +\end{verbatim} +\end{center} + +\noindent +This one prints out Hello World\ldots{}obviously \texttt{;o)} We also +add 3 new commands in the bf++-version of the bf-language. The purpose +of these commands we explain later. + + +\subsubsection*{Tasks (file bf.scala)} + +\begin{itemize} +\item[(1)] Write a function that takes a filename (a string) as an argument + and requests the corresponding file from disk. It returns the + content of the file as a string. If the file does not exists, + the function should return the empty string. + \mbox{}\hfill[1 Mark] + +\item[(2)] Brainf**k++ memory is represented by a \texttt{Map} from + integers to integers. The empty memory is represented by + \texttt{Map()}, that is nothing is stored in the + memory; \texttt{Map(0 -> 1, 2 -> 3)} stores \texttt{1} at + memory location \texttt{0}, and at \texttt{2} it stores \texttt{3}. The + convention is that if we query the memory at a location that is + \emph{not} defined in the \texttt{Map}, we return \texttt{0}. Write + a `safe-read' function, \texttt{sread}, that takes a memory (a \texttt{Map}) and + a memory pointer (an \texttt{Int}) as arguments, and `safely' reads the + corresponding memory location. If the \texttt{Map} is not defined at + the memory pointer, \texttt{sread} returns \texttt{0}. + + Write another function \texttt{write}, which takes a memory, a + memory pointer and an integer value as arguments and updates the + \texttt{Map} with the value at the given memory location. As usual, + the \texttt{Map} is not updated `in-place' but a new map is created + with the same data, except the new value is stored at the given memory + pointer.\hfill[1 Mark] + +\item[(3)] Write two functions, \texttt{jumpRight} and + \texttt{jumpLeft}, that are needed to implement the loop constructs + in brainf**k++. They take a program (a \texttt{String}) and a program + counter (an \texttt{Int}) as arguments and move right (respectively + left) in the string in order to find the \textbf{matching} + opening/closing bracket. For example, given the following program + with the program counter indicated by an arrow: + + \begin{center} + \texttt{--[\barbelow{.}.+>--].>.++} + \end{center} + + then the matching closing bracket is in 9th position (counting from 0) and + \texttt{jumpRight} is supposed to return the position just after this + + \begin{center} + \texttt{--[..+>--]\barbelow{.}>.++} + \end{center} + + meaning it jumps to after the loop. Similarly, if you are in 8th position, + then \texttt{jumpLeft} is supposed to jump to just after the opening + bracket (that is jumping to the beginning of the loop): + + \begin{center} + \texttt{--[..+>-\barbelow{-}].>.++} + \qquad$\stackrel{\texttt{jumpLeft}}{\longrightarrow}$\qquad + \texttt{--[\barbelow{.}.+>--].>.++} + \end{center} + + Unfortunately we have to take into account that there might be + other opening and closing brackets on the `way' to find the + matching bracket. For example in the brain*ck++ program + + \begin{center} + \texttt{--[\barbelow{.}.[+>]--].>.++} + \end{center} + + we do not want to return the index for the \texttt{'-'} in the 9th + position, but the program counter for \texttt{'.'} in 12th + position. The easiest to find out whether a bracket is matched is by + using levels (which are the third argument in \texttt{jumpLeft} and + \texttt{jumpLeft}). In case of \texttt{jumpRight} you increase the + level by one whenever you find an opening bracket and decrease by + one for a closing bracket. Then in \texttt{jumpRight} you are looking + for the closing bracket on level \texttt{0}. For \texttt{jumpLeft} you + do the opposite. In this way you can find \textbf{matching} brackets + in strings such as + + \begin{center} + \texttt{--[\barbelow{.}.[[-]+>[.]]--].>.++} + \end{center} + + for which \texttt{jumpRight} should produce the position: + + \begin{center} + \texttt{--[..[[-]+>[.]]--]\barbelow{.}>.++} + \end{center} + + It is also possible that the position returned by \texttt{jumpRight} or + \texttt{jumpLeft} is outside the string in cases where there are + no matching brackets. For example + + \begin{center} + \texttt{--[\barbelow{.}.[[-]+>[.]]--.>.++} + \qquad$\stackrel{\texttt{jumpRight}}{\longrightarrow}$\qquad + \texttt{--[..[[-]+>[.]]-->.++\barbelow{\;\phantom{+}}} + \end{center} + \hfill[2 Marks] + + +\item[(4)] Write a recursive function \texttt{compute} that runs a + brain*u*k++ program. It takes a program, a program counter, a memory + pointer and a memory as arguments. If the program counter is outside + the program string, the execution stops and \texttt{compute} returns the + memory. If the program counter is inside the string, it reads the + corresponding character and updates the program counter \texttt{pc}, + memory pointer \texttt{mp} and memory \texttt{mem} according to the + rules shown in Figure~\ref{comms}. It then calls recursively + \texttt{compute} with the updated data. The most convenient way to + implement the brainf**k++ rules in Scala is to use pattern-matching + and to calculate a triple consisting of the updated \texttt{pc}, + \texttt{mp} and \texttt{mem}. + + Write another function \texttt{run} that calls \texttt{compute} with a + given brainfu*k++ program and memory, and the program counter and memory pointer + set to~$0$. Like \texttt{compute}, it returns the memory after the execution + of the program finishes. You can test your brainf**k++ interpreter with the + Sierpinski triangle or the Hello world programs (they seem to be particularly + useful for debugging purposes), or have a look at + + \begin{center} + \url{https://esolangs.org/wiki/Brainfuck} + \end{center} + + \noindent for more bf/bf++-programs and the test cases given in \texttt{bf.scala}.\\ + \mbox{}\hfill[2 Marks] + + \begin{figure}[p] + \begin{center} + \begin{tabular}{|@{\hspace{0.5mm}}p{0.8cm}|l|} + \hline + \hfill\texttt{'>'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}} + $\bullet$ & $\texttt{pc} + 1$\\ + $\bullet$ & $\texttt{mp} + 1$\\ + $\bullet$ & \texttt{mem} unchanged + \end{tabular}\\\hline + \hfill\texttt{'<'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}} + $\bullet$ & $\texttt{pc} + 1$\\ + $\bullet$ & $\texttt{mp} - 1$\\ + $\bullet$ & \texttt{mem} unchanged + \end{tabular}\\\hline + \hfill\texttt{'+'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}} + $\bullet$ & $\texttt{pc} + 1$\\ + $\bullet$ & $\texttt{mp}$ unchanged\\ + $\bullet$ & \texttt{mem} updated with \texttt{mp -> mem(mp) + 1}\\ + \end{tabular}\\\hline + \hfill\texttt{'-'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}} + $\bullet$ & $\texttt{pc} + 1$\\ + $\bullet$ & $\texttt{mp}$ unchanged\\ + $\bullet$ & \texttt{mem} updated with \texttt{mp -> mem(mp) - 1}\\ + \end{tabular}\\\hline + \hfill\texttt{'.'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}} + $\bullet$ & $\texttt{pc} + 1$\\ + $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\\ + $\bullet$ & print out \,\texttt{mem(mp)} as a character\\ + \end{tabular}\\\hline + %\hfill\texttt{','} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}} + % $\bullet$ & $\texttt{pc} + 1$\\ + % $\bullet$ & $\texttt{mp}$ unchanged\\ + % $\bullet$ & \texttt{mem} updated with \texttt{mp -> \textrm{input}}\\ + % \multicolumn{2}{@{}l}{the input is given by \texttt{Console.in.read().toByte}} + % \end{tabular}\\\hline + \hfill\texttt{'['} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}} + \multicolumn{2}{@{}l}{if \texttt{mem(mp) == 0} then}\\ + $\bullet$ & $\texttt{pc = jumpRight(prog, pc + 1, 0)}$\\ + $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\medskip\\ + \multicolumn{2}{@{}l}{otherwise if \texttt{mem(mp) != 0} then}\\ + $\bullet$ & $\texttt{pc} + 1$\\ + $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\\ + \end{tabular} + \\\hline + \hfill\texttt{']'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}} + \multicolumn{2}{@{}l}{if \texttt{mem(mp) != 0} then}\\ + $\bullet$ & $\texttt{pc = jumpLeft(prog, pc - 1, 0)}$\\ + $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\medskip\\ + \multicolumn{2}{@{}l}{otherwise if \texttt{mem(mp) == 0} then}\\ + $\bullet$ & $\texttt{pc} + 1$\\ + $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\\ + \end{tabular}\\\hline + \hfill\texttt{'*'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}} + $\bullet$ & $\texttt{pc} + 1$\\ + $\bullet$ & $\texttt{mp}$ unchanged\\ + $\bullet$ & \texttt{mem} updated with \texttt{mp -> mem(mp) * mem(mp - 1)}\\ + \multicolumn{2}{@{}l}{this multiplies the content of the memory cells at + \texttt{mp} and \texttt{mp - 1}}\\ + \multicolumn{2}{@{}l}{and stores the result at \texttt{mp}} + \end{tabular}\\\hline + \hfill\texttt{'@'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}} + $\bullet$ & $\texttt{pc} + 1$\\ + $\bullet$ & $\texttt{mp}$ unchanged\\ + $\bullet$ & \texttt{mem} updated with + \texttt{mem(mp) -> mem(mp - 1)}\\ + \multicolumn{2}{@{}l}{this updates the memory cell having the index stored at \texttt{mem(mp)},}\\ + \multicolumn{2}{@{}l}{with the value stored at \texttt{mem(mp - 1)},} + \end{tabular}\\\hline + \hfill\texttt{'\#'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}} + $\bullet$ & $\texttt{pc} + 1$\\ + $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\\ + $\bullet$ & print out \,\texttt{mem(mp)} as a number\\ + \end{tabular}\\\hline + any other char & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}} + $\bullet$ & $\texttt{pc} + 1$\\ + $\bullet$ & \texttt{mp} and \texttt{mem} unchanged + \end{tabular}\\ + \hline + \end{tabular} + \\\mbox{}\\[-10mm]\mbox{} + \end{center} + \caption{The rules for how commands in the brainf***++ language update the + program counter \texttt{pc}, + the memory pointer \texttt{mp} and the memory \texttt{mem}.\label{comms}} + \end{figure} +\end{itemize}\bigskip + +%%\newpage + +\subsection*{Part B (4 Marks)} + +I am sure you agree while it is fun to marvel at bf++-programs, like the +Sierpinski triangle or the Mandelbrot program, being interpreted, it +is much more fun to write a compiler for the bf++-language. + + +\subsubsection*{Tasks (file bfc.scala)} + +\begin{itemize} +\item[(5)] Compilers, in general, attempt to make programs run + faster by precomputing as much information as possible + before running the program. In our case we can precompute the + addresses where we need to jump at the beginning and end of + loops. + + For this write a function \texttt{jtable} that precomputes the ``jump + table'' for a bf++-program. This function takes a bf++-program + as an argument and returns a \texttt{Map[Int, Int]}. The + purpose of this Map is to record the information, in cases + a pc-position points to a '\texttt{[}' or a '\texttt{]}', + to which pc-position do we need to jump next? + + For example for the program + + \begin{center} + \texttt{+++++[->++++++++++<]>--<+++[->>++++++++++} + \texttt{<<]>>++<<----------[+>.>.<+<]} + \end{center} + + we obtain the Map (note the precise numbers might differ depending on white + spaces etc.~in the bf-program): + + \begin{center} + \texttt{Map(69 -> 61, 5 -> 20, 60 -> 70, 27 -> 44, 43 -> 28, 19 -> 6)} + \end{center} + + This Map states that for the '\texttt{[}' on position 5, we need to + jump to position 20, which is just after the corresponding '\texttt{]}'. + Similarly, for the '\texttt{]}' on position 19, we need to jump to + position 6, which is just after the '\texttt{[}' on position 5, and so + on. The idea is to not calculate this information each time + we hit a bracket, but just look up this information in the + \texttt{jtable}. + + Then adapt the \texttt{compute} and \texttt{run} functions + from Part 1 in order to take advantage of the information + stored in the \texttt{jtable}. This means whenever \texttt{jumpLeft} + and \texttt{jumpRight} was called previously, you should look + up the jump address in the \texttt{jtable}. Feel free to reuse + the function \texttt{jumpLeft} and \texttt{jumpRight} for + calculating the \texttt{jtable}.\hfill{[1 Mark]} + +\item[(6)] Compilers try to eliminate any ``dead'' code that could + slow down programs and also perform what is often called + \emph{peephole + optimisations}.\footnote{\url{https://en.wikipedia.org/wiki/Peephole_optimization}} + For the latter consider that it is difficult for compilers to + comprehend what is intended with whole programs, but they are very good + at finding out what small snippets of code do, and then try to + generate faster code for such snippets. + + In our case, dead code is everything that is not a bf++-command. + Therefore write a function \texttt{optimise} which deletes such + dead code from a bf++-program. Moreover this function should replace every substring + of the form \pcode{[-]} by a new command \texttt{0}. + The idea is that the loop \pcode{[-]} just resets the + memory at the current location to 0. It is more efficient + to do this in a single step, rather than stepwise in a loop as in + the original bf++-programs. + + In the extended \texttt{compute3} and \texttt{run3} functions you should + implement this command by writing 0 to \pcode{mem(mp)}, that is use + \pcode{write(mem, mp, 0)} as the rule for the command \texttt{0}. + The easiest way to modify a string in this way is to use the regular + expression \pcode{"""[^<>+-.\\[\\]@\#*]"""}, which recognises everything that is + not a bf++-command. Similarly, the + regular expression \pcode{"""\\[-\\]"""} finds all occurrences of \pcode{[-]}. By using the Scala method \pcode{.replaceAll} you can replace substrings + with new strings.\\ + \mbox{}\hfill{[1 Mark]} + +\item[(7)] Finally, real compilers try to take advantage of modern + CPUs which often provide complex operations in hardware that can + combine many smaller instructions into a single faster instruction. + + In our case we can optimise the several single increments performed at a + memory cell, for example \pcode{++++}, by a single ``increment by + 4''. For this optimisation we just have to make sure these single + increments are all next to each other. Similar optimisations should apply + for the bf-commands \pcode{-}, \pcode{<} and + \pcode{>}, which can all be replaced by extended versions that take + the amount of the increment (decrement) into account. We will do + this by introducing two-character bf++-commands. For example + + \begin{center} + \begin{tabular}{l|l} + original bf-cmds & replacement\\ + \hline + \pcode{+} & \pcode{+A}\\ + \pcode{++} & \pcode{+B}\\ + \pcode{+++} & \pcode{+C}\\ + \ldots{} & \ldots{}\\ + \pcode{+++....++} & \pcode{+Z}\\ + \hspace{5mm}(these are 26 \pcode{+}'s)\\ + \end{tabular} + \end{center} + + + If there are more + than 26 \pcode{+}'s in a row, then more than one ``two-character'' + bf-commands need to be generated (the idea is that more than + 26 copies of a single bf++-command in a row is a rare occurrence in + actual bf++-programs). Similar replacements apply + for \pcode{-}, \pcode{<} and \pcode{>}, but + all other bf++-commands should be unaffected by this + change. + + For this write a function \texttt{combine} which replaces sequences + of repeated increment and decrement commands by appropriate + two-character commands. In the functions \pcode{compute4} and + \pcode{run4}, the ``combine'' and the optimisation from (6) should + be performed. Make sure that when a two-character bf++-command is + encountered you need to increase the \pcode{pc}-counter by two in + order to progress to the next command. For example + + \begin{center} + \pcode{combine(optimise(load_bff("benchmark.bf")))} + \end{center} + + generates the improved program + + \begin{center} + \pcode{>A+B[A-A]++[<+++++++++++++>-]<[[}\hspace{3mm}\ldots + \end{center} + + As you can see, the compiler bets on saving a lot of time on the + \pcode{+B} and \pcode{+M} steps so that the optimisations is + worthwhile overall (of course for the \pcode{>A}'s and so on, the compiler incurs a + penalty). Luckily, after you have performed all + optimisations in (5) - (7), you can expect that the + \pcode{benchmark.bf} program runs four to five times faster. + You can also test whether your compiler produces the correct result + by testing for example + + \begin{center} + \pcode{run(load_bff("sierpinski.bf")) == run4(load_bff("sierpinski.bf"))} + \end{center} + + which should return true for all the different compiler stages. \\ + \mbox{}\hfill{[2 Marks]} +\end{itemize} + +\end{document} + + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: