cws/cw04-new.tex
changeset 212 4bda49ec24da
parent 191 f78b18c4c886
equal deleted inserted replaced
211:092e0879a5ae 212:4bda49ec24da
       
     1 \documentclass{article}
       
     2 \usepackage{../style}
       
     3 \usepackage{../langs}
       
     4 \usepackage{disclaimer}
       
     5 \usepackage{tikz}
       
     6 \usepackage{pgf}
       
     7 \usepackage{pgfplots}
       
     8 \usepackage{stackengine}
       
     9 %% \usepackage{accents}
       
    10 \newcommand\barbelow[1]{\stackunder[1.2pt]{#1}{\raisebox{-4mm}{\boldmath$\uparrow$}}}
       
    11 
       
    12 \begin{filecontents}{re-python2.data}
       
    13 1 0.033
       
    14 5 0.036
       
    15 10 0.034
       
    16 15 0.036
       
    17 18 0.059
       
    18 19 0.084 
       
    19 20 0.141
       
    20 21 0.248
       
    21 22 0.485
       
    22 23 0.878
       
    23 24 1.71
       
    24 25 3.40
       
    25 26 7.08
       
    26 27 14.12
       
    27 28 26.69
       
    28 \end{filecontents}
       
    29 
       
    30 \begin{filecontents}{re-java.data}
       
    31 5  0.00298
       
    32 10  0.00418
       
    33 15  0.00996
       
    34 16  0.01710
       
    35 17  0.03492
       
    36 18  0.03303
       
    37 19  0.05084
       
    38 20  0.10177
       
    39 21  0.19960
       
    40 22  0.41159
       
    41 23  0.82234
       
    42 24  1.70251
       
    43 25  3.36112
       
    44 26  6.63998
       
    45 27  13.35120
       
    46 28  29.81185
       
    47 \end{filecontents}
       
    48 
       
    49 \begin{filecontents}{re-java9.data}
       
    50 1000  0.01410
       
    51 2000  0.04882
       
    52 3000  0.10609
       
    53 4000  0.17456
       
    54 5000  0.27530
       
    55 6000  0.41116
       
    56 7000  0.53741
       
    57 8000  0.70261
       
    58 9000  0.93981
       
    59 10000 0.97419
       
    60 11000 1.28697
       
    61 12000 1.51387
       
    62 14000 2.07079
       
    63 16000 2.69846
       
    64 20000 4.41823
       
    65 24000 6.46077
       
    66 26000 7.64373
       
    67 30000 9.99446
       
    68 34000 12.966885
       
    69 38000 16.281621
       
    70 42000 19.180228
       
    71 46000 21.984721
       
    72 50000 26.950203
       
    73 60000 43.0327746
       
    74 \end{filecontents}
       
    75 
       
    76 
       
    77 \begin{document}
       
    78 
       
    79 % BF IDE
       
    80 % https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5
       
    81   
       
    82 \section*{Coursework 8 (Regular Expressions and Brainf***)}
       
    83 
       
    84 This coursework is worth 10\%. It is about regular expressions,
       
    85 pattern matching and an interpreter. The first part is due on 30
       
    86 November at 11pm; the second, more advanced part, is due on 21
       
    87 December at 11pm. In the first part, you are asked to implement a
       
    88 regular expression matcher based on derivatives of regular
       
    89 expressions. The reason is that regular expression matching in Java
       
    90 and Python can sometimes be extremely slow. The advanced part is about
       
    91 an interpreter for a very simple programming language.\bigskip
       
    92 
       
    93 \IMPORTANT{}
       
    94 
       
    95 \noindent
       
    96 Also note that the running time of each part will be restricted to a
       
    97 maximum of 360 seconds on my laptop.
       
    98 
       
    99 \DISCLAIMER{}
       
   100 
       
   101 
       
   102 \subsection*{Part 1 (6 Marks)}
       
   103 
       
   104 The task is to implement a regular expression matcher that is based on
       
   105 derivatives of regular expressions. Most of the functions are defined by
       
   106 recursion over regular expressions and can be elegantly implemented
       
   107 using Scala's pattern-matching. The implementation should deal with the
       
   108 following regular expressions, which have been predefined in the file
       
   109 \texttt{re.scala}:
       
   110 
       
   111 \begin{center}
       
   112 \begin{tabular}{lcll}
       
   113   $r$ & $::=$ & $\ZERO$     & cannot match anything\\
       
   114       &   $|$ & $\ONE$      & can only match the empty string\\
       
   115       &   $|$ & $c$         & can match a single character (in this case $c$)\\
       
   116       &   $|$ & $r_1 + r_2$ & can match a string either with $r_1$ or with $r_2$\\
       
   117   &   $|$ & $r_1\cdot r_2$ & can match the first part of a string with $r_1$ and\\
       
   118           &  & & then the second part with $r_2$\\
       
   119       &   $|$ & $r^*$       & can match zero or more times $r$\\
       
   120 \end{tabular}
       
   121 \end{center}
       
   122 
       
   123 \noindent 
       
   124 Why? Knowing how to match regular expressions and strings will let you
       
   125 solve a lot of problems that vex other humans. Regular expressions are
       
   126 one of the fastest and simplest ways to match patterns in text, and
       
   127 are endlessly useful for searching, editing and analysing data in all
       
   128 sorts of places (for example analysing network traffic in order to
       
   129 detect security breaches). However, you need to be fast, otherwise you
       
   130 will stumble over problems such as recently reported at
       
   131 
       
   132 {\small
       
   133 \begin{itemize}
       
   134 \item[$\bullet$] \url{http://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}
       
   135 \item[$\bullet$] \url{https://vimeo.com/112065252}
       
   136 \item[$\bullet$] \url{http://davidvgalbraith.com/how-i-fixed-atom/}  
       
   137 \end{itemize}}
       
   138 
       
   139 \subsubsection*{Tasks (file re.scala)}
       
   140 
       
   141 The file \texttt{re.scala} has already a definition for regular
       
   142 expressions and also defines some handy shorthand notation for
       
   143 regular expressions. The notation in this document matches up
       
   144 with the code in the file as follows:
       
   145 
       
   146 \begin{center}
       
   147   \begin{tabular}{rcl@{\hspace{10mm}}l}
       
   148     & & code: & shorthand:\smallskip \\ 
       
   149   $\ZERO$ & $\mapsto$ & \texttt{ZERO}\\
       
   150   $\ONE$  & $\mapsto$ & \texttt{ONE}\\
       
   151   $c$     & $\mapsto$ & \texttt{CHAR(c)}\\
       
   152   $r_1 + r_2$ & $\mapsto$ & \texttt{ALT(r1, r2)} & \texttt{r1 | r2}\\
       
   153   $r_1 \cdot r_2$ & $\mapsto$ & \texttt{SEQ(r1, r2)} & \texttt{r1 $\sim$ r2}\\
       
   154   $r^*$ & $\mapsto$ &  \texttt{STAR(r)} & \texttt{r.\%}
       
   155 \end{tabular}    
       
   156 \end{center}  
       
   157 
       
   158 
       
   159 \begin{itemize}
       
   160 \item[(1a)] Implement a function, called \textit{nullable}, by
       
   161   recursion over regular expressions. This function tests whether a
       
   162   regular expression can match the empty string. This means given a
       
   163   regular expression it either returns true or false. The function
       
   164   \textit{nullable}
       
   165   is defined as follows:
       
   166 
       
   167 \begin{center}
       
   168 \begin{tabular}{lcl}
       
   169 $\textit{nullable}(\ZERO)$ & $\dn$ & $\textit{false}$\\
       
   170 $\textit{nullable}(\ONE)$  & $\dn$ & $\textit{true}$\\
       
   171 $\textit{nullable}(c)$     & $\dn$ & $\textit{false}$\\
       
   172 $\textit{nullable}(r_1 + r_2)$ & $\dn$ & $\textit{nullable}(r_1) \vee \textit{nullable}(r_2)$\\
       
   173 $\textit{nullable}(r_1 \cdot r_2)$ & $\dn$ & $\textit{nullable}(r_1) \wedge \textit{nullable}(r_2)$\\
       
   174 $\textit{nullable}(r^*)$ & $\dn$ & $\textit{true}$\\
       
   175 \end{tabular}
       
   176 \end{center}~\hfill[1 Mark]
       
   177 
       
   178 \item[(1b)] Implement a function, called \textit{der}, by recursion over
       
   179   regular expressions. It takes a character and a regular expression
       
   180   as arguments and calculates the derivative regular expression according
       
   181   to the rules:
       
   182 
       
   183 \begin{center}
       
   184 \begin{tabular}{lcl}
       
   185 $\textit{der}\;c\;(\ZERO)$ & $\dn$ & $\ZERO$\\
       
   186 $\textit{der}\;c\;(\ONE)$  & $\dn$ & $\ZERO$\\
       
   187 $\textit{der}\;c\;(d)$     & $\dn$ & $\textit{if}\; c = d\;\textit{then} \;\ONE \; \textit{else} \;\ZERO$\\
       
   188 $\textit{der}\;c\;(r_1 + r_2)$ & $\dn$ & $(\textit{der}\;c\;r_1) + (\textit{der}\;c\;r_2)$\\
       
   189 $\textit{der}\;c\;(r_1 \cdot r_2)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r_1)$\\
       
   190       & & $\textit{then}\;((\textit{der}\;c\;r_1)\cdot r_2) + (\textit{der}\;c\;r_2)$\\
       
   191       & & $\textit{else}\;(\textit{der}\;c\;r_1)\cdot r_2$\\
       
   192 $\textit{der}\;c\;(r^*)$ & $\dn$ & $(\textit{der}\;c\;r)\cdot (r^*)$\\
       
   193 \end{tabular}
       
   194 \end{center}
       
   195 
       
   196 For example given the regular expression $r = (a \cdot b) \cdot c$, the derivatives
       
   197 w.r.t.~the characters $a$, $b$ and $c$ are
       
   198 
       
   199 \begin{center}
       
   200   \begin{tabular}{lcll}
       
   201     $\textit{der}\;a\;r$ & $=$ & $(\ONE \cdot b)\cdot c$ & ($= r'$)\\
       
   202     $\textit{der}\;b\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$\\
       
   203     $\textit{der}\;c\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$
       
   204   \end{tabular}
       
   205 \end{center}
       
   206 
       
   207 Let $r'$ stand for the first derivative, then taking the derivatives of $r'$
       
   208 w.r.t.~the characters $a$, $b$ and $c$ gives
       
   209 
       
   210 \begin{center}
       
   211   \begin{tabular}{lcll}
       
   212     $\textit{der}\;a\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ \\
       
   213     $\textit{der}\;b\;r'$ & $=$ & $((\ZERO \cdot b) + \ONE)\cdot c$ & ($= r''$)\\
       
   214     $\textit{der}\;c\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$
       
   215   \end{tabular}
       
   216 \end{center}
       
   217 
       
   218 One more example: Let $r''$ stand for the second derivative above,
       
   219 then taking the derivatives of $r''$ w.r.t.~the characters $a$, $b$
       
   220 and $c$ gives
       
   221 
       
   222 \begin{center}
       
   223   \begin{tabular}{lcll}
       
   224     $\textit{der}\;a\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$ \\
       
   225     $\textit{der}\;b\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$\\
       
   226     $\textit{der}\;c\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ONE$ &
       
   227     (is $\textit{nullable}$)                      
       
   228   \end{tabular}
       
   229 \end{center}
       
   230 
       
   231 Note, the last derivative can match the empty string, that is it is \textit{nullable}.\\
       
   232 \mbox{}\hfill\mbox{[1 Mark]}
       
   233 
       
   234 \item[(1c)] Implement the function \textit{simp}, which recursively
       
   235   traverses a regular expression from the inside to the outside, and
       
   236   on the way simplifies every regular expression on the left (see
       
   237   below) to the regular expression on the right, except it does not
       
   238   simplify inside ${}^*$-regular expressions.
       
   239 
       
   240   \begin{center}
       
   241 \begin{tabular}{l@{\hspace{4mm}}c@{\hspace{4mm}}ll}
       
   242 $r \cdot \ZERO$ & $\mapsto$ & $\ZERO$\\ 
       
   243 $\ZERO \cdot r$ & $\mapsto$ & $\ZERO$\\ 
       
   244 $r \cdot \ONE$ & $\mapsto$ & $r$\\ 
       
   245 $\ONE \cdot r$ & $\mapsto$ & $r$\\ 
       
   246 $r + \ZERO$ & $\mapsto$ & $r$\\ 
       
   247 $\ZERO + r$ & $\mapsto$ & $r$\\ 
       
   248 $r + r$ & $\mapsto$ & $r$\\ 
       
   249 \end{tabular}
       
   250   \end{center}
       
   251 
       
   252   For example the regular expression
       
   253   \[(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot (r_4 \cdot \ZERO)\]
       
   254 
       
   255   simplifies to just $r_1$. \textbf{Hint:} Regular expressions can be
       
   256   seen as trees and there are several methods for traversing
       
   257   trees. One of them corresponds to the inside-out traversal, which is
       
   258   sometimes also called post-order traversal. Furthermore,
       
   259   remember numerical expressions from school times: there you had expressions
       
   260   like $u + \ldots + (1 \cdot x) - \ldots (z + (y \cdot 0)) \ldots$
       
   261   and simplification rules that looked very similar to rules
       
   262   above. You would simplify such numerical expressions by replacing
       
   263   for example the $y \cdot 0$ by $0$, or $1\cdot x$ by $x$, and then
       
   264   look whether more rules are applicable. If you organise the
       
   265   simplification in an inside-out fashion, it is always clear which
       
   266   rule should be applied next.\hfill[2 Marks]
       
   267 
       
   268 \item[(1d)] Implement two functions: The first, called \textit{ders},
       
   269   takes a list of characters and a regular expression as arguments, and
       
   270   builds the derivative w.r.t.~the list as follows:
       
   271 
       
   272 \begin{center}
       
   273 \begin{tabular}{lcl}
       
   274 $\textit{ders}\;(Nil)\;r$ & $\dn$ & $r$\\
       
   275   $\textit{ders}\;(c::cs)\;r$  & $\dn$ &
       
   276     $\textit{ders}\;cs\;(\textit{simp}(\textit{der}\;c\;r))$\\
       
   277 \end{tabular}
       
   278 \end{center}
       
   279 
       
   280 Note that this function is different from \textit{der}, which only
       
   281 takes a single character.
       
   282 
       
   283 The second function, called \textit{matcher}, takes a string and a
       
   284 regular expression as arguments. It builds first the derivatives
       
   285 according to \textit{ders} and after that tests whether the resulting
       
   286 derivative regular expression can match the empty string (using
       
   287 \textit{nullable}).  For example the \textit{matcher} will produce
       
   288 true for the regular expression $(a\cdot b)\cdot c$ and the string
       
   289 $abc$, but false if you give it the string $ab$. \hfill[1 Mark]
       
   290 
       
   291 \item[(1e)] Implement a function, called \textit{size}, by recursion
       
   292   over regular expressions. If a regular expression is seen as a tree,
       
   293   then \textit{size} should return the number of nodes in such a
       
   294   tree. Therefore this function is defined as follows:
       
   295 
       
   296 \begin{center}
       
   297 \begin{tabular}{lcl}
       
   298 $\textit{size}(\ZERO)$ & $\dn$ & $1$\\
       
   299 $\textit{size}(\ONE)$  & $\dn$ & $1$\\
       
   300 $\textit{size}(c)$     & $\dn$ & $1$\\
       
   301 $\textit{size}(r_1 + r_2)$ & $\dn$ & $1 + \textit{size}(r_1) + \textit{size}(r_2)$\\
       
   302 $\textit{size}(r_1 \cdot r_2)$ & $\dn$ & $1 + \textit{size}(r_1) + \textit{size}(r_2)$\\
       
   303 $\textit{size}(r^*)$ & $\dn$ & $1 + \textit{size}(r)$\\
       
   304 \end{tabular}
       
   305 \end{center}
       
   306 
       
   307 You can use \textit{size} in order to test how much the `evil' regular
       
   308 expression $(a^*)^* \cdot b$ grows when taking successive derivatives
       
   309 according the letter $a$ without simplification and then compare it to
       
   310 taking the derivative, but simplify the result.  The sizes
       
   311 are given in \texttt{re.scala}. \hfill[1 Mark]
       
   312 \end{itemize}
       
   313 
       
   314 \subsection*{Background}
       
   315 
       
   316 Although easily implementable in Scala, the idea behind the derivative
       
   317 function might not so easy to be seen. To understand its purpose
       
   318 better, assume a regular expression $r$ can match strings of the form
       
   319 $c\!::\!cs$ (that means strings which start with a character $c$ and have
       
   320 some rest, or tail, $cs$). If you take the derivative of $r$ with
       
   321 respect to the character $c$, then you obtain a regular expression
       
   322 that can match all the strings $cs$.  In other words, the regular
       
   323 expression $\textit{der}\;c\;r$ can match the same strings $c\!::\!cs$
       
   324 that can be matched by $r$, except that the $c$ is chopped off.
       
   325 
       
   326 Assume now $r$ can match the string $abc$. If you take the derivative
       
   327 according to $a$ then you obtain a regular expression that can match
       
   328 $bc$ (it is $abc$ where the $a$ has been chopped off). If you now
       
   329 build the derivative $\textit{der}\;b\;(\textit{der}\;a\;r)$ you
       
   330 obtain a regular expression that can match the string $c$ (it is $bc$
       
   331 where $b$ is chopped off). If you finally build the derivative of this
       
   332 according $c$, that is
       
   333 $\textit{der}\;c\;(\textit{der}\;b\;(\textit{der}\;a\;r))$, you obtain
       
   334 a regular expression that can match the empty string. You can test
       
   335 whether this is indeed the case using the function nullable, which is
       
   336 what your matcher is doing.
       
   337 
       
   338 The purpose of the $\textit{simp}$ function is to keep the regular
       
   339 expressions small. Normally the derivative function makes the regular
       
   340 expression bigger (see the SEQ case and the example in (1b)) and the
       
   341 algorithm would be slower and slower over time. The $\textit{simp}$
       
   342 function counters this increase in size and the result is that the
       
   343 algorithm is fast throughout.  By the way, this algorithm is by Janusz
       
   344 Brzozowski who came up with the idea of derivatives in 1964 in his PhD
       
   345 thesis.
       
   346 
       
   347 \begin{center}\small
       
   348 \url{https://en.wikipedia.org/wiki/Janusz_Brzozowski_(computer_scientist)}
       
   349 \end{center}
       
   350 
       
   351 
       
   352 If you want to see how badly the regular expression matchers do in
       
   353 Java\footnote{Version 8 and below; Version 9 does not seem to be as
       
   354   catastrophic, but still worse than the regular expression matcher
       
   355 based on derivatives.} and in Python with the `evil' regular
       
   356 expression $(a^*)^*\cdot b$, then have a look at the graphs below (you
       
   357 can try it out for yourself: have a look at the file
       
   358 \texttt{catastrophic.java} and \texttt{catastrophic.py} on
       
   359 KEATS). Compare this with the matcher you have implemented. How long
       
   360 can the string of $a$'s be in your matcher and still stay within the
       
   361 30 seconds time limit?
       
   362 
       
   363 \begin{center}
       
   364 \begin{tabular}{@{}cc@{}}
       
   365 \multicolumn{2}{c}{Graph: $(a^*)^*\cdot b$ and strings 
       
   366            $\underbrace{a\ldots a}_{n}$}\bigskip\\
       
   367   
       
   368 \begin{tikzpicture}
       
   369 \begin{axis}[
       
   370     xlabel={$n$},
       
   371     x label style={at={(1.05,0.0)}},
       
   372     ylabel={time in secs},
       
   373     y label style={at={(0.06,0.5)}},
       
   374     enlargelimits=false,
       
   375     xtick={0,5,...,30},
       
   376     xmax=33,
       
   377     ymax=45,
       
   378     ytick={0,5,...,40},
       
   379     scaled ticks=false,
       
   380     axis lines=left,
       
   381     width=6cm,
       
   382     height=5.5cm, 
       
   383     legend entries={Python, Java 8},  
       
   384     legend pos=north west]
       
   385 \addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
       
   386 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
       
   387 \end{axis}
       
   388 \end{tikzpicture}
       
   389   & 
       
   390 \begin{tikzpicture}
       
   391 \begin{axis}[
       
   392     xlabel={$n$},
       
   393     x label style={at={(1.05,0.0)}},
       
   394     ylabel={time in secs},
       
   395     y label style={at={(0.06,0.5)}},
       
   396     %enlargelimits=false,
       
   397     %xtick={0,5000,...,30000},
       
   398     xmax=65000,
       
   399     ymax=45,
       
   400     ytick={0,5,...,40},
       
   401     scaled ticks=false,
       
   402     axis lines=left,
       
   403     width=6cm,
       
   404     height=5.5cm, 
       
   405     legend entries={Java 9},  
       
   406     legend pos=north west]
       
   407 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java9.data};
       
   408 \end{axis}
       
   409 \end{tikzpicture}
       
   410 \end{tabular}  
       
   411 \end{center}
       
   412 \newpage
       
   413 
       
   414 \subsection*{Part 2 (4 Marks)}
       
   415 
       
   416 Coming from Java or C++, you might think Scala is a quite esoteric
       
   417 programming language.  But remember, some serious companies have built
       
   418 their business on
       
   419 Scala.\footnote{\url{https://en.wikipedia.org/wiki/Scala_(programming_language)\#Companies}}
       
   420 And there are far, far more esoteric languages out there. One is
       
   421 called \emph{brainf***}. You are asked in this part to implement an
       
   422 interpreter for this language.
       
   423 
       
   424 Urban M\"uller developed brainf*** in 1993.  A close relative of this
       
   425 language was already introduced in 1964 by Corado B\"ohm, an Italian
       
   426 computer pioneer, who unfortunately died a few months ago. The main
       
   427 feature of brainf*** is its minimalistic set of instructions---just 8
       
   428 instructions in total and all of which are single characters. Despite
       
   429 the minimalism, this language has been shown to be Turing
       
   430 complete\ldots{}if this doesn't ring any bell with you: it roughly
       
   431 means that every algorithm we know can, in principle, be implemented in
       
   432 brainf***. It just takes a lot of determination and quite a lot of
       
   433 memory resources. Some relatively sophisticated sample programs in
       
   434 brainf*** are given in the file \texttt{bf.scala}.\bigskip
       
   435 
       
   436 \noindent
       
   437 As mentioned above, brainf*** has 8 single-character commands, namely
       
   438 \texttt{'>'}, \texttt{'<'}, \texttt{'+'}, \texttt{'-'}, \texttt{'.'},
       
   439 \texttt{','}, \texttt{'['} and \texttt{']'}. Every other character is
       
   440 considered a comment.  Brainf*** operates on memory cells containing
       
   441 integers. For this it uses a single memory pointer that points at each
       
   442 stage to one memory cell. This pointer can be moved forward by one
       
   443 memory cell by using the command \texttt{'>'}, and backward by using
       
   444 \texttt{'<'}. The commands \texttt{'+'} and \texttt{'-'} increase,
       
   445 respectively decrease, by 1 the content of the memory cell to which
       
   446 the memory pointer currently points to. The commands for input/output
       
   447 are \texttt{','} and \texttt{'.'}. Output works by reading the content
       
   448 of the memory cell to which the memory pointer points to and printing
       
   449 it out as an ASCII character. Input works the other way, taking some
       
   450 user input and storing it in the cell to which the memory pointer
       
   451 points to. The commands \texttt{'['} and \texttt{']'} are looping
       
   452 constructs. Everything in between \texttt{'['} and \texttt{']'} is
       
   453 repeated until a counter (memory cell) reaches zero.  A typical
       
   454 program in brainf*** looks as follows:
       
   455 
       
   456 \begin{center}
       
   457 \begin{verbatim}
       
   458  ++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++
       
   459  ..+++.>>.<-.<.+++.------.--------.>>+.>++.
       
   460 \end{verbatim}
       
   461 \end{center}  
       
   462 
       
   463 \noindent
       
   464 This one prints out Hello World\ldots{}obviously. 
       
   465 
       
   466 \subsubsection*{Tasks (file bf.scala)}
       
   467 
       
   468 \begin{itemize}
       
   469 \item[(2a)] Brainf*** memory is represented by a \texttt{Map} from
       
   470   integers to integers. The empty memory is represented by
       
   471   \texttt{Map()}, that is nothing is stored in the
       
   472   memory. \texttt{Map(0 -> 1, 2 -> 3)} clearly stores \texttt{1} at
       
   473   memory location \texttt{0}; at \texttt{2} it stores \texttt{3}. The
       
   474   convention is that if we query the memory at a location that is
       
   475   \emph{not} defined in the \texttt{Map}, we return \texttt{0}. Write
       
   476   a function, \texttt{sread}, that takes a memory (a \texttt{Map}) and
       
   477   a memory pointer (an \texttt{Int}) as argument, and safely reads the
       
   478   corresponding memory location. If the \texttt{Map} is not defined at
       
   479   the memory pointer, \texttt{sread} returns \texttt{0}.
       
   480 
       
   481   Write another function \texttt{write}, which takes a memory, a
       
   482   memory pointer and an integer value as argument and updates the
       
   483   \texttt{Map} with the value at the given memory location. As usual
       
   484   the \texttt{Map} is not updated `in-place' but a new map is created
       
   485   with the same data, except the value is stored at the given memory
       
   486   pointer.\hfill[1 Mark]
       
   487 
       
   488 \item[(2b)] Write two functions, \texttt{jumpRight} and
       
   489   \texttt{jumpLeft} that are needed to implement the loop constructs
       
   490   of brainf***. They take a program (a \texttt{String}) and a program
       
   491   counter (an \texttt{Int}) as argument and move right (respectively
       
   492   left) in the string in order to find the \textbf{matching}
       
   493   opening/closing bracket. For example, given the following program
       
   494   with the program counter indicated by an arrow:
       
   495 
       
   496   \begin{center}
       
   497   \texttt{--[\barbelow{.}.+>--],>,++}
       
   498   \end{center}
       
   499 
       
   500   then the matching closing bracket is in 9th position (counting from 0) and
       
   501   \texttt{jumpRight} is supposed to return the position just after this
       
   502   
       
   503   \begin{center}
       
   504   \texttt{--[..+>--]\barbelow{,}>,++}
       
   505   \end{center}
       
   506 
       
   507   meaning it jumps to after the loop. Similarly, if you are in 8th position
       
   508   then \texttt{jumpLeft} is supposed to jump to just after the opening
       
   509   bracket (that is jumping to the beginning of the loop):
       
   510 
       
   511   \begin{center}
       
   512     \texttt{--[..+>-\barbelow{-}],>,++}
       
   513     \qquad$\stackrel{\texttt{jumpLeft}}{\longrightarrow}$\qquad
       
   514     \texttt{--[\barbelow{.}.+>--],>,++}
       
   515   \end{center}
       
   516 
       
   517   Unfortunately we have to take into account that there might be
       
   518   other opening and closing brackets on the `way' to find the
       
   519   matching bracket. For example in the brainf*** program
       
   520 
       
   521   \begin{center}
       
   522   \texttt{--[\barbelow{.}.[+>]--],>,++}
       
   523   \end{center}
       
   524 
       
   525   we do not want to return the index for the \texttt{'-'} in the 9th
       
   526   position, but the program counter for \texttt{','} in 12th
       
   527   position. The easiest to find out whether a bracket is matched is by
       
   528   using levels (which are the third argument in \texttt{jumpLeft} and
       
   529   \texttt{jumpLeft}). In case of \texttt{jumpRight} you increase the
       
   530   level by one whenever you find an opening bracket and decrease by
       
   531   one for a closing bracket. Then in \texttt{jumpRight} you are looking
       
   532   for the closing bracket on level \texttt{0}. For \texttt{jumpLeft} you
       
   533   do the opposite. In this way you can find \textbf{matching} brackets
       
   534   in strings such as
       
   535 
       
   536   \begin{center}
       
   537   \texttt{--[\barbelow{.}.[[-]+>[.]]--],>,++}
       
   538   \end{center}
       
   539 
       
   540   for which \texttt{jumpRight} should produce the position:
       
   541 
       
   542   \begin{center}
       
   543   \texttt{--[..[[-]+>[.]]--]\barbelow{,}>,++}
       
   544   \end{center}
       
   545 
       
   546   It is also possible that the position returned by \texttt{jumpRight} or
       
   547   \texttt{jumpLeft} is outside the string in cases where there are
       
   548   no matching brackets. For example
       
   549 
       
   550   \begin{center}
       
   551   \texttt{--[\barbelow{.}.[[-]+>[.]]--,>,++}
       
   552   \qquad$\stackrel{\texttt{jumpRight}}{\longrightarrow}$\qquad
       
   553   \texttt{--[..[[-]+>[.]]-->,++\barbelow{\;\phantom{+}}}
       
   554   \end{center}
       
   555   \hfill[1 Mark]
       
   556 
       
   557 
       
   558 \item[(2c)] Write a recursive function \texttt{run} that executes a
       
   559   brainf*** program. It takes a program, a program counter, a memory
       
   560   pointer and a memory as arguments. If the program counter is outside
       
   561   the program string, the execution stops and \texttt{run} returns the
       
   562   memory. If the program counter is inside the string, it reads the
       
   563   corresponding character and updates the program counter \texttt{pc},
       
   564   memory pointer \texttt{mp} and memory \texttt{mem} according to the
       
   565   rules shown in Figure~\ref{comms}. It then calls recursively
       
   566   \texttt{run} with the updated data.
       
   567 
       
   568   Write another function \texttt{start} that calls \texttt{run} with a
       
   569   given brainfu** program and memory, and the program counter and memory pointer
       
   570   set to~$0$. Like \texttt{run} it returns the memory after the execution
       
   571   of the program finishes. You can test your brainf**k interpreter with the
       
   572   Sierpinski triangle or the Hello world programs or have a look at
       
   573 
       
   574   \begin{center}
       
   575   \url{https://esolangs.org/wiki/Brainfuck}
       
   576   \end{center}\hfill[2 Marks]
       
   577   
       
   578   \begin{figure}[p]
       
   579   \begin{center}
       
   580     \begin{tabular}{|@{}p{0.8cm}|l|}
       
   581       \hline
       
   582       \hfill\texttt{'>'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   583                        $\bullet$ & $\texttt{pc} + 1$\\
       
   584                        $\bullet$ & $\texttt{mp} + 1$\\
       
   585                        $\bullet$ & \texttt{mem} unchanged
       
   586                      \end{tabular}\\\hline   
       
   587       \hfill\texttt{'<'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   588                        $\bullet$ & $\texttt{pc} + 1$\\
       
   589                        $\bullet$ & $\texttt{mp} - 1$\\
       
   590                        $\bullet$ & \texttt{mem} unchanged
       
   591                      \end{tabular}\\\hline   
       
   592       \hfill\texttt{'+'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   593                        $\bullet$ & $\texttt{pc} + 1$\\
       
   594                        $\bullet$ & $\texttt{mp}$ unchanged\\
       
   595                        $\bullet$ & \texttt{mem} updated with \texttt{mp -> mem(mp) + 1}\\
       
   596                      \end{tabular}\\\hline   
       
   597       \hfill\texttt{'-'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   598                        $\bullet$ & $\texttt{pc} + 1$\\
       
   599                        $\bullet$ & $\texttt{mp}$ unchanged\\
       
   600                        $\bullet$ & \texttt{mem} updated with \texttt{mp -> mem(mp) - 1}\\
       
   601                      \end{tabular}\\\hline   
       
   602       \hfill\texttt{'.'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   603                        $\bullet$ & $\texttt{pc} + 1$\\
       
   604                        $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\\
       
   605                        $\bullet$ & print out \,\texttt{mem(mp)} as a character\\
       
   606                      \end{tabular}\\\hline   
       
   607       \hfill\texttt{','} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   608                        $\bullet$ & $\texttt{pc} + 1$\\
       
   609                        $\bullet$ & $\texttt{mp}$ unchanged\\
       
   610                        $\bullet$ & \texttt{mem} updated with \texttt{mp -> \textrm{input}}\\
       
   611                        \multicolumn{2}{@{}l}{the input is given by \texttt{Console.in.read().toByte}}
       
   612                      \end{tabular}\\\hline   
       
   613       \hfill\texttt{'['} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   614                        \multicolumn{2}{@{}l}{if \texttt{mem(mp) == 0} then}\\
       
   615                        $\bullet$ & $\texttt{pc = jumpRight(prog, pc + 1, 0)}$\\
       
   616                        $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\medskip\\
       
   617                        \multicolumn{2}{@{}l}{otherwise if \texttt{mem(mp) != 0} then}\\
       
   618                        $\bullet$ & $\texttt{pc} + 1$\\
       
   619                        $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\\
       
   620                      \end{tabular}
       
   621                      \\\hline   
       
   622       \hfill\texttt{']'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   623                        \multicolumn{2}{@{}l}{if \texttt{mem(mp) != 0} then}\\
       
   624                        $\bullet$ & $\texttt{pc = jumpLeft(prog, pc - 1, 0)}$\\
       
   625                        $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\medskip\\
       
   626                        \multicolumn{2}{@{}l}{otherwise if \texttt{mem(mp) == 0} then}\\
       
   627                        $\bullet$ & $\texttt{pc} + 1$\\
       
   628                        $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\\
       
   629                      \end{tabular}\\\hline   
       
   630       any other char & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   631                          $\bullet$ & $\texttt{pc} + 1$\\
       
   632                          $\bullet$ & \texttt{mp} and \texttt{mem} unchanged
       
   633                        \end{tabular}\\
       
   634       \hline                 
       
   635     \end{tabular}
       
   636   \end{center}
       
   637   \caption{The rules for how commands in the brainf*** language update the program counter \texttt{pc},
       
   638     memory pointer \texttt{mp} and memory \texttt{mem}.\label{comms}}
       
   639   \end{figure}
       
   640 \end{itemize}\bigskip  
       
   641 
       
   642 
       
   643 
       
   644 
       
   645 \end{document}
       
   646 
       
   647 
       
   648 %%% Local Variables: 
       
   649 %%% mode: latex
       
   650 %%% TeX-master: t
       
   651 %%% End: