cws/cw04.tex
changeset 221 9e7897f25e13
parent 218 22705d22c105
child 222 e52cc402caee
equal deleted inserted replaced
220:3020f8c76baa 221:9e7897f25e13
    42 24  1.70251
    42 24  1.70251
    43 25  3.36112
    43 25  3.36112
    44 26  6.63998
    44 26  6.63998
    45 27  13.35120
    45 27  13.35120
    46 28  29.81185
    46 28  29.81185
       
    47 \end{filecontents}
       
    48 
       
    49 \begin{filecontents}{re-js.data}
       
    50 5   0.061
       
    51 10  0.061
       
    52 15  0.061
       
    53 20  0.070
       
    54 23  0.131
       
    55 25  0.308
       
    56 26  0.564
       
    57 28  1.994
       
    58 30  7.648
       
    59 31  15.881 
       
    60 32  32.190
    47 \end{filecontents}
    61 \end{filecontents}
    48 
    62 
    49 \begin{filecontents}{re-java9.data}
    63 \begin{filecontents}{re-java9.data}
    50 1000  0.01410
    64 1000  0.01410
    51 2000  0.04882
    65 2000  0.04882
    77 \begin{document}
    91 \begin{document}
    78 
    92 
    79 % BF IDE
    93 % BF IDE
    80 % https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5
    94 % https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5
    81   
    95   
    82 \section*{Coursework 8 (Regular Expressions and Brainf***)}
    96 \section*{Coursework 9 (Scala)}
    83 
    97 
    84 This coursework is worth 10\%. It is about regular expressions,
    98 This coursework is worth 10\%. It is about a regular expression
    85 pattern matching and an interpreter. The first part is due on 30
    99 matcher and the shunting yard algorithm by Dijkstra. The first part is
    86 November at 11pm; the second, more advanced part, is due on 21
   100 due on 6 December at 11pm; the second, more advanced part, is due on
    87 December at 11pm. In the first part, you are asked to implement a
   101 21 December at 11pm. In the first part, you are asked to implement a
    88 regular expression matcher based on derivatives of regular
   102 regular expression matcher based on derivatives of regular
    89 expressions. The reason is that regular expression matching in Java
   103 expressions. The background is that regular expression matching in
    90 and Python can sometimes be extremely slow. The advanced part is about
   104 languages like Java, JavaScript and Python can sometimes be excruciatingly
    91 an interpreter for a very simple programming language.\bigskip
   105 slow. The advanced part is about the shunting yard algorithm that
       
   106 transforms the usual infix notation of arithmetic expressions into the
       
   107 postfix notation, which is for example used in compilers.\bigskip
    92 
   108 
    93 \IMPORTANT{}
   109 \IMPORTANT{}
    94 
   110 
    95 \noindent
   111 \noindent
    96 Also note that the running time of each part will be restricted to a
   112 Also note that the running time of each part will be restricted to a
    97 maximum of 30 seconds on my laptop.
   113 maximum of 30 seconds on my laptop.
    98 
   114 
    99 \DISCLAIMER{}
   115 \DISCLAIMER{}
       
   116 
       
   117 \subsection*{Reference Implementation}
       
   118 
       
   119 This Scala assignment comes with three reference implementations in form of
       
   120 \texttt{jar}-files. This allows you to run any test cases on your own
       
   121 computer. For example you can call Scala on the command line with the
       
   122 option \texttt{-cp re.jar} and then query any function from the
       
   123 \texttt{re.scala} template file. As usual you have to
       
   124 prefix the calls with \texttt{CW9a}, \texttt{CW9b} and \texttt{CW9c}.
       
   125 Since some tasks are time sensitive, you can check the reference
       
   126 implementation as follows: if you want to know how long it takes
       
   127 to match strings of $a$'s using the regular expression $(a^*)^*\cdot b$
       
   128 you can querry as follows:
       
   129 
       
   130 
       
   131 
       
   132 \begin{lstlisting}[language={},xleftmargin=1mm,numbers=none,basicstyle=\ttfamily\small]
       
   133 $ scala -cp re.jar
       
   134 scala> import CW9a._  
       
   135 scala> for (i <- 0 to 5000000 by 500000) {
       
   136   | println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))) + "secs.")
       
   137   | }
       
   138 0 0.00002 secs.
       
   139 500000 0.10608 secs.
       
   140 1000000 0.22286 secs.
       
   141 1500000 0.35982 secs.
       
   142 2000000 0.45828 secs.
       
   143 2500000 0.59558 secs.
       
   144 3000000 0.73191 secs.
       
   145 3500000 0.83499 secs.
       
   146 4000000 0.99149 secs.
       
   147 4500000 1.15395 secs.
       
   148 5000000 1.29659 secs.
       
   149 \end{lstlisting}%$
   100 
   150 
   101 
   151 
   102 \subsection*{Part 1 (6 Marks)}
   152 \subsection*{Part 1 (6 Marks)}
   103 
   153 
   104 The task is to implement a regular expression matcher that is based on
   154 The task is to implement a regular expression matcher that is based on
   114       &   $|$ & $\ONE$      & can only match the empty string\\
   164       &   $|$ & $\ONE$      & can only match the empty string\\
   115       &   $|$ & $c$         & can match a single character (in this case $c$)\\
   165       &   $|$ & $c$         & can match a single character (in this case $c$)\\
   116       &   $|$ & $r_1 + r_2$ & can match a string either with $r_1$ or with $r_2$\\
   166       &   $|$ & $r_1 + r_2$ & can match a string either with $r_1$ or with $r_2$\\
   117   &   $|$ & $r_1\cdot r_2$ & can match the first part of a string with $r_1$ and\\
   167   &   $|$ & $r_1\cdot r_2$ & can match the first part of a string with $r_1$ and\\
   118           &  & & then the second part with $r_2$\\
   168           &  & & then the second part with $r_2$\\
   119       &   $|$ & $r^*$       & can match zero or more times $r$\\
   169       &   $|$ & $r^*$       & can match a string with zero or more copies of $r$\\
   120 \end{tabular}
   170 \end{tabular}
   121 \end{center}
   171 \end{center}
   122 
   172 
   123 \noindent 
   173 \noindent 
   124 Why? Knowing how to match regular expressions and strings will let you
   174 Why? Regular expressions are
   125 solve a lot of problems that vex other humans. Regular expressions are
   175 one of the simplest ways to match patterns in text, and
   126 one of the fastest and simplest ways to match patterns in text, and
       
   127 are endlessly useful for searching, editing and analysing data in all
   176 are endlessly useful for searching, editing and analysing data in all
   128 sorts of places (for example analysing network traffic in order to
   177 sorts of places (for example analysing network traffic in order to
   129 detect security breaches). However, you need to be fast, otherwise you
   178 detect security breaches). However, you need to be fast, otherwise you
   130 will stumble over problems such as recently reported at
   179 will stumble over problems such as recently reported at
   131 
   180 
   133 \begin{itemize}
   182 \begin{itemize}
   134 \item[$\bullet$] \url{http://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}
   183 \item[$\bullet$] \url{http://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}
   135 \item[$\bullet$] \url{https://vimeo.com/112065252}
   184 \item[$\bullet$] \url{https://vimeo.com/112065252}
   136 \item[$\bullet$] \url{http://davidvgalbraith.com/how-i-fixed-atom/}  
   185 \item[$\bullet$] \url{http://davidvgalbraith.com/how-i-fixed-atom/}  
   137 \end{itemize}}
   186 \end{itemize}}
       
   187 
       
   188 % Knowing how to match regular expressions and strings will let you
       
   189 % solve a lot of problems that vex other humans.
       
   190 
   138 
   191 
   139 \subsubsection*{Tasks (file re.scala)}
   192 \subsubsection*{Tasks (file re.scala)}
   140 
   193 
   141 The file \texttt{re.scala} has already a definition for regular
   194 The file \texttt{re.scala} has already a definition for regular
   142 expressions and also defines some handy shorthand notation for
   195 expressions and also defines some handy shorthand notation for
   155 \end{tabular}    
   208 \end{tabular}    
   156 \end{center}  
   209 \end{center}  
   157 
   210 
   158 
   211 
   159 \begin{itemize}
   212 \begin{itemize}
   160 \item[(1a)] Implement a function, called \textit{nullable}, by
   213 \item[(1)] Implement a function, called \textit{nullable}, by
   161   recursion over regular expressions. This function tests whether a
   214   recursion over regular expressions. This function tests whether a
   162   regular expression can match the empty string. This means given a
   215   regular expression can match the empty string. This means given a
   163   regular expression it either returns true or false. The function
   216   regular expression it either returns true or false. The function
   164   \textit{nullable}
   217   \textit{nullable}
   165   is defined as follows:
   218   is defined as follows:
   173 $\textit{nullable}(r_1 \cdot r_2)$ & $\dn$ & $\textit{nullable}(r_1) \wedge \textit{nullable}(r_2)$\\
   226 $\textit{nullable}(r_1 \cdot r_2)$ & $\dn$ & $\textit{nullable}(r_1) \wedge \textit{nullable}(r_2)$\\
   174 $\textit{nullable}(r^*)$ & $\dn$ & $\textit{true}$\\
   227 $\textit{nullable}(r^*)$ & $\dn$ & $\textit{true}$\\
   175 \end{tabular}
   228 \end{tabular}
   176 \end{center}~\hfill[1 Mark]
   229 \end{center}~\hfill[1 Mark]
   177 
   230 
   178 \item[(1b)] Implement a function, called \textit{der}, by recursion over
   231 \item[(2)] Implement a function, called \textit{der}, by recursion over
   179   regular expressions. It takes a character and a regular expression
   232   regular expressions. It takes a character and a regular expression
   180   as arguments and calculates the derivative regular expression according
   233   as arguments and calculates the derivative regular expression according
   181   to the rules:
   234   to the rules:
   182 
   235 
   183 \begin{center}
   236 \begin{center}
   196 For example given the regular expression $r = (a \cdot b) \cdot c$, the derivatives
   249 For example given the regular expression $r = (a \cdot b) \cdot c$, the derivatives
   197 w.r.t.~the characters $a$, $b$ and $c$ are
   250 w.r.t.~the characters $a$, $b$ and $c$ are
   198 
   251 
   199 \begin{center}
   252 \begin{center}
   200   \begin{tabular}{lcll}
   253   \begin{tabular}{lcll}
   201     $\textit{der}\;a\;r$ & $=$ & $(\ONE \cdot b)\cdot c$ & ($= r'$)\\
   254     $\textit{der}\;a\;r$ & $=$ & $(\ONE \cdot b)\cdot c$ & \quad($= r'$)\\
   202     $\textit{der}\;b\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$\\
   255     $\textit{der}\;b\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$\\
   203     $\textit{der}\;c\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$
   256     $\textit{der}\;c\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$
   204   \end{tabular}
   257   \end{tabular}
   205 \end{center}
   258 \end{center}
   206 
   259 
   208 w.r.t.~the characters $a$, $b$ and $c$ gives
   261 w.r.t.~the characters $a$, $b$ and $c$ gives
   209 
   262 
   210 \begin{center}
   263 \begin{center}
   211   \begin{tabular}{lcll}
   264   \begin{tabular}{lcll}
   212     $\textit{der}\;a\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ \\
   265     $\textit{der}\;a\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ \\
   213     $\textit{der}\;b\;r'$ & $=$ & $((\ZERO \cdot b) + \ONE)\cdot c$ & ($= r''$)\\
   266     $\textit{der}\;b\;r'$ & $=$ & $((\ZERO \cdot b) + \ONE)\cdot c$ & \quad($= r''$)\\
   214     $\textit{der}\;c\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$
   267     $\textit{der}\;c\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$
   215   \end{tabular}
   268   \end{tabular}
   216 \end{center}
   269 \end{center}
   217 
   270 
   218 One more example: Let $r''$ stand for the second derivative above,
   271 One more example: Let $r''$ stand for the second derivative above,
   229 \end{center}
   282 \end{center}
   230 
   283 
   231 Note, the last derivative can match the empty string, that is it is \textit{nullable}.\\
   284 Note, the last derivative can match the empty string, that is it is \textit{nullable}.\\
   232 \mbox{}\hfill\mbox{[1 Mark]}
   285 \mbox{}\hfill\mbox{[1 Mark]}
   233 
   286 
   234 \item[(1c)] Implement the function \textit{simp}, which recursively
   287 \item[(3)] Implement the function \textit{simp}, which recursively
   235   traverses a regular expression from the inside to the outside, and
   288   traverses a regular expression from the inside to the outside, and
   236   on the way simplifies every regular expression on the left (see
   289   on the way simplifies every regular expression on the left (see
   237   below) to the regular expression on the right, except it does not
   290   below) to the regular expression on the right, except it does not
   238   simplify inside ${}^*$-regular expressions.
   291   simplify inside ${}^*$-regular expressions.
   239 
   292 
   253   \[(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot (r_4 \cdot \ZERO)\]
   306   \[(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot (r_4 \cdot \ZERO)\]
   254 
   307 
   255   simplifies to just $r_1$. \textbf{Hint:} Regular expressions can be
   308   simplifies to just $r_1$. \textbf{Hint:} Regular expressions can be
   256   seen as trees and there are several methods for traversing
   309   seen as trees and there are several methods for traversing
   257   trees. One of them corresponds to the inside-out traversal, which is
   310   trees. One of them corresponds to the inside-out traversal, which is
   258   sometimes also called post-order traversal. Furthermore,
   311   sometimes also called post-order traversal'' you traverse inside the
       
   312   tree and on the way up, you apply simplification rules.
       
   313   Furthermore,
   259   remember numerical expressions from school times: there you had expressions
   314   remember numerical expressions from school times: there you had expressions
   260   like $u + \ldots + (1 \cdot x) - \ldots (z + (y \cdot 0)) \ldots$
   315   like $u + \ldots + (1 \cdot x) - \ldots (z + (y \cdot 0)) \ldots$
   261   and simplification rules that looked very similar to rules
   316   and simplification rules that looked very similar to rules
   262   above. You would simplify such numerical expressions by replacing
   317   above. You would simplify such numerical expressions by replacing
   263   for example the $y \cdot 0$ by $0$, or $1\cdot x$ by $x$, and then
   318   for example the $y \cdot 0$ by $0$, or $1\cdot x$ by $x$, and then
   264   look whether more rules are applicable. If you organise the
   319   look whether more rules are applicable. If you organise the
   265   simplification in an inside-out fashion, it is always clear which
   320   simplification in an inside-out fashion, it is always clear which
   266   rule should be applied next.\hfill[2 Marks]
   321   rule should be applied next.\hfill[1 Mark]
   267 
   322 
   268 \item[(1d)] Implement two functions: The first, called \textit{ders},
   323 \item[(4)] Implement two functions: The first, called \textit{ders},
   269   takes a list of characters and a regular expression as arguments, and
   324   takes a list of characters and a regular expression as arguments, and
   270   builds the derivative w.r.t.~the list as follows:
   325   builds the derivative w.r.t.~the list as follows:
   271 
   326 
   272 \begin{center}
   327 \begin{center}
   273 \begin{tabular}{lcl}
   328 \begin{tabular}{lcl}
   286 derivative regular expression can match the empty string (using
   341 derivative regular expression can match the empty string (using
   287 \textit{nullable}).  For example the \textit{matcher} will produce
   342 \textit{nullable}).  For example the \textit{matcher} will produce
   288 true for the regular expression $(a\cdot b)\cdot c$ and the string
   343 true for the regular expression $(a\cdot b)\cdot c$ and the string
   289 $abc$, but false if you give it the string $ab$. \hfill[1 Mark]
   344 $abc$, but false if you give it the string $ab$. \hfill[1 Mark]
   290 
   345 
   291 \item[(1e)] Implement a function, called \textit{size}, by recursion
   346 \item[(5)] Implement a function, called \textit{size}, by recursion
   292   over regular expressions. If a regular expression is seen as a tree,
   347   over regular expressions. If a regular expression is seen as a tree,
   293   then \textit{size} should return the number of nodes in such a
   348   then \textit{size} should return the number of nodes in such a
   294   tree. Therefore this function is defined as follows:
   349   tree. Therefore this function is defined as follows:
   295 
   350 
   296 \begin{center}
   351 \begin{center}
   307 You can use \textit{size} in order to test how much the `evil' regular
   362 You can use \textit{size} in order to test how much the `evil' regular
   308 expression $(a^*)^* \cdot b$ grows when taking successive derivatives
   363 expression $(a^*)^* \cdot b$ grows when taking successive derivatives
   309 according the letter $a$ without simplification and then compare it to
   364 according the letter $a$ without simplification and then compare it to
   310 taking the derivative, but simplify the result.  The sizes
   365 taking the derivative, but simplify the result.  The sizes
   311 are given in \texttt{re.scala}. \hfill[1 Mark]
   366 are given in \texttt{re.scala}. \hfill[1 Mark]
       
   367 
       
   368 \item[(6)] You do not have to implement anything specific under this
       
   369   task.  The purpose here is that you will be marked for some ``power''
       
   370   test cases. For example can your matcher decide within 30 seconds
       
   371   whether the regular expression $(a^*)^*\cdot b$ matches strings of the
       
   372   form $aaa\ldots{}aaaa$, for say 1 Million $a$'s. And does simplification
       
   373   simplify the regular expression
       
   374 
       
   375   \[
       
   376   \texttt{SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)}
       
   377   \]  
       
   378 
       
   379   \noindent correctly to just \texttt{ONE}, where \texttt{SEQ} is nested
       
   380   50 or more times.\\
       
   381   \mbox{}\hfill[1 Mark]
   312 \end{itemize}
   382 \end{itemize}
   313 
   383 
   314 \subsection*{Background}
   384 \subsection*{Background}
   315 
   385 
   316 Although easily implementable in Scala, the idea behind the derivative
   386 Although easily implementable in Scala, the idea behind the derivative
   335 whether this is indeed the case using the function nullable, which is
   405 whether this is indeed the case using the function nullable, which is
   336 what your matcher is doing.
   406 what your matcher is doing.
   337 
   407 
   338 The purpose of the $\textit{simp}$ function is to keep the regular
   408 The purpose of the $\textit{simp}$ function is to keep the regular
   339 expressions small. Normally the derivative function makes the regular
   409 expressions small. Normally the derivative function makes the regular
   340 expression bigger (see the SEQ case and the example in (1b)) and the
   410 expression bigger (see the SEQ case and the example in (2)) and the
   341 algorithm would be slower and slower over time. The $\textit{simp}$
   411 algorithm would be slower and slower over time. The $\textit{simp}$
   342 function counters this increase in size and the result is that the
   412 function counters this increase in size and the result is that the
   343 algorithm is fast throughout.  By the way, this algorithm is by Janusz
   413 algorithm is fast throughout.  By the way, this algorithm is by Janusz
   344 Brzozowski who came up with the idea of derivatives in 1964 in his PhD
   414 Brzozowski who came up with the idea of derivatives in 1964 in his PhD
   345 thesis.
   415 thesis.
   348 \url{https://en.wikipedia.org/wiki/Janusz_Brzozowski_(computer_scientist)}
   418 \url{https://en.wikipedia.org/wiki/Janusz_Brzozowski_(computer_scientist)}
   349 \end{center}
   419 \end{center}
   350 
   420 
   351 
   421 
   352 If you want to see how badly the regular expression matchers do in
   422 If you want to see how badly the regular expression matchers do in
   353 Java\footnote{Version 8 and below; Version 9 does not seem to be as
   423 Java\footnote{Version 8 and below; Version 9 and above does not seem to be as
   354   catastrophic, but still worse than the regular expression matcher
   424   catastrophic, but still much worse than the regular expression
   355 based on derivatives.} and in Python with the `evil' regular
   425   matcher based on derivatives.}, JavaScript and Python with the
   356 expression $(a^*)^*\cdot b$, then have a look at the graphs below (you
   426 `evil' regular expression $(a^*)^*\cdot b$, then have a look at the
   357 can try it out for yourself: have a look at the file
   427 graphs below (you can try it out for yourself: have a look at the file
   358 \texttt{catastrophic.java} and \texttt{catastrophic.py} on
   428 \texttt{catastrophic9.java}, \texttt{catastrophic.js} and
   359 KEATS). Compare this with the matcher you have implemented. How long
   429 \texttt{catastrophic.py} on KEATS). Compare this with the matcher you
   360 can the string of $a$'s be in your matcher and still stay within the
   430 have implemented. How long can the string of $a$'s be in your matcher
   361 30 seconds time limit?
   431 and still stay within the 30 seconds time limit?
   362 
   432 
   363 \begin{center}
   433 \begin{center}
   364 \begin{tabular}{@{}cc@{}}
   434 \begin{tabular}{@{}cc@{}}
   365 \multicolumn{2}{c}{Graph: $(a^*)^*\cdot b$ and strings 
   435 \multicolumn{2}{c}{Graph: $(a^*)^*\cdot b$ and strings 
   366            $\underbrace{a\ldots a}_{n}$}\bigskip\\
   436            $\underbrace{a\ldots a}_{n}$}\bigskip\\
   378     ytick={0,5,...,40},
   448     ytick={0,5,...,40},
   379     scaled ticks=false,
   449     scaled ticks=false,
   380     axis lines=left,
   450     axis lines=left,
   381     width=6cm,
   451     width=6cm,
   382     height=5.5cm, 
   452     height=5.5cm, 
   383     legend entries={Python, Java 8},  
   453     legend entries={Python, Java 8, JavaScript},  
   384     legend pos=north west]
   454     legend pos=north west]
   385 \addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
   455 \addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
   386 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
   456 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
       
   457 \addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
   387 \end{axis}
   458 \end{axis}
   388 \end{tikzpicture}
   459 \end{tikzpicture}
   389   & 
   460   & 
   390 \begin{tikzpicture}
   461 \begin{tikzpicture}
   391 \begin{axis}[
   462 \begin{axis}[
   411 \end{center}
   482 \end{center}
   412 \newpage
   483 \newpage
   413 
   484 
   414 \subsection*{Part 2 (4 Marks)}
   485 \subsection*{Part 2 (4 Marks)}
   415 
   486 
   416 Coming from Java or C++, you might think Scala is a quite esoteric
   487 The \emph{Shunting Yard Algorithm} has been developed by Edsger Dijkstra,
   417 programming language.  But remember, some serious companies have built
   488 an influential computer scientist who developed many well-known
   418 their business on
   489 algorithms. This algorithm transforms the usual infix notation of
   419 Scala.\footnote{\url{https://en.wikipedia.org/wiki/Scala_(programming_language)\#Companies}}
   490 arithmetic expressions into the postfix notation, sometimes also
   420 And there are far, far more esoteric languages out there. One is
   491 called reverse Polish notation.
   421 called \emph{brainf***}. You are asked in this part to implement an
   492 
   422 interpreter for this language.
   493 Why on Earth do people use the postfix notation? It is much more
   423 
   494 convenient to work with the usual infix notation for arithmetic
   424 Urban M\"uller developed brainf*** in 1993.  A close relative of this
   495 expressions. Most modern calculators (as opposed to the ones used 20
   425 language was already introduced in 1964 by Corado B\"ohm, an Italian
   496 years ago) understand infix notation. So why on Earth? \ldots{}Well,
   426 computer pioneer, who unfortunately died a few months ago. The main
   497 many computers under the hood, even nowadays, use postfix notation
   427 feature of brainf*** is its minimalistic set of instructions---just 8
   498 extensively. For example if you give to the Java compiler the
   428 instructions in total and all of which are single characters. Despite
   499 expression $1 + ((2 * 3) + (4 - 3))$, it will generate for it the Java Byte
   429 the minimalism, this language has been shown to be Turing
   500 code
   430 complete\ldots{}if this doesn't ring any bell with you: it roughly
   501 
   431 means that every algorithm we know can, in principle, be implemented in
   502 \begin{lstlisting}[language=JVMIS,numbers=none]
   432 brainf***. It just takes a lot of determination and quite a lot of
   503 ldc 1 
   433 memory resources. Some relatively sophisticated sample programs in
   504 ldc 2 
   434 brainf*** are given in the file \texttt{bf.scala}.\bigskip
   505 ldc 3 
       
   506 imul 
       
   507 ldc 4 
       
   508 ldc 3 
       
   509 isub 
       
   510 iadd 
       
   511 iadd
       
   512 \end{lstlisting}
   435 
   513 
   436 \noindent
   514 \noindent
   437 As mentioned above, brainf*** has 8 single-character commands, namely
   515 where the command \texttt{ldc} loads a constant onto a stack, and \texttt{imul},
   438 \texttt{'>'}, \texttt{'<'}, \texttt{'+'}, \texttt{'-'}, \texttt{'.'},
   516 \texttt{isub} and \texttt{iadd} are commands acting on the stack. Clearly this
   439 \texttt{','}, \texttt{'['} and \texttt{']'}. Every other character is
   517 is the arithmetic expression in postfix notation.\bigskip
   440 considered a comment.  Brainf*** operates on memory cells containing
       
   441 integers. For this it uses a single memory pointer that points at each
       
   442 stage to one memory cell. This pointer can be moved forward by one
       
   443 memory cell by using the command \texttt{'>'}, and backward by using
       
   444 \texttt{'<'}. The commands \texttt{'+'} and \texttt{'-'} increase,
       
   445 respectively decrease, by 1 the content of the memory cell to which
       
   446 the memory pointer currently points to. The commands for input/output
       
   447 are \texttt{','} and \texttt{'.'}. Output works by reading the content
       
   448 of the memory cell to which the memory pointer points to and printing
       
   449 it out as an ASCII character. Input works the other way, taking some
       
   450 user input and storing it in the cell to which the memory pointer
       
   451 points to. The commands \texttt{'['} and \texttt{']'} are looping
       
   452 constructs. Everything in between \texttt{'['} and \texttt{']'} is
       
   453 repeated until a counter (memory cell) reaches zero.  A typical
       
   454 program in brainf*** looks as follows:
       
   455 
       
   456 \begin{center}
       
   457 \begin{verbatim}
       
   458  ++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++
       
   459  ..+++.>>.<-.<.+++.------.--------.>>+.>++.
       
   460 \end{verbatim}
       
   461 \end{center}  
       
   462 
   518 
   463 \noindent
   519 \noindent
   464 This one prints out Hello World\ldots{}obviously. 
   520 The shunting yard algorithm processes an input token list using an
   465 
   521 operator stack and an output list. The input consists of numbers,
   466 \subsubsection*{Tasks (file bf.scala)}
   522 operators ($+$, $-$, $*$, $/$) and parentheses, and for the purpose of
       
   523 the assignment we assume the input is always a well-formed expression
       
   524 in infix notation.  The algorithm uses information about the
       
   525 precedences of the operators (given in the template file). The
       
   526 algorithm processes the input token list as follows:
   467 
   527 
   468 \begin{itemize}
   528 \begin{itemize}
   469 \item[(2a)] Brainf*** memory is represented by a \texttt{Map} from
   529 \item If there is a number as input token, then this token is
   470   integers to integers. The empty memory is represented by
   530   transferred to the output list. Then the rest of the input is
   471   \texttt{Map()}, that is nothing is stored in the
   531   processed.
   472   memory. \texttt{Map(0 -> 1, 2 -> 3)} clearly stores \texttt{1} at
   532 \item If there is an operator as input token, then you need to check
   473   memory location \texttt{0}; at \texttt{2} it stores \texttt{3}. The
   533   what is on top of the operator stack. If there are operators with
   474   convention is that if we query the memory at a location that is
   534   a higher or equal precedence, these operators are first popped off
   475   \emph{not} defined in the \texttt{Map}, we return \texttt{0}. Write
   535   the stack and moved to the output list. Then the operator from the input
   476   a function, \texttt{sread}, that takes a memory (a \texttt{Map}) and
   536   is pushed onto the stack and the rest of the input is processed.
   477   a memory pointer (an \texttt{Int}) as argument, and safely reads the
   537 \item If the input is a left-parenthesis, you push it on to the stack
   478   corresponding memory location. If the \texttt{Map} is not defined at
   538   and continue processing the input.
   479   the memory pointer, \texttt{sread} returns \texttt{0}.
   539 \item If the input is a right-parenthesis, then you move all operators
   480 
   540   from the stack to the output list until you reach the left-parenthesis.
   481   Write another function \texttt{write}, which takes a memory, a
   541   Then you discharge the $($ and $)$ from the input and stack, and continue
   482   memory pointer and an integer value as argument and updates the
   542   processing.
   483   \texttt{Map} with the value at the given memory location. As usual
   543 \item If the input is empty, then you move all remaining operators
   484   the \texttt{Map} is not updated `in-place' but a new map is created
   544   from the stack to the output list.  
   485   with the same data, except the value is stored at the given memory
   545 \end{itemize}  
   486   pointer.\hfill[1 Mark]
   546 
   487 
   547 \subsubsection*{Tasks (file postfix.scala)}
   488 \item[(2b)] Write two functions, \texttt{jumpRight} and
   548 
   489   \texttt{jumpLeft} that are needed to implement the loop constructs
   549 \begin{itemize}
   490   of brainf***. They take a program (a \texttt{String}) and a program
   550 \item[(7)] Implement the shunting yard algorithm outlined above. The
   491   counter (an \texttt{Int}) as argument and move right (respectively
   551   function, called \texttt{syard}, takes a list of tokens as first
   492   left) in the string in order to find the \textbf{matching}
   552   argument. The second and third arguments are the stack and output
   493   opening/closing bracket. For example, given the following program
   553   list represented as Scala lists. The most convenient way to
   494   with the program counter indicated by an arrow:
   554   implement this algorithm is to analyse what the input list, stack
   495 
   555   and output look like in each step using pattern-matching. The
   496   \begin{center}
   556   algorithm transforms for example the input
   497   \texttt{--[\barbelow{.}.+>--],>,++}
   557 
   498   \end{center}
   558   \[
   499 
   559   \texttt{List(3, +, 4, *, (, 2, -, 1, ))}
   500   then the matching closing bracket is in 9th position (counting from 0) and
   560   \]
   501   \texttt{jumpRight} is supposed to return the position just after this
   561 
       
   562   into the postfix output
       
   563 
       
   564   \[
       
   565   \texttt{List(3, 4, 2, 1, -, *, +)}
       
   566   \]  
   502   
   567   
   503   \begin{center}
   568   You can assume the input list is always a  list representing
   504   \texttt{--[..+>--]\barbelow{,}>,++}
   569   a well-formed infix arithmetic expression.\hfill[1 Mark]
   505   \end{center}
   570 
   506 
   571 \item[(8)] Implement a compute function that takes a postfix expression
   507   meaning it jumps to after the loop. Similarly, if you are in 8th position
   572   as argument and evaluates it generating an integer as result. It uses a
   508   then \texttt{jumpLeft} is supposed to jump to just after the opening
   573   stack to evaluate the postfix expression. The operators $+$, $-$, $*$
   509   bracket (that is jumping to the beginning of the loop):
   574   are as usual; $/$ is division on integers, for example $7 / 3 = 2$.
   510 
       
   511   \begin{center}
       
   512     \texttt{--[..+>-\barbelow{-}],>,++}
       
   513     \qquad$\stackrel{\texttt{jumpLeft}}{\longrightarrow}$\qquad
       
   514     \texttt{--[\barbelow{.}.+>--],>,++}
       
   515   \end{center}
       
   516 
       
   517   Unfortunately we have to take into account that there might be
       
   518   other opening and closing brackets on the `way' to find the
       
   519   matching bracket. For example in the brainf*** program
       
   520 
       
   521   \begin{center}
       
   522   \texttt{--[\barbelow{.}.[+>]--],>,++}
       
   523   \end{center}
       
   524 
       
   525   we do not want to return the index for the \texttt{'-'} in the 9th
       
   526   position, but the program counter for \texttt{','} in 12th
       
   527   position. The easiest to find out whether a bracket is matched is by
       
   528   using levels (which are the third argument in \texttt{jumpLeft} and
       
   529   \texttt{jumpLeft}). In case of \texttt{jumpRight} you increase the
       
   530   level by one whenever you find an opening bracket and decrease by
       
   531   one for a closing bracket. Then in \texttt{jumpRight} you are looking
       
   532   for the closing bracket on level \texttt{0}. For \texttt{jumpLeft} you
       
   533   do the opposite. In this way you can find \textbf{matching} brackets
       
   534   in strings such as
       
   535 
       
   536   \begin{center}
       
   537   \texttt{--[\barbelow{.}.[[-]+>[.]]--],>,++}
       
   538   \end{center}
       
   539 
       
   540   for which \texttt{jumpRight} should produce the position:
       
   541 
       
   542   \begin{center}
       
   543   \texttt{--[..[[-]+>[.]]--]\barbelow{,}>,++}
       
   544   \end{center}
       
   545 
       
   546   It is also possible that the position returned by \texttt{jumpRight} or
       
   547   \texttt{jumpLeft} is outside the string in cases where there are
       
   548   no matching brackets. For example
       
   549 
       
   550   \begin{center}
       
   551   \texttt{--[\barbelow{.}.[[-]+>[.]]--,>,++}
       
   552   \qquad$\stackrel{\texttt{jumpRight}}{\longrightarrow}$\qquad
       
   553   \texttt{--[..[[-]+>[.]]-->,++\barbelow{\;\phantom{+}}}
       
   554   \end{center}
       
   555   \hfill[1 Mark]
   575   \hfill[1 Mark]
   556 
   576 \end{itemize}
   557 
   577 
   558 \item[(2c)] Write a recursive function \texttt{run} that executes a
   578 \subsubsection*{Task (file postfix2.scala)}
   559   brainf*** program. It takes a program, a program counter, a memory
   579 
   560   pointer and a memory as arguments. If the program counter is outside
   580 \begin{itemize}
   561   the program string, the execution stops and \texttt{run} returns the
   581 \item[(9)] Extend the code in (7) and (8) to include the power
   562   memory. If the program counter is inside the string, it reads the
   582   operator.  This requires proper account of associativity of
   563   corresponding character and updates the program counter \texttt{pc},
   583   the operators. The power operator is right-associative, whereas the
   564   memory pointer \texttt{mp} and memory \texttt{mem} according to the
   584   other operators are left-associative.  Left-associative operators
   565   rules shown in Figure~\ref{comms}. It then calls recursively
   585   are popped off if the precedence is bigger or equal, while
   566   \texttt{run} with the updated data.
   586   right-associative operators are only popped off if the precedence is
   567 
   587   bigger. The compute function in this task should use
   568   Write another function \texttt{start} that calls \texttt{run} with a
   588   \texttt{Long}s, rather than \texttt{Int}s.\hfill[2 Marks]
   569   given brainfu** program and memory, and the program counter and memory pointer
   589 \end{itemize}
   570   set to~$0$. Like \texttt{run} it returns the memory after the execution
       
   571   of the program finishes. You can test your brainf**k interpreter with the
       
   572   Sierpinski triangle or the Hello world programs or have a look at
       
   573 
       
   574   \begin{center}
       
   575   \url{https://esolangs.org/wiki/Brainfuck}
       
   576   \end{center}\hfill[2 Marks]
       
   577   
       
   578   \begin{figure}[p]
       
   579   \begin{center}
       
   580     \begin{tabular}{|@{}p{0.8cm}|l|}
       
   581       \hline
       
   582       \hfill\texttt{'>'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   583                        $\bullet$ & $\texttt{pc} + 1$\\
       
   584                        $\bullet$ & $\texttt{mp} + 1$\\
       
   585                        $\bullet$ & \texttt{mem} unchanged
       
   586                      \end{tabular}\\\hline   
       
   587       \hfill\texttt{'<'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   588                        $\bullet$ & $\texttt{pc} + 1$\\
       
   589                        $\bullet$ & $\texttt{mp} - 1$\\
       
   590                        $\bullet$ & \texttt{mem} unchanged
       
   591                      \end{tabular}\\\hline   
       
   592       \hfill\texttt{'+'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   593                        $\bullet$ & $\texttt{pc} + 1$\\
       
   594                        $\bullet$ & $\texttt{mp}$ unchanged\\
       
   595                        $\bullet$ & \texttt{mem} updated with \texttt{mp -> mem(mp) + 1}\\
       
   596                      \end{tabular}\\\hline   
       
   597       \hfill\texttt{'-'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   598                        $\bullet$ & $\texttt{pc} + 1$\\
       
   599                        $\bullet$ & $\texttt{mp}$ unchanged\\
       
   600                        $\bullet$ & \texttt{mem} updated with \texttt{mp -> mem(mp) - 1}\\
       
   601                      \end{tabular}\\\hline   
       
   602       \hfill\texttt{'.'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   603                        $\bullet$ & $\texttt{pc} + 1$\\
       
   604                        $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\\
       
   605                        $\bullet$ & print out \,\texttt{mem(mp)} as a character\\
       
   606                      \end{tabular}\\\hline   
       
   607       \hfill\texttt{','} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   608                        $\bullet$ & $\texttt{pc} + 1$\\
       
   609                        $\bullet$ & $\texttt{mp}$ unchanged\\
       
   610                        $\bullet$ & \texttt{mem} updated with \texttt{mp -> \textrm{input}}\\
       
   611                        \multicolumn{2}{@{}l}{the input is given by \texttt{Console.in.read().toByte}}
       
   612                      \end{tabular}\\\hline   
       
   613       \hfill\texttt{'['} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   614                        \multicolumn{2}{@{}l}{if \texttt{mem(mp) == 0} then}\\
       
   615                        $\bullet$ & $\texttt{pc = jumpRight(prog, pc + 1, 0)}$\\
       
   616                        $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\medskip\\
       
   617                        \multicolumn{2}{@{}l}{otherwise if \texttt{mem(mp) != 0} then}\\
       
   618                        $\bullet$ & $\texttt{pc} + 1$\\
       
   619                        $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\\
       
   620                      \end{tabular}
       
   621                      \\\hline   
       
   622       \hfill\texttt{']'} & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   623                        \multicolumn{2}{@{}l}{if \texttt{mem(mp) != 0} then}\\
       
   624                        $\bullet$ & $\texttt{pc = jumpLeft(prog, pc - 1, 0)}$\\
       
   625                        $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\medskip\\
       
   626                        \multicolumn{2}{@{}l}{otherwise if \texttt{mem(mp) == 0} then}\\
       
   627                        $\bullet$ & $\texttt{pc} + 1$\\
       
   628                        $\bullet$ & $\texttt{mp}$ and \texttt{mem} unchanged\\
       
   629                      \end{tabular}\\\hline   
       
   630       any other char & \begin{tabular}[t]{@{}l@{\hspace{2mm}}l@{}}
       
   631                          $\bullet$ & $\texttt{pc} + 1$\\
       
   632                          $\bullet$ & \texttt{mp} and \texttt{mem} unchanged
       
   633                        \end{tabular}\\
       
   634       \hline                 
       
   635     \end{tabular}
       
   636   \end{center}
       
   637   \caption{The rules for how commands in the brainf*** language update the program counter \texttt{pc},
       
   638     memory pointer \texttt{mp} and memory \texttt{mem}.\label{comms}}
       
   639   \end{figure}
       
   640 \end{itemize}\bigskip  
       
   641 
   590 
   642 
   591 
   643 
   592 
   644 
   593 
   645 \end{document}
   594 \end{document}