cws/main_cw03.tex
changeset 351 591b9005157e
parent 311 a479ec3ea536
child 356 d1046d9d3213
equal deleted inserted replaced
350:c5ad0e3f2a6d 351:591b9005157e
     1 % !TEX program = xelatex
     1 % !TEX program = xelatex
     2 \documentclass{article}
     2 \documentclass{article}
     3 \usepackage{chessboard}
       
     4 \usepackage[LSBC4,T1]{fontenc}
       
     5 \let\clipbox\relax
       
     6 \usepackage{../style}
     3 \usepackage{../style}
     7 \usepackage{../langs}
     4 \usepackage{../langs}
     8 \usepackage{disclaimer}
     5 \usepackage{disclaimer}
       
     6 \usepackage{tikz}
       
     7 \usepackage{pgf}
       
     8 \usepackage{pgfplots}
       
     9 \usepackage{stackengine}
       
    10 %% \usepackage{accents}
       
    11 \newcommand\barbelow[1]{\stackunder[1.2pt]{#1}{\raisebox{-4mm}{\boldmath$\uparrow$}}}
       
    12 
       
    13 \begin{filecontents}{re-python2.data}
       
    14 1 0.033
       
    15 5 0.036
       
    16 10 0.034
       
    17 15 0.036
       
    18 18 0.059
       
    19 19 0.084 
       
    20 20 0.141
       
    21 21 0.248
       
    22 22 0.485
       
    23 23 0.878
       
    24 24 1.71
       
    25 25 3.40
       
    26 26 7.08
       
    27 27 14.12
       
    28 28 26.69
       
    29 \end{filecontents}
       
    30 
       
    31 \begin{filecontents}{re-java.data}
       
    32 5  0.00298
       
    33 10  0.00418
       
    34 15  0.00996
       
    35 16  0.01710
       
    36 17  0.03492
       
    37 18  0.03303
       
    38 19  0.05084
       
    39 20  0.10177
       
    40 21  0.19960
       
    41 22  0.41159
       
    42 23  0.82234
       
    43 24  1.70251
       
    44 25  3.36112
       
    45 26  6.63998
       
    46 27  13.35120
       
    47 28  29.81185
       
    48 \end{filecontents}
       
    49 
       
    50 \begin{filecontents}{re-js.data}
       
    51 5   0.061
       
    52 10  0.061
       
    53 15  0.061
       
    54 20  0.070
       
    55 23  0.131
       
    56 25  0.308
       
    57 26  0.564
       
    58 28  1.994
       
    59 30  7.648
       
    60 31  15.881 
       
    61 32  32.190
       
    62 \end{filecontents}
       
    63 
       
    64 \begin{filecontents}{re-java9.data}
       
    65 1000  0.01410
       
    66 2000  0.04882
       
    67 3000  0.10609
       
    68 4000  0.17456
       
    69 5000  0.27530
       
    70 6000  0.41116
       
    71 7000  0.53741
       
    72 8000  0.70261
       
    73 9000  0.93981
       
    74 10000 0.97419
       
    75 11000 1.28697
       
    76 12000 1.51387
       
    77 14000 2.07079
       
    78 16000 2.69846
       
    79 20000 4.41823
       
    80 24000 6.46077
       
    81 26000 7.64373
       
    82 30000 9.99446
       
    83 34000 12.966885
       
    84 38000 16.281621
       
    85 42000 19.180228
       
    86 46000 21.984721
       
    87 50000 26.950203
       
    88 60000 43.0327746
       
    89 \end{filecontents}
       
    90 
       
    91 \begin{filecontents}{re-swift.data}
       
    92 5   0.001
       
    93 10  0.001
       
    94 15  0.009
       
    95 20  0.178
       
    96 23  1.399
       
    97 24  2.893
       
    98 25  5.671
       
    99 26  11.357
       
   100 27  22.430
       
   101 \end{filecontents}
       
   102 
       
   103 \begin{filecontents}{re-dart.data}
       
   104 20 0.042
       
   105 21 0.084
       
   106 22 0.190
       
   107 23 0.340
       
   108 24 0.678
       
   109 25 1.369
       
   110 26 2.700
       
   111 27 5.462
       
   112 28 10.908
       
   113 29 21.725
       
   114 30 43.492
       
   115 \end{filecontents}
     9 
   116 
    10 \begin{document}
   117 \begin{document}
    11 
   118 
    12 \setchessboard{smallboard,
   119 % BF IDE
    13                zero,
   120 % https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5
    14                showmover=false,
   121   
    15                boardfontencoding=LSBC4,
   122 \section*{Part 8 (Scala, 7 Marks)}
    16                hlabelformat=\arabic{ranklabel},
   123 
    17                vlabelformat=\arabic{filelabel}}
   124 %\mbox{}\hfill\textit{``[Google’s MapReduce] abstraction is inspired by the}\\
    18 
   125 %\mbox{}\hfill\textit{map and reduce primitives present in Lisp and many}\\
    19 \mbox{}\\[-18mm]\mbox{}
   126 %\mbox{}\hfill\textit{other functional language.''}\smallskip\\
    20 
   127 %\mbox{}\hfill\textit{ --- Dean and Ghemawat, who designed this concept at Google}
    21 \section*{Part 8 (Scala)}
   128 %\bigskip\medskip
    22 
       
    23 \mbox{}\hfill\textit{``The problem with object-oriented languages is they’ve got all this implicit,}\\
       
    24 \mbox{}\hfill\textit{environment that they carry around with them. You wanted a banana but}\\
       
    25 \mbox{}\hfill\textit{what you got was a gorilla holding the banana and the entire jungle.''}\smallskip\\
       
    26 \mbox{}\hfill\textit{ --- Joe Armstrong (creator of the Erlang programming language)}\medskip\bigskip
       
    27 
   129 
    28 \noindent
   130 \noindent
    29 This part is about searching and backtracking. You are asked to
   131 This part is about a regular expression matcher described by
    30 implement Scala programs that solve various versions of the
   132 Brzozowski in 1964. This part is due on \cwEIGHTa{} at 5pm.  The
    31 \textit{Knight's Tour Problem} on a chessboard. The preliminary part (4\%) is
   133 background is that ``out-of-the-box'' regular expression matching in
    32 due on  \cwEIGHT{} at 4pm; the core part is due on \cwEIGHTa{} at 4pm.
   134 mainstream languages like Java, JavaScript and Python can sometimes be
    33 Note the core, more advanced, part might include material you have not
   135 excruciatingly slow.  You are supposed to implement a regular
    34 yet seen in the first three lectures. \bigskip
   136 expression matcher that is much, much faster. \bigskip
    35 
   137 
    36 \IMPORTANT{}
   138 \IMPORTANTNONE{}
       
   139 
       
   140 \noindent
    37 Also note that the running time of each part will be restricted to a
   141 Also note that the running time of each part will be restricted to a
    38 maximum of 30 seconds on my laptop: If you calculate a result once,
   142 maximum of 30 seconds on my laptop.  
    39 try to avoid to calculate the result again. Feel free to copy any code
       
    40 you need from files \texttt{knight1.scala}, \texttt{knight2.scala} and
       
    41 \texttt{knight3.scala}.
       
    42 
   143 
    43 \DISCLAIMER{}
   144 \DISCLAIMER{}
    44 
   145 
       
   146 \subsection*{Reference Implementation}
       
   147 
       
   148 This Scala assignment comes with a reference implementation in form of
       
   149 a \texttt{jar}-file. This allows you to run any test cases on your own
       
   150 computer. For example you can call Scala on the command line with the
       
   151 option \texttt{-cp re.jar} and then query any function from the
       
   152 \texttt{re.scala} template file. As usual you have to prefix the calls
       
   153 with \texttt{CW8c} or import this object.  Since some tasks
       
   154 are time sensitive, you can check the reference implementation as
       
   155 follows: if you want to know, for example, how long it takes to match
       
   156 strings of $a$'s using the regular expression $(a^*)^*\cdot b$ you can
       
   157 query as follows:
       
   158 
       
   159 
       
   160 \begin{lstlisting}[xleftmargin=1mm,numbers=none,basicstyle=\ttfamily\small]
       
   161 $ scala -cp re.jar
       
   162 scala> import CW8c._  
       
   163 scala> for (i <- 0 to 5000000 by 500000) {
       
   164   | println(f"$i: ${time_needed(2, matcher(EVIL, "a" * i))}%.5f secs.")
       
   165   | }
       
   166 0: 0.00002 secs.
       
   167 500000: 0.10608 secs.
       
   168 1000000: 0.22286 secs.
       
   169 1500000: 0.35982 secs.
       
   170 2000000: 0.45828 secs.
       
   171 2500000: 0.59558 secs.
       
   172 3000000: 0.73191 secs.
       
   173 3500000: 0.83499 secs.
       
   174 4000000: 0.99149 secs.
       
   175 4500000: 1.15395 secs.
       
   176 5000000: 1.29659 secs.
       
   177 \end{lstlisting}%$
       
   178 
       
   179 \subsection*{Preliminaries}
       
   180 
       
   181 The task is to implement a regular expression matcher that is based on
       
   182 derivatives of regular expressions. Most of the functions are defined by
       
   183 recursion over regular expressions and can be elegantly implemented
       
   184 using Scala's pattern-matching. The implementation should deal with the
       
   185 following regular expressions, which have been predefined in the file
       
   186 \texttt{re.scala}:
       
   187 
       
   188 \begin{center}
       
   189 \begin{tabular}{lcll}
       
   190   $r$ & $::=$ & $\ZERO$     & cannot match anything\\
       
   191       &   $|$ & $\ONE$      & can only match the empty string\\
       
   192       &   $|$ & $c$         & can match a single character (in this case $c$)\\
       
   193       &   $|$ & $r_1 + r_2$ & can match a string either with $r_1$ or with $r_2$\\
       
   194   &   $|$ & $r_1\cdot r_2$ & can match the first part of a string with $r_1$ and\\
       
   195           &  & & then the second part with $r_2$\\
       
   196       &   $|$ & $r^*$       & can match a string with zero or more copies of $r$\\
       
   197 \end{tabular}
       
   198 \end{center}
       
   199 
       
   200 \noindent 
       
   201 Why? Regular expressions are
       
   202 one of the simplest ways to match patterns in text, and
       
   203 are endlessly useful for searching, editing and analysing data in all
       
   204 sorts of places (for example analysing network traffic in order to
       
   205 detect security breaches). However, you need to be fast, otherwise you
       
   206 will stumble over problems such as recently reported at
       
   207 
       
   208 {\small
       
   209 \begin{itemize}
       
   210 \item[$\bullet$] \url{https://blog.cloudflare.com/details-of-the-cloudflare-outage-on-july-2-2019}  
       
   211 \item[$\bullet$] \url{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}
       
   212 \item[$\bullet$] \url{https://vimeo.com/112065252}
       
   213 \item[$\bullet$] \url{https://davidvgalbraith.com/how-i-fixed-atom}  
       
   214 \end{itemize}}
       
   215 
       
   216 % Knowing how to match regular expressions and strings will let you
       
   217 % solve a lot of problems that vex other humans.
       
   218 
       
   219 
       
   220 \subsubsection*{Tasks (file re.scala)}
       
   221 
       
   222 The file \texttt{re.scala} has already a definition for regular
       
   223 expressions and also defines some handy shorthand notation for
       
   224 regular expressions. The notation in this document matches up
       
   225 with the code in the file as follows:
       
   226 
       
   227 \begin{center}
       
   228   \begin{tabular}{rcl@{\hspace{10mm}}l}
       
   229     & & code: & shorthand:\smallskip \\ 
       
   230   $\ZERO$ & $\mapsto$ & \texttt{ZERO}\\
       
   231   $\ONE$  & $\mapsto$ & \texttt{ONE}\\
       
   232   $c$     & $\mapsto$ & \texttt{CHAR(c)}\\
       
   233   $r_1 + r_2$ & $\mapsto$ & \texttt{ALT(r1, r2)} & \texttt{r1 | r2}\\
       
   234   $r_1 \cdot r_2$ & $\mapsto$ & \texttt{SEQ(r1, r2)} & \texttt{r1 $\sim$ r2}\\
       
   235   $r^*$ & $\mapsto$ &  \texttt{STAR(r)} & \texttt{r.\%}
       
   236 \end{tabular}    
       
   237 \end{center}  
       
   238 
       
   239 
       
   240 \begin{itemize}
       
   241 \item[(1)] Implement a function, called \textit{nullable}, by
       
   242   recursion over regular expressions. This function tests whether a
       
   243   regular expression can match the empty string. This means given a
       
   244   regular expression it either returns true or false. The function
       
   245   \textit{nullable}
       
   246   is defined as follows:
       
   247 
       
   248 \begin{center}
       
   249 \begin{tabular}{lcl}
       
   250 $\textit{nullable}(\ZERO)$ & $\dn$ & $\textit{false}$\\
       
   251 $\textit{nullable}(\ONE)$  & $\dn$ & $\textit{true}$\\
       
   252 $\textit{nullable}(c)$     & $\dn$ & $\textit{false}$\\
       
   253 $\textit{nullable}(r_1 + r_2)$ & $\dn$ & $\textit{nullable}(r_1) \vee \textit{nullable}(r_2)$\\
       
   254 $\textit{nullable}(r_1 \cdot r_2)$ & $\dn$ & $\textit{nullable}(r_1) \wedge \textit{nullable}(r_2)$\\
       
   255 $\textit{nullable}(r^*)$ & $\dn$ & $\textit{true}$\\
       
   256 \end{tabular}
       
   257 \end{center}~\hfill[1 Mark]
       
   258 
       
   259 \item[(2)] Implement a function, called \textit{der}, by recursion over
       
   260   regular expressions. It takes a character and a regular expression
       
   261   as arguments and calculates the derivative of a regular expression according
       
   262   to the rules:
       
   263 
       
   264 \begin{center}
       
   265 \begin{tabular}{lcl}
       
   266 $\textit{der}\;c\;(\ZERO)$ & $\dn$ & $\ZERO$\\
       
   267 $\textit{der}\;c\;(\ONE)$  & $\dn$ & $\ZERO$\\
       
   268 $\textit{der}\;c\;(d)$     & $\dn$ & $\textit{if}\; c = d\;\textit{then} \;\ONE \; \textit{else} \;\ZERO$\\
       
   269 $\textit{der}\;c\;(r_1 + r_2)$ & $\dn$ & $(\textit{der}\;c\;r_1) + (\textit{der}\;c\;r_2)$\\
       
   270 $\textit{der}\;c\;(r_1 \cdot r_2)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r_1)$\\
       
   271       & & $\textit{then}\;((\textit{der}\;c\;r_1)\cdot r_2) + (\textit{der}\;c\;r_2)$\\
       
   272       & & $\textit{else}\;(\textit{der}\;c\;r_1)\cdot r_2$\\
       
   273 $\textit{der}\;c\;(r^*)$ & $\dn$ & $(\textit{der}\;c\;r)\cdot (r^*)$\\
       
   274 \end{tabular}
       
   275 \end{center}
       
   276 
       
   277 For example given the regular expression $r = (a \cdot b) \cdot c$, the derivatives
       
   278 w.r.t.~the characters $a$, $b$ and $c$ are
       
   279 
       
   280 \begin{center}
       
   281   \begin{tabular}{lcll}
       
   282     $\textit{der}\;a\;r$ & $=$ & $(\ONE \cdot b)\cdot c$ & \quad($= r'$)\\
       
   283     $\textit{der}\;b\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$\\
       
   284     $\textit{der}\;c\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$
       
   285   \end{tabular}
       
   286 \end{center}
       
   287 
       
   288 Let $r'$ stand for the first derivative, then taking the derivatives of $r'$
       
   289 w.r.t.~the characters $a$, $b$ and $c$ gives
       
   290 
       
   291 \begin{center}
       
   292   \begin{tabular}{lcll}
       
   293     $\textit{der}\;a\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ \\
       
   294     $\textit{der}\;b\;r'$ & $=$ & $((\ZERO \cdot b) + \ONE)\cdot c$ & \quad($= r''$)\\
       
   295     $\textit{der}\;c\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$
       
   296   \end{tabular}
       
   297 \end{center}
       
   298 
       
   299 One more example: Let $r''$ stand for the second derivative above,
       
   300 then taking the derivatives of $r''$ w.r.t.~the characters $a$, $b$
       
   301 and $c$ gives
       
   302 
       
   303 \begin{center}
       
   304   \begin{tabular}{lcll}
       
   305     $\textit{der}\;a\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$ \\
       
   306     $\textit{der}\;b\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$\\
       
   307     $\textit{der}\;c\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ONE$ &
       
   308     (is $\textit{nullable}$)                      
       
   309   \end{tabular}
       
   310 \end{center}
       
   311 
       
   312 Note, the last derivative can match the empty string, that is it is \textit{nullable}.\\
       
   313 \mbox{}\hfill\mbox{[1 Mark]}
       
   314 
       
   315 \item[(3)] Implement the function \textit{simp}, which recursively
       
   316   traverses a regular expression, and on the way up simplifies every
       
   317   regular expression on the left (see below) to the regular expression
       
   318   on the right, except it does not simplify inside ${}^*$-regular
       
   319   expressions.
       
   320 
       
   321   \begin{center}
       
   322 \begin{tabular}{l@{\hspace{4mm}}c@{\hspace{4mm}}ll}
       
   323 $r \cdot \ZERO$ & $\mapsto$ & $\ZERO$\\ 
       
   324 $\ZERO \cdot r$ & $\mapsto$ & $\ZERO$\\ 
       
   325 $r \cdot \ONE$ & $\mapsto$ & $r$\\ 
       
   326 $\ONE \cdot r$ & $\mapsto$ & $r$\\ 
       
   327 $r + \ZERO$ & $\mapsto$ & $r$\\ 
       
   328 $\ZERO + r$ & $\mapsto$ & $r$\\ 
       
   329 $r + r$ & $\mapsto$ & $r$\\ 
       
   330 \end{tabular}
       
   331   \end{center}
       
   332 
       
   333   For example the regular expression
       
   334   \[(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot (r_4 \cdot \ZERO)\]
       
   335 
       
   336   simplifies to just $r_1$. \textbf{Hint:} Regular expressions can be
       
   337   seen as trees and there are several methods for traversing
       
   338   trees. One of them corresponds to the inside-out traversal, which is also
       
   339   sometimes called post-order tra\-versal: you traverse inside the
       
   340   tree and on the way up you apply simplification rules.
       
   341   \textbf{Another Hint:}
       
   342   Remember numerical expressions from school times---there you had expressions
       
   343   like $u + \ldots + (1 \cdot x) - \ldots (z + (y \cdot 0)) \ldots$
       
   344   and simplification rules that looked very similar to rules
       
   345   above. You would simplify such numerical expressions by replacing
       
   346   for example the $y \cdot 0$ by $0$, or $1\cdot x$ by $x$, and then
       
   347   look whether more rules are applicable. If you organise the
       
   348   simplification in an inside-out fashion, it is always clear which
       
   349   simplification should be applied next.\hfill[1 Mark]
       
   350 
       
   351 \item[(4)] Implement two functions: The first, called \textit{ders},
       
   352   takes a list of characters and a regular expression as arguments, and
       
   353   builds the derivative w.r.t.~the list as follows:
       
   354 
       
   355 \begin{center}
       
   356 \begin{tabular}{lcl}
       
   357 $\textit{ders}\;(Nil)\;r$ & $\dn$ & $r$\\
       
   358   $\textit{ders}\;(c::cs)\;r$  & $\dn$ &
       
   359     $\textit{ders}\;cs\;(\textit{simp}(\textit{der}\;c\;r))$\\
       
   360 \end{tabular}
       
   361 \end{center}
       
   362 
       
   363 Note that this function is different from \textit{der}, which only
       
   364 takes a single character.
       
   365 
       
   366 The second function, called \textit{matcher}, takes a string and a
       
   367 regular expression as arguments. It builds first the derivatives
       
   368 according to \textit{ders} and after that tests whether the resulting
       
   369 derivative regular expression can match the empty string (using
       
   370 \textit{nullable}).  For example the \textit{matcher} will produce
       
   371 true for the regular expression $(a\cdot b)\cdot c$ and the string
       
   372 $abc$, but false if you give it the string $ab$. \hfill[1 Mark]
       
   373 
       
   374 \item[(5)] Implement a function, called \textit{size}, by recursion
       
   375   over regular expressions. If a regular expression is seen as a tree,
       
   376   then \textit{size} should return the number of nodes in such a
       
   377   tree. Therefore this function is defined as follows:
       
   378 
       
   379 \begin{center}
       
   380 \begin{tabular}{lcl}
       
   381 $\textit{size}(\ZERO)$ & $\dn$ & $1$\\
       
   382 $\textit{size}(\ONE)$  & $\dn$ & $1$\\
       
   383 $\textit{size}(c)$     & $\dn$ & $1$\\
       
   384 $\textit{size}(r_1 + r_2)$ & $\dn$ & $1 + \textit{size}(r_1) + \textit{size}(r_2)$\\
       
   385 $\textit{size}(r_1 \cdot r_2)$ & $\dn$ & $1 + \textit{size}(r_1) + \textit{size}(r_2)$\\
       
   386 $\textit{size}(r^*)$ & $\dn$ & $1 + \textit{size}(r)$\\
       
   387 \end{tabular}
       
   388 \end{center}
       
   389 
       
   390 You can use \textit{size} in order to test how much the ``evil'' regular
       
   391 expression $(a^*)^* \cdot b$ grows when taking successive derivatives
       
   392 according the letter $a$ without simplification and then compare it to
       
   393 taking the derivative, but simplify the result.  The sizes
       
   394 are given in \texttt{re.scala}. \hfill[1 Mark]
       
   395 
       
   396 \item[(6)] You do not have to implement anything specific under this
       
   397   task.  The purpose here is that you will be marked for some ``power''
       
   398   test cases. For example can your matcher decide within 30 seconds
       
   399   whether the regular expression $(a^*)^*\cdot b$ matches strings of the
       
   400   form $aaa\ldots{}aaaa$, for say 1 Million $a$'s. And does simplification
       
   401   simplify the regular expression
       
   402 
       
   403   \[
       
   404   \texttt{SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)}
       
   405   \]  
       
   406 
       
   407   \noindent correctly to just \texttt{ONE}, where \texttt{SEQ} is nested
       
   408   50 or more times?\\
       
   409   \mbox{}\hfill[2 Mark]
       
   410 \end{itemize}
       
   411 
    45 \subsection*{Background}
   412 \subsection*{Background}
    46 
   413 
    47 The \textit{Knight's Tour Problem} is about finding a tour such that
   414 Although easily implementable in Scala, the idea behind the derivative
    48 the knight visits every field on an $n\times n$ chessboard once. For
   415 function might not so easy to be seen. To understand its purpose
    49 example on a $5\times 5$ chessboard, a knight's tour is:
   416 better, assume a regular expression $r$ can match strings of the form
    50 
   417 $c\!::\!cs$ (that means strings which start with a character $c$ and have
    51 \chessboard[maxfield=d4, 
   418 some rest, or tail, $cs$). If you take the derivative of $r$ with
    52             pgfstyle= {[base,at={\pgfpoint{0pt}{-0.5ex}}]text},
   419 respect to the character $c$, then you obtain a regular expression
    53             text = \small 24, markfield=Z4,
   420 that can match all the strings $cs$.  In other words, the regular
    54             text = \small 11, markfield=a4,
   421 expression $\textit{der}\;c\;r$ can match the same strings $c\!::\!cs$
    55             text = \small  6, markfield=b4,
   422 that can be matched by $r$, except that the $c$ is chopped off.
    56             text = \small 17, markfield=c4,
   423 
    57             text = \small  0, markfield=d4,
   424 Assume now $r$ can match the string $abc$. If you take the derivative
    58             text = \small 19, markfield=Z3,
   425 according to $a$ then you obtain a regular expression that can match
    59             text = \small 16, markfield=a3,
   426 $bc$ (it is $abc$ where the $a$ has been chopped off). If you now
    60             text = \small 23, markfield=b3,
   427 build the derivative $\textit{der}\;b\;(\textit{der}\;a\;r)$ you
    61             text = \small 12, markfield=c3,
   428 obtain a regular expression that can match the string $c$ (it is $bc$
    62             text = \small  7, markfield=d3,
   429 where $b$ is chopped off). If you finally build the derivative of this
    63             text = \small 10, markfield=Z2,
   430 according $c$, that is
    64             text = \small  5, markfield=a2,
   431 $\textit{der}\;c\;(\textit{der}\;b\;(\textit{der}\;a\;r))$, you obtain
    65             text = \small 18, markfield=b2,
   432 a regular expression that can match the empty string. You can test
    66             text = \small  1, markfield=c2,
   433 whether this is indeed the case using the function nullable, which is
    67             text = \small 22, markfield=d2,
   434 what your matcher is doing.
    68             text = \small 15, markfield=Z1,
   435 
    69             text = \small 20, markfield=a1,
   436 The purpose of the $\textit{simp}$ function is to keep the regular
    70             text = \small  3, markfield=b1,
   437 expressions small. Normally the derivative function makes the regular
    71             text = \small  8, markfield=c1,
   438 expression bigger (see the SEQ case and the example in (2)) and the
    72             text = \small 13, markfield=d1,
   439 algorithm would be slower and slower over time. The $\textit{simp}$
    73             text = \small  4, markfield=Z0,
   440 function counters this increase in size and the result is that the
    74             text = \small  9, markfield=a0,
   441 algorithm is fast throughout.  By the way, this algorithm is by Janusz
    75             text = \small 14, markfield=b0,
   442 Brzozowski who came up with the idea of derivatives in 1964 in his PhD
    76             text = \small 21, markfield=c0,
   443 thesis.
    77             text = \small  2, markfield=d0
   444 
    78            ]
   445 \begin{center}\small
    79            
   446 \url{https://en.wikipedia.org/wiki/Janusz_Brzozowski_(computer_scientist)}
    80 \noindent
   447 \end{center}
    81 This tour starts in the right-upper corner, then moves to field
   448 
    82 $(3,2)$, then $(4,0)$ and so on. There are no knight's tours on
   449 
    83 $2\times 2$, $3\times 3$ and $4\times 4$ chessboards, but for every
   450 If you want to see how badly the regular expression matchers do in
    84 bigger board there is. 
   451 Java\footnote{Version 8 and below; Version 9 and above does not seem to be as
    85 
   452   catastrophic, but still much worse than the regular expression
    86 A knight's tour is called \emph{closed}, if the last step in the tour
   453   matcher based on derivatives.}, JavaScript and Python with the
    87 is within a knight's move to the beginning of the tour. So the above
   454 `evil' regular expression $(a^*)^*\cdot b$, then have a look at the
    88 knight's tour is \underline{not} closed because the last
   455 graphs below (you can try it out for yourself: have a look at the files
    89 step on field $(0, 4)$ is not within the reach of the first step on
   456 \texttt{catastrophic9.java}, \texttt{catastrophic.js},
    90 $(4, 4)$. It turns out there is no closed knight's tour on a $5\times
   457 \texttt{catastrophic.py} etc on KEATS). Compare this with the matcher you
    91 5$ board. But there are on a $6\times 6$ board and on bigger ones, for
   458 have implemented. How long can the string of $a$'s be in your matcher
    92 example
   459 and still stay within the 30 seconds time limit?
    93 
   460 
    94 \chessboard[maxfield=e5, 
   461 \begin{center}
    95             pgfstyle={[base,at={\pgfpoint{0pt}{-0.5ex}}]text},
   462 \begin{tabular}{@{}cc@{}}
    96             text = \small 10, markfield=Z5,
   463 \multicolumn{2}{c}{Graph: $(a^*)^*\cdot b$ and strings 
    97             text = \small  5, markfield=a5,
   464            $\underbrace{a\ldots a}_{n}$}\bigskip\\
    98             text = \small 18, markfield=b5,
       
    99             text = \small 25, markfield=c5,
       
   100             text = \small 16, markfield=d5,
       
   101             text = \small  7, markfield=e5,
       
   102             text = \small 31, markfield=Z4,
       
   103             text = \small 26, markfield=a4,
       
   104             text = \small  9, markfield=b4,
       
   105             text = \small  6, markfield=c4,
       
   106             text = \small 19, markfield=d4,
       
   107             text = \small 24, markfield=e4,
       
   108             % 4  11  30  17   8  15 
       
   109             text = \small  4, markfield=Z3,
       
   110             text = \small 11, markfield=a3,
       
   111             text = \small 30, markfield=b3,
       
   112             text = \small 17, markfield=c3,
       
   113             text = \small  8, markfield=d3,
       
   114             text = \small 15, markfield=e3,
       
   115             %29  32  27   0  23  20 
       
   116             text = \small 29, markfield=Z2,
       
   117             text = \small 32, markfield=a2,
       
   118             text = \small 27, markfield=b2,
       
   119             text = \small  0, markfield=c2,
       
   120             text = \small 23, markfield=d2,
       
   121             text = \small 20, markfield=e2,
       
   122             %12   3  34  21  14   1 
       
   123             text = \small 12, markfield=Z1,
       
   124             text = \small  3, markfield=a1,
       
   125             text = \small 34, markfield=b1,
       
   126             text = \small 21, markfield=c1,
       
   127             text = \small 14, markfield=d1,
       
   128             text = \small  1, markfield=e1,
       
   129             %33  28  13   2  35  22 
       
   130             text = \small 33, markfield=Z0,
       
   131             text = \small 28, markfield=a0,
       
   132             text = \small 13, markfield=b0,
       
   133             text = \small  2, markfield=c0,
       
   134             text = \small 35, markfield=d0,
       
   135             text = \small 22, markfield=e0,
       
   136             vlabel=false,
       
   137             hlabel=false
       
   138            ]
       
   139 
       
   140 
       
   141 \noindent
       
   142 where the 35th move can join up again with the 0th move.
       
   143 
       
   144 If you cannot remember how a knight moves in chess, or never played
       
   145 chess, below are all potential moves indicated for two knights, one on
       
   146 field $(2, 2)$ (blue moves) and another on $(7, 7)$ (red moves):
       
   147 
       
   148 {\chessboard[maxfield=g7,
       
   149             color=blue!50,
       
   150             linewidth=0.2em,
       
   151             shortenstart=0.5ex,
       
   152             shortenend=0.5ex,
       
   153             markstyle=cross,
       
   154             markfields={a4, c4, Z3, d3, Z1, d1, a0, c0},
       
   155             color=red!50,
       
   156             markfields={f5, e6},
       
   157             setpieces={Ng7, Nb2},
       
   158             boardfontsize=12pt,labelfontsize=9pt]}
       
   159 
       
   160 \subsection*{Reference Implementation}
       
   161 
       
   162 This Scala part comes with three reference implementations in form of
       
   163 \texttt{jar}-files. This allows you to run any test cases on your own
       
   164 computer. For example you can call Scala on the command line with the
       
   165 option \texttt{-cp knight1.jar} and then query any function from the
       
   166 \texttt{knight1.scala} template file. As usual you have to
       
   167 prefix the calls with \texttt{CW8a}, \texttt{CW8b} and \texttt{CW8c}.
       
   168 Since some of the calls are time sensitive, I included some timing
       
   169 information. For example
       
   170 
       
   171 \begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
       
   172 $ scala -cp knight1.jar
       
   173 scala> CW8a.enum_tours(5, List((0, 0))).length
       
   174 Time needed: 1.722 secs.
       
   175 res0: Int = 304
       
   176 
       
   177 scala> CW8a.print_board(8, CW8a.first_tour(8, List((0, 0))).get)
       
   178 Time needed: 15.411 secs.
       
   179 
       
   180  51  46  55  44  53   4  21  12 
       
   181  56  43  52   3  22  13  24   5 
       
   182  47  50  45  54  25  20  11  14 
       
   183  42  57   2  49  40  23   6  19 
       
   184  35  48  41  26  61  10  15  28 
       
   185  58   1  36  39  32  27  18   7 
       
   186  37  34  31  60   9  62  29  16 
       
   187   0  59  38  33  30  17   8  63 
       
   188 \end{lstlisting}%$
       
   189 
       
   190 
       
   191 \subsection*{Hints}
       
   192 
       
   193 \noindent
       
   194 \textbf{Preliminary Part} useful list functions: \texttt{.contains(..)} checks
       
   195 whether an element is in a list, \texttt{.flatten} turns a list of
       
   196 lists into just a list, \texttt{\_::\_} puts an element on the head of
       
   197 the list, \texttt{.head} gives you the first element of a list (make
       
   198 sure the list is not \texttt{Nil}); a useful option function:
       
   199 \texttt{.isDefined} returns true, if an option is \texttt{Some(..)};
       
   200 anonymous functions can be constructed using \texttt{(x:Int) => ...},
       
   201 this function takes an \texttt{Int} as an argument.\medskip
       
   202 
       
   203 
       
   204 \noindent
       
   205 \textbf{Core Part} a useful list function: \texttt{.sortBy} sorts a list
       
   206 according to a component given by the function; a function can be
       
   207 tested to be tail-recursive by annotation \texttt{@tailrec}, which is
       
   208 made available by importing \texttt{scala.annotation.tailrec}.\medskip
       
   209 
       
   210 
       
   211 
       
   212 
       
   213 \subsection*{Preliminary Part (4 Marks)}
       
   214 
       
   215 You are asked to implement the knight's tour problem such that the
       
   216 dimension of the board can be changed.  Therefore most functions will
       
   217 take the dimension of the board as an argument.  The fun with this
       
   218 problem is that even for small chessboard dimensions it has already an
       
   219 incredibly large search space---finding a tour is like finding a
       
   220 needle in a haystack. In the first task we want to see how far we get
       
   221 with exhaustively exploring the complete search space for small
       
   222 chessboards.\medskip
       
   223 
       
   224 \noindent
       
   225 Let us first fix the basic datastructures for the implementation.  The
       
   226 board dimension is an integer.
       
   227 A \emph{position} (or field) on the chessboard is
       
   228 a pair of integers, like $(0, 0)$. A \emph{path} is a list of
       
   229 positions. The first (or 0th move) in a path is the last element in
       
   230 this list; and the last move in the path is the first element. For
       
   231 example the path for the $5\times 5$ chessboard above is represented
       
   232 by
       
   233 
       
   234 \[
       
   235 \texttt{List($\underbrace{\texttt{(0, 4)}}_{24}$,
       
   236   $\underbrace{\texttt{(2, 3)}}_{23}$, ...,
       
   237   $\underbrace{\texttt{(3, 2)}}_1$, $\underbrace{\texttt{(4, 4)}}_0$)}
       
   238 \]
       
   239 
       
   240 \noindent
       
   241 Suppose the dimension of a chessboard is $n$, then a path is a
       
   242 \emph{tour} if the length of the path is $n \times n$, each element
       
   243 occurs only once in the path, and each move follows the rules of how a
       
   244 knight moves (see above for the rules).
       
   245 
       
   246 
       
   247 \subsubsection*{Tasks (file knight1.scala)}
       
   248 
       
   249 \begin{itemize}
       
   250 \item[(1)] Implement an \texttt{is\_legal} function that takes a
       
   251   dimension, a path and a position as arguments and tests whether the
       
   252   position is inside the board and not yet element in the
       
   253   path. \hfill[1 Mark]
       
   254 
       
   255 \item[(2)] Implement a \texttt{legal\_moves} function that calculates for a
       
   256   position all legal onward moves. If the onward moves are
       
   257   placed on a circle, you should produce them starting from
       
   258   ``12-o'clock'' following in clockwise order.  For example on an
       
   259   $8\times 8$ board for a knight at position $(2, 2)$ and otherwise
       
   260   empty board, the legal-moves function should produce the onward
       
   261   positions in this order:
       
   262 
       
   263   \begin{center}
       
   264   \texttt{List((3,4), (4,3), (4,1), (3,0), (1,0), (0,1), (0,3), (1,4))}
       
   265   \end{center}
       
   266 
       
   267   If the board is not empty, then maybe some of the moves need to be
       
   268   filtered out from this list.  For a knight on field $(7, 7)$ and an
       
   269   empty board, the legal moves are
       
   270 
       
   271   \begin{center}
       
   272   \texttt{List((6,5), (5,6))}
       
   273   \end{center}
       
   274   \mbox{}\hfill[1 Mark]
       
   275 
       
   276 \item[(3)] Implement two recursive functions (\texttt{count\_tours} and
       
   277   \texttt{enum\_tours}). They each take a dimension and a path as
       
   278   arguments. They exhaustively search for tours starting
       
   279   from the given path. The first function counts all possible 
       
   280   tours (there can be none for certain board sizes) and the second
       
   281   collects all tours in a list of paths. These functions will be
       
   282   called with a path containing a single position---the starting field.
       
   283   They are expected to extend this path so as to find all tours starting
       
   284   from the given position.\\
       
   285   \mbox{}\hfill[2 Marks]
       
   286 \end{itemize}
       
   287 
       
   288 \noindent \textbf{Test data:} For the marking, the functions in (3)
       
   289 will be called with board sizes up to $5 \times 5$. If you search
       
   290 for tours on a $5 \times 5$ board starting only from field $(0, 0)$,
       
   291 there are 304 of tours. If you try out every field of a $5 \times
       
   292 5$-board as a starting field and add up all tours, you obtain
       
   293 1728. A $6\times 6$ board is already too large to be searched
       
   294 exhaustively.\footnote{For your interest, the number of tours on
       
   295   $6\times 6$, $7\times 7$ and $8\times 8$ are 6637920, 165575218320,
       
   296   19591828170979904, respectively.}\smallskip
       
   297 
       
   298 
       
   299 \subsection*{Core Part (6 Marks)}
       
   300 
       
   301 
       
   302 \subsubsection*{Tasks (file knight1.scala cont.)}
       
   303 
       
   304 \begin{itemize}
       
   305 \item[(4)] Implement a \texttt{first}-function. This function takes a list of
       
   306   positions and a function $f$ as arguments; $f$ is the name we give to
       
   307   this argument). The function $f$ takes a position as argument and
       
   308   produces an optional path. So $f$'s type is \texttt{Pos =>
       
   309     Option[Path]}. The idea behind the \texttt{first}-function is as follows:
       
   310 
       
   311   \[
       
   312   \begin{array}{lcl}
       
   313   \textit{first}(\texttt{Nil}, f) & \dn & \texttt{None}\\  
       
   314   \textit{first}(x\!::\!xs, f) & \dn & \begin{cases}
       
   315     f(x) & \textit{if}\;f(x) \not=\texttt{None}\\
       
   316     \textit{first}(xs, f) & \textit{otherwise}\\
       
   317                               \end{cases}
       
   318   \end{array}
       
   319   \]
       
   320 
       
   321   \noindent That is, we want to find the first position where the
       
   322   result of $f$ is not \texttt{None}, if there is one. Note that
       
   323   `inside' \texttt{first}, you do not (need to) know anything about
       
   324   the argument $f$ except its type, namely \texttt{Pos =>
       
   325     Option[Path]}. If you want to find out what the result of $f$ is
       
   326   on a particular argument, say $x$, you can just write $f(x)$. 
       
   327   There is one additional point however you should
       
   328   take into account when implementing \texttt{first}: you will need to
       
   329   calculate what the result of $f(x)$ is; your code should do this
       
   330   only \textbf{once} and for as \textbf{few} elements in the list as
       
   331   possible! Do not calculate $f(x)$ for all elements and then see which 
       
   332   is the first \texttt{Some}.\\\mbox{}\hfill[1 Mark]
       
   333   
   465   
   334 \item[(5)] Implement a \texttt{first\_tour} function that uses the
   466 \begin{tikzpicture}
   335   \texttt{first}-function from (4), and searches recursively for single tour.
   467 \begin{axis}[
   336   As there might not be such a tour at all, the \texttt{first\_tour} function
   468     xlabel={$n$},
   337   needs to return a value of type
   469     x label style={at={(1.05,0.0)}},
   338   \texttt{Option[Path]}.\\\mbox{}\hfill[1 Mark]
   470     ylabel={time in secs},
   339 \end{itemize}
   471     y label style={at={(0.06,0.5)}},
   340 
   472     enlargelimits=false,
   341 \noindent
   473     xtick={0,5,...,30},
   342 \textbf{Testing:} The \texttt{first\_tour} function will be called with board
   474     xmax=33,
   343 sizes of up to $8 \times 8$.
   475     ymax=45,
   344 \bigskip
   476     ytick={0,5,...,40},
   345 
   477     scaled ticks=false,
   346 %%\newpage
   478     axis lines=left,
   347 
   479     width=6cm,
   348 \noindent
   480     height=5.5cm, 
   349 As you should have seen in the earlier parts, a naive search for tours beyond
   481     legend entries={Python, Java 8, JavaScript, Swift, Dart},  
   350 $8 \times 8$ boards and also searching for closed tours even on small
   482     legend pos=north west,
   351 boards takes too much time. There is a heuristics, called \emph{Warnsdorf's
   483     legend cell align=left]
   352 Rule} that can speed up finding a tour. This heuristics states that a
   484 \addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
   353 knight is moved so that it always proceeds to the field from which the
   485 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
   354 knight will have the \underline{fewest} onward moves.  For example for
   486 \addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
   355 a knight on field $(1, 3)$, the field $(0, 1)$ has the fewest possible
   487 \addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};
   356 onward moves, namely 2.
   488 \addplot[brown,mark=*, mark options={fill=white}] table {re-dart.data};
   357 
   489 \end{axis}
   358 \chessboard[maxfield=g7,
   490 \end{tikzpicture}
   359             pgfstyle= {[base,at={\pgfpoint{0pt}{-0.5ex}}]text},
   491   & 
   360             text = \small 3, markfield=Z5,
   492 \begin{tikzpicture}
   361             text = \small 7, markfield=b5,
   493 \begin{axis}[
   362             text = \small 7, markfield=c4,
   494     xlabel={$n$},
   363             text = \small 7, markfield=c2,
   495     x label style={at={(1.05,0.0)}},
   364             text = \small 5, markfield=b1,
   496     ylabel={time in secs},
   365             text = \small 2, markfield=Z1,
   497     y label style={at={(0.06,0.5)}},
   366             setpieces={Na3}]
   498     %enlargelimits=false,
   367 
   499     %xtick={0,5000,...,30000},
   368 \noindent
   500     xmax=65000,
   369 Warnsdorf's Rule states that the moves on the board above should be
   501     ymax=45,
   370 tried in the order
   502     ytick={0,5,...,40},
   371 
   503     scaled ticks=false,
   372 \[
   504     axis lines=left,
   373 (0, 1), (0, 5), (2, 1), (2, 5), (3, 4), (3, 2)
   505     width=6cm,
   374 \]
   506     height=5.5cm, 
   375 
   507     legend entries={Java 9},  
   376 \noindent
   508     legend pos=north west]
   377 Whenever there are ties, the corresponding onward moves can be in any
   509 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java9.data};
   378 order.  When calculating the number of onward moves for each field, we
   510 \end{axis}
   379 do not count moves that revisit any field already visited.
   511 \end{tikzpicture}
   380 
   512 \end{tabular}  
   381 \subsubsection*{Tasks (file knight2.scala)}
   513 \end{center}
   382 
   514 \newpage
   383 \begin{itemize}
   515 
   384 \item[(6)] Write a function \texttt{ordered\_moves} that calculates a list of
       
   385   onward moves like in (2) but orders them according to 
       
   386   Warnsdorf’s Rule. That means moves with the fewest legal onward moves
       
   387   should come first (in order to be tried out first). \hfill[1 Mark]
       
   388   
       
   389 \item[(7)] Implement a \texttt{first\_closed\_tour\_heuristics}
       
   390   function that searches for a single
       
   391   \textbf{closed} tour on a $6\times 6$ board. It should try out
       
   392   onward moves according to
       
   393   the \texttt{ordered\_moves} function from (6). It is more likely to find
       
   394   a solution when started in the middle of the board (that is
       
   395   position $(dimension / 2, dimension / 2)$). \hfill[1 Mark]
       
   396 
       
   397 \item[(8)] Implement a \texttt{first\_tour\_heuristics} function
       
   398   for boards up to
       
   399   $30\times 30$.  It is the same function as in (7) but searches for
       
   400   tours (not just closed tours). It might be called with any field on the
       
   401   board as starting field.\\
       
   402   %You have to be careful to write a
       
   403   %tail-recursive function of the \texttt{first\_tour\_heuristics} function
       
   404   %otherwise you will get problems with stack-overflows.\\
       
   405   \mbox{}\hfill[1 Mark]
       
   406 \end{itemize}    
       
   407 
       
   408 \subsubsection*{Task (file knight3.scala)}
       
   409 \begin{itemize}
       
   410 \item[(9)] Implement a function \texttt{tour\_on\_mega\_board} which is
       
   411   the same function as in (8), \textbf{but} should be able to
       
   412   deal with boards up to
       
   413   $70\times 70$ \textbf{within 30 seconds} (on my laptop). This will be tested
       
   414   by starting from field $(0, 0)$. You have to be careful to
       
   415   write a tail-recursive function otherwise you will get problems
       
   416   with stack-overflows. Please observe the requirements about
       
   417   the submissions: no tricks involving \textbf{.par}.\medskip
       
   418 
       
   419   The timelimit of 30 seconds is with respect to the laptop on which the
       
   420   marking will happen. You can roughly estimate how well your
       
   421   implementation performs by running \texttt{knight3.jar} on your
       
   422   computer. For example the reference implementation shows
       
   423   on my laptop:
       
   424   
       
   425   \begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
       
   426 $ scala -cp knight3.jar
       
   427   
       
   428 scala> CW8c.tour_on_mega_board(70, List((0, 0)))
       
   429 Time needed: 9.484 secs.
       
   430 ...<<long_list>>...
       
   431 \end{lstlisting}%$
       
   432 
       
   433   \mbox{}\hfill[1 Mark]
       
   434 \end{itemize}  
       
   435 \bigskip
       
   436 
   516 
   437 
   517 
   438 
   518 
   439 
   519 
   440 \end{document}
   520 \end{document}
       
   521 
   441 
   522 
   442 %%% Local Variables: 
   523 %%% Local Variables: 
   443 %%% mode: latex
   524 %%% mode: latex
   444 %%% TeX-master: t
   525 %%% TeX-master: t
   445 %%% End: 
   526 %%% End: