# HG changeset patch # User Christian Urban # Date 1604504131 0 # Node ID 591b9005157e7586620c42d7bc5e1ff7424f6979 # Parent c5ad0e3f2a6d7af087da6ea4fabfa259d6692b56 updated diff -r c5ad0e3f2a6d -r 591b9005157e cws/main_cw03.tex --- a/cws/main_cw03.tex Wed Nov 04 14:55:49 2020 +0000 +++ b/cws/main_cw03.tex Wed Nov 04 15:35:31 2020 +0000 @@ -1,444 +1,525 @@ % !TEX program = xelatex \documentclass{article} -\usepackage{chessboard} -\usepackage[LSBC4,T1]{fontenc} -\let\clipbox\relax \usepackage{../style} \usepackage{../langs} \usepackage{disclaimer} +\usepackage{tikz} +\usepackage{pgf} +\usepackage{pgfplots} +\usepackage{stackengine} +%% \usepackage{accents} +\newcommand\barbelow[1]{\stackunder[1.2pt]{#1}{\raisebox{-4mm}{\boldmath$\uparrow$}}} + +\begin{filecontents}{re-python2.data} +1 0.033 +5 0.036 +10 0.034 +15 0.036 +18 0.059 +19 0.084 +20 0.141 +21 0.248 +22 0.485 +23 0.878 +24 1.71 +25 3.40 +26 7.08 +27 14.12 +28 26.69 +\end{filecontents} + +\begin{filecontents}{re-java.data} +5 0.00298 +10 0.00418 +15 0.00996 +16 0.01710 +17 0.03492 +18 0.03303 +19 0.05084 +20 0.10177 +21 0.19960 +22 0.41159 +23 0.82234 +24 1.70251 +25 3.36112 +26 6.63998 +27 13.35120 +28 29.81185 +\end{filecontents} + +\begin{filecontents}{re-js.data} +5 0.061 +10 0.061 +15 0.061 +20 0.070 +23 0.131 +25 0.308 +26 0.564 +28 1.994 +30 7.648 +31 15.881 +32 32.190 +\end{filecontents} + +\begin{filecontents}{re-java9.data} +1000 0.01410 +2000 0.04882 +3000 0.10609 +4000 0.17456 +5000 0.27530 +6000 0.41116 +7000 0.53741 +8000 0.70261 +9000 0.93981 +10000 0.97419 +11000 1.28697 +12000 1.51387 +14000 2.07079 +16000 2.69846 +20000 4.41823 +24000 6.46077 +26000 7.64373 +30000 9.99446 +34000 12.966885 +38000 16.281621 +42000 19.180228 +46000 21.984721 +50000 26.950203 +60000 43.0327746 +\end{filecontents} + +\begin{filecontents}{re-swift.data} +5 0.001 +10 0.001 +15 0.009 +20 0.178 +23 1.399 +24 2.893 +25 5.671 +26 11.357 +27 22.430 +\end{filecontents} + +\begin{filecontents}{re-dart.data} +20 0.042 +21 0.084 +22 0.190 +23 0.340 +24 0.678 +25 1.369 +26 2.700 +27 5.462 +28 10.908 +29 21.725 +30 43.492 +\end{filecontents} \begin{document} -\setchessboard{smallboard, - zero, - showmover=false, - boardfontencoding=LSBC4, - hlabelformat=\arabic{ranklabel}, - vlabelformat=\arabic{filelabel}} +% BF IDE +% https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5 + +\section*{Part 8 (Scala, 7 Marks)} -\mbox{}\\[-18mm]\mbox{} - -\section*{Part 8 (Scala)} - -\mbox{}\hfill\textit{``The problem with object-oriented languages is they’ve got all this implicit,}\\ -\mbox{}\hfill\textit{environment that they carry around with them. You wanted a banana but}\\ -\mbox{}\hfill\textit{what you got was a gorilla holding the banana and the entire jungle.''}\smallskip\\ -\mbox{}\hfill\textit{ --- Joe Armstrong (creator of the Erlang programming language)}\medskip\bigskip +%\mbox{}\hfill\textit{``[Google’s MapReduce] abstraction is inspired by the}\\ +%\mbox{}\hfill\textit{map and reduce primitives present in Lisp and many}\\ +%\mbox{}\hfill\textit{other functional language.''}\smallskip\\ +%\mbox{}\hfill\textit{ --- Dean and Ghemawat, who designed this concept at Google} +%\bigskip\medskip \noindent -This part is about searching and backtracking. You are asked to -implement Scala programs that solve various versions of the -\textit{Knight's Tour Problem} on a chessboard. The preliminary part (4\%) is -due on \cwEIGHT{} at 4pm; the core part is due on \cwEIGHTa{} at 4pm. -Note the core, more advanced, part might include material you have not -yet seen in the first three lectures. \bigskip +This part is about a regular expression matcher described by +Brzozowski in 1964. This part is due on \cwEIGHTa{} at 5pm. The +background is that ``out-of-the-box'' regular expression matching in +mainstream languages like Java, JavaScript and Python can sometimes be +excruciatingly slow. You are supposed to implement a regular +expression matcher that is much, much faster. \bigskip -\IMPORTANT{} +\IMPORTANTNONE{} + +\noindent Also note that the running time of each part will be restricted to a -maximum of 30 seconds on my laptop: If you calculate a result once, -try to avoid to calculate the result again. Feel free to copy any code -you need from files \texttt{knight1.scala}, \texttt{knight2.scala} and -\texttt{knight3.scala}. +maximum of 30 seconds on my laptop. \DISCLAIMER{} -\subsection*{Background} - -The \textit{Knight's Tour Problem} is about finding a tour such that -the knight visits every field on an $n\times n$ chessboard once. For -example on a $5\times 5$ chessboard, a knight's tour is: - -\chessboard[maxfield=d4, - pgfstyle= {[base,at={\pgfpoint{0pt}{-0.5ex}}]text}, - text = \small 24, markfield=Z4, - text = \small 11, markfield=a4, - text = \small 6, markfield=b4, - text = \small 17, markfield=c4, - text = \small 0, markfield=d4, - text = \small 19, markfield=Z3, - text = \small 16, markfield=a3, - text = \small 23, markfield=b3, - text = \small 12, markfield=c3, - text = \small 7, markfield=d3, - text = \small 10, markfield=Z2, - text = \small 5, markfield=a2, - text = \small 18, markfield=b2, - text = \small 1, markfield=c2, - text = \small 22, markfield=d2, - text = \small 15, markfield=Z1, - text = \small 20, markfield=a1, - text = \small 3, markfield=b1, - text = \small 8, markfield=c1, - text = \small 13, markfield=d1, - text = \small 4, markfield=Z0, - text = \small 9, markfield=a0, - text = \small 14, markfield=b0, - text = \small 21, markfield=c0, - text = \small 2, markfield=d0 - ] - -\noindent -This tour starts in the right-upper corner, then moves to field -$(3,2)$, then $(4,0)$ and so on. There are no knight's tours on -$2\times 2$, $3\times 3$ and $4\times 4$ chessboards, but for every -bigger board there is. - -A knight's tour is called \emph{closed}, if the last step in the tour -is within a knight's move to the beginning of the tour. So the above -knight's tour is \underline{not} closed because the last -step on field $(0, 4)$ is not within the reach of the first step on -$(4, 4)$. It turns out there is no closed knight's tour on a $5\times -5$ board. But there are on a $6\times 6$ board and on bigger ones, for -example - -\chessboard[maxfield=e5, - pgfstyle={[base,at={\pgfpoint{0pt}{-0.5ex}}]text}, - text = \small 10, markfield=Z5, - text = \small 5, markfield=a5, - text = \small 18, markfield=b5, - text = \small 25, markfield=c5, - text = \small 16, markfield=d5, - text = \small 7, markfield=e5, - text = \small 31, markfield=Z4, - text = \small 26, markfield=a4, - text = \small 9, markfield=b4, - text = \small 6, markfield=c4, - text = \small 19, markfield=d4, - text = \small 24, markfield=e4, - % 4 11 30 17 8 15 - text = \small 4, markfield=Z3, - text = \small 11, markfield=a3, - text = \small 30, markfield=b3, - text = \small 17, markfield=c3, - text = \small 8, markfield=d3, - text = \small 15, markfield=e3, - %29 32 27 0 23 20 - text = \small 29, markfield=Z2, - text = \small 32, markfield=a2, - text = \small 27, markfield=b2, - text = \small 0, markfield=c2, - text = \small 23, markfield=d2, - text = \small 20, markfield=e2, - %12 3 34 21 14 1 - text = \small 12, markfield=Z1, - text = \small 3, markfield=a1, - text = \small 34, markfield=b1, - text = \small 21, markfield=c1, - text = \small 14, markfield=d1, - text = \small 1, markfield=e1, - %33 28 13 2 35 22 - text = \small 33, markfield=Z0, - text = \small 28, markfield=a0, - text = \small 13, markfield=b0, - text = \small 2, markfield=c0, - text = \small 35, markfield=d0, - text = \small 22, markfield=e0, - vlabel=false, - hlabel=false - ] - - -\noindent -where the 35th move can join up again with the 0th move. - -If you cannot remember how a knight moves in chess, or never played -chess, below are all potential moves indicated for two knights, one on -field $(2, 2)$ (blue moves) and another on $(7, 7)$ (red moves): - -{\chessboard[maxfield=g7, - color=blue!50, - linewidth=0.2em, - shortenstart=0.5ex, - shortenend=0.5ex, - markstyle=cross, - markfields={a4, c4, Z3, d3, Z1, d1, a0, c0}, - color=red!50, - markfields={f5, e6}, - setpieces={Ng7, Nb2}, - boardfontsize=12pt,labelfontsize=9pt]} - \subsection*{Reference Implementation} -This Scala part comes with three reference implementations in form of -\texttt{jar}-files. This allows you to run any test cases on your own +This Scala assignment comes with a reference implementation in form of +a \texttt{jar}-file. This allows you to run any test cases on your own computer. For example you can call Scala on the command line with the -option \texttt{-cp knight1.jar} and then query any function from the -\texttt{knight1.scala} template file. As usual you have to -prefix the calls with \texttt{CW8a}, \texttt{CW8b} and \texttt{CW8c}. -Since some of the calls are time sensitive, I included some timing -information. For example - -\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small] -$ scala -cp knight1.jar -scala> CW8a.enum_tours(5, List((0, 0))).length -Time needed: 1.722 secs. -res0: Int = 304 - -scala> CW8a.print_board(8, CW8a.first_tour(8, List((0, 0))).get) -Time needed: 15.411 secs. - - 51 46 55 44 53 4 21 12 - 56 43 52 3 22 13 24 5 - 47 50 45 54 25 20 11 14 - 42 57 2 49 40 23 6 19 - 35 48 41 26 61 10 15 28 - 58 1 36 39 32 27 18 7 - 37 34 31 60 9 62 29 16 - 0 59 38 33 30 17 8 63 -\end{lstlisting}%$ - - -\subsection*{Hints} - -\noindent -\textbf{Preliminary Part} useful list functions: \texttt{.contains(..)} checks -whether an element is in a list, \texttt{.flatten} turns a list of -lists into just a list, \texttt{\_::\_} puts an element on the head of -the list, \texttt{.head} gives you the first element of a list (make -sure the list is not \texttt{Nil}); a useful option function: -\texttt{.isDefined} returns true, if an option is \texttt{Some(..)}; -anonymous functions can be constructed using \texttt{(x:Int) => ...}, -this function takes an \texttt{Int} as an argument.\medskip - - -\noindent -\textbf{Core Part} a useful list function: \texttt{.sortBy} sorts a list -according to a component given by the function; a function can be -tested to be tail-recursive by annotation \texttt{@tailrec}, which is -made available by importing \texttt{scala.annotation.tailrec}.\medskip - - +option \texttt{-cp re.jar} and then query any function from the +\texttt{re.scala} template file. As usual you have to prefix the calls +with \texttt{CW8c} or import this object. Since some tasks +are time sensitive, you can check the reference implementation as +follows: if you want to know, for example, how long it takes to match +strings of $a$'s using the regular expression $(a^*)^*\cdot b$ you can +query as follows: -\subsection*{Preliminary Part (4 Marks)} +\begin{lstlisting}[xleftmargin=1mm,numbers=none,basicstyle=\ttfamily\small] +$ scala -cp re.jar +scala> import CW8c._ +scala> for (i <- 0 to 5000000 by 500000) { + | println(f"$i: ${time_needed(2, matcher(EVIL, "a" * i))}%.5f secs.") + | } +0: 0.00002 secs. +500000: 0.10608 secs. +1000000: 0.22286 secs. +1500000: 0.35982 secs. +2000000: 0.45828 secs. +2500000: 0.59558 secs. +3000000: 0.73191 secs. +3500000: 0.83499 secs. +4000000: 0.99149 secs. +4500000: 1.15395 secs. +5000000: 1.29659 secs. +\end{lstlisting}%$ -You are asked to implement the knight's tour problem such that the -dimension of the board can be changed. Therefore most functions will -take the dimension of the board as an argument. The fun with this -problem is that even for small chessboard dimensions it has already an -incredibly large search space---finding a tour is like finding a -needle in a haystack. In the first task we want to see how far we get -with exhaustively exploring the complete search space for small -chessboards.\medskip +\subsection*{Preliminaries} + +The task is to implement a regular expression matcher that is based on +derivatives of regular expressions. Most of the functions are defined by +recursion over regular expressions and can be elegantly implemented +using Scala's pattern-matching. The implementation should deal with the +following regular expressions, which have been predefined in the file +\texttt{re.scala}: -\noindent -Let us first fix the basic datastructures for the implementation. The -board dimension is an integer. -A \emph{position} (or field) on the chessboard is -a pair of integers, like $(0, 0)$. A \emph{path} is a list of -positions. The first (or 0th move) in a path is the last element in -this list; and the last move in the path is the first element. For -example the path for the $5\times 5$ chessboard above is represented -by +\begin{center} +\begin{tabular}{lcll} + $r$ & $::=$ & $\ZERO$ & cannot match anything\\ + & $|$ & $\ONE$ & can only match the empty string\\ + & $|$ & $c$ & can match a single character (in this case $c$)\\ + & $|$ & $r_1 + r_2$ & can match a string either with $r_1$ or with $r_2$\\ + & $|$ & $r_1\cdot r_2$ & can match the first part of a string with $r_1$ and\\ + & & & then the second part with $r_2$\\ + & $|$ & $r^*$ & can match a string with zero or more copies of $r$\\ +\end{tabular} +\end{center} -\[ -\texttt{List($\underbrace{\texttt{(0, 4)}}_{24}$, - $\underbrace{\texttt{(2, 3)}}_{23}$, ..., - $\underbrace{\texttt{(3, 2)}}_1$, $\underbrace{\texttt{(4, 4)}}_0$)} -\] +\noindent +Why? Regular expressions are +one of the simplest ways to match patterns in text, and +are endlessly useful for searching, editing and analysing data in all +sorts of places (for example analysing network traffic in order to +detect security breaches). However, you need to be fast, otherwise you +will stumble over problems such as recently reported at -\noindent -Suppose the dimension of a chessboard is $n$, then a path is a -\emph{tour} if the length of the path is $n \times n$, each element -occurs only once in the path, and each move follows the rules of how a -knight moves (see above for the rules). +{\small +\begin{itemize} +\item[$\bullet$] \url{https://blog.cloudflare.com/details-of-the-cloudflare-outage-on-july-2-2019} +\item[$\bullet$] \url{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016} +\item[$\bullet$] \url{https://vimeo.com/112065252} +\item[$\bullet$] \url{https://davidvgalbraith.com/how-i-fixed-atom} +\end{itemize}} + +% Knowing how to match regular expressions and strings will let you +% solve a lot of problems that vex other humans. -\subsubsection*{Tasks (file knight1.scala)} - -\begin{itemize} -\item[(1)] Implement an \texttt{is\_legal} function that takes a - dimension, a path and a position as arguments and tests whether the - position is inside the board and not yet element in the - path. \hfill[1 Mark] +\subsubsection*{Tasks (file re.scala)} -\item[(2)] Implement a \texttt{legal\_moves} function that calculates for a - position all legal onward moves. If the onward moves are - placed on a circle, you should produce them starting from - ``12-o'clock'' following in clockwise order. For example on an - $8\times 8$ board for a knight at position $(2, 2)$ and otherwise - empty board, the legal-moves function should produce the onward - positions in this order: - - \begin{center} - \texttt{List((3,4), (4,3), (4,1), (3,0), (1,0), (0,1), (0,3), (1,4))} - \end{center} - - If the board is not empty, then maybe some of the moves need to be - filtered out from this list. For a knight on field $(7, 7)$ and an - empty board, the legal moves are +The file \texttt{re.scala} has already a definition for regular +expressions and also defines some handy shorthand notation for +regular expressions. The notation in this document matches up +with the code in the file as follows: - \begin{center} - \texttt{List((6,5), (5,6))} - \end{center} - \mbox{}\hfill[1 Mark] - -\item[(3)] Implement two recursive functions (\texttt{count\_tours} and - \texttt{enum\_tours}). They each take a dimension and a path as - arguments. They exhaustively search for tours starting - from the given path. The first function counts all possible - tours (there can be none for certain board sizes) and the second - collects all tours in a list of paths. These functions will be - called with a path containing a single position---the starting field. - They are expected to extend this path so as to find all tours starting - from the given position.\\ - \mbox{}\hfill[2 Marks] -\end{itemize} - -\noindent \textbf{Test data:} For the marking, the functions in (3) -will be called with board sizes up to $5 \times 5$. If you search -for tours on a $5 \times 5$ board starting only from field $(0, 0)$, -there are 304 of tours. If you try out every field of a $5 \times -5$-board as a starting field and add up all tours, you obtain -1728. A $6\times 6$ board is already too large to be searched -exhaustively.\footnote{For your interest, the number of tours on - $6\times 6$, $7\times 7$ and $8\times 8$ are 6637920, 165575218320, - 19591828170979904, respectively.}\smallskip +\begin{center} + \begin{tabular}{rcl@{\hspace{10mm}}l} + & & code: & shorthand:\smallskip \\ + $\ZERO$ & $\mapsto$ & \texttt{ZERO}\\ + $\ONE$ & $\mapsto$ & \texttt{ONE}\\ + $c$ & $\mapsto$ & \texttt{CHAR(c)}\\ + $r_1 + r_2$ & $\mapsto$ & \texttt{ALT(r1, r2)} & \texttt{r1 | r2}\\ + $r_1 \cdot r_2$ & $\mapsto$ & \texttt{SEQ(r1, r2)} & \texttt{r1 $\sim$ r2}\\ + $r^*$ & $\mapsto$ & \texttt{STAR(r)} & \texttt{r.\%} +\end{tabular} +\end{center} -\subsection*{Core Part (6 Marks)} +\begin{itemize} +\item[(1)] Implement a function, called \textit{nullable}, by + recursion over regular expressions. This function tests whether a + regular expression can match the empty string. This means given a + regular expression it either returns true or false. The function + \textit{nullable} + is defined as follows: + +\begin{center} +\begin{tabular}{lcl} +$\textit{nullable}(\ZERO)$ & $\dn$ & $\textit{false}$\\ +$\textit{nullable}(\ONE)$ & $\dn$ & $\textit{true}$\\ +$\textit{nullable}(c)$ & $\dn$ & $\textit{false}$\\ +$\textit{nullable}(r_1 + r_2)$ & $\dn$ & $\textit{nullable}(r_1) \vee \textit{nullable}(r_2)$\\ +$\textit{nullable}(r_1 \cdot r_2)$ & $\dn$ & $\textit{nullable}(r_1) \wedge \textit{nullable}(r_2)$\\ +$\textit{nullable}(r^*)$ & $\dn$ & $\textit{true}$\\ +\end{tabular} +\end{center}~\hfill[1 Mark] + +\item[(2)] Implement a function, called \textit{der}, by recursion over + regular expressions. It takes a character and a regular expression + as arguments and calculates the derivative of a regular expression according + to the rules: + +\begin{center} +\begin{tabular}{lcl} +$\textit{der}\;c\;(\ZERO)$ & $\dn$ & $\ZERO$\\ +$\textit{der}\;c\;(\ONE)$ & $\dn$ & $\ZERO$\\ +$\textit{der}\;c\;(d)$ & $\dn$ & $\textit{if}\; c = d\;\textit{then} \;\ONE \; \textit{else} \;\ZERO$\\ +$\textit{der}\;c\;(r_1 + r_2)$ & $\dn$ & $(\textit{der}\;c\;r_1) + (\textit{der}\;c\;r_2)$\\ +$\textit{der}\;c\;(r_1 \cdot r_2)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r_1)$\\ + & & $\textit{then}\;((\textit{der}\;c\;r_1)\cdot r_2) + (\textit{der}\;c\;r_2)$\\ + & & $\textit{else}\;(\textit{der}\;c\;r_1)\cdot r_2$\\ +$\textit{der}\;c\;(r^*)$ & $\dn$ & $(\textit{der}\;c\;r)\cdot (r^*)$\\ +\end{tabular} +\end{center} + +For example given the regular expression $r = (a \cdot b) \cdot c$, the derivatives +w.r.t.~the characters $a$, $b$ and $c$ are +\begin{center} + \begin{tabular}{lcll} + $\textit{der}\;a\;r$ & $=$ & $(\ONE \cdot b)\cdot c$ & \quad($= r'$)\\ + $\textit{der}\;b\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$\\ + $\textit{der}\;c\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$ + \end{tabular} +\end{center} -\subsubsection*{Tasks (file knight1.scala cont.)} +Let $r'$ stand for the first derivative, then taking the derivatives of $r'$ +w.r.t.~the characters $a$, $b$ and $c$ gives + +\begin{center} + \begin{tabular}{lcll} + $\textit{der}\;a\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ \\ + $\textit{der}\;b\;r'$ & $=$ & $((\ZERO \cdot b) + \ONE)\cdot c$ & \quad($= r''$)\\ + $\textit{der}\;c\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ + \end{tabular} +\end{center} + +One more example: Let $r''$ stand for the second derivative above, +then taking the derivatives of $r''$ w.r.t.~the characters $a$, $b$ +and $c$ gives + +\begin{center} + \begin{tabular}{lcll} + $\textit{der}\;a\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$ \\ + $\textit{der}\;b\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$\\ + $\textit{der}\;c\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ONE$ & + (is $\textit{nullable}$) + \end{tabular} +\end{center} + +Note, the last derivative can match the empty string, that is it is \textit{nullable}.\\ +\mbox{}\hfill\mbox{[1 Mark]} + +\item[(3)] Implement the function \textit{simp}, which recursively + traverses a regular expression, and on the way up simplifies every + regular expression on the left (see below) to the regular expression + on the right, except it does not simplify inside ${}^*$-regular + expressions. -\begin{itemize} -\item[(4)] Implement a \texttt{first}-function. This function takes a list of - positions and a function $f$ as arguments; $f$ is the name we give to - this argument). The function $f$ takes a position as argument and - produces an optional path. So $f$'s type is \texttt{Pos => - Option[Path]}. The idea behind the \texttt{first}-function is as follows: + \begin{center} +\begin{tabular}{l@{\hspace{4mm}}c@{\hspace{4mm}}ll} +$r \cdot \ZERO$ & $\mapsto$ & $\ZERO$\\ +$\ZERO \cdot r$ & $\mapsto$ & $\ZERO$\\ +$r \cdot \ONE$ & $\mapsto$ & $r$\\ +$\ONE \cdot r$ & $\mapsto$ & $r$\\ +$r + \ZERO$ & $\mapsto$ & $r$\\ +$\ZERO + r$ & $\mapsto$ & $r$\\ +$r + r$ & $\mapsto$ & $r$\\ +\end{tabular} + \end{center} + + For example the regular expression + \[(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot (r_4 \cdot \ZERO)\] + + simplifies to just $r_1$. \textbf{Hint:} Regular expressions can be + seen as trees and there are several methods for traversing + trees. One of them corresponds to the inside-out traversal, which is also + sometimes called post-order tra\-versal: you traverse inside the + tree and on the way up you apply simplification rules. + \textbf{Another Hint:} + Remember numerical expressions from school times---there you had expressions + like $u + \ldots + (1 \cdot x) - \ldots (z + (y \cdot 0)) \ldots$ + and simplification rules that looked very similar to rules + above. You would simplify such numerical expressions by replacing + for example the $y \cdot 0$ by $0$, or $1\cdot x$ by $x$, and then + look whether more rules are applicable. If you organise the + simplification in an inside-out fashion, it is always clear which + simplification should be applied next.\hfill[1 Mark] + +\item[(4)] Implement two functions: The first, called \textit{ders}, + takes a list of characters and a regular expression as arguments, and + builds the derivative w.r.t.~the list as follows: + +\begin{center} +\begin{tabular}{lcl} +$\textit{ders}\;(Nil)\;r$ & $\dn$ & $r$\\ + $\textit{ders}\;(c::cs)\;r$ & $\dn$ & + $\textit{ders}\;cs\;(\textit{simp}(\textit{der}\;c\;r))$\\ +\end{tabular} +\end{center} + +Note that this function is different from \textit{der}, which only +takes a single character. + +The second function, called \textit{matcher}, takes a string and a +regular expression as arguments. It builds first the derivatives +according to \textit{ders} and after that tests whether the resulting +derivative regular expression can match the empty string (using +\textit{nullable}). For example the \textit{matcher} will produce +true for the regular expression $(a\cdot b)\cdot c$ and the string +$abc$, but false if you give it the string $ab$. \hfill[1 Mark] + +\item[(5)] Implement a function, called \textit{size}, by recursion + over regular expressions. If a regular expression is seen as a tree, + then \textit{size} should return the number of nodes in such a + tree. Therefore this function is defined as follows: + +\begin{center} +\begin{tabular}{lcl} +$\textit{size}(\ZERO)$ & $\dn$ & $1$\\ +$\textit{size}(\ONE)$ & $\dn$ & $1$\\ +$\textit{size}(c)$ & $\dn$ & $1$\\ +$\textit{size}(r_1 + r_2)$ & $\dn$ & $1 + \textit{size}(r_1) + \textit{size}(r_2)$\\ +$\textit{size}(r_1 \cdot r_2)$ & $\dn$ & $1 + \textit{size}(r_1) + \textit{size}(r_2)$\\ +$\textit{size}(r^*)$ & $\dn$ & $1 + \textit{size}(r)$\\ +\end{tabular} +\end{center} + +You can use \textit{size} in order to test how much the ``evil'' regular +expression $(a^*)^* \cdot b$ grows when taking successive derivatives +according the letter $a$ without simplification and then compare it to +taking the derivative, but simplify the result. The sizes +are given in \texttt{re.scala}. \hfill[1 Mark] + +\item[(6)] You do not have to implement anything specific under this + task. The purpose here is that you will be marked for some ``power'' + test cases. For example can your matcher decide within 30 seconds + whether the regular expression $(a^*)^*\cdot b$ matches strings of the + form $aaa\ldots{}aaaa$, for say 1 Million $a$'s. And does simplification + simplify the regular expression \[ - \begin{array}{lcl} - \textit{first}(\texttt{Nil}, f) & \dn & \texttt{None}\\ - \textit{first}(x\!::\!xs, f) & \dn & \begin{cases} - f(x) & \textit{if}\;f(x) \not=\texttt{None}\\ - \textit{first}(xs, f) & \textit{otherwise}\\ - \end{cases} - \end{array} - \] + \texttt{SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)} + \] - \noindent That is, we want to find the first position where the - result of $f$ is not \texttt{None}, if there is one. Note that - `inside' \texttt{first}, you do not (need to) know anything about - the argument $f$ except its type, namely \texttt{Pos => - Option[Path]}. If you want to find out what the result of $f$ is - on a particular argument, say $x$, you can just write $f(x)$. - There is one additional point however you should - take into account when implementing \texttt{first}: you will need to - calculate what the result of $f(x)$ is; your code should do this - only \textbf{once} and for as \textbf{few} elements in the list as - possible! Do not calculate $f(x)$ for all elements and then see which - is the first \texttt{Some}.\\\mbox{}\hfill[1 Mark] - -\item[(5)] Implement a \texttt{first\_tour} function that uses the - \texttt{first}-function from (4), and searches recursively for single tour. - As there might not be such a tour at all, the \texttt{first\_tour} function - needs to return a value of type - \texttt{Option[Path]}.\\\mbox{}\hfill[1 Mark] + \noindent correctly to just \texttt{ONE}, where \texttt{SEQ} is nested + 50 or more times?\\ + \mbox{}\hfill[2 Mark] \end{itemize} -\noindent -\textbf{Testing:} The \texttt{first\_tour} function will be called with board -sizes of up to $8 \times 8$. -\bigskip +\subsection*{Background} -%%\newpage +Although easily implementable in Scala, the idea behind the derivative +function might not so easy to be seen. To understand its purpose +better, assume a regular expression $r$ can match strings of the form +$c\!::\!cs$ (that means strings which start with a character $c$ and have +some rest, or tail, $cs$). If you take the derivative of $r$ with +respect to the character $c$, then you obtain a regular expression +that can match all the strings $cs$. In other words, the regular +expression $\textit{der}\;c\;r$ can match the same strings $c\!::\!cs$ +that can be matched by $r$, except that the $c$ is chopped off. -\noindent -As you should have seen in the earlier parts, a naive search for tours beyond -$8 \times 8$ boards and also searching for closed tours even on small -boards takes too much time. There is a heuristics, called \emph{Warnsdorf's -Rule} that can speed up finding a tour. This heuristics states that a -knight is moved so that it always proceeds to the field from which the -knight will have the \underline{fewest} onward moves. For example for -a knight on field $(1, 3)$, the field $(0, 1)$ has the fewest possible -onward moves, namely 2. +Assume now $r$ can match the string $abc$. If you take the derivative +according to $a$ then you obtain a regular expression that can match +$bc$ (it is $abc$ where the $a$ has been chopped off). If you now +build the derivative $\textit{der}\;b\;(\textit{der}\;a\;r)$ you +obtain a regular expression that can match the string $c$ (it is $bc$ +where $b$ is chopped off). If you finally build the derivative of this +according $c$, that is +$\textit{der}\;c\;(\textit{der}\;b\;(\textit{der}\;a\;r))$, you obtain +a regular expression that can match the empty string. You can test +whether this is indeed the case using the function nullable, which is +what your matcher is doing. -\chessboard[maxfield=g7, - pgfstyle= {[base,at={\pgfpoint{0pt}{-0.5ex}}]text}, - text = \small 3, markfield=Z5, - text = \small 7, markfield=b5, - text = \small 7, markfield=c4, - text = \small 7, markfield=c2, - text = \small 5, markfield=b1, - text = \small 2, markfield=Z1, - setpieces={Na3}] +The purpose of the $\textit{simp}$ function is to keep the regular +expressions small. Normally the derivative function makes the regular +expression bigger (see the SEQ case and the example in (2)) and the +algorithm would be slower and slower over time. The $\textit{simp}$ +function counters this increase in size and the result is that the +algorithm is fast throughout. By the way, this algorithm is by Janusz +Brzozowski who came up with the idea of derivatives in 1964 in his PhD +thesis. -\noindent -Warnsdorf's Rule states that the moves on the board above should be -tried in the order +\begin{center}\small +\url{https://en.wikipedia.org/wiki/Janusz_Brzozowski_(computer_scientist)} +\end{center} + -\[ -(0, 1), (0, 5), (2, 1), (2, 5), (3, 4), (3, 2) -\] +If you want to see how badly the regular expression matchers do in +Java\footnote{Version 8 and below; Version 9 and above does not seem to be as + catastrophic, but still much worse than the regular expression + matcher based on derivatives.}, JavaScript and Python with the +`evil' regular expression $(a^*)^*\cdot b$, then have a look at the +graphs below (you can try it out for yourself: have a look at the files +\texttt{catastrophic9.java}, \texttt{catastrophic.js}, +\texttt{catastrophic.py} etc on KEATS). Compare this with the matcher you +have implemented. How long can the string of $a$'s be in your matcher +and still stay within the 30 seconds time limit? -\noindent -Whenever there are ties, the corresponding onward moves can be in any -order. When calculating the number of onward moves for each field, we -do not count moves that revisit any field already visited. - -\subsubsection*{Tasks (file knight2.scala)} - -\begin{itemize} -\item[(6)] Write a function \texttt{ordered\_moves} that calculates a list of - onward moves like in (2) but orders them according to - Warnsdorf’s Rule. That means moves with the fewest legal onward moves - should come first (in order to be tried out first). \hfill[1 Mark] +\begin{center} +\begin{tabular}{@{}cc@{}} +\multicolumn{2}{c}{Graph: $(a^*)^*\cdot b$ and strings + $\underbrace{a\ldots a}_{n}$}\bigskip\\ -\item[(7)] Implement a \texttt{first\_closed\_tour\_heuristics} - function that searches for a single - \textbf{closed} tour on a $6\times 6$ board. It should try out - onward moves according to - the \texttt{ordered\_moves} function from (6). It is more likely to find - a solution when started in the middle of the board (that is - position $(dimension / 2, dimension / 2)$). \hfill[1 Mark] - -\item[(8)] Implement a \texttt{first\_tour\_heuristics} function - for boards up to - $30\times 30$. It is the same function as in (7) but searches for - tours (not just closed tours). It might be called with any field on the - board as starting field.\\ - %You have to be careful to write a - %tail-recursive function of the \texttt{first\_tour\_heuristics} function - %otherwise you will get problems with stack-overflows.\\ - \mbox{}\hfill[1 Mark] -\end{itemize} +\begin{tikzpicture} +\begin{axis}[ + xlabel={$n$}, + x label style={at={(1.05,0.0)}}, + ylabel={time in secs}, + y label style={at={(0.06,0.5)}}, + enlargelimits=false, + xtick={0,5,...,30}, + xmax=33, + ymax=45, + ytick={0,5,...,40}, + scaled ticks=false, + axis lines=left, + width=6cm, + height=5.5cm, + legend entries={Python, Java 8, JavaScript, Swift, Dart}, + legend pos=north west, + legend cell align=left] +\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data}; +\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data}; +\addplot[red,mark=*, mark options={fill=white}] table {re-js.data}; +\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data}; +\addplot[brown,mark=*, mark options={fill=white}] table {re-dart.data}; +\end{axis} +\end{tikzpicture} + & +\begin{tikzpicture} +\begin{axis}[ + xlabel={$n$}, + x label style={at={(1.05,0.0)}}, + ylabel={time in secs}, + y label style={at={(0.06,0.5)}}, + %enlargelimits=false, + %xtick={0,5000,...,30000}, + xmax=65000, + ymax=45, + ytick={0,5,...,40}, + scaled ticks=false, + axis lines=left, + width=6cm, + height=5.5cm, + legend entries={Java 9}, + legend pos=north west] +\addplot[cyan,mark=*, mark options={fill=white}] table {re-java9.data}; +\end{axis} +\end{tikzpicture} +\end{tabular} +\end{center} +\newpage -\subsubsection*{Task (file knight3.scala)} -\begin{itemize} -\item[(9)] Implement a function \texttt{tour\_on\_mega\_board} which is - the same function as in (8), \textbf{but} should be able to - deal with boards up to - $70\times 70$ \textbf{within 30 seconds} (on my laptop). This will be tested - by starting from field $(0, 0)$. You have to be careful to - write a tail-recursive function otherwise you will get problems - with stack-overflows. Please observe the requirements about - the submissions: no tricks involving \textbf{.par}.\medskip - - The timelimit of 30 seconds is with respect to the laptop on which the - marking will happen. You can roughly estimate how well your - implementation performs by running \texttt{knight3.jar} on your - computer. For example the reference implementation shows - on my laptop: - - \begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small] -$ scala -cp knight3.jar - -scala> CW8c.tour_on_mega_board(70, List((0, 0))) -Time needed: 9.484 secs. -...<>... -\end{lstlisting}%$ - - \mbox{}\hfill[1 Mark] -\end{itemize} -\bigskip \end{document} + %%% Local Variables: %%% mode: latex %%% TeX-master: t