cws/main_cw03.tex
changeset 428 cdfa6a293453
parent 426 b51467741af2
child 444 7a0735db4788
equal deleted inserted replaced
427:6e93040e3378 428:cdfa6a293453
   117 \begin{document}
   117 \begin{document}
   118 
   118 
   119 % BF IDE
   119 % BF IDE
   120 % https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5
   120 % https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5
   121   
   121   
   122 \section*{Main Part 3 (Scala, 6 Marks)}
   122 \section*{Main Part 3 (Scala, 7 Marks)}
   123 
   123 
   124 \mbox{}\hfill\textit{``Java is the most distressing thing to happen to computing since MS-DOS.''}\smallskip\\
   124 \mbox{}\hfill\textit{``Java is the most distressing thing to happen to computing since MS-DOS.''}\smallskip\\
   125 \mbox{}\hfill\textit{ --- Alan Kay, the inventor of object-oriented programming}\bigskip\medskip
   125 \mbox{}\hfill\textit{ --- Alan Kay, the inventor of object-oriented programming}\bigskip\medskip
   126 
   126 
   127 \noindent
   127 \noindent
   204 will stumble over problems such as recently reported at
   204 will stumble over problems such as recently reported at
   205 
   205 
   206 {\small
   206 {\small
   207 \begin{itemize}
   207 \begin{itemize}
   208 \item[$\bullet$] \url{https://blog.cloudflare.com/details-of-the-cloudflare-outage-on-july-2-2019}  
   208 \item[$\bullet$] \url{https://blog.cloudflare.com/details-of-the-cloudflare-outage-on-july-2-2019}  
   209 \item[$\bullet$] \url{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}
   209 \item[$\bullet$] \texttt{\href{https://web.archive.org/web/20160801163029/https://www.stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}}
   210 \item[$\bullet$] \url{https://vimeo.com/112065252}
   210 \item[$\bullet$] \url{https://vimeo.com/112065252}
   211 \item[$\bullet$] \url{https://davidvgalbraith.com/how-i-fixed-atom}  
   211 \item[$\bullet$] \url{https://davidvgalbraith.com/how-i-fixed-atom}  
   212 \end{itemize}}
   212 \end{itemize}}
   213 
   213 
   214 % Knowing how to match regular expressions and strings will let you
   214 % Knowing how to match regular expressions and strings will let you
   216 
   216 
   217 
   217 
   218 \subsubsection*{Tasks (file re.scala)}
   218 \subsubsection*{Tasks (file re.scala)}
   219 
   219 
   220 The file \texttt{re.scala} has already a definition for regular
   220 The file \texttt{re.scala} has already a definition for regular
   221 expressions and also defines some handy shorthand notation for
   221 expressions and also defines some handy shorthand notation for regular
   222 regular expressions. The notation in this document matches up
   222 expressions. The notation in this coursework description matches up
   223 with the code in the file as follows:
   223 with the code as follows:
   224 
   224 
   225 \begin{center}
   225 \begin{center}
   226   \begin{tabular}{rcl@{\hspace{10mm}}l}
   226   \begin{tabular}{rcl@{\hspace{10mm}}l}
   227     & & code: & shorthand:\smallskip \\ 
   227     & & code: & shorthand:\smallskip \\ 
   228   $\ZERO$ & $\mapsto$ & \texttt{ZERO}\\
   228   $\ZERO$ & $\mapsto$ & \texttt{ZERO}\\
   229   $\ONE$  & $\mapsto$ & \texttt{ONE}\\
   229   $\ONE$  & $\mapsto$ & \texttt{ONE}\\
   230   $c$     & $\mapsto$ & \texttt{CHAR(c)}\\
   230   $c$     & $\mapsto$ & \texttt{CHAR(c)}\\
   231   $\sum rs$ & $\mapsto$ & \texttt{ALTs(rs)}\\  
   231   $\sum rs$ & $\mapsto$ & \texttt{ALTs(rs)}\\  
   232   $r_1 + r_2$ & $\mapsto$ & \texttt{ALT(r1, r2)} & \texttt{r1 | r2}\\
   232   $r_1 + r_2$ & $\mapsto$ & \texttt{ALT(r1, r2)} & \texttt{r1 | r2}\\
       
   233   $\prod rs$ & $\mapsto$ & \texttt{SEQs(rs)}\\  
   233   $r_1 \cdot r_2$ & $\mapsto$ & \texttt{SEQ(r1, r2)} & \texttt{r1 $\sim$ r2}\\
   234   $r_1 \cdot r_2$ & $\mapsto$ & \texttt{SEQ(r1, r2)} & \texttt{r1 $\sim$ r2}\\
   234   $r^*$ & $\mapsto$ &  \texttt{STAR(r)} & \texttt{r.\%}
   235   $r^*$ & $\mapsto$ &  \texttt{STAR(r)} & \texttt{r.\%}
   235 \end{tabular}    
   236 \end{tabular}    
   236 \end{center}  
   237 \end{center}  
   237 
   238 
   242 argument.  In what follows we shall use $rs$ to stand for lists of
   243 argument.  In what follows we shall use $rs$ to stand for lists of
   243 regular expressions. When the list is empty, we shall write $\sum\,[]$;
   244 regular expressions. When the list is empty, we shall write $\sum\,[]$;
   244 if it is non-empty, we sometimes write $\sum\,[r_1,..., r_n]$.
   245 if it is non-empty, we sometimes write $\sum\,[r_1,..., r_n]$.
   245 The binary alternative can be seen as an abbreviation,
   246 The binary alternative can be seen as an abbreviation,
   246 that is $r_1 + r_2 \dn \sum\,[r_1, r_2]$. As a result we can ignore the
   247 that is $r_1 + r_2 \dn \sum\,[r_1, r_2]$. As a result we can ignore the
   247 binary version and only implement the n-ary alternative.
   248 binary version and only implement the n-ary alternative. Similarly
       
   249 the sequence regular expression is only implemented with lists and the
       
   250 binary version can be obtained by defining $r_1 \cdot r_2 \dn \prod\,[r_1, r_2]$.
   248 
   251 
   249 \begin{itemize}
   252 \begin{itemize}
   250 \item[(1)] Implement a function, called \textit{nullable}, by
   253 \item[(1)] Implement a function, called \textit{nullable}, by
   251   recursion over regular expressions. This function tests whether a
   254   recursion over regular expressions. This function tests whether a
   252   regular expression can match the empty string. This means given a
   255   regular expression can match the empty string. This means given a
   253   regular expression it either returns true or false. The function
   256   regular expression, it either returns true or false. The function
   254   \textit{nullable}
   257   \textit{nullable}
   255   is defined as follows:
   258   is defined as follows:
   256 
   259 
   257 \begin{center}
   260 \begin{center}
   258 \begin{tabular}{lcl}
   261 \begin{tabular}{lcl}
   259 $\textit{nullable}(\ZERO)$ & $\dn$ & $\textit{false}$\\
   262 $\textit{nullable}(\ZERO)$ & $\dn$ & $\textit{false}$\\
   260 $\textit{nullable}(\ONE)$  & $\dn$ & $\textit{true}$\\
   263 $\textit{nullable}(\ONE)$  & $\dn$ & $\textit{true}$\\
   261 $\textit{nullable}(c)$     & $\dn$ & $\textit{false}$\\
   264 $\textit{nullable}(c)$     & $\dn$ & $\textit{false}$\\
   262 $\textit{nullable}(\sum rs)$ & $\dn$ & $\exists r \in rs.\;\textit{nullable}(r)$\\
   265 $\textit{nullable}(\sum rs)$ & $\dn$ & $\exists r \in rs.\;\textit{nullable}(r)$\\
   263 $\textit{nullable}(r_1 \cdot r_2)$ & $\dn$ & $\textit{nullable}(r_1) \wedge \textit{nullable}(r_2)$\\
   266 $\textit{nullable}(\prod rs)$ & $\dn$ & $\forall r\in rs.\;\textit{nullable}(r)$\\
   264 $\textit{nullable}(r^*)$ & $\dn$ & $\textit{true}$\\
   267 $\textit{nullable}(r^*)$ & $\dn$ & $\textit{true}$\\
   265 \end{tabular}
   268 \end{tabular}
   266 \end{center}~\hfill[0.5 Marks]
   269 \end{center}~\hfill[0.5 Marks]
   267 
   270 
   268 \item[(2)] Implement a function, called \textit{der}, by recursion over
   271 \item[(2)] Implement a function, called \textit{der}, by recursion over
   274 \begin{tabular}{lcl}
   277 \begin{tabular}{lcl}
   275 $\textit{der}\;c\;(\ZERO)$ & $\dn$ & $\ZERO$\\
   278 $\textit{der}\;c\;(\ZERO)$ & $\dn$ & $\ZERO$\\
   276 $\textit{der}\;c\;(\ONE)$  & $\dn$ & $\ZERO$\\
   279 $\textit{der}\;c\;(\ONE)$  & $\dn$ & $\ZERO$\\
   277 $\textit{der}\;c\;(d)$     & $\dn$ & $\textit{if}\; c = d\;\textit{then} \;\ONE \; \textit{else} \;\ZERO$\\
   280 $\textit{der}\;c\;(d)$     & $\dn$ & $\textit{if}\; c = d\;\textit{then} \;\ONE \; \textit{else} \;\ZERO$\\
   278 $\textit{der}\;c\;(\sum\;[r_1,..,r_n])$ & $\dn$ & $\sum\;[\textit{der}\;c\;r_1,..,\textit{der}\;c\;r_n]$\\
   281 $\textit{der}\;c\;(\sum\;[r_1,..,r_n])$ & $\dn$ & $\sum\;[\textit{der}\;c\;r_1,..,\textit{der}\;c\;r_n]$\\
   279 $\textit{der}\;c\;(r_1 \cdot r_2)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r_1)$\\
   282 $\textit{der}\;c\;(\prod\;[])$ & $\dn$ & $\ZERO$\\  
   280       & & $\textit{then}\;((\textit{der}\;c\;r_1)\cdot r_2) + (\textit{der}\;c\;r_2)$\\
   283 $\textit{der}\;c\;(\prod\;r\!::\!rs)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r)$\\
   281       & & $\textit{else}\;(\textit{der}\;c\;r_1)\cdot r_2$\\
   284       & & $\textit{then}\;(\prod\;(\textit{der}\;c\;r)\!::\!rs) + (\textit{der}\;c\;(\prod rs))$\\
       
   285       & & $\textit{else}\;(\prod\;(\textit{der}\;c\;r)\!::\! rs)$\\
   282 $\textit{der}\;c\;(r^*)$ & $\dn$ & $(\textit{der}\;c\;r)\cdot (r^*)$\\
   286 $\textit{der}\;c\;(r^*)$ & $\dn$ & $(\textit{der}\;c\;r)\cdot (r^*)$\\
   283 \end{tabular}
   287 \end{tabular}
   284 \end{center}
   288 \end{center}
   285 
       
   286 For example given the regular expression $r = (a \cdot b) \cdot c$, the derivatives
       
   287 w.r.t.~the characters $a$, $b$ and $c$ are
       
   288 
       
   289 \begin{center}
       
   290   \begin{tabular}{lcll}
       
   291     $\textit{der}\;a\;r$ & $=$ & $(\ONE \cdot b)\cdot c$ & \quad($= r'$)\\
       
   292     $\textit{der}\;b\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$\\
       
   293     $\textit{der}\;c\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$
       
   294   \end{tabular}
       
   295 \end{center}
       
   296 
       
   297 Let $r'$ stand for the first derivative, then taking the derivatives of $r'$
       
   298 w.r.t.~the characters $a$, $b$ and $c$ gives
       
   299 
       
   300 \begin{center}
       
   301   \begin{tabular}{lcll}
       
   302     $\textit{der}\;a\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ \\
       
   303     $\textit{der}\;b\;r'$ & $=$ & $((\ZERO \cdot b) + \ONE)\cdot c$ & \quad($= r''$)\\
       
   304     $\textit{der}\;c\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$
       
   305   \end{tabular}
       
   306 \end{center}
       
   307 
       
   308 One more example: Let $r''$ stand for the second derivative above,
       
   309 then taking the derivatives of $r''$ w.r.t.~the characters $a$, $b$
       
   310 and $c$ gives
       
   311 
       
   312 \begin{center}
       
   313   \begin{tabular}{lcll}
       
   314     $\textit{der}\;a\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$ \\
       
   315     $\textit{der}\;b\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$\\
       
   316     $\textit{der}\;c\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ONE$ &
       
   317     (is $\textit{nullable}$)                      
       
   318   \end{tabular}
       
   319 \end{center}
       
   320 
       
   321 Note, the last derivative can match the empty string, that is it is \textit{nullable}.\\
       
   322 \mbox{}\hfill\mbox{[1 Mark]}
   289 \mbox{}\hfill\mbox{[1 Mark]}
   323 
   290 
   324 \item[(3)] We next want to simplify regular expressions: essentially
   291 \item[(3)] We next want to simplify regular expressions: essentially
   325   we want to remove $\ZERO$ in regular expressions like $r + \ZERO$
   292   we want to remove $\ZERO$ in regular expressions like $r + \ZERO$
   326   and $\ZERO + r$.  However, our n-ary alternative takes a list of
   293   and $\ZERO + r$.  However, our n-ary alternative takes a list of
   327   regular expressions as argument, we therefore need a more general
   294   regular expressions as argument, and we therefore need a more general
   328   ``flatten'' function, called \texttt{flts}. This function should
   295   ``denesting'' function, which deletes $\ZERO$s and ``spills out'' nested
   329   analyse a list of regular expressions, say $rs$, as follows:
   296   $\sum$s.  This function, called \texttt{denest}, should analyse a
       
   297   list of regular expressions, say $rs$, as follows:
   330 
   298 
   331   \begin{center}
   299   \begin{center}
   332     \begin{tabular}{lllll}
   300     \begin{tabular}{lllll}
   333       1) &$rs = []$                & $\dn$ & $[]$ & (empty list)\\
   301       1) &$rs = []$                & $\dn$ & $[]$ & (empty list)\\
   334       2) &$rs = \ZERO :: rest$     & $\dn$ & $\textrm{flatten}\;rest$\\
   302       2) &$rs = \ZERO :: rest$     & $\dn$ & $\texttt{denest}\;rest$ & (throw away $\ZERO$)\\
   335       3) &$rs = (\sum rs_1) :: rest$ & $\dn$ & $rs_1 ::: \textrm{flatten}\;rest$\\
   303       3) &$rs = (\sum rs) :: rest$ & $\dn$ & $rs ::: \texttt{denest}\;rest$ & (spill out $\sum$)\\
   336       4) &$rs = r :: rest$         & $\dn$ & $r :: \textrm{flatten}\;rest$ & (otherwise)\\
   304       4) &$rs = r :: rest$         & $\dn$ & $r :: \texttt{denest}\;rest$ & (otherwise)\\
   337     \end{tabular}  
   305     \end{tabular}  
   338   \end{center}  
   306   \end{center}  
   339 
   307 
   340   The first clause states that empty lists cannot be further
   308   The first clause states that empty lists cannot be further
   341   flattened. The second removes the first $\ZERO$ from the list and recurses.
   309   denested. The second removes the first $\ZERO$ from the list and recurses.
   342   The third is when the first regular expression is an \texttt{ALTs}, then
   310   The third is when the first regular expression is an \texttt{ALTs}, then
   343   the content of this alternative should be spilled out and appended
   311   the content of this alternative should be spilled out and appended
   344   with the flattened rest of the list. The last case is for all other
   312   with the denested rest of the list. The last case is for all other
   345   cases where the head of the list is not $\ZERO$ and not an \texttt{ALTs},
   313   cases where the head of the list is not $\ZERO$ and not an \texttt{ALTs},
   346   then we just keep the head of the list and flatten the rest.
   314   then we just keep the head of the list and denest the rest.\\
   347   \mbox{}\hfill\mbox{[1 Mark]}
   315   \mbox{}\hfill\mbox{[1 Mark]}
   348 
   316 
   349 \item[(4)] Implement the function \textit{simp}, which recursively
   317 \item[(4)] Implement the function \texttt{flts} which flattens our
       
   318   n-ary sequence regular expression $\prod$. Like \texttt{denest}, this
       
   319   function is intended to delete $\ONE$s and spill out nested $\prod$s.
       
   320   Unfortunately, there is a special case to do with $\ZERO$: If this function encounters a $\ZERO$, then
       
   321   the whole ``product'' should be $\ZERO$. The problem is that the $\ZERO$ can be anywhere
       
   322   inside the list. The easiest way to implement this function is therefore by using an
       
   323   accumulator, which when called is set to \texttt{Nil}. This means \textit{flts} takes
       
   324   two arguments (which are both lists of regular expressions)
       
   325 
       
   326   \[
       
   327   \texttt{flts}\;rs\;acc  
       
   328   \]
       
   329 
       
   330   This function analyses the list $rs$ as follows
       
   331 
       
   332   \begin{center}
       
   333     \begin{tabular}{@{}lllll@{}}
       
   334       1) &$rs = []$                   & $\dn$ & $acc$ & (empty list)\\
       
   335       2) &$rs = \ZERO :: rest$        & $\dn$ & $[\ZERO]$ & (special case for $\ZERO$)\\
       
   336       3) &$rs = \ONE :: rest$         & $\dn$ & $\texttt{flts}\,rest\,acc$ & (throw away $\ONE$)\\
       
   337       4) &$rs = (\prod rs) :: rest$ & $\dn$ & $\texttt{flts}\;rest\,(acc ::: rs)$ & (spill out $\prod$)\\
       
   338       5) &$rs = r :: rest$            & $\dn$ & $\texttt{flts}\;rest\,(acc ::: [r])$& (otherwise)\\
       
   339     \end{tabular}  
       
   340   \end{center}
       
   341 
       
   342   In the first case we just return whatever has accumulated in
       
   343   $acc$. In the fourth case we spill out the $rs$ by appending the
       
   344   $rs$ to the end of the accumulator. Similarly in the last case we
       
   345   append the single regular expression $r$ to the end of the
       
   346   accumulator. I let you think why the ``end'' is needed. \mbox{}\hfill\mbox{[1 Mark]}
       
   347 
       
   348 \item[(5)] Before we can simplify regular expressions, we need what is often called
       
   349   \emph{smart constructors} for $\sum$ and $\prod$. While the ``normal'' constructors
       
   350   \texttt{ALTs} and \texttt{SEQs} give us alternatives and sequences, respectively, \emph{smart}
       
   351   constructors might return something different depending on what list of regular expressions
       
   352   they are given as argument.
       
   353 
       
   354   \begin{center}
       
   355   \begin{tabular}{@{}c@{\hspace{9mm}}c@{}}
       
   356     \begin{tabular}{l@{\hspace{2mm}}l@{\hspace{1mm}}ll}
       
   357       & $\sum^{smart}$\smallskip\\
       
   358       1) & $rs = []$  & $\dn$ & $\ZERO$\\ 
       
   359       2) & $rs = [r]$ & $\dn$ & $r$\\ \\
       
   360       3) & otherwise  & $\dn$ & $\sum\,rs$
       
   361     \end{tabular} &
       
   362     \begin{tabular}{l@{\hspace{2mm}}l@{\hspace{1mm}}ll}
       
   363         & $\prod^{smart}$\smallskip\\              
       
   364         1) & $rs = []$ & $\dn$ & $\ONE$\\
       
   365         2a) & $rs = [\ZERO]$ & $\dn$ & $\ZERO$\\
       
   366         2b) & $rs = [r]$ & $\dn$ & $r$\\
       
   367         3) & otherwise  & $\dn$ & $\prod\,rs$
       
   368     \end{tabular}              
       
   369   \end{tabular}    
       
   370   \end{center}
       
   371   \mbox{}\hfill\mbox{[0.5 Marks]}
       
   372   
       
   373 \item[(6)] Implement the function \textit{simp}, which recursively
   350   traverses a regular expression, and on the way up simplifies every
   374   traverses a regular expression, and on the way up simplifies every
   351   regular expression on the left (see below) to the regular expression
   375   regular expression on the left (see below) to the regular expression
   352   on the right, except it does not simplify inside ${}^*$-regular
   376   on the right, except it does not simplify inside ${}^*$-regular
   353   expressions.
   377   expressions and also does not simplify $\ZERO$, $\ONE$ and characters.
   354 
   378 
   355   \begin{center}
   379   \begin{center}
   356 \begin{tabular}{l@{\hspace{4mm}}c@{\hspace{4mm}}ll}
   380     \begin{tabular}{@{}l@{\hspace{3mm}}c@{\hspace{3mm}}ll@{}}
   357 $r \cdot \ZERO$ & $\mapsto$ & $\ZERO$\\ 
   381       LHS: & & RHS:\smallskip\\
   358 $\ZERO \cdot r$ & $\mapsto$ & $\ZERO$\\ 
   382 $\sum\;[r_1,..,r_n]$ & $\mapsto$ & $\sum^{smart}\;(\texttt{(denest + distinct)}[simp(r_1),..,simp(r_n)])$\\
   359 $r \cdot \ONE$ & $\mapsto$ & $r$\\ 
   383       $\prod\;[r_1,..,r_n]$ & $\mapsto$ & $\prod^{smart}\;(\texttt{(flts)}[simp(r_1),..,simp(r_n)])$\\
   360 $\ONE \cdot r$ & $\mapsto$ & $r$\\ 
   384       $r$ & $\mapsto$ & $r$ \quad (all other cases)
   361 $\sum\;[r_1,..,r_n]$ & $\mapsto$ & $\sum\;(\texttt{(flts + distinct)}[simp(r_1),..,simp(r_n)])$\\  
       
   362 \end{tabular}
   385 \end{tabular}
   363 \end{center}
   386 \end{center}
   364 
   387 
   365 The last case is as follows: first apply $simp$ to all regular
   388 The first case is as follows: first apply $simp$ to all regular
   366 expressions $r_1,.. ,r_n$; then flatten the resulting list using
   389 expressions $r_1,.. ,r_n$; then denest the resulting list using
   367 \texttt{flts}; finally remove all duplicates in this list (this can be
   390 \texttt{denest}; after that remove all duplicates in this list (this can be
   368 done in Scala using the function
   391 done in Scala using the function
   369 \texttt{\_.distinct}). When you perform these
   392 \texttt{\_.distinct}). Finally, you end up with a list of (simplified)
   370   operations, you end up with three cases, namely where the list is
   393 regular expressions; apply the smart constructor $\sum^{smart}$ to this list.
   371   empty, contains a single element and ``otherwise''. These cases
   394 Similarly in the $\prod$ case: simplify first all regular
   372   should be processed as follows
   395 expressions $r_1,.. ,r_n$; then flatten the resulting list using \texttt{flts} and apply the
   373 \begin{center}
   396 smart constructor $\prod^{smart}$ to the result. In all other cases, just return the
   374 \begin{tabular}{l@{\hspace{4mm}}c@{\hspace{4mm}}ll}
   397 input $r$ as is.
   375 $\sum\;[]$ & $\mapsto$ & $\ZERO$\\ 
       
   376 $\sum\;[r]$ & $\mapsto$ & $r$\\ 
       
   377 $\sum\;rs$ & $\mapsto$ & $\sum\;rs$ & ``otherwise''\\ 
       
   378 \end{tabular}
       
   379 \end{center}
       
   380 
       
   381   
   398   
   382 
   399 
   383   For example the regular expression
   400   For example the regular expression
   384   \[(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot (r_4 \cdot \ZERO)\]
   401   \[(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot (r_4 \cdot \ZERO)\]
   385 
   402 
   386   simplifies to just $r_1$. \textbf{Hint:} Regular expressions can be
   403   simplifies to just $r_1$. \mbox{}\hfill\mbox{[1 Mark]}
   387   seen as trees and there are several methods for traversing
   404 
   388   trees. One of them corresponds to the inside-out traversal, which is
   405 \item[(7)] Implement two functions: The first, called \textit{ders},
   389   also sometimes called post-order tra\-versal: you traverse inside
       
   390   the tree and on the way up you apply simplification rules.
       
   391   \textbf{Another Hint:} Remember numerical expressions from school
       
   392   times---there you had expressions like
       
   393   $u + \ldots + (1 \cdot x) - \ldots (z + (y \cdot 0)) \ldots$ and
       
   394   simplification rules that looked very similar to rules above. You
       
   395   would simplify such numerical expressions by replacing for example
       
   396   the $y \cdot 0$ by $0$, or $1\cdot x$ by $x$, and then look whether
       
   397   more rules are applicable. If you regard regular expressions as
       
   398   trees and if you organise the simplification in an inside-out
       
   399   fashion, it is always clear which simplification should be applied
       
   400   next.\mbox{}\hfill\mbox{[1 Mark]}
       
   401 
       
   402 \item[(5)] Implement two functions: The first, called \textit{ders},
       
   403   takes a list of characters and a regular expression as arguments, and
   406   takes a list of characters and a regular expression as arguments, and
   404   builds the derivative w.r.t.~the list as follows:
   407   builds the derivative w.r.t.~the list as follows:
   405 
   408 
   406 \begin{center}
   409 \begin{center}
   407 \begin{tabular}{lcl}
   410 \begin{tabular}{lcl}
   408 $\textit{ders}\;(Nil)\;r$ & $\dn$ & $r$\\
   411 $\textit{ders}\;(Nil)\;r$ & $\dn$ & $r$\\
   409   $\textit{ders}\;(c::cs)\;r$  & $\dn$ &
   412   $\textit{ders}\;(c::cs)\;r$  & $\dn$ &
   410     $\textit{ders}\;cs\;(\textit{simp}(\textit{der}\;c\;r))$\\
   413     $\textit{ders}\;cs\;(\textit{simp}\,(\textit{der}\;c\;r))$\\
   411 \end{tabular}
   414 \end{tabular}
   412 \end{center}
   415 \end{center}
   413 
   416 
   414 Note that this function is different from \textit{der}, which only
   417 Note that this function is different from \textit{der}, which only
   415 takes a single character.
   418 takes a single character.
   418 regular expression as arguments. It builds first the derivatives
   421 regular expression as arguments. It builds first the derivatives
   419 according to \textit{ders} and after that tests whether the resulting
   422 according to \textit{ders} and after that tests whether the resulting
   420 derivative regular expression can match the empty string (using
   423 derivative regular expression can match the empty string (using
   421 \textit{nullable}).  For example the \textit{matcher} will produce
   424 \textit{nullable}).  For example the \textit{matcher} will produce
   422 true for the regular expression $(a\cdot b)\cdot c$ and the string
   425 true for the regular expression $(a\cdot b)\cdot c$ and the string
   423 $abc$, but false if you give it the string $ab$. \hfill[1 Mark]
   426 $abc$, but false if you give it the string $ab$. \hfill[0.5 Mark]
   424 
   427 
   425 \item[(6)] Implement a function, called \textit{size}, by recursion
   428 \item[(8)] Implement a function, called \textit{size}, by recursion
   426   over regular expressions. If a regular expression is seen as a tree,
   429   over regular expressions. If a regular expression is seen as a tree,
   427   then \textit{size} should return the number of nodes in such a
   430   then \textit{size} should return the number of nodes in such a
   428   tree. Therefore this function is defined as follows:
   431   tree. Therefore this function is defined as follows:
   429 
   432 
   430 \begin{center}
   433 \begin{center}
   431 \begin{tabular}{lcl}
   434 \begin{tabular}{lcl}
   432 $\textit{size}(\ZERO)$ & $\dn$ & $1$\\
   435 $\textit{size}(\ZERO)$ & $\dn$ & $1$\\
   433 $\textit{size}(\ONE)$  & $\dn$ & $1$\\
   436 $\textit{size}(\ONE)$  & $\dn$ & $1$\\
   434 $\textit{size}(c)$     & $\dn$ & $1$\\
   437 $\textit{size}(c)$     & $\dn$ & $1$\\
   435 $\textit{size}(\sum\,[r_1,..,r_n]$ & $\dn$ & $1 + \textit{size}(r_1) + ... + \textit{size}(r_n)$\\
   438 $\textit{size}(\sum\,[r_1,..,r_n]$ & $\dn$ & $1 + \textit{size}(r_1) + ... + \textit{size}(r_n)$\\
   436 $\textit{size}(r_1 \cdot r_2)$ & $\dn$ & $1 + \textit{size}(r_1) + \textit{size}(r_2)$\\
   439 $\textit{size}(\prod\,[r_1,..,r_n]$ & $\dn$ & $1 + \textit{size}(r_1) + ... + \textit{size}(r_n)$\\
   437 $\textit{size}(r^*)$ & $\dn$ & $1 + \textit{size}(r)$\\
   440 $\textit{size}(r^*)$ & $\dn$ & $1 + \textit{size}(r)$\\
   438 \end{tabular}
   441 \end{tabular}
   439 \end{center}
   442 \end{center}
   440 
   443 
   441 You can use \textit{size} in order to test how much the ``evil'' regular
   444 You can use \textit{size} in order to test how much the ``evil'' regular
   442 expression $(a^*)^* \cdot b$ grows when taking successive derivatives
   445 expression $(a^*)^* \cdot b$ grows when taking successive derivatives
   443 according the letter $a$ without simplification and then compare it to
   446 according the letter $a$ without simplification and then compare it to
   444 taking the derivative, but simplify the result.  The sizes
   447 taking the derivative, but simplify the result.  The sizes
   445 are given in \texttt{re.scala}. \hfill[0.5 Marks]
   448 are given in \texttt{re.scala}. \hfill[0.5 Marks]
   446 
   449 
   447 \item[(7)] You do not have to implement anything specific under this
   450 \item[(9)] You do not have to implement anything specific under this
   448   task.  The purpose here is that you will be marked for some ``power''
   451   task.  The purpose here is that you will be marked for some ``power''
   449   test cases. For example can your matcher decide within 30 seconds
   452   test cases. For example can your matcher decide within 30 seconds
   450   whether the regular expression $(a^*)^*\cdot b$ matches strings of the
   453   whether the regular expression $(a^*)^*\cdot b$ matches strings of the
   451   form $aaa\ldots{}aaaa$, for say 1 Million $a$'s. And does simplification
   454   form $aaa\ldots{}aaaa$, for say 1 Million $a$'s. And does simplification
   452   simplify the regular expression
   455   simplify the regular expression
   461 \end{itemize}
   464 \end{itemize}
   462 
   465 
   463 \subsection*{Background}
   466 \subsection*{Background}
   464 
   467 
   465 Although easily implementable in Scala (ok maybe the \texttt{simp} functions and
   468 Although easily implementable in Scala (ok maybe the \texttt{simp} functions and
   466 \texttt{ALTs} needs a bit more thinking), the idea behind the
   469 the constructors \texttt{ALTs}/\texttt{SEQs} needs a bit more thinking), the idea behind the
   467 derivative function might not so easy to be seen. To understand its
   470 derivative function might not so easy to be seen. To understand its
   468 purpose better, assume a regular expression $r$ can match strings of
   471 purpose better, assume a regular expression $r$ can match strings of
   469 the form $c\!::\!cs$ (that means strings which start with a character
   472 the form $c\!::\!cs$ (that means strings which start with a character
   470 $c$ and have some rest, or tail, $cs$). If you take the derivative of
   473 $c$ and have some rest, or tail, $cs$). If you take the derivative of
   471 $r$ with respect to the character $c$, then you obtain a regular
   474 $r$ with respect to the character $c$, then you obtain a regular
   482 where $b$ is chopped off). If you finally build the derivative of this
   485 where $b$ is chopped off). If you finally build the derivative of this
   483 according $c$, that is
   486 according $c$, that is
   484 $\textit{der}\;c\;(\textit{der}\;b\;(\textit{der}\;a\;r))$, you obtain
   487 $\textit{der}\;c\;(\textit{der}\;b\;(\textit{der}\;a\;r))$, you obtain
   485 a regular expression that can match the empty string. You can test
   488 a regular expression that can match the empty string. You can test
   486 whether this is indeed the case using the function nullable, which is
   489 whether this is indeed the case using the function nullable, which is
   487 what your matcher is doing.
   490 what the matcher you have implemented is doing.
   488 
   491 
   489 The purpose of the $\textit{simp}$ function is to keep the regular
   492 The purpose of the $\textit{simp}$ function is to keep the regular
   490 expressions small. Normally the derivative function makes the regular
   493 expressions small. Normally the derivative function makes the regular
   491 expression bigger (see the SEQ case and the example in (2)) and the
   494 expression bigger (see the \texttt{SEQs} case and the example in Task (2)) and the
   492 algorithm would be slower and slower over time. The $\textit{simp}$
   495 algorithm would be slower and slower over time. The $\textit{simp}$
   493 function counters this increase in size and the result is that the
   496 function counters this increase in size and the result is that the
   494 algorithm is fast throughout.  By the way, this algorithm is by Janusz
   497 algorithm is fast throughout.  By the way, this algorithm is by Janusz
   495 Brzozowski who came up with the idea of derivatives in 1964 in his PhD
   498 Brzozowski who came up with the idea of derivatives in 1964 in his PhD
   496 thesis.
   499 thesis.
   499 \url{https://en.wikipedia.org/wiki/Janusz_Brzozowski_(computer_scientist)}
   502 \url{https://en.wikipedia.org/wiki/Janusz_Brzozowski_(computer_scientist)}
   500 \end{center}
   503 \end{center}
   501 
   504 
   502 
   505 
   503 If you want to see how badly the regular expression matchers do in
   506 If you want to see how badly the regular expression matchers do in
   504 Java\footnote{Version 8 and below; Version 9 and above does not seem to be as
   507 Java\footnote{Version 8 and below; Version 9 and above does not seem
   505   catastrophic, but still much worse than the regular expression
   508   to be as catastrophic, but still much worse than the regular
   506   matcher based on derivatives.}, JavaScript and Python with the
   509   expression matcher based on derivatives. BTW, Scala uses the regular
   507 ``evil'' regular expression $(a^*)^*\cdot b$, then have a look at the
   510   expression matcher provided by the Java libraries. So is just as bad.}, JavaScript,
   508 graphs below (you can try it out for yourself: have a look at the files
   511 Python Swift and Dart with the ``evil'' regular expression
       
   512 $(a^*)^*\cdot b$, then have a look at the graphs below (you can try it
       
   513 out for yourself: have a look at the files
   509 \texttt{catastrophic9.java}, \texttt{catastrophic.js},
   514 \texttt{catastrophic9.java}, \texttt{catastrophic.js},
   510 \texttt{catastrophic.py} etc on KEATS). Compare this with the matcher you
   515 \texttt{catastrophic.py} etc on KEATS). Compare this with the matcher
   511 have implemented. How long can a string of $a$'s be in your matcher
   516 you have implemented. How long can a string of $a$'s be in your
   512 and still stay within the 30 seconds time limit? It should be muuuch better
   517 matcher and still stay within the 30 seconds time limit? It should be
   513 than your off-the-shelf matcher in your bog-standard language.
   518 mu(uu)$^*$ch better than your off-the-shelf matcher in your
       
   519 bog-standard language.
   514 
   520 
   515 \begin{center}
   521 \begin{center}
   516 \begin{tabular}{@{}cc@{}}
   522 \begin{tabular}{@{}cc@{}}
   517 \multicolumn{2}{c}{Graph: $(a^*)^*\cdot b$ and strings 
   523 \multicolumn{2}{c}{Graph: $(a^*)^*\cdot b$ and strings 
   518            $\underbrace{a\ldots a}_{n}$}\medskip\\
   524            $\underbrace{a\ldots a}_{n}$}\medskip\\
   572 
   578 
   573 
   579 
   574 \end{document}
   580 \end{document}
   575 
   581 
   576 
   582 
       
   583 
       
   584 For example given the regular expression $r = (a \cdot b) \cdot c$, the derivatives
       
   585 w.r.t.~the characters $a$, $b$ and $c$ are
       
   586 
       
   587 \begin{center}
       
   588   \begin{tabular}{lcll}
       
   589     $\textit{der}\;a\;r$ & $=$ & $(\ONE \cdot b)\cdot c$ & \quad($= r'$)\\
       
   590     $\textit{der}\;b\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$\\
       
   591     $\textit{der}\;c\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$
       
   592   \end{tabular}
       
   593 \end{center}
       
   594 
       
   595 Let $r'$ stand for the first derivative, then taking the derivatives of $r'$
       
   596 w.r.t.~the characters $a$, $b$ and $c$ gives
       
   597 
       
   598 \begin{center}
       
   599   \begin{tabular}{lcll}
       
   600     $\textit{der}\;a\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ \\
       
   601     $\textit{der}\;b\;r'$ & $=$ & $((\ZERO \cdot b) + \ONE)\cdot c$ & \quad($= r''$)\\
       
   602     $\textit{der}\;c\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$
       
   603   \end{tabular}
       
   604 \end{center}
       
   605 
       
   606 One more example: Let $r''$ stand for the second derivative above,
       
   607 then taking the derivatives of $r''$ w.r.t.~the characters $a$, $b$
       
   608 and $c$ gives
       
   609 
       
   610 \begin{center}
       
   611   \begin{tabular}{lcll}
       
   612     $\textit{der}\;a\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$ \\
       
   613     $\textit{der}\;b\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$\\
       
   614     $\textit{der}\;c\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ONE$ &
       
   615     (is $\textit{nullable}$)                      
       
   616   \end{tabular}
       
   617 \end{center}
       
   618 
       
   619 Note, the last derivative can match the empty string, that is it is \textit{nullable}.
       
   620 
       
   621 
       
   622 
   577 %%% Local Variables: 
   623 %%% Local Variables: 
   578 %%% mode: latex
   624 %%% mode: latex
   579 %%% TeX-master: t
   625 %%% TeX-master: t
   580 %%% End: 
   626 %%% End: