pep-material: changeset 428:cdfa6a293453

--- a/core_templates1/README.md	Sat Oct 08 00:30:51 2022 +0100
+++ b/core_templates1/README.md	Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Core 1
+# assignment2022scala - Core 1
 
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
 * [coursework description](https://nms.kcl.ac.uk/christian.urban/core_cw01.pdf)
 * reference jar:
     [collatz.jar](https://nms.kcl.ac.uk/christian.urban/collatz.jar)
\ No newline at end of file

--- a/core_templates1/collatz.scala	Sat Oct 08 00:30:51 2022 +0100
+++ b/core_templates1/collatz.scala	Tue Nov 01 15:03:48 2022 +0000
@@ -3,36 +3,18 @@
 
 object C1 {
 
-//(1) Complete the collatz function below. It should
-//    recursively calculate the number of steps needed 
-//    until the collatz series reaches the number 1.
-//    If needed, you can use an auxiliary function that
-//    performs the recursion. The function should expect
-//    arguments in the range of 1 to 1 Million.
+// ADD YOUR CODE BELOW
+//======================
 
+
+//(1) 
 def collatz(n: Long) : Long = ???
 
 
-//(2) Complete the collatz_max function below. It should
-//    calculate how many steps are needed for each number 
-//    from 1 up to a bound and then calculate the maximum number of
-//    steps and the corresponding number that needs that many 
-//    steps. Again, you should expect bounds in the range of 1
-//    up to 1 Million. The first component of the pair is
-//    the maximum number of steps and the second is the 
-//    corresponding number.
-
+//(2) 
 def collatz_max(bnd: Long) : (Long, Long) = ???
 
-//(3) Implement a function that calculates the last_odd
-//    number in a collatz series.  For this implement an
-//    is_pow_of_two function which tests whether a number 
-//    is a power of two. The function is_hard calculates 
-//    whether 3n + 1 is a power of two. Again you can
-//    assume the input ranges between 1 and 1 Million,
-//    and also assume that the input of last_odd will not 
-//    be a power of 2.
-
+//(3)
 def is_pow_of_two(n: Long) : Boolean = ???
 
 def is_hard(n: Long) : Boolean = ???

--- a/core_templates2/README.md	Sat Oct 08 00:30:51 2022 +0100
+++ b/core_templates2/README.md	Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Core 2
+# assignment2022scala - Core 2
 
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
 * [coursework description](https://nms.kcl.ac.uk/christian.urban/core_cw02.pdf)
 * reference jar:
     [docdiff.jar](https://nms.kcl.ac.uk/christian.urban/docdiff.jar)
\ No newline at end of file

--- a/core_templates2/docdiff.scala	Sat Oct 08 00:30:51 2022 +0100
+++ b/core_templates2/docdiff.scala	Tue Nov 01 15:03:48 2022 +0000
@@ -4,44 +4,23 @@
 
 object C2 { 
 
+// ADD YOUR CODE BELOW
+//======================
 
-//(1) Complete the clean function below. It should find
-//    all words in a string using the regular expression
-//    \w+  and the library function 
-//
-//         some_regex.findAllIn(some_string)
-//
-//    The words should be Returned as a list of strings.
-
-
+//(1)
 def clean(s: String) : List[String] = ???
   
 
 
-//(2) The function occurrences calculates the number of times  
-//    strings occur in a list of strings. These occurrences should 
-//    be calculated as a Map from strings to integers.
-
-
+//(2)
 def occurrences(xs: List[String]): Map[String, Int] = ???
 
 
-//(3) This functions calculates the dot-product of two documents
-//    (list of strings). For this it calculates the occurrence
-//    maps from (2) and then multiplies the corresponding occurrences. 
-//    If a string does not occur in a document, the product is zero.
-//    The function finally sums up all products. 
-
-
+//(3)
 def prod(lst1: List[String], lst2: List[String]) : Int = ???
 
 
-//(4) Complete the functions overlap and similarity. The overlap of
-//    two documents is calculated by the formula given in the assignment
-//    description. The similarity of two strings is given by the overlap
-//    of the cleaned strings (see (1)).  
-
-
+//(4)
 def overlap(lst1: List[String], lst2: List[String]) : Double = ???
 
 def similarity(s1: String, s2: String) : Double = ???

--- a/core_templates3/README.md	Sat Oct 08 00:30:51 2022 +0100
+++ b/core_templates3/README.md	Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Core 3
+# assignment2022scala - Core 3
 
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
 * [coursework description](https://nms.kcl.ac.uk/christian.urban/core_cw03.pdf)
 * reference jar:
     [postfix.jar](https://nms.kcl.ac.uk/christian.urban/postfix.jar),

--- a/core_templates3/postfix.scala	Sat Oct 08 00:30:51 2022 +0100
+++ b/core_templates3/postfix.scala	Tue Nov 01 15:03:48 2022 +0000
@@ -19,28 +19,14 @@
 // helper function for splitting strings into tokens
 def split(s: String) : Toks = s.split(" ").toList
 
+// ADD YOUR CODE BELOW
+//======================
 
-// (1) Implement below the shunting yard algorithm. The most
-// convenient way to this in Scala is to implement a recursive 
-// function and to heavily use pattern matching. The function syard 
-// takes some input tokens as first argument. The second and third 
-// arguments represent the stack and the output of the shunting yard 
-// algorithm.
-//
-// In the marking, you can assume the function is called only with 
-// an empty stack and an empty output list. You can also assume the
-// input os  only properly formatted (infix) arithmetic expressions
-// (all parentheses will be well-nested, the input only contains 
-// operators and numbers).
 
-// You can implement any additional helper function you need. I found 
-// it helpful to implement two auxiliary functions for the pattern matching:  
-// 
-
+// (1) 
 def is_op(op: String) : Boolean = ???
 def prec(op1: String, op2: String) : Boolean = ???
 
-
 def syard(toks: Toks, st: Toks = Nil, out: Toks = Nil) : Toks = ???
 
 
@@ -58,13 +44,7 @@
 //syard(split("( ( ( 3 ) ) + ( ( 4 + ( 5 ) ) ) )")) // 3 4 5 + +
 
  
-// (2) Implement a compute function that evaluates an input list
-// in postfix notation. This function takes a list of tokens
-// and a stack as argumenta. The function should produce the 
-// result as an integer using the stack. You can assume 
-// this function will be only called with proper postfix 
-// expressions.    
-
+// (2) 
 def compute(toks: Toks, st: List[Int] = Nil) : Int = ???

--- a/core_templates3/postfix2.scala	Sat Oct 08 00:30:51 2022 +0100
+++ b/core_templates3/postfix2.scala	Tue Nov 01 15:03:48 2022 +0000
@@ -35,12 +35,12 @@
 // the operations in the basic version of the algorithm
 val ops = List("+", "-", "*", "/", "^")
 
-// (3) Implement the extended version of the shunting yard algorithm.
-// This version should properly account for the fact that the power 
-// operation is right-associative. Apart from the extension to include
-// the power operation, you can make the same assumptions as in 
-// basic version.
+// ADD YOUR CODE BELOW
+//======================
 
+
+
+// (3) 
 def syard(toks: Toks, st: Toks = Nil, out: Toks = Nil) : Toks = ???
 
 
@@ -48,9 +48,7 @@
 // syard(split("3 + 4 * 8 / ( 5 - 1 ) ^ 2 ^ 3"))  // 3 4 8 * 5 1 - 2 3 ^ ^ / +
 
 
-// (4) Implement a compute function that produces an Int for an
-// input list of tokens in postfix notation.
-
+// (4)
 def compute(toks: Toks, st: List[Int] = Nil) : Int = ???

Binary file cws/core_cw01.pdf has changed

Binary file cws/core_cw02.pdf has changed

Binary file cws/core_cw03.pdf has changed

--- a/cws/disclaimer.sty	Sat Oct 08 00:30:51 2022 +0100
+++ b/cws/disclaimer.sty	Tue Nov 01 15:03:48 2022 +0000
@@ -24,7 +24,7 @@
   meaning in Scala than in Java. It changes the meaning of your program,
   and you should never use it.
 
-\item Do not use \texttt{var}! This declares a mutable variable. Only
+\item Do not use \textbf{\texttt{var}}! This declares a mutable variable. Only
   use \texttt{val}!
 
 \item Do not use any parallel collections! No \texttt{.par} therefore!
@@ -96,9 +96,11 @@
 
 It should be understood that the work you submit represents
 your \textbf{own} effort! You have implemented the code entirely
-on your own. You have not used any unauthorised aid (e.g.~Google). You have not copied from anyone else. An
-exception is the Scala code I showed during the lectures or
-uploaded to KEATS, which you can freely use.\bigskip
+on your own. You have not copied from anyone else.
+Do not be tempted to ask Github Copilot for help or
+do any other shenanigans like this! An exception is the Scala
+code I showed during the lectures or uploaded to KEATS,
+which you can freely use.\bigskip
 }
 
 \newcommand{\DISCLAIMEREXAM}{%

Binary file cws/main_cw01.pdf has changed

Binary file cws/main_cw02.pdf has changed

--- a/cws/main_cw02.tex	Sat Oct 08 00:30:51 2022 +0100
+++ b/cws/main_cw02.tex	Tue Nov 01 15:03:48 2022 +0000
@@ -4,13 +4,14 @@
 \usepackage{../styles/style}
 \usepackage{disclaimer}
 \usepackage{../styles/langs}
+\usepackage{graphicx}
 
 \begin{document}
 
 
 %% should ask to lower case the words.
 
-\section*{Main Part 2 (Scala, 6 Marks)}
+\section*{Evil Wordle Game (Scala, 7 Marks)}
 
 \mbox{}\hfill\textit{``C makes it easy to shoot yourself in the foot; C++ makes it harder,}\\
 \mbox{}\hfill\textit{ but when you do, it blows your whole leg off.''}\smallskip\\
@@ -20,8 +21,8 @@
 
 
 \noindent
-You are asked to implement a Scala program for recommending movies
-according to a ratings list.\bigskip
+You are asked to implement a Scala program for making the popular Wordle game as difficult
+as possible.\bigskip
 
 \IMPORTANTNONE{}
 
@@ -31,7 +32,6 @@
 
 \DISCLAIMER{}
 
-
 \subsection*{Reference Implementation}
 
 Like the C++ part, the Scala part works like this: you push your files
@@ -51,163 +51,246 @@
 you would type something like:
 
 \begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
-$ scala -cp danube.jar
-scala> val ratings_url =
-     | """https://nms.kcl.ac.uk/christian.urban/ratings.csv"""
+$ scala -cp wordle.jar
+scala> val secretsURL =
+     | """https://nms.kcl.ac.uk/christian.urban/wordle.txt"""
 
-scala> M2.get_csv_url(ratings_url)
-val res0: List[String] = List(1,1,4 ...)
+scala> M2.get_wordle_list(secretsURL)
+val res0: List[String] = List(aahed, aalii, ...)
 \end{lstlisting}%$
 
 \subsection*{Hints}
 
 \noindent
-Use \texttt{.split(",").toList} for splitting
-strings according to commas (similarly for the newline character \mbox{$\backslash$\texttt{n}}),
+Useful data functions: \texttt{Source.fromURL},
+\texttt{Source.fromFile} for obtaining a webpage and reading a file,
 \texttt{.getOrElse(..,..)} allows to query a Map, but also gives a
 default value if the Map is not defined, a Map can be `updated' by
 using \texttt{+}, \texttt{.contains} and \texttt{.filter} can test whether
 an element is included in a list, and respectively filter out elements in a list,
 \texttt{.sortBy(\_.\_2)} sorts a list of pairs according to the second
 elements in the pairs---the sorting is done from smallest to highest,
-\texttt{.take(n)} for taking some elements in a list (takes fewer if the list
-contains less than \texttt{n} elements).
+\texttt{.groupBy} orders lists according to same elements
+.
 
 
 \newpage
 
 
-\subsection*{Main Part 2 (6 Marks, file danube.scala)}
-
-You are creating Danube.co.uk which you hope will be the next big thing
-in online movie renting. You know that you can save money by
-anticipating what movies people will rent; you will pass these savings
-on to your users by offering a discount if they rent movies that
-Danube.co.uk recommends.  
+\subsection*{Main Part 2 (6 Marks, file wordle.scala)}
 
-Your task is to generate \emph{two} movie recommendations for every
-movie a user rents. To do this, you calculate what other
-renters, who also watched this movie, suggest by giving positive ratings.
-Of course, some suggestions are more popular than others. You need to find
-the two most-frequently suggested movies. Return fewer recommendations,
-if there are fewer movies suggested.
-
-The calculations will be based on the small datasets which the research lab
-GroupLens provides for education and development purposes.
+You probably know the game of Wordle\footnote{\url{https://en.wikipedia.org/wiki/Wordle}}
+where you are supposed to guess a five-letter word. The feedback for guesses can help
+with the next guess (green letters are correct, orange letters are present, but in the
+wrong place). For example:
 
 \begin{center}
-\url{https://grouplens.org/datasets/movielens/}
-\end{center}
+\includegraphics[scale=0.2]{../pics/w.jpeg}
+\end{center}  
 
 \noindent
-The slightly adapted CSV-files should be downloaded in your Scala
-file from the URLs:
-
+The idea of the program to be implemented here is to make the Wordle game as evil as possible
+by finding words that are the most difficult to guess. A word list of five-letter words is available
+from 
 
 \begin{center}
 \begin{tabular}{ll}  
-  \url{https://nms.kcl.ac.uk/christian.urban/ratings.csv} & (940 KByte)\\
-  \url{https://nms.kcl.ac.uk/christian.urban/movies.csv}  & (280 KByte)\\
+  \url{https://nms.kcl.ac.uk/christian.urban/wordle.txt} & (78 KByte)\\
 \end{tabular}
 \end{center}
 
 \noindent
-The ratings.csv file is organised as userID, 
-movieID, and rating (which is between 0 and 5, with \emph{positive} ratings
-being 4 and 5). The file movie.csv is organised as
-movieID and full movie name. Both files still contain the usual
-CSV-file header (first line). In this part you are asked
-to implement functions that process these files. If bandwidth
-is an issue for you, download the files locally, but in the submitted
-version use \texttt{Source.fromURL} instead of \texttt{Source.fromFile}.
+In your program you need to download this list and implement some
+functions that in the end select the most difficult words (given an
+input from the user).  If bandwidth is an issue for you, download the
+file locally, but in the submitted version use \texttt{Source.fromURL}
+instead of \texttt{Source.fromFile}.
 
 \subsection*{Tasks}
 
 \begin{itemize}
-\item[(1)] Implement the function \pcode{get_csv_url} which takes an
-  URL-string as argument and requests the corresponding file. The two
-  URLs of interest are \pcode{ratings_url} and \pcode{movies_url},
-  which correspond to CSV-files mentioned above.  The function should
-  return the CSV-file appropriately broken up into lines, and the
-  first line should be dropped (that is omit the header of the CSV-file).
-  The result is a list of strings (the lines in the file). In case
-  the url does not produce a file, return the empty list.\\
-  \mbox{}\hfill [1 Mark]
+\item[(1)] Implement the function \pcode{get_wordle_list} which takes an
+  URL-string as argument and requests the corresponding file. The function should
+  return the word list appropriately broken up into lines.
+  The result should be a list of strings (the lines in the file). In case
+  the url does not produce a file, return the empty list.
+  \mbox{}\hfill [0.5 Marks]
 
-\item[(2)] Implement two functions that process the (broken up)
-  CSV-files from (1). The \pcode{process_ratings} function filters out all
-  ratings below 4 and returns a list of (userID, movieID) pairs. The
-  \pcode{process_movies} function returns a list of (movieID, title) pairs.
-  Note the input to these functions will be the output of the function
-  \pcode{get_csv_url}.\\
-  \mbox{}\hfill [1 Mark]
-%\end{itemize}  
-%  
-%
-%\subsection*{Part 3 (4 Marks, file danube.scala)}
-%
-%\subsection*{Tasks}
-%
-%\begin{itemize}
-\item[(3)] Implement a kind of grouping function that calculates a Map
-  containing the userIDs and all the corresponding recommendations for
-  this user (list of movieIDs). This should be implemented in a
-  tail-recursive fashion using a Map as accumulator. This Map is set to
-  \pcode{Map()} at the beginning of the calculation. For example
+\item[(2)] Implement a polymorphic function \pcode{removeN}, which removes $n$ occurrences of an
+  element from a list (if this element is less than $n$ times present, then remove all occurrences).
+  For example
 
 \begin{lstlisting}[numbers=none]
-val lst = List(("1", "a"), ("1", "b"),
-               ("2", "x"), ("3", "a"),
-               ("2", "y"), ("3", "c"))
-groupById(lst, Map())
+removeN(List(1,2,3,2,1), 3, 2)  => List(1, 2, 2, 1)
+removeN(List(1,2,3,2,1), 2, 1)  => List(1, 3, 2, 1)
+removeN(List(1,2,3,2,1), 2, 2)  => List(1, 3, 1)
+removeN(List(1,2,3,2,1), 1, 1)  => List(2, 3, 2, 1)
+removeN(List(1,2,3,2,1), 1, 3)  => List(2, 3, 2)
+removeN(List(1,2,3,2,1), 0, 2)  => List(1, 2, 3, 2, 1)
+\end{lstlisting}
+
+Make sure you only remove at most $n$ occurrences of the element from the list.
+This function should work for lists of integers but also lists of chars, strings etc.\\
+  \mbox{}\hfill [0.5 Marks]
+
+\item[(3)] Implement a function \pcode{score} that calculates the
+  feedback for a word against a secret word using the rules of the
+  Wordle game. The output of \pcode{score} should be a list of 5
+  elements of type \pcode{Tip} representing three outcomes: a letter
+  in the correct position, a letter that is present, but not in the
+  correct position and a letter that is absent. For example given the
+  secret word "chess" the score for the word "caves" is
+
+\begin{lstlisting}[numbers=none]
+List(Correct, Absent, Absent, Present, Correct)
+\end{lstlisting}
+
+  You have to be careful with multiple occurrences of letters. For example
+  the secret "chess" with the guess "swiss" should produce
+
+\begin{lstlisting}[numbers=none]
+List(Absent, Absent, Absent, Correct, Correct)
+\end{lstlisting}
+
+even though the first 's' in "swiss" is present in the secret word, the 's' are already
+`used up' by the two letters that are correct. To implement this you need to
+implement first a function \pcode{pool} which calculates all the letters in
+a secret that are not correct in a word. For example
+
+\begin{lstlisting}[numbers=none]
+  pool("chess", "caves")  => List(h, e, s)
+  pool("chess", "swiss")  => List(c, h, e)
 \end{lstlisting}
 
-returns the ratings map
+  Now the helper function \pcode{aux} can analyse the arguments secret and word recursively letter-by-letter and
+  decide: if the letters are the same, then return \pcode{Correct} for the corresponding position.
+  If they are not the same, but the letter is in the pool, then return \pcode{Present} and also remove
+  this letter from the pool in the next recursive call of \pcode{aux}. Otherwise return \pcode{Absent} for the
+  corresponding position. The function \pcode{score} is a wrapper for the function \pcode{aux}
+  calling \pcode{aux} with the appropriate arguments (recall what is calculated with \pcode{pool}).\mbox{}\hfill [2 Marks]
+
+\item[(4)] Implement a function \pcode{eval} that gives an integer value to each of the
+  \pcode{Tip}s such that
 
-\begin{center}
-  \pcode{Map(1 -> List(b, a), 2 -> List(y, x), 3 -> List(c, a))}.
-\end{center}
+  \begin{center}
+  \begin{tabular}{lcl}  
+    \textit{eval (Correct)} & $\dn$ & $10$\\
+    \textit{eval (Present)} & $\dn$ & $1$\\
+    \textit{eval (Absent)} & $\dn$ & $0$
+  \end{tabular}                                   
+  \end{center}  
+
+  The function \pcode{iscore} then takes an output of \pcode{score} and sums
+  up all corresponding values. For example for 
+
+\begin{lstlisting}[numbers=none]
+  iscore("chess", "caves")  => 21
+  iscore("chess", "swiss")  => 20
+\end{lstlisting}
+  \mbox{}\hfill [0.5 Marks]
 
-\noindent
-In which order the elements of the list are given is unimportant.\\
-\mbox{}\hfill [1 Mark]
+\item[(5)] The function \pcode{evil} takes a list of secrets (the list from Task 1)
+  and a word as arguments, and calculates the list of words with the lowest
+  score (remember we want to make the Wordle game as difficult as possible---therefore
+  when the user gives us a word, we want to find the secrets that produce the lowest
+  score). For this implement a helper function \pcode{lowest} that goes through
+  the secrets one-by-one and calculates the score. The argument \pcode{current} is
+  the score of the ``currently'' found secrets. When the function \pcode{lowest}
+  is called for the first time then this will be set to the maximum integer value
+  \pcode{Int.MaxValue}. The accumulator will be first empty. If a secret is found
+  with the same score as \pcode{current} then this word is added to the accumulator.
+  If the secret has a lower score, then the accumulator will be discarded and this
+  secret will be the new accumulator. If the secret has a higher score, then it can be
+  ignored. For example \pcode{evil} (the wrapper for \pcode{lowest}) generates
+
+\begin{lstlisting}[numbers=none]
+evil(secrets, "stent").length => 1907
+evil(secrets, "hexes").length => 2966
+evil(secrets, "horse").length => 1203
+evil(secrets, "hoise").length => 971
+evil(secrets, "house").length => 1228
+\end{lstlisting}
 
-\item[(4)] Implement a function that takes a ratings map and a movieID
-  as arguments.  The function calculates all suggestions containing the
-  given movie in its recommendations. It returns a list of all these
-  recommendations (each of them is a list and needs to have the given
-  movie deleted, otherwise it might happen we recommend the same movie
-  ``back''). For example for the Map from above and the movie
-  \pcode{"y"} we obtain \pcode{List(List("x"))}, and for the movie
-  \pcode{"a"} we get \pcode{List(List("b"), List("c"))}.\\
+where \pcode{secrets} is the list generated under Task 1.
+In all cases above the iscore of the resulting secrets is 0, but this does not need to be the case
+in general.\\
+  \mbox{}\hfill [1.5 Marks]
+
+\item[(6)] The secrets generated in Task 5 are the ones with the lowest score
+  with respect to the word. You can think of these as the secrets that are furthest ``away'' from the
+  given word. This is already quite evil for a secret word---remember we can choose
+  a secret \emph{after} a user has given a first word. Now we want to make it
+  even more evil by choosing words that have the most obscure letters. For this we
+  calculate the frequency of how many times certain letters occur in our secrets
+  list (see Task 1). The \emph{frequency} of the letter $c$, say, is given by the formula
+
+  \begin{center}
+  $\textit{freq(c)} \dn 1 - \frac{\textit{number of occurrences of c}}{\textit{number of all letters}}$  
+  \end{center}  
+
+  That means that letters that occur fewer times in our secrets have a higher frequency.
+  For example the letter 'y' has the frequency 0.9680234350909651 while the much more
+  often occurring letter 'e' has only 0.897286463151403 (all calculations should be done
+  with Doubles).
+
+  The function \pcode{frequencies} should calculate the frequencies for all lower-case letters
+  by generating a Map from letters (\pcode{Char}) to Doubles (frequencies).\\ 
   \mbox{}\hfill [1 Mark]
 
-\item[(5)] Implement a suggestions function which takes a ratings map
-  and a movieID as arguments. It calculates all the recommended movies
-  sorted according to the most frequently suggested movie(s) sorted
-  first. This function returns \emph{all} suggested movieIDs as a list of
-  strings.\\
-  \mbox{}\hfill [1 Mark]
+\item[(7)] In this task we want to use the output of \pcode{evil}, rank each string in the
+  generated set and then filter out the strings that are ranked highest (the ones with the most obscure letters).
+  This list of strings often contains only a single word, but in general there might be more (see below).
+  First implement a function \pcode{rank} that takes a frequency map (from 6) and a string as arguments and
+  generates a rank by summing up all frequencies of the letters in the string. For example
+
+\begin{lstlisting}[numbers=none]
+rank(frequencies(secrets), "adobe") => 4.673604687018193
+rank(frequencies(secrets), "gaffe") => 4.745205057045945
+rank(frequencies(secrets), "fuzzy") => 4.898735738513722
+\end{lstlisting}
+
+  Finally, implement a function \pcode{ranked_evil} that selects from the output of \pcode{evil}
+  the string(s) which are highest ranked in evilness.
+
+  
+\begin{lstlisting}[numbers=none]
+ranked_evil(secrets, "abbey") => List(whizz)
+ranked_evil(secrets, "afear") => List(buzzy)
+ranked_evil(secrets, "zincy") => List(jugum)
+ranked_evil(secrets, "zippy") => List(chuff)
+\end{lstlisting}
+
+This means if the user types in "abbey" then the most evil word to choose as secret is ``whizz'' (according to
+our calculations). This word has a zero \pcode{iscore} and the most obscure letters.
 
-\item[(6)]  
-  Implement then a recommendation function which generates a maximum
-  of two most-suggested movies (as calculated above). But it returns
-  the actual movie name, not the movieID. If fewer movies are recommended,
-  then return fewer than two movie names.\\
-  \mbox{}\hfill [1 Mark]
+%
+%\color{red}
+%\section*{Correction with \texttt{ranked\_evil}}
+%
+%The testcases above are actually not the maximum, but the minimum! I will make sure
+%that the task will count as solved when either the minimum (as above) or the maximum (as intended)
+%is used. The correct solutions for the above testcases using the maximum are:
+%\color{black}
+%
+%\begin{lstlisting}[numbers=none]
+%ranked_evil(secrets, "beats") => List(fuzzy)
+%ranked_evil(secrets, "vitae") => List(fuzzy)
+%ranked_evil(secrets, "bento") => List(fuzzy)
+%ranked_evil(secrets, "belts") => List(fuzzy)
+%\end{lstlisting}
+%
+%\noindent \textcolor{red}{Some further testcases for the maximum are:}
+%
+%\begin{lstlisting}[numbers=none]
+%ranked_evil(secrets, "abbey") => List(whizz)
+%ranked_evil(secrets, "afear") => List(buzzy)
+%ranked_evil(secrets, "zincy") => List(jugum)
+%ranked_evil(secrets, "zippy") => List(chuff)
+%\end{lstlisting}
+% 
+%
 
-%\item[(7)] Calculate the recommendations for all movies according to
-% what the recommendations function in (6) produces (this
-% can take a few seconds). Put all recommendations into a list 
-% (of strings) and count how often the strings occur in
-% this list. This produces a list of string-int pairs,
-% where the first component is the movie name and the second
-% is the number of how many times the movie was recommended. 
-% Sort all the pairs according to the number
-% of times they were recommended (most recommended movie name 
-% first).\\
-% \mbox{}\hfill [1 Mark]
-  
+\mbox{}\hfill [1 Mark]  
 \end{itemize}
 
 \end{document}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cws/main_cw02.text-old	Tue Nov 01 15:03:48 2022 +0000
@@ -0,0 +1,221 @@
+
+% !TEX program = xelatex
+\documentclass{article}
+\usepackage{../styles/style}
+\usepackage{disclaimer}
+\usepackage{../styles/langs}
+
+\begin{document}
+
+
+%% should ask to lower case the words.
+
+\section*{Main Part 2 (Scala, 6 Marks)}
+
+\mbox{}\hfill\textit{``C makes it easy to shoot yourself in the foot; C++ makes it harder,}\\
+\mbox{}\hfill\textit{ but when you do, it blows your whole leg off.''}\smallskip\\
+\mbox{}\hfill\textit{ --- Bjarne Stroustrup (creator of the C++ language)}\bigskip\bigskip
+
+
+
+
+\noindent
+You are asked to implement a Scala program for recommending movies
+according to a ratings list.\bigskip
+
+\IMPORTANTNONE{}
+
+\noindent
+Also note that the running time of each part will be restricted to a
+maximum of 30 seconds on my laptop.
+
+\DISCLAIMER{}
+
+
+\subsection*{Reference Implementation}
+
+Like the C++ part, the Scala part works like this: you push your files
+to GitHub and receive (after sometimes a long delay) some automated
+feedback. In the end we will take a snapshot of the submitted files
+and apply an automated marking script to them.\medskip
+
+\noindent
+In addition, the Scala part comes with reference
+implementations in form of \texttt{jar}-files. This allows you to run
+any test cases on your own computer. For example you can call Scala on
+the command line with the option \texttt{-cp danube.jar} and then
+query any function from the template file. Say you want to find out
+what the function \texttt{} produces: for this you just need
+to prefix it with the object name \texttt{M2}.  If you want to find out what
+these functions produce for the list \texttt{List("a", "b", "b")},
+you would type something like:
+
+\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
+$ scala -cp danube.jar
+scala> val ratings_url =
+     | """https://nms.kcl.ac.uk/christian.urban/ratings.csv"""
+
+scala> M2.get_csv_url(ratings_url)
+val res0: List[String] = List(1,1,4 ...)
+\end{lstlisting}%$
+
+\subsection*{Hints}
+
+\noindent
+Use \texttt{.split(",").toList} for splitting
+strings according to commas (similarly for the newline character \mbox{$\backslash$\texttt{n}}),
+\texttt{.getOrElse(..,..)} allows to query a Map, but also gives a
+default value if the Map is not defined, a Map can be `updated' by
+using \texttt{+}, \texttt{.contains} and \texttt{.filter} can test whether
+an element is included in a list, and respectively filter out elements in a list,
+\texttt{.sortBy(\_.\_2)} sorts a list of pairs according to the second
+elements in the pairs---the sorting is done from smallest to highest,
+\texttt{.take(n)} for taking some elements in a list (takes fewer if the list
+contains less than \texttt{n} elements).
+
+
+\newpage
+
+
+\subsection*{Main Part 2 (6 Marks, file danube.scala)}
+
+You are creating Danube.co.uk which you hope will be the next big thing
+in online movie renting. You know that you can save money by
+anticipating what movies people will rent; you will pass these savings
+on to your users by offering a discount if they rent movies that
+Danube.co.uk recommends.  
+
+Your task is to generate \emph{two} movie recommendations for every
+movie a user rents. To do this, you calculate what other
+renters, who also watched this movie, suggest by giving positive ratings.
+Of course, some suggestions are more popular than others. You need to find
+the two most-frequently suggested movies. Return fewer recommendations,
+if there are fewer movies suggested.
+
+The calculations will be based on the small datasets which the research lab
+GroupLens provides for education and development purposes.
+
+\begin{center}
+\url{https://grouplens.org/datasets/movielens/}
+\end{center}
+
+\noindent
+The slightly adapted CSV-files should be downloaded in your Scala
+file from the URLs:
+
+
+\begin{center}
+\begin{tabular}{ll}  
+  \url{https://nms.kcl.ac.uk/christian.urban/ratings.csv} & (940 KByte)\\
+  \url{https://nms.kcl.ac.uk/christian.urban/movies.csv}  & (280 KByte)\\
+\end{tabular}
+\end{center}
+
+\noindent
+The ratings.csv file is organised as userID, 
+movieID, and rating (which is between 0 and 5, with \emph{positive} ratings
+being 4 and 5). The file movie.csv is organised as
+movieID and full movie name. Both files still contain the usual
+CSV-file header (first line). In this part you are asked
+to implement functions that process these files. If bandwidth
+is an issue for you, download the files locally, but in the submitted
+version use \texttt{Source.fromURL} instead of \texttt{Source.fromFile}.
+
+\subsection*{Tasks}
+
+\begin{itemize}
+\item[(1)] Implement the function \pcode{get_csv_url} which takes an
+  URL-string as argument and requests the corresponding file. The two
+  URLs of interest are \pcode{ratings_url} and \pcode{movies_url},
+  which correspond to CSV-files mentioned above.  The function should
+  return the CSV-file appropriately broken up into lines, and the
+  first line should be dropped (that is omit the header of the CSV-file).
+  The result is a list of strings (the lines in the file). In case
+  the url does not produce a file, return the empty list.\\
+  \mbox{}\hfill [1 Mark]
+
+\item[(2)] Implement two functions that process the (broken up)
+  CSV-files from (1). The \pcode{process_ratings} function filters out all
+  ratings below 4 and returns a list of (userID, movieID) pairs. The
+  \pcode{process_movies} function returns a list of (movieID, title) pairs.
+  Note the input to these functions will be the output of the function
+  \pcode{get_csv_url}.\\
+  \mbox{}\hfill [1 Mark]
+%\end{itemize}  
+%  
+%
+%\subsection*{Part 3 (4 Marks, file danube.scala)}
+%
+%\subsection*{Tasks}
+%
+%\begin{itemize}
+\item[(3)] Implement a kind of grouping function that calculates a Map
+  containing the userIDs and all the corresponding recommendations for
+  this user (list of movieIDs). This should be implemented in a
+  tail-recursive fashion using a Map as accumulator. This Map is set to
+  \pcode{Map()} at the beginning of the calculation. For example
+
+\begin{lstlisting}[numbers=none]
+val lst = List(("1", "a"), ("1", "b"),
+               ("2", "x"), ("3", "a"),
+               ("2", "y"), ("3", "c"))
+groupById(lst, Map())
+\end{lstlisting}
+
+returns the ratings map
+
+\begin{center}
+  \pcode{Map(1 -> List(b, a), 2 -> List(y, x), 3 -> List(c, a))}.
+\end{center}
+
+\noindent
+In which order the elements of the list are given is unimportant.\\
+\mbox{}\hfill [1 Mark]
+
+\item[(4)] Implement a function that takes a ratings map and a movieID
+  as arguments.  The function calculates all suggestions containing the
+  given movie in its recommendations. It returns a list of all these
+  recommendations (each of them is a list and needs to have the given
+  movie deleted, otherwise it might happen we recommend the same movie
+  ``back''). For example for the Map from above and the movie
+  \pcode{"y"} we obtain \pcode{List(List("x"))}, and for the movie
+  \pcode{"a"} we get \pcode{List(List("b"), List("c"))}.\\
+  \mbox{}\hfill [1 Mark]
+
+\item[(5)] Implement a suggestions function which takes a ratings map
+  and a movieID as arguments. It calculates all the recommended movies
+  sorted according to the most frequently suggested movie(s) sorted
+  first. This function returns \emph{all} suggested movieIDs as a list of
+  strings.\\
+  \mbox{}\hfill [1 Mark]
+
+\item[(6)]  
+  Implement then a recommendation function which generates a maximum
+  of two most-suggested movies (as calculated above). But it returns
+  the actual movie name, not the movieID. If fewer movies are recommended,
+  then return fewer than two movie names.\\
+  \mbox{}\hfill [1 Mark]
+
+%\item[(7)] Calculate the recommendations for all movies according to
+% what the recommendations function in (6) produces (this
+% can take a few seconds). Put all recommendations into a list 
+% (of strings) and count how often the strings occur in
+% this list. This produces a list of string-int pairs,
+% where the first component is the movie name and the second
+% is the number of how many times the movie was recommended. 
+% Sort all the pairs according to the number
+% of times they were recommended (most recommended movie name 
+% first).\\
+% \mbox{}\hfill [1 Mark]
+  
+\end{itemize}
+
+\end{document} 
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: t
+%%% End: 
+
+
+

Binary file cws/main_cw03.pdf has changed

--- a/cws/main_cw03.tex	Sat Oct 08 00:30:51 2022 +0100
+++ b/cws/main_cw03.tex	Tue Nov 01 15:03:48 2022 +0000
@@ -119,7 +119,7 @@
 % BF IDE
 % https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5
   
-\section*{Main Part 3 (Scala, 6 Marks)}
+\section*{Main Part 3 (Scala, 7 Marks)}
 
 \mbox{}\hfill\textit{``Java is the most distressing thing to happen to computing since MS-DOS.''}\smallskip\\
 \mbox{}\hfill\textit{ --- Alan Kay, the inventor of object-oriented programming}\bigskip\medskip
@@ -206,7 +206,7 @@
 {\small
 \begin{itemize}
 \item[$\bullet$] \url{https://blog.cloudflare.com/details-of-the-cloudflare-outage-on-july-2-2019}  
-\item[$\bullet$] \url{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}
+\item[$\bullet$] \texttt{\href{https://web.archive.org/web/20160801163029/https://www.stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}}
 \item[$\bullet$] \url{https://vimeo.com/112065252}
 \item[$\bullet$] \url{https://davidvgalbraith.com/how-i-fixed-atom}  
 \end{itemize}}
@@ -218,9 +218,9 @@
 \subsubsection*{Tasks (file re.scala)}
 
 The file \texttt{re.scala} has already a definition for regular
-expressions and also defines some handy shorthand notation for
-regular expressions. The notation in this document matches up
-with the code in the file as follows:
+expressions and also defines some handy shorthand notation for regular
+expressions. The notation in this coursework description matches up
+with the code as follows:
 
 \begin{center}
   \begin{tabular}{rcl@{\hspace{10mm}}l}
@@ -230,6 +230,7 @@
   $c$     & $\mapsto$ & \texttt{CHAR(c)}\\
   $\sum rs$ & $\mapsto$ & \texttt{ALTs(rs)}\\  
   $r_1 + r_2$ & $\mapsto$ & \texttt{ALT(r1, r2)} & \texttt{r1 | r2}\\
+  $\prod rs$ & $\mapsto$ & \texttt{SEQs(rs)}\\  
   $r_1 \cdot r_2$ & $\mapsto$ & \texttt{SEQ(r1, r2)} & \texttt{r1 $\sim$ r2}\\
   $r^*$ & $\mapsto$ &  \texttt{STAR(r)} & \texttt{r.\%}
 \end{tabular}    
@@ -244,13 +245,15 @@
 if it is non-empty, we sometimes write $\sum\,[r_1,..., r_n]$.
 The binary alternative can be seen as an abbreviation,
 that is $r_1 + r_2 \dn \sum\,[r_1, r_2]$. As a result we can ignore the
-binary version and only implement the n-ary alternative.
+binary version and only implement the n-ary alternative. Similarly
+the sequence regular expression is only implemented with lists and the
+binary version can be obtained by defining $r_1 \cdot r_2 \dn \prod\,[r_1, r_2]$.
 
 \begin{itemize}
 \item[(1)] Implement a function, called \textit{nullable}, by
   recursion over regular expressions. This function tests whether a
   regular expression can match the empty string. This means given a
-  regular expression it either returns true or false. The function
+  regular expression, it either returns true or false. The function
   \textit{nullable}
   is defined as follows:
 
@@ -260,7 +263,7 @@
 $\textit{nullable}(\ONE)$  & $\dn$ & $\textit{true}$\\
 $\textit{nullable}(c)$     & $\dn$ & $\textit{false}$\\
 $\textit{nullable}(\sum rs)$ & $\dn$ & $\exists r \in rs.\;\textit{nullable}(r)$\\
-$\textit{nullable}(r_1 \cdot r_2)$ & $\dn$ & $\textit{nullable}(r_1) \wedge \textit{nullable}(r_2)$\\
+$\textit{nullable}(\prod rs)$ & $\dn$ & $\forall r\in rs.\;\textit{nullable}(r)$\\
 $\textit{nullable}(r^*)$ & $\dn$ & $\textit{true}$\\
 \end{tabular}
 \end{center}~\hfill[0.5 Marks]
@@ -276,130 +279,130 @@
 $\textit{der}\;c\;(\ONE)$  & $\dn$ & $\ZERO$\\
 $\textit{der}\;c\;(d)$     & $\dn$ & $\textit{if}\; c = d\;\textit{then} \;\ONE \; \textit{else} \;\ZERO$\\
 $\textit{der}\;c\;(\sum\;[r_1,..,r_n])$ & $\dn$ & $\sum\;[\textit{der}\;c\;r_1,..,\textit{der}\;c\;r_n]$\\
-$\textit{der}\;c\;(r_1 \cdot r_2)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r_1)$\\
-      & & $\textit{then}\;((\textit{der}\;c\;r_1)\cdot r_2) + (\textit{der}\;c\;r_2)$\\
-      & & $\textit{else}\;(\textit{der}\;c\;r_1)\cdot r_2$\\
+$\textit{der}\;c\;(\prod\;[])$ & $\dn$ & $\ZERO$\\  
+$\textit{der}\;c\;(\prod\;r\!::\!rs)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r)$\\
+      & & $\textit{then}\;(\prod\;(\textit{der}\;c\;r)\!::\!rs) + (\textit{der}\;c\;(\prod rs))$\\
+      & & $\textit{else}\;(\prod\;(\textit{der}\;c\;r)\!::\! rs)$\\
 $\textit{der}\;c\;(r^*)$ & $\dn$ & $(\textit{der}\;c\;r)\cdot (r^*)$\\
 \end{tabular}
 \end{center}
-
-For example given the regular expression $r = (a \cdot b) \cdot c$, the derivatives
-w.r.t.~the characters $a$, $b$ and $c$ are
-
-\begin{center}
-  \begin{tabular}{lcll}
-    $\textit{der}\;a\;r$ & $=$ & $(\ONE \cdot b)\cdot c$ & \quad($= r'$)\\
-    $\textit{der}\;b\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$\\
-    $\textit{der}\;c\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$
-  \end{tabular}
-\end{center}
-
-Let $r'$ stand for the first derivative, then taking the derivatives of $r'$
-w.r.t.~the characters $a$, $b$ and $c$ gives
-
-\begin{center}
-  \begin{tabular}{lcll}
-    $\textit{der}\;a\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ \\
-    $\textit{der}\;b\;r'$ & $=$ & $((\ZERO \cdot b) + \ONE)\cdot c$ & \quad($= r''$)\\
-    $\textit{der}\;c\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$
-  \end{tabular}
-\end{center}
-
-One more example: Let $r''$ stand for the second derivative above,
-then taking the derivatives of $r''$ w.r.t.~the characters $a$, $b$
-and $c$ gives
-
-\begin{center}
-  \begin{tabular}{lcll}
-    $\textit{der}\;a\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$ \\
-    $\textit{der}\;b\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$\\
-    $\textit{der}\;c\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ONE$ &
-    (is $\textit{nullable}$)                      
-  \end{tabular}
-\end{center}
-
-Note, the last derivative can match the empty string, that is it is \textit{nullable}.\\
 \mbox{}\hfill\mbox{[1 Mark]}
 
 \item[(3)] We next want to simplify regular expressions: essentially
   we want to remove $\ZERO$ in regular expressions like $r + \ZERO$
   and $\ZERO + r$.  However, our n-ary alternative takes a list of
-  regular expressions as argument, we therefore need a more general
-  ``flatten'' function, called \texttt{flts}. This function should
-  analyse a list of regular expressions, say $rs$, as follows:
+  regular expressions as argument, and we therefore need a more general
+  ``denesting'' function, which deletes $\ZERO$s and ``spills out'' nested
+  $\sum$s.  This function, called \texttt{denest}, should analyse a
+  list of regular expressions, say $rs$, as follows:
 
   \begin{center}
     \begin{tabular}{lllll}
       1) &$rs = []$                & $\dn$ & $[]$ & (empty list)\\
-      2) &$rs = \ZERO :: rest$     & $\dn$ & $\textrm{flatten}\;rest$\\
-      3) &$rs = (\sum rs_1) :: rest$ & $\dn$ & $rs_1 ::: \textrm{flatten}\;rest$\\
-      4) &$rs = r :: rest$         & $\dn$ & $r :: \textrm{flatten}\;rest$ & (otherwise)\\
+      2) &$rs = \ZERO :: rest$     & $\dn$ & $\texttt{denest}\;rest$ & (throw away $\ZERO$)\\
+      3) &$rs = (\sum rs) :: rest$ & $\dn$ & $rs ::: \texttt{denest}\;rest$ & (spill out $\sum$)\\
+      4) &$rs = r :: rest$         & $\dn$ & $r :: \texttt{denest}\;rest$ & (otherwise)\\
     \end{tabular}  
   \end{center}  
 
   The first clause states that empty lists cannot be further
-  flattened. The second removes the first $\ZERO$ from the list and recurses.
+  denested. The second removes the first $\ZERO$ from the list and recurses.
   The third is when the first regular expression is an \texttt{ALTs}, then
   the content of this alternative should be spilled out and appended
-  with the flattened rest of the list. The last case is for all other
+  with the denested rest of the list. The last case is for all other
   cases where the head of the list is not $\ZERO$ and not an \texttt{ALTs},
-  then we just keep the head of the list and flatten the rest.
+  then we just keep the head of the list and denest the rest.\\
   \mbox{}\hfill\mbox{[1 Mark]}
 
-\item[(4)] Implement the function \textit{simp}, which recursively
+\item[(4)] Implement the function \texttt{flts} which flattens our
+  n-ary sequence regular expression $\prod$. Like \texttt{denest}, this
+  function is intended to delete $\ONE$s and spill out nested $\prod$s.
+  Unfortunately, there is a special case to do with $\ZERO$: If this function encounters a $\ZERO$, then
+  the whole ``product'' should be $\ZERO$. The problem is that the $\ZERO$ can be anywhere
+  inside the list. The easiest way to implement this function is therefore by using an
+  accumulator, which when called is set to \texttt{Nil}. This means \textit{flts} takes
+  two arguments (which are both lists of regular expressions)
+
+  \[
+  \texttt{flts}\;rs\;acc  
+  \]
+
+  This function analyses the list $rs$ as follows
+
+  \begin{center}
+    \begin{tabular}{@{}lllll@{}}
+      1) &$rs = []$                   & $\dn$ & $acc$ & (empty list)\\
+      2) &$rs = \ZERO :: rest$        & $\dn$ & $[\ZERO]$ & (special case for $\ZERO$)\\
+      3) &$rs = \ONE :: rest$         & $\dn$ & $\texttt{flts}\,rest\,acc$ & (throw away $\ONE$)\\
+      4) &$rs = (\prod rs) :: rest$ & $\dn$ & $\texttt{flts}\;rest\,(acc ::: rs)$ & (spill out $\prod$)\\
+      5) &$rs = r :: rest$            & $\dn$ & $\texttt{flts}\;rest\,(acc ::: [r])$& (otherwise)\\
+    \end{tabular}  
+  \end{center}
+
+  In the first case we just return whatever has accumulated in
+  $acc$. In the fourth case we spill out the $rs$ by appending the
+  $rs$ to the end of the accumulator. Similarly in the last case we
+  append the single regular expression $r$ to the end of the
+  accumulator. I let you think why the ``end'' is needed. \mbox{}\hfill\mbox{[1 Mark]}
+
+\item[(5)] Before we can simplify regular expressions, we need what is often called
+  \emph{smart constructors} for $\sum$ and $\prod$. While the ``normal'' constructors
+  \texttt{ALTs} and \texttt{SEQs} give us alternatives and sequences, respectively, \emph{smart}
+  constructors might return something different depending on what list of regular expressions
+  they are given as argument.
+
+  \begin{center}
+  \begin{tabular}{@{}c@{\hspace{9mm}}c@{}}
+    \begin{tabular}{l@{\hspace{2mm}}l@{\hspace{1mm}}ll}
+      & $\sum^{smart}$\smallskip\\
+      1) & $rs = []$  & $\dn$ & $\ZERO$\\ 
+      2) & $rs = [r]$ & $\dn$ & $r$\\ \\
+      3) & otherwise  & $\dn$ & $\sum\,rs$
+    \end{tabular} &
+    \begin{tabular}{l@{\hspace{2mm}}l@{\hspace{1mm}}ll}
+        & $\prod^{smart}$\smallskip\\              
+        1) & $rs = []$ & $\dn$ & $\ONE$\\
+        2a) & $rs = [\ZERO]$ & $\dn$ & $\ZERO$\\
+        2b) & $rs = [r]$ & $\dn$ & $r$\\
+        3) & otherwise  & $\dn$ & $\prod\,rs$
+    \end{tabular}              
+  \end{tabular}    
+  \end{center}
+  \mbox{}\hfill\mbox{[0.5 Marks]}
+  
+\item[(6)] Implement the function \textit{simp}, which recursively
   traverses a regular expression, and on the way up simplifies every
   regular expression on the left (see below) to the regular expression
   on the right, except it does not simplify inside ${}^*$-regular
-  expressions.
+  expressions and also does not simplify $\ZERO$, $\ONE$ and characters.
 
   \begin{center}
-\begin{tabular}{l@{\hspace{4mm}}c@{\hspace{4mm}}ll}
-$r \cdot \ZERO$ & $\mapsto$ & $\ZERO$\\ 
-$\ZERO \cdot r$ & $\mapsto$ & $\ZERO$\\ 
-$r \cdot \ONE$ & $\mapsto$ & $r$\\ 
-$\ONE \cdot r$ & $\mapsto$ & $r$\\ 
-$\sum\;[r_1,..,r_n]$ & $\mapsto$ & $\sum\;(\texttt{(flts + distinct)}[simp(r_1),..,simp(r_n)])$\\  
+    \begin{tabular}{@{}l@{\hspace{3mm}}c@{\hspace{3mm}}ll@{}}
+      LHS: & & RHS:\smallskip\\
+$\sum\;[r_1,..,r_n]$ & $\mapsto$ & $\sum^{smart}\;(\texttt{(denest + distinct)}[simp(r_1),..,simp(r_n)])$\\
+      $\prod\;[r_1,..,r_n]$ & $\mapsto$ & $\prod^{smart}\;(\texttt{(flts)}[simp(r_1),..,simp(r_n)])$\\
+      $r$ & $\mapsto$ & $r$ \quad (all other cases)
 \end{tabular}
 \end{center}
 
-The last case is as follows: first apply $simp$ to all regular
-expressions $r_1,.. ,r_n$; then flatten the resulting list using
-\texttt{flts}; finally remove all duplicates in this list (this can be
+The first case is as follows: first apply $simp$ to all regular
+expressions $r_1,.. ,r_n$; then denest the resulting list using
+\texttt{denest}; after that remove all duplicates in this list (this can be
 done in Scala using the function
-\texttt{\_.distinct}). When you perform these
-  operations, you end up with three cases, namely where the list is
-  empty, contains a single element and ``otherwise''. These cases
-  should be processed as follows
-\begin{center}
-\begin{tabular}{l@{\hspace{4mm}}c@{\hspace{4mm}}ll}
-$\sum\;[]$ & $\mapsto$ & $\ZERO$\\ 
-$\sum\;[r]$ & $\mapsto$ & $r$\\ 
-$\sum\;rs$ & $\mapsto$ & $\sum\;rs$ & ``otherwise''\\ 
-\end{tabular}
-\end{center}
-
+\texttt{\_.distinct}). Finally, you end up with a list of (simplified)
+regular expressions; apply the smart constructor $\sum^{smart}$ to this list.
+Similarly in the $\prod$ case: simplify first all regular
+expressions $r_1,.. ,r_n$; then flatten the resulting list using \texttt{flts} and apply the
+smart constructor $\prod^{smart}$ to the result. In all other cases, just return the
+input $r$ as is.
   
 
   For example the regular expression
   \[(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot (r_4 \cdot \ZERO)\]
 
-  simplifies to just $r_1$. \textbf{Hint:} Regular expressions can be
-  seen as trees and there are several methods for traversing
-  trees. One of them corresponds to the inside-out traversal, which is
-  also sometimes called post-order tra\-versal: you traverse inside
-  the tree and on the way up you apply simplification rules.
-  \textbf{Another Hint:} Remember numerical expressions from school
-  times---there you had expressions like
-  $u + \ldots + (1 \cdot x) - \ldots (z + (y \cdot 0)) \ldots$ and
-  simplification rules that looked very similar to rules above. You
-  would simplify such numerical expressions by replacing for example
-  the $y \cdot 0$ by $0$, or $1\cdot x$ by $x$, and then look whether
-  more rules are applicable. If you regard regular expressions as
-  trees and if you organise the simplification in an inside-out
-  fashion, it is always clear which simplification should be applied
-  next.\mbox{}\hfill\mbox{[1 Mark]}
+  simplifies to just $r_1$. \mbox{}\hfill\mbox{[1 Mark]}
 
-\item[(5)] Implement two functions: The first, called \textit{ders},
+\item[(7)] Implement two functions: The first, called \textit{ders},
   takes a list of characters and a regular expression as arguments, and
   builds the derivative w.r.t.~the list as follows:
 
@@ -407,7 +410,7 @@
 \begin{tabular}{lcl}
 $\textit{ders}\;(Nil)\;r$ & $\dn$ & $r$\\
   $\textit{ders}\;(c::cs)\;r$  & $\dn$ &
-    $\textit{ders}\;cs\;(\textit{simp}(\textit{der}\;c\;r))$\\
+    $\textit{ders}\;cs\;(\textit{simp}\,(\textit{der}\;c\;r))$\\
 \end{tabular}
 \end{center}
 
@@ -420,9 +423,9 @@
 derivative regular expression can match the empty string (using
 \textit{nullable}).  For example the \textit{matcher} will produce
 true for the regular expression $(a\cdot b)\cdot c$ and the string
-$abc$, but false if you give it the string $ab$. \hfill[1 Mark]
+$abc$, but false if you give it the string $ab$. \hfill[0.5 Mark]
 
-\item[(6)] Implement a function, called \textit{size}, by recursion
+\item[(8)] Implement a function, called \textit{size}, by recursion
   over regular expressions. If a regular expression is seen as a tree,
   then \textit{size} should return the number of nodes in such a
   tree. Therefore this function is defined as follows:
@@ -433,7 +436,7 @@
 $\textit{size}(\ONE)$  & $\dn$ & $1$\\
 $\textit{size}(c)$     & $\dn$ & $1$\\
 $\textit{size}(\sum\,[r_1,..,r_n]$ & $\dn$ & $1 + \textit{size}(r_1) + ... + \textit{size}(r_n)$\\
-$\textit{size}(r_1 \cdot r_2)$ & $\dn$ & $1 + \textit{size}(r_1) + \textit{size}(r_2)$\\
+$\textit{size}(\prod\,[r_1,..,r_n]$ & $\dn$ & $1 + \textit{size}(r_1) + ... + \textit{size}(r_n)$\\
 $\textit{size}(r^*)$ & $\dn$ & $1 + \textit{size}(r)$\\
 \end{tabular}
 \end{center}
@@ -444,7 +447,7 @@
 taking the derivative, but simplify the result.  The sizes
 are given in \texttt{re.scala}. \hfill[0.5 Marks]
 
-\item[(7)] You do not have to implement anything specific under this
+\item[(9)] You do not have to implement anything specific under this
   task.  The purpose here is that you will be marked for some ``power''
   test cases. For example can your matcher decide within 30 seconds
   whether the regular expression $(a^*)^*\cdot b$ matches strings of the
@@ -463,7 +466,7 @@
 \subsection*{Background}
 
 Although easily implementable in Scala (ok maybe the \texttt{simp} functions and
-\texttt{ALTs} needs a bit more thinking), the idea behind the
+the constructors \texttt{ALTs}/\texttt{SEQs} needs a bit more thinking), the idea behind the
 derivative function might not so easy to be seen. To understand its
 purpose better, assume a regular expression $r$ can match strings of
 the form $c\!::\!cs$ (that means strings which start with a character
@@ -484,11 +487,11 @@
 $\textit{der}\;c\;(\textit{der}\;b\;(\textit{der}\;a\;r))$, you obtain
 a regular expression that can match the empty string. You can test
 whether this is indeed the case using the function nullable, which is
-what your matcher is doing.
+what the matcher you have implemented is doing.
 
 The purpose of the $\textit{simp}$ function is to keep the regular
 expressions small. Normally the derivative function makes the regular
-expression bigger (see the SEQ case and the example in (2)) and the
+expression bigger (see the \texttt{SEQs} case and the example in Task (2)) and the
 algorithm would be slower and slower over time. The $\textit{simp}$
 function counters this increase in size and the result is that the
 algorithm is fast throughout.  By the way, this algorithm is by Janusz
@@ -501,16 +504,19 @@
 
 
 If you want to see how badly the regular expression matchers do in
-Java\footnote{Version 8 and below; Version 9 and above does not seem to be as
-  catastrophic, but still much worse than the regular expression
-  matcher based on derivatives.}, JavaScript and Python with the
-``evil'' regular expression $(a^*)^*\cdot b$, then have a look at the
-graphs below (you can try it out for yourself: have a look at the files
+Java\footnote{Version 8 and below; Version 9 and above does not seem
+  to be as catastrophic, but still much worse than the regular
+  expression matcher based on derivatives. BTW, Scala uses the regular
+  expression matcher provided by the Java libraries. So is just as bad.}, JavaScript,
+Python Swift and Dart with the ``evil'' regular expression
+$(a^*)^*\cdot b$, then have a look at the graphs below (you can try it
+out for yourself: have a look at the files
 \texttt{catastrophic9.java}, \texttt{catastrophic.js},
-\texttt{catastrophic.py} etc on KEATS). Compare this with the matcher you
-have implemented. How long can a string of $a$'s be in your matcher
-and still stay within the 30 seconds time limit? It should be muuuch better
-than your off-the-shelf matcher in your bog-standard language.
+\texttt{catastrophic.py} etc on KEATS). Compare this with the matcher
+you have implemented. How long can a string of $a$'s be in your
+matcher and still stay within the 30 seconds time limit? It should be
+mu(uu)$^*$ch better than your off-the-shelf matcher in your
+bog-standard language.
 
 \begin{center}
 \begin{tabular}{@{}cc@{}}
@@ -574,6 +580,46 @@
 \end{document}
 
 
+
+For example given the regular expression $r = (a \cdot b) \cdot c$, the derivatives
+w.r.t.~the characters $a$, $b$ and $c$ are
+
+\begin{center}
+  \begin{tabular}{lcll}
+    $\textit{der}\;a\;r$ & $=$ & $(\ONE \cdot b)\cdot c$ & \quad($= r'$)\\
+    $\textit{der}\;b\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$\\
+    $\textit{der}\;c\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$
+  \end{tabular}
+\end{center}
+
+Let $r'$ stand for the first derivative, then taking the derivatives of $r'$
+w.r.t.~the characters $a$, $b$ and $c$ gives
+
+\begin{center}
+  \begin{tabular}{lcll}
+    $\textit{der}\;a\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ \\
+    $\textit{der}\;b\;r'$ & $=$ & $((\ZERO \cdot b) + \ONE)\cdot c$ & \quad($= r''$)\\
+    $\textit{der}\;c\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$
+  \end{tabular}
+\end{center}
+
+One more example: Let $r''$ stand for the second derivative above,
+then taking the derivatives of $r''$ w.r.t.~the characters $a$, $b$
+and $c$ gives
+
+\begin{center}
+  \begin{tabular}{lcll}
+    $\textit{der}\;a\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$ \\
+    $\textit{der}\;b\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$\\
+    $\textit{der}\;c\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ONE$ &
+    (is $\textit{nullable}$)                      
+  \end{tabular}
+\end{center}
+
+Note, the last derivative can match the empty string, that is it is \textit{nullable}.
+
+
+
 %%% Local Variables: 
 %%% mode: latex
 %%% TeX-master: t

Binary file cws/main_cw04.pdf has changed

Binary file cws/main_cw05.pdf has changed

--- a/main_solution3/re.scala	Sat Oct 08 00:30:51 2022 +0100
+++ b/main_solution3/re.scala	Tue Nov 01 15:03:48 2022 +0000
@@ -1,5 +1,5 @@
 // Main Part 3 about Regular Expression Matching
-//=============================================
+//==============================================
 
 object M3 {
 
@@ -8,13 +8,15 @@
 case object ZERO extends Rexp
 case object ONE extends Rexp
 case class CHAR(c: Char) extends Rexp
-case class ALTs(rs: List[Rexp]) extends Rexp      // alternatives 
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   // sequence
-case class STAR(r: Rexp) extends Rexp             // star
+case class ALTs(rs: List[Rexp]) extends Rexp  // alternatives 
+case class SEQs(rs: List[Rexp]) extends Rexp  // sequences
+case class STAR(r: Rexp) extends Rexp         // star
 
 
-//the usual binary choice can be defined in terms of ALTs
+//the usual binary choice and binary sequence can be defined 
+//in terms of ALTs and SEQs
 def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2))
+def SEQ(r1: Rexp, r2: Rexp) = SEQs(List(r1, r2))
 
 // some convenience for typing in regular expressions
 import scala.language.implicitConversions    
@@ -41,81 +43,78 @@
   def ~ (r: String) = SEQ(s, r)
 }
 
-// (1) Complete the function nullable according to
-// the definition given in the coursework; this 
-// function checks whether a regular expression
-// can match the empty string and Returns a boolean
-// accordingly.
-
+// (1) 
 def nullable (r: Rexp) : Boolean = r match {
   case ZERO => false
   case ONE => true
   case CHAR(_) => false
   case ALTs(rs) => rs.exists(nullable)
-  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+  case SEQs(rs) => rs.forall(nullable)
   case STAR(_) => true
 }
 
-// (2) Complete the function der according to
-// the definition given in the coursework; this
-// function calculates the derivative of a 
-// regular expression w.r.t. a character.
-
-def der (c: Char, r: Rexp) : Rexp = r match {
+// (2) 
+def der(c: Char, r: Rexp) : Rexp = r match {
   case ZERO => ZERO
   case ONE => ZERO
   case CHAR(d) => if (c == d) ONE else ZERO
   case ALTs(rs) => ALTs(rs.map(der(c, _)))
-  case SEQ(r1, r2) => 
-    if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
-    else SEQ(der(c, r1), r2)
+  case SEQs(Nil) => ZERO
+  case SEQs(r1::rs) => 
+    if (nullable(r1)) ALT(SEQs(der(c, r1)::rs), der(c, SEQs(rs)))
+    else SEQs(der(c, r1):: rs)
   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
 }
 
 
-// (3) Implement the flatten function flts. It
-// deletes 0s from a list of regular expressions
-// and also 'spills out', or flattens, nested 
-// ALTernativeS.
+// (3) 
+def denest(rs: List[Rexp]) : List[Rexp] = rs match {
+  case Nil => Nil
+  case ZERO::tl => denest(tl)
+  case ALTs(rs1)::rs2 => rs1 ::: denest(rs2)  
+  case r::rs => r :: denest(rs) 
+}
 
-def flts(rs: List[Rexp]) : List[Rexp] = rs match {
-  case Nil => Nil
-  case ZERO::tl => flts(tl)
-  case ALTs(rs1)::rs2 => rs1 ::: flts(rs2)  
-  case r::rs => r :: flts(rs) 
+// (4)
+def flts(rs: List[Rexp], acc: List[Rexp] = Nil) : List[Rexp] = rs match {
+  case Nil => acc
+  case ZERO::rs => ZERO::Nil
+  case ONE::rs => flts(rs, acc)
+  case SEQs(rs1)::rs => flts(rs, acc ::: rs1)
+  case r::rs => flts(rs, acc :+ r) 
 }
 
-// (4) Complete the simp function according to
-// the specification given in the coursework; this
-// function simplifies a regular expression from
-// the inside out, like you would simplify arithmetic 
-// expressions; however it does not simplify inside 
-// STAR-regular expressions.
+// (5)
+def ALTs_smart(rs: List[Rexp]) : Rexp = rs match {
+  case Nil => ZERO
+  case r::Nil => r  
+  case rs => ALTs(rs)
+}
 
+def SEQs_smart(rs: List[Rexp]) : Rexp = rs match {
+  case Nil => ONE
+  case ZERO::nil => ZERO
+  case r::Nil => r
+  case rs => SEQs(rs) 
+}
+
+// (6) 
 
 def simp(r: Rexp) : Rexp = r match {
-  case ALTs(rs) => (flts(rs.map(simp)).distinct) match {
-    case Nil => ZERO
-    case r::Nil => r  
-    case rs => ALTs(rs)
-  }
-  case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match {
-    case (ZERO, _) => ZERO
-    case (_, ZERO) => ZERO
-    case (ONE, r2s) => r2s
-    case (r1s, ONE) => r1s
-    case (r1s, r2s) => SEQ(r1s, r2s)
-  }
+  case ALTs(rs) => 
+    ALTs_smart(denest(rs.map(simp)).distinct)
+  case SEQs(rs) => 
+    SEQs_smart(flts(rs.map(simp)))
   case r => r
 }
 
-simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE))
+//println("Simp tests")
+//println(simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)))
+//println(simp(((CHAR('a') | ZERO) ~ ONE) | 
+//              (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO))))
 
-// (5) Complete the two functions below; the first 
-// calculates the derivative w.r.t. a string; the second
-// is the regular expression matcher taking a regular
-// expression and a string and checks whether the
-// string matches the regular expression.
+
+// (7) 
 
 def ders (s: List[Char], r: Rexp) : Rexp = s match {
   case Nil => r
@@ -125,43 +124,40 @@
 // main matcher function
 def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r))
 
-// (6) Complete the size function for regular
-// expressions according to the specification 
-// given in the coursework.
-
+// (8) 
 
 def size(r: Rexp): Int = r match {
   case ZERO => 1
   case ONE => 1
   case CHAR(_) => 1
   case ALTs(rs) => 1 + rs.map(size).sum
-  case SEQ(r1, r2) => 1 + size(r1) + size (r2)
+  case SEQs(rs) => 1 + rs.map(size).sum
   case STAR(r1) => 1 + size(r1)
 }
 
 
 
 // some testing data
-
-//matcher(("a" ~ "b") ~ "c", "abc")  // => true
-//matcher(("a" ~ "b") ~ "c", "ab")   // => false
+/*
+println(matcher(("a" ~ "b") ~ "c", "abc"))  // => true
+println(matcher(("a" ~ "b") ~ "c", "ab"))   // => false
 
 // the supposedly 'evil' regular expression (a*)* b
-// val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
+val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
 
-//println(matcher(EVIL, "a" * 1000 ++ "b"))   // => true
-//println(matcher(EVIL, "a" * 1000))          // => false
+println(matcher(EVIL, "a" * 1000 ++ "b"))   // => true
+println(matcher(EVIL, "a" * 1000))          // => false
 
 // size without simplifications
-//println(size(der('a', der('a', EVIL))))             // => 28
-//println(size(der('a', der('a', der('a', EVIL)))))   // => 58
+println(size(der('a', der('a', EVIL))))             // => 36
+println(size(der('a', der('a', der('a', EVIL)))))   // => 83
 
 // size with simplification
-//println(simp(der('a', der('a', EVIL))))          
-//println(simp(der('a', der('a', der('a', EVIL)))))
+println(simp(der('a', der('a', EVIL))))          
+println(simp(der('a', der('a', der('a', EVIL)))))
 
-//println(size(simp(der('a', der('a', EVIL)))))           // => 8
-//println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 8
+println(size(simp(der('a', der('a', EVIL)))))           // => 7
+println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 7
 
 // Python needs around 30 seconds for matching 28 a's with EVIL. 
 // Java 9 and later increase this to an "astonishing" 40000 a's in
@@ -169,7 +165,7 @@
 //
 // Lets see how long it takes to match strings with 
 // 5 Million a's...it should be in the range of a 
-// couple of seconds.
+// few seconds.
 
 def time_needed[T](i: Int, code: => T) = {
   val start = System.nanoTime()
@@ -178,19 +174,20 @@
   "%.5f".format((end - start)/(i * 1.0e9))
 }
 
-//for (i <- 0 to 5000000 by 500000) {
-//  println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") 
-//}
+for (i <- 0 to 5000000 by 500000) {
+  println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") 
+}
 
 // another "power" test case 
-//simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next) == ONE
+println(simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next()) == ONE)
 
 // the Iterator produces the rexp
 //
 //      SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)
 //
-//    where SEQ is nested 50 times.
- 
+//    where SEQ is nested 100 times.
+*/ 
 
 
 }
+

--- a/main_templates1/README.md	Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates1/README.md	Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Main 1
+# assignment2022scala - Main 1
 
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
 * [coursework description](https://nms.kcl.ac.uk/christian.urban/main_cw01.pdf)
 * reference jar:
       [drumb.jar](https://nms.kcl.ac.uk/christian.urban/drumb.jar)
\ No newline at end of file

--- a/main_templates1/drumb.scala	Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates1/drumb.scala	Tue Nov 01 15:03:48 2022 +0000
@@ -9,66 +9,39 @@
 val rstate_portfolio = List("PLD", "PSA", "AMT", "AIV", "AVB", "BXP", "CCI", 
                             "DLR", "EQIX", "EQR", "ESS", "EXR", "FRT", "HCP") 
 
-
-// (1) The function below takes a stock symbol and a year as arguments.
-//     It should read the corresponding CSV-file and then extract the January 
-//     data from the given year. The data should be collected in a list of
-//     strings (one entry for each line in the CSV-file).
-
 import io.Source
 import scala.util._
 
+// ADD YOUR CODE BELOW
+//======================
+
+
+// (1) 
 def get_january_data(symbol: String, year: Int) : List[String] = ???
 
 
-// (2) From the output of the get_january_data function, the next function 
-//     should extract the first line (if it exists) and the corresponding
-//     first trading price in that year with type Option[Double]. If no line 
-//     is generated by get_january_data then the result is None; and Some if 
-//     there is a price.
-
-
+// (2) 
 def get_first_price(symbol: String, year: Int) : Option[Double] = ???
 
 
-// (3) Complete the function below that obtains all first prices
-//     for the stock symbols from a portfolio (list of strings) and 
-//     for the given range of years. The inner lists are for the
-//     stock symbols and the outer list for the years.
-
-
+// (3) 
 def get_prices(portfolio: List[String], years: Range) : List[List[Option[Double]]] = ???
 
 
 
-// (4) The function below calculates the change factor (delta) between
-//     a price in year n and a price in year n + 1. 
-
+// (4) 
 def get_delta(price_old: Option[Double], price_new: Option[Double]) : Option[Double] = ???
 
 
 
-// (5) The next function calculates all change factors for all prices (from a 
-//     portfolio). The input to this function are the nested lists created by 
-//     get_prices above.
-
+// (5) 
 def get_deltas(data: List[List[Option[Double]]]) :  List[List[Option[Double]]] = ???
 
-
-
-// (6) Write a function that given change factors, a starting balance and an index,
-//     calculates the yearly yield, i.e. new balance, according to our dumb investment 
-//     strategy. Index points to a year in the data list.
-
+// (6) 
 def yearly_yield(data: List[List[Option[Double]]], balance: Long, index: Int) : Long = ???
 
 
-// (7) Write a function compound_yield that calculates the overall balance for a 
-//     range of years where in each year the yearly profit is compounded to the new 
-//     balances and then re-invested into our portfolio. For this use the function and 
-//     results generated under (6). The function investment calls compound_yield
-//     with the appropriate deltas and the first index.
-
+// (7) 
 def compound_yield(data: List[List[Option[Double]]], balance: Long, index: Int) : Long = ???
 
 def investment(portfolio: List[String], years: Range, start_balance: Long) : Long = ???

--- a/main_templates2/README.md	Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates2/README.md	Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Main 2
+# assignment2022scala - Main 2
 
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
 * [coursework description](https://nms.kcl.ac.uk/christian.urban/main_cw02.pdf)
 * reference jar:
-      [danube.jar](https://nms.kcl.ac.uk/christian.urban/danube.jar)
\ No newline at end of file
+      [danube.jar](https://nms.kcl.ac.uk/christian.urban/wordle.jar)
\ No newline at end of file

--- a/main_templates3/README.md	Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates3/README.md	Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Main 3
+# assignment2022scala - Main 3
 
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
 * [coursework description](https://nms.kcl.ac.uk/christian.urban/main_cw03.pdf)
 * reference jar:
       [re.jar](https://nms.kcl.ac.uk/christian.urban/re.jar)
\ No newline at end of file

--- a/main_templates3/re.scala	Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates3/re.scala	Tue Nov 01 15:03:48 2022 +0000
@@ -1,23 +1,24 @@
 // Main Part 3 about Regular Expression Matching
-//=============================================
+//==============================================
 
 object M3 {
 
-// Regular Expressions
 abstract class Rexp
 case object ZERO extends Rexp
 case object ONE extends Rexp
 case class CHAR(c: Char) extends Rexp
-case class ALTs(rs: List[Rexp]) extends Rexp      // alternatives 
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   // sequence
-case class STAR(r: Rexp) extends Rexp             // star
+case class ALTs(rs: List[Rexp]) extends Rexp  // alternatives 
+case class SEQs(rs: List[Rexp]) extends Rexp  // sequences
+case class STAR(r: Rexp) extends Rexp         // star
+
 
-//the usual binary choice can be defined in terms of ALTs
+//the usual binary choice and binary sequence can be defined 
+//in terms of ALTs and SEQs
 def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2))
+def SEQ(r1: Rexp, r2: Rexp) = SEQs(List(r1, r2))
 
 
 // some convenience for typing regular expressions
-
 import scala.language.implicitConversions    
 import scala.language.reflectiveCalls 
 
@@ -42,92 +43,77 @@
   def ~ (r: String) = SEQ(s, r)
 }
 
-
+// examples for the implicits:
 // ALT(CHAR('a'), CHAR('b'))
-// val reg : Rexp = "a" | "b" 
-
+// val areg : Rexp = "a" | "b"
 
-// (1) Complete the function nullable according to
-// the definition given in the coursework; this 
-// function checks whether a regular expression
-// can match the empty string and Returns a boolean
-// accordingly.
-
-def nullable (r: Rexp) : Boolean = ???
-
-
-// (2) Complete the function der according to
-// the definition given in the coursework; this
-// function calculates the derivative of a 
-// regular expression w.r.t. a character.
-
-def der (c: Char, r: Rexp) : Rexp = ???
+// SEQ(CHAR('a'), CHAR('b')) 
+// val sreg : Rexp = "a" ~ "b"
 
 
-// (3) Implement the flatten function flts. It
-// deletes 0s from a list of regular expressions
-// and also 'spills out', or flattens, nested 
-// ALTernativeS.
+// ADD YOUR CODE BELOW
+//======================
 
-def flts(rs: List[Rexp]) : List[Rexp] = ???
+// (1)
+def nullable (r: Rexp) : Boolean = ???
 
-
+// (2) 
+def der (c: Char, r: Rexp) : Rexp = ???
 
-// (4) Complete the simp function according to
-// the specification given in the coursework description; 
-// this function simplifies a regular expression from
-// the inside out, like you would simplify arithmetic 
-// expressions; however it does not simplify inside 
-// STAR-regular expressions. Use the _.distinct and 
-// flts functions.
+// (3) 
+def denest(rs: List[Rexp]) : List[Rexp] = ???
+
+// (4)
+def flts(rs: List[Rexp], acc: List[Rexp] = Nil) : List[Rexp] = ???
 
+// (5)
+def ALTs_smart(rs: List[Rexp]) : Rexp = ???
+def SEQs_smart(rs: List[Rexp]) : Rexp = ???
+
+// (6)
 def simp(r: Rexp) : Rexp = ???
 
-
-// (5) Complete the two functions below; the first 
-// calculates the derivative w.r.t. a string; the second
-// is the regular expression matcher taking a regular
-// expression and a string and checks whether the
-// string matches the regular expression
-
+// (7)
 def ders (s: List[Char], r: Rexp) : Rexp = ???
-
 def matcher(r: Rexp, s: String): Boolean = ???
 
-
-// (6) Complete the size function for regular
-// expressions according to the specification 
-// given in the coursework.
-
+// (8) 
 def size(r: Rexp): Int = ???
 
 
-// some testing data
-
+// Some testing data
+//===================
 /*
+
+simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE))   // => ALTs(List(ONE, CHAR(a)))
+simp(((CHAR('a') | ZERO) ~ ONE) | 
+     (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO)))   // => CHAR(a)
+
+matcher(("a" ~ "b") ~ "c", "ab")   // => false
 matcher(("a" ~ "b") ~ "c", "abc")  // => true
-matcher(("a" ~ "b") ~ "c", "ab")   // => false
+
 
 // the supposedly 'evil' regular expression (a*)* b
 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
 
+matcher(EVIL, "a" * 1000)          // => false
 matcher(EVIL, "a" * 1000 ++ "b")   // => true
-matcher(EVIL, "a" * 1000)          // => false
+
 
 // size without simplifications
-size(der('a', der('a', EVIL)))             // => 28
-size(der('a', der('a', der('a', EVIL))))   // => 58
+size(der('a', der('a', EVIL)))             // => 36
+size(der('a', der('a', der('a', EVIL))))   // => 83
 
 // size with simplification
-size(simp(der('a', der('a', EVIL))))           // => 8
-size(simp(der('a', der('a', der('a', EVIL))))) // => 8
+size(simp(der('a', der('a', EVIL))))           // => 7
+size(simp(der('a', der('a', der('a', EVIL))))) // => 7
 
 // Python needs around 30 seconds for matching 28 a's with EVIL. 
 // Java 9 and later increase this to an "astonishing" 40000 a's in
 // 30 seconds.
 //
 // Lets see how long it really takes to match strings with 
-// 5 Million a's...it should be in the range of a couple
+// 5 Million a's...it should be in the range of a few
 // of seconds.
 
 def time_needed[T](i: Int, code: => T) = {

--- a/main_templates4/README.md	Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates4/README.md	Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Main 4
+# assignment2022scala - Main 4
 
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
 * [coursework description](https://nms.kcl.ac.uk/christian.urban/main_cw04.pdf)
 * reference jar:
      [knight1.jar](https://nms.kcl.ac.uk/christian.urban/knight1.jar),

--- a/main_templates4/knight1.scala	Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates4/knight1.scala	Tue Nov 01 15:03:48 2022 +0000
@@ -9,22 +9,19 @@
 // templates below. Also have a look whether the functions
 // at the end of the file are of any help.
 
-
-
 type Pos = (Int, Int)    // a position on a chessboard 
 type Path = List[Pos]    // a path...a list of positions
 
-//(1) Complete the function that tests whether the position x
-//    is inside the board and not yet element in the path.
+// ADD YOUR CODE BELOW
+//======================
 
+
+
+//(1) 
 def is_legal(dim: Int, path: Path, x: Pos) : Boolean = ???
 
 
-
-//(2) Complete the function that calculates for a position x
-//    all legal onward moves that are not already in the path. 
-//    The moves should be ordered in a "clockwise" manner.
- 
+//(2) 
 def legal_moves(dim: Int, path: Path, x: Pos) : List[Pos] = ???
 
 
@@ -38,21 +35,12 @@
 //assert(legal_moves(8, List((6,6)), (7,7)) == List((6,5), (5,6)))
 
 
-//(3) Complete the two recursive functions below. 
-//    They exhaustively search for knight's tours starting from the 
-//    given path. The first function counts all possible tours, 
-//    and the second collects all tours in a list of paths.
-
+// (3) 
 def count_tours(dim: Int, path: Path) : Int = ???
 
 def enum_tours(dim: Int, path: Path) : List[Path] = ???
 
-
-//(4) Implement a first-function that finds the first 
-//    element, say x, in the list xs where f is not None. 
-//    In that case Return f(x), otherwise None. If possible,
-//    calculate f(x) only once.
-
+// (4) 
 def first(xs: List[Pos], f: Pos => Option[Path]) : Option[Path] = ???
 
 
@@ -64,10 +52,7 @@
 //first(List((1, 0),(2, 0),(3, 0)), foo)          // None
 
 
-//(5) Implement a function that uses the first-function from (4) for
-//    trying out onward moves, and searches recursively for a
-//    knight tour on a dim * dim-board.
-
+//(5) 
 def first_tour(dim: Int, path: Path) : Option[Path] = ???

--- a/main_templates4/knight2.scala	Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates4/knight2.scala	Tue Nov 01 15:03:48 2022 +0000
@@ -4,7 +4,6 @@
 
 object M4b {
 
-
 // !!! Copy any function you need from file knight1.scala !!!
 //
 // If you need any auxiliary functions, feel free to 
@@ -14,21 +13,14 @@
 type Pos = (Int, Int)    // a position on a chessboard 
 type Path = List[Pos]    // a path...a list of positions
 
+// ADD YOUR CODE BELOW
+//======================
 
-//(6) Complete the function that calculates a list of onward
-//    moves like in (2) but orders them according to Warnsdorf’s 
-//    rule. That means moves with the fewest legal onward moves 
-//    should come first.
-
-
+//(6) 
 def ordered_moves(dim: Int, path: Path, x: Pos) : List[Pos] = ???
 
 
-//(7) Complete the function that searches for a single *closed* 
-//    tour using the ordered_moves function from (6). This
-//    function will be tested on a 6 x 6 board. 
-
-
+//(7) 
 def first_closed_tour_heuristics(dim: Int, path: Path) : Option[Path] = ???

--- a/main_templates4/knight3.scala	Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates4/knight3.scala	Tue Nov 01 15:03:48 2022 +0000
@@ -14,13 +14,11 @@
 type Pos = (Int, Int)    // a position on a chessboard 
 type Path = List[Pos]    // a path...a list of positions
 
-//(9) Implement a function that searches for a 
-//    you have to be careful to write a tail-recursive version as this 
-//    function will be called with dimensions of up to 70 * 70
-//    and starting field (0, 0). It has to produce a solution within
-//    30 seconds.
+// ADD YOUR CODE BELOW
+//======================
 
 
+//(9) 
 def tour_on_mega_board(dim: Int, path: Path) : Option[Path] = ???
 
 }

--- a/main_templates5/README.md	Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates5/README.md	Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Main 5
+# assignment2022scala - Main 5
 
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
 * [coursework description](https://nms.kcl.ac.uk/christian.urban/main_cw05.pdf)
 * reference jar:
     [bf.jar](https://nms.kcl.ac.uk/christian.urban/bf.jar),

--- a/main_templates5/bf.scala	Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates5/bf.scala	Tue Nov 01 15:03:48 2022 +0000
@@ -5,45 +5,26 @@
 
 object M5a {
 
-
 // representation of BF memory 
 
 type Mem = Map[Int, Int]
 
-
-// (1) Write a function that takes a file name as argument and
-// and requests the corresponding file from disk. It returns the
-// content of the file as a String. If the file does not exists,
-// the function should return the empty string.
-
 import io.Source
 import scala.util._
 
+// ADD YOUR CODE BELOW
+//======================
+
+// (1)
 def load_bff(name: String) : String = ???
 
-
-
-// (2) Complete the functions for safely reading  
-// and writing brainf*** memory. Safely read should
-// Return the value stored in the Map for a given memory
-// pointer, provided it exists; otherwise it Returns 0. The
-// writing function generates a new Map with the
-// same data, except at the given memory pointer the
-// value v is stored.
-
+// (2) 
 
 def sread(mem: Mem, mp: Int) : Int = ???
 
 def write(mem: Mem, mp: Int, v: Int) : Mem = ???
 
-
-
-// (3) Implement the two jumping instructions in the 
-// brainf*** language. In jumpRight, given a program and 
-// a program counter move the program counter to the right 
-// until after the *matching* ]-command. Similarly, 
-// jumpLeft implements the move to the left to just after
-// the *matching* [-command.
+// (3) 
 
 def jumpRight(prog: String, pc: Int, level: Int) : Int = ???
 
@@ -60,21 +41,7 @@
 
 
 
-// (4) Complete the compute function that interprets (runs) a brainf***
-// program: the arguments are a program (represented as a string), a program 
-// counter, a memory counter and a brainf*** memory. It Returns the
-// memory at the stage when the execution of the brainf*** program
-// finishes. The interpretation finishes once the program counter
-// pc is pointing to something outside the program string.
-// If the pc points to a character inside the program, the pc, 
-// memory pointer and memory need to be updated according to 
-// rules of the brainf*** language. Then, recursively, the compute 
-// function continues with the command at the new program
-// counter. 
-//
-// Implement the run function that calls compute with the program
-// counter and memory counter set to 0.
-
+// (4) 
 
 def compute(prog: String, pc: Int, mp: Int, mem: Mem) : Mem = ???

--- a/main_templates5/bfc.scala	Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates5/bfc.scala	Tue Nov 01 15:03:48 2022 +0000
@@ -4,7 +4,6 @@
 
 object M5b {
 
-
 // !!! Copy any function you need from file bf.scala !!!
 //
 // If you need any auxiliary function, feel free to 
@@ -34,43 +33,12 @@
 import io.Source
 import scala.util._
 
-
-// TASKS
-//=======
+// ADD YOUR CODE BELOW
+//======================
 
-// (5) Write a function jtable that precomputes the "jump
-//     table" for a bf-program. This function takes a bf-program 
-//     as an argument and Returns a Map[Int, Int]. The 
-//     purpose of this map is to record the information about
-//     pc positions where '[' or a ']' are stored. The information
-//     is to which pc-position do we need to jump next?
-// 
-//     For example for the program
-//    
-//       "+++++[->++++++++++<]>--<+++[->>++++++++++<<]>>++<<----------[+>.>.<+<]"
-//
-//     we obtain the map
-//
-//       Map(69 -> 61, 5 -> 20, 60 -> 70, 27 -> 44, 43 -> 28, 19 -> 6)
-//  
-//     This states that for the '[' on position 5, we need to
-//     jump to position 20, which is just after the corresponding ']'.
-//     Similarly, for the ']' on position 19, we need to jump to
-//     position 6, which is just after the '[' on position 5, and so
-//     on. The idea is to not calculate this information each time
-//     we hit a bracket, but just look up this information in the 
-//     jtable. You can use the jumpLeft and jumpRight functions
-//     from Part 1 for calculating the jtable.
-//
-//     Then adapt the compute and run functions from Part 1 
-//     in order to take advantage of the information stored in the jtable. 
-//     This means whenever jumpLeft and jumpRight was called previously,
-//     you should immediately look up the jump address in the jtable.
- 
-
+// (5) 
 def jtable(pg: String) : Map[Int, Int] = ???
 
-
 // testcase
 //
 // jtable("""+++++[->++++++++++<]>--<+++[->>++++++++++<<]>>++<<----------[+>.>.<+<]""")
@@ -80,26 +48,13 @@
 def compute2(pg: String, tb: Map[Int, Int], pc: Int, mp: Int, mem: Mem) : Mem = ???
 def run2(pg: String, m: Mem = Map()) = ???
 
-
 // testcases
 // time_needed(1, run2(load_bff("benchmark.bf")))
 // time_needed(1, run2(load_bff("sierpinski.bf")))
 
 
 
-// (6) Write a function optimise which deletes "dead code" (everything
-// that is not a bf-command) and also replaces substrings of the form
-// [-] by a new command 0. The idea is that the loop [-] just resets the
-// memory at the current location to 0. In the compute3 and run3 functions
-// below you implement this command by writing the number 0 to mem(mp), 
-// that is write(mem, mp, 0). 
-//
-// The easiest way to modify a string in this way is to use the regular
-// expression """[^<>+\-.\[\]]""", which recognises everything that is 
-// not a bf-command and replace it by the empty string. Similarly the
-// regular expression """\[-\]""" finds all occurrences of [-] and 
-// by using the Scala method .replaceAll you can replace it with the 
-// string "0" standing for the new bf-command.
+// (6) 
 
 def optimise(s: String) : String = ???
 
@@ -117,37 +72,14 @@
 
 
 
-// (7)  Write a function combine which replaces sequences
-// of repeated increment and decrement commands by appropriate
-// two-character commands. For example for sequences of +
-//
-//              orig bf-cmds  | replacement
-//            ------------------------------
-//              +             | +A 
-//              ++            | +B
-//              +++           | +C
-//                            |
-//              ...           |
-//                            | 
-//              +++....+++    | +Z
-//                (where length = 26)
-//
-//  Similar for the bf-command -, > and <. All other commands should
-//  be unaffected by this change.
-//
-//  Adapt the compute4 and run4 functions such that they can deal
-//  appropriately with such two-character commands.
-
-
+// (7)  
 def combine(s: String) : String = ???
 
 // testcase
 // combine(load_bff("benchmark.bf"))
 
-
 def compute4(pg: String, tb: Map[Int, Int], pc: Int, mp: Int, mem: Mem) : Mem = ???
 
-
 // should call first optimise and then combine on the input string
 //
 def run4(pg: String, m: Mem = Map()) = ???

author	Christian Urban <christian.urban@kcl.ac.uk>
	Tue, 01 Nov 2022 15:03:48 +0000
changeset 428	cdfa6a293453
parent 427	6e93040e3378
child 429	126d0e47ac85

core_templates1/README.md		file \| annotate \| diff \| comparison \| revisions
core_templates1/collatz.scala		file \| annotate \| diff \| comparison \| revisions
core_templates2/README.md		file \| annotate \| diff \| comparison \| revisions
core_templates2/docdiff.scala		file \| annotate \| diff \| comparison \| revisions
core_templates3/README.md		file \| annotate \| diff \| comparison \| revisions
core_templates3/postfix.scala		file \| annotate \| diff \| comparison \| revisions
core_templates3/postfix2.scala		file \| annotate \| diff \| comparison \| revisions
cws/core_cw01.pdf		file \| annotate \| diff \| comparison \| revisions
cws/core_cw02.pdf		file \| annotate \| diff \| comparison \| revisions
cws/core_cw03.pdf		file \| annotate \| diff \| comparison \| revisions
cws/disclaimer.sty		file \| annotate \| diff \| comparison \| revisions
cws/main_cw01.pdf		file \| annotate \| diff \| comparison \| revisions
cws/main_cw02.pdf		file \| annotate \| diff \| comparison \| revisions
cws/main_cw02.tex		file \| annotate \| diff \| comparison \| revisions
cws/main_cw02.text-old		file \| annotate \| diff \| comparison \| revisions
cws/main_cw03.pdf		file \| annotate \| diff \| comparison \| revisions
cws/main_cw03.tex		file \| annotate \| diff \| comparison \| revisions
cws/main_cw04.pdf		file \| annotate \| diff \| comparison \| revisions
cws/main_cw05.pdf		file \| annotate \| diff \| comparison \| revisions
main_solution3/re.scala		file \| annotate \| diff \| comparison \| revisions
main_templates1/README.md		file \| annotate \| diff \| comparison \| revisions
main_templates1/drumb.scala		file \| annotate \| diff \| comparison \| revisions
main_templates2/README.md		file \| annotate \| diff \| comparison \| revisions
main_templates3/README.md		file \| annotate \| diff \| comparison \| revisions
main_templates3/re.scala		file \| annotate \| diff \| comparison \| revisions
main_templates4/README.md		file \| annotate \| diff \| comparison \| revisions
main_templates4/knight1.scala		file \| annotate \| diff \| comparison \| revisions
main_templates4/knight2.scala		file \| annotate \| diff \| comparison \| revisions
main_templates4/knight3.scala		file \| annotate \| diff \| comparison \| revisions
main_templates5/README.md		file \| annotate \| diff \| comparison \| revisions
main_templates5/bf.scala		file \| annotate \| diff \| comparison \| revisions
main_templates5/bfc.scala		file \| annotate \| diff \| comparison \| revisions