--- a/core_templates1/README.md Sat Oct 08 00:30:51 2022 +0100
+++ b/core_templates1/README.md Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Core 1
+# assignment2022scala - Core 1
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
* [coursework description](https://nms.kcl.ac.uk/christian.urban/core_cw01.pdf)
* reference jar:
[collatz.jar](https://nms.kcl.ac.uk/christian.urban/collatz.jar)
\ No newline at end of file
--- a/core_templates1/collatz.scala Sat Oct 08 00:30:51 2022 +0100
+++ b/core_templates1/collatz.scala Tue Nov 01 15:03:48 2022 +0000
@@ -3,36 +3,18 @@
object C1 {
-//(1) Complete the collatz function below. It should
-// recursively calculate the number of steps needed
-// until the collatz series reaches the number 1.
-// If needed, you can use an auxiliary function that
-// performs the recursion. The function should expect
-// arguments in the range of 1 to 1 Million.
+// ADD YOUR CODE BELOW
+//======================
+
+//(1)
def collatz(n: Long) : Long = ???
-//(2) Complete the collatz_max function below. It should
-// calculate how many steps are needed for each number
-// from 1 up to a bound and then calculate the maximum number of
-// steps and the corresponding number that needs that many
-// steps. Again, you should expect bounds in the range of 1
-// up to 1 Million. The first component of the pair is
-// the maximum number of steps and the second is the
-// corresponding number.
-
+//(2)
def collatz_max(bnd: Long) : (Long, Long) = ???
-//(3) Implement a function that calculates the last_odd
-// number in a collatz series. For this implement an
-// is_pow_of_two function which tests whether a number
-// is a power of two. The function is_hard calculates
-// whether 3n + 1 is a power of two. Again you can
-// assume the input ranges between 1 and 1 Million,
-// and also assume that the input of last_odd will not
-// be a power of 2.
-
+//(3)
def is_pow_of_two(n: Long) : Boolean = ???
def is_hard(n: Long) : Boolean = ???
--- a/core_templates2/README.md Sat Oct 08 00:30:51 2022 +0100
+++ b/core_templates2/README.md Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Core 2
+# assignment2022scala - Core 2
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
* [coursework description](https://nms.kcl.ac.uk/christian.urban/core_cw02.pdf)
* reference jar:
[docdiff.jar](https://nms.kcl.ac.uk/christian.urban/docdiff.jar)
\ No newline at end of file
--- a/core_templates2/docdiff.scala Sat Oct 08 00:30:51 2022 +0100
+++ b/core_templates2/docdiff.scala Tue Nov 01 15:03:48 2022 +0000
@@ -4,44 +4,23 @@
object C2 {
+// ADD YOUR CODE BELOW
+//======================
-//(1) Complete the clean function below. It should find
-// all words in a string using the regular expression
-// \w+ and the library function
-//
-// some_regex.findAllIn(some_string)
-//
-// The words should be Returned as a list of strings.
-
-
+//(1)
def clean(s: String) : List[String] = ???
-//(2) The function occurrences calculates the number of times
-// strings occur in a list of strings. These occurrences should
-// be calculated as a Map from strings to integers.
-
-
+//(2)
def occurrences(xs: List[String]): Map[String, Int] = ???
-//(3) This functions calculates the dot-product of two documents
-// (list of strings). For this it calculates the occurrence
-// maps from (2) and then multiplies the corresponding occurrences.
-// If a string does not occur in a document, the product is zero.
-// The function finally sums up all products.
-
-
+//(3)
def prod(lst1: List[String], lst2: List[String]) : Int = ???
-//(4) Complete the functions overlap and similarity. The overlap of
-// two documents is calculated by the formula given in the assignment
-// description. The similarity of two strings is given by the overlap
-// of the cleaned strings (see (1)).
-
-
+//(4)
def overlap(lst1: List[String], lst2: List[String]) : Double = ???
def similarity(s1: String, s2: String) : Double = ???
--- a/core_templates3/README.md Sat Oct 08 00:30:51 2022 +0100
+++ b/core_templates3/README.md Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Core 3
+# assignment2022scala - Core 3
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
* [coursework description](https://nms.kcl.ac.uk/christian.urban/core_cw03.pdf)
* reference jar:
[postfix.jar](https://nms.kcl.ac.uk/christian.urban/postfix.jar),
--- a/core_templates3/postfix.scala Sat Oct 08 00:30:51 2022 +0100
+++ b/core_templates3/postfix.scala Tue Nov 01 15:03:48 2022 +0000
@@ -19,28 +19,14 @@
// helper function for splitting strings into tokens
def split(s: String) : Toks = s.split(" ").toList
+// ADD YOUR CODE BELOW
+//======================
-// (1) Implement below the shunting yard algorithm. The most
-// convenient way to this in Scala is to implement a recursive
-// function and to heavily use pattern matching. The function syard
-// takes some input tokens as first argument. The second and third
-// arguments represent the stack and the output of the shunting yard
-// algorithm.
-//
-// In the marking, you can assume the function is called only with
-// an empty stack and an empty output list. You can also assume the
-// input os only properly formatted (infix) arithmetic expressions
-// (all parentheses will be well-nested, the input only contains
-// operators and numbers).
-// You can implement any additional helper function you need. I found
-// it helpful to implement two auxiliary functions for the pattern matching:
-//
-
+// (1)
def is_op(op: String) : Boolean = ???
def prec(op1: String, op2: String) : Boolean = ???
-
def syard(toks: Toks, st: Toks = Nil, out: Toks = Nil) : Toks = ???
@@ -58,13 +44,7 @@
//syard(split("( ( ( 3 ) ) + ( ( 4 + ( 5 ) ) ) )")) // 3 4 5 + +
-// (2) Implement a compute function that evaluates an input list
-// in postfix notation. This function takes a list of tokens
-// and a stack as argumenta. The function should produce the
-// result as an integer using the stack. You can assume
-// this function will be only called with proper postfix
-// expressions.
-
+// (2)
def compute(toks: Toks, st: List[Int] = Nil) : Int = ???
--- a/core_templates3/postfix2.scala Sat Oct 08 00:30:51 2022 +0100
+++ b/core_templates3/postfix2.scala Tue Nov 01 15:03:48 2022 +0000
@@ -35,12 +35,12 @@
// the operations in the basic version of the algorithm
val ops = List("+", "-", "*", "/", "^")
-// (3) Implement the extended version of the shunting yard algorithm.
-// This version should properly account for the fact that the power
-// operation is right-associative. Apart from the extension to include
-// the power operation, you can make the same assumptions as in
-// basic version.
+// ADD YOUR CODE BELOW
+//======================
+
+
+// (3)
def syard(toks: Toks, st: Toks = Nil, out: Toks = Nil) : Toks = ???
@@ -48,9 +48,7 @@
// syard(split("3 + 4 * 8 / ( 5 - 1 ) ^ 2 ^ 3")) // 3 4 8 * 5 1 - 2 3 ^ ^ / +
-// (4) Implement a compute function that produces an Int for an
-// input list of tokens in postfix notation.
-
+// (4)
def compute(toks: Toks, st: List[Int] = Nil) : Int = ???
Binary file cws/core_cw01.pdf has changed
Binary file cws/core_cw02.pdf has changed
Binary file cws/core_cw03.pdf has changed
--- a/cws/disclaimer.sty Sat Oct 08 00:30:51 2022 +0100
+++ b/cws/disclaimer.sty Tue Nov 01 15:03:48 2022 +0000
@@ -24,7 +24,7 @@
meaning in Scala than in Java. It changes the meaning of your program,
and you should never use it.
-\item Do not use \texttt{var}! This declares a mutable variable. Only
+\item Do not use \textbf{\texttt{var}}! This declares a mutable variable. Only
use \texttt{val}!
\item Do not use any parallel collections! No \texttt{.par} therefore!
@@ -96,9 +96,11 @@
It should be understood that the work you submit represents
your \textbf{own} effort! You have implemented the code entirely
-on your own. You have not used any unauthorised aid (e.g.~Google). You have not copied from anyone else. An
-exception is the Scala code I showed during the lectures or
-uploaded to KEATS, which you can freely use.\bigskip
+on your own. You have not copied from anyone else.
+Do not be tempted to ask Github Copilot for help or
+do any other shenanigans like this! An exception is the Scala
+code I showed during the lectures or uploaded to KEATS,
+which you can freely use.\bigskip
}
\newcommand{\DISCLAIMEREXAM}{%
Binary file cws/main_cw01.pdf has changed
Binary file cws/main_cw02.pdf has changed
--- a/cws/main_cw02.tex Sat Oct 08 00:30:51 2022 +0100
+++ b/cws/main_cw02.tex Tue Nov 01 15:03:48 2022 +0000
@@ -4,13 +4,14 @@
\usepackage{../styles/style}
\usepackage{disclaimer}
\usepackage{../styles/langs}
+\usepackage{graphicx}
\begin{document}
%% should ask to lower case the words.
-\section*{Main Part 2 (Scala, 6 Marks)}
+\section*{Evil Wordle Game (Scala, 7 Marks)}
\mbox{}\hfill\textit{``C makes it easy to shoot yourself in the foot; C++ makes it harder,}\\
\mbox{}\hfill\textit{ but when you do, it blows your whole leg off.''}\smallskip\\
@@ -20,8 +21,8 @@
\noindent
-You are asked to implement a Scala program for recommending movies
-according to a ratings list.\bigskip
+You are asked to implement a Scala program for making the popular Wordle game as difficult
+as possible.\bigskip
\IMPORTANTNONE{}
@@ -31,7 +32,6 @@
\DISCLAIMER{}
-
\subsection*{Reference Implementation}
Like the C++ part, the Scala part works like this: you push your files
@@ -51,163 +51,246 @@
you would type something like:
\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
-$ scala -cp danube.jar
-scala> val ratings_url =
- | """https://nms.kcl.ac.uk/christian.urban/ratings.csv"""
+$ scala -cp wordle.jar
+scala> val secretsURL =
+ | """https://nms.kcl.ac.uk/christian.urban/wordle.txt"""
-scala> M2.get_csv_url(ratings_url)
-val res0: List[String] = List(1,1,4 ...)
+scala> M2.get_wordle_list(secretsURL)
+val res0: List[String] = List(aahed, aalii, ...)
\end{lstlisting}%$
\subsection*{Hints}
\noindent
-Use \texttt{.split(",").toList} for splitting
-strings according to commas (similarly for the newline character \mbox{$\backslash$\texttt{n}}),
+Useful data functions: \texttt{Source.fromURL},
+\texttt{Source.fromFile} for obtaining a webpage and reading a file,
\texttt{.getOrElse(..,..)} allows to query a Map, but also gives a
default value if the Map is not defined, a Map can be `updated' by
using \texttt{+}, \texttt{.contains} and \texttt{.filter} can test whether
an element is included in a list, and respectively filter out elements in a list,
\texttt{.sortBy(\_.\_2)} sorts a list of pairs according to the second
elements in the pairs---the sorting is done from smallest to highest,
-\texttt{.take(n)} for taking some elements in a list (takes fewer if the list
-contains less than \texttt{n} elements).
+\texttt{.groupBy} orders lists according to same elements
+.
\newpage
-\subsection*{Main Part 2 (6 Marks, file danube.scala)}
-
-You are creating Danube.co.uk which you hope will be the next big thing
-in online movie renting. You know that you can save money by
-anticipating what movies people will rent; you will pass these savings
-on to your users by offering a discount if they rent movies that
-Danube.co.uk recommends.
+\subsection*{Main Part 2 (6 Marks, file wordle.scala)}
-Your task is to generate \emph{two} movie recommendations for every
-movie a user rents. To do this, you calculate what other
-renters, who also watched this movie, suggest by giving positive ratings.
-Of course, some suggestions are more popular than others. You need to find
-the two most-frequently suggested movies. Return fewer recommendations,
-if there are fewer movies suggested.
-
-The calculations will be based on the small datasets which the research lab
-GroupLens provides for education and development purposes.
+You probably know the game of Wordle\footnote{\url{https://en.wikipedia.org/wiki/Wordle}}
+where you are supposed to guess a five-letter word. The feedback for guesses can help
+with the next guess (green letters are correct, orange letters are present, but in the
+wrong place). For example:
\begin{center}
-\url{https://grouplens.org/datasets/movielens/}
-\end{center}
+\includegraphics[scale=0.2]{../pics/w.jpeg}
+\end{center}
\noindent
-The slightly adapted CSV-files should be downloaded in your Scala
-file from the URLs:
-
+The idea of the program to be implemented here is to make the Wordle game as evil as possible
+by finding words that are the most difficult to guess. A word list of five-letter words is available
+from
\begin{center}
\begin{tabular}{ll}
- \url{https://nms.kcl.ac.uk/christian.urban/ratings.csv} & (940 KByte)\\
- \url{https://nms.kcl.ac.uk/christian.urban/movies.csv} & (280 KByte)\\
+ \url{https://nms.kcl.ac.uk/christian.urban/wordle.txt} & (78 KByte)\\
\end{tabular}
\end{center}
\noindent
-The ratings.csv file is organised as userID,
-movieID, and rating (which is between 0 and 5, with \emph{positive} ratings
-being 4 and 5). The file movie.csv is organised as
-movieID and full movie name. Both files still contain the usual
-CSV-file header (first line). In this part you are asked
-to implement functions that process these files. If bandwidth
-is an issue for you, download the files locally, but in the submitted
-version use \texttt{Source.fromURL} instead of \texttt{Source.fromFile}.
+In your program you need to download this list and implement some
+functions that in the end select the most difficult words (given an
+input from the user). If bandwidth is an issue for you, download the
+file locally, but in the submitted version use \texttt{Source.fromURL}
+instead of \texttt{Source.fromFile}.
\subsection*{Tasks}
\begin{itemize}
-\item[(1)] Implement the function \pcode{get_csv_url} which takes an
- URL-string as argument and requests the corresponding file. The two
- URLs of interest are \pcode{ratings_url} and \pcode{movies_url},
- which correspond to CSV-files mentioned above. The function should
- return the CSV-file appropriately broken up into lines, and the
- first line should be dropped (that is omit the header of the CSV-file).
- The result is a list of strings (the lines in the file). In case
- the url does not produce a file, return the empty list.\\
- \mbox{}\hfill [1 Mark]
+\item[(1)] Implement the function \pcode{get_wordle_list} which takes an
+ URL-string as argument and requests the corresponding file. The function should
+ return the word list appropriately broken up into lines.
+ The result should be a list of strings (the lines in the file). In case
+ the url does not produce a file, return the empty list.
+ \mbox{}\hfill [0.5 Marks]
-\item[(2)] Implement two functions that process the (broken up)
- CSV-files from (1). The \pcode{process_ratings} function filters out all
- ratings below 4 and returns a list of (userID, movieID) pairs. The
- \pcode{process_movies} function returns a list of (movieID, title) pairs.
- Note the input to these functions will be the output of the function
- \pcode{get_csv_url}.\\
- \mbox{}\hfill [1 Mark]
-%\end{itemize}
-%
-%
-%\subsection*{Part 3 (4 Marks, file danube.scala)}
-%
-%\subsection*{Tasks}
-%
-%\begin{itemize}
-\item[(3)] Implement a kind of grouping function that calculates a Map
- containing the userIDs and all the corresponding recommendations for
- this user (list of movieIDs). This should be implemented in a
- tail-recursive fashion using a Map as accumulator. This Map is set to
- \pcode{Map()} at the beginning of the calculation. For example
+\item[(2)] Implement a polymorphic function \pcode{removeN}, which removes $n$ occurrences of an
+ element from a list (if this element is less than $n$ times present, then remove all occurrences).
+ For example
\begin{lstlisting}[numbers=none]
-val lst = List(("1", "a"), ("1", "b"),
- ("2", "x"), ("3", "a"),
- ("2", "y"), ("3", "c"))
-groupById(lst, Map())
+removeN(List(1,2,3,2,1), 3, 2) => List(1, 2, 2, 1)
+removeN(List(1,2,3,2,1), 2, 1) => List(1, 3, 2, 1)
+removeN(List(1,2,3,2,1), 2, 2) => List(1, 3, 1)
+removeN(List(1,2,3,2,1), 1, 1) => List(2, 3, 2, 1)
+removeN(List(1,2,3,2,1), 1, 3) => List(2, 3, 2)
+removeN(List(1,2,3,2,1), 0, 2) => List(1, 2, 3, 2, 1)
+\end{lstlisting}
+
+Make sure you only remove at most $n$ occurrences of the element from the list.
+This function should work for lists of integers but also lists of chars, strings etc.\\
+ \mbox{}\hfill [0.5 Marks]
+
+\item[(3)] Implement a function \pcode{score} that calculates the
+ feedback for a word against a secret word using the rules of the
+ Wordle game. The output of \pcode{score} should be a list of 5
+ elements of type \pcode{Tip} representing three outcomes: a letter
+ in the correct position, a letter that is present, but not in the
+ correct position and a letter that is absent. For example given the
+ secret word "chess" the score for the word "caves" is
+
+\begin{lstlisting}[numbers=none]
+List(Correct, Absent, Absent, Present, Correct)
+\end{lstlisting}
+
+ You have to be careful with multiple occurrences of letters. For example
+ the secret "chess" with the guess "swiss" should produce
+
+\begin{lstlisting}[numbers=none]
+List(Absent, Absent, Absent, Correct, Correct)
+\end{lstlisting}
+
+even though the first 's' in "swiss" is present in the secret word, the 's' are already
+`used up' by the two letters that are correct. To implement this you need to
+implement first a function \pcode{pool} which calculates all the letters in
+a secret that are not correct in a word. For example
+
+\begin{lstlisting}[numbers=none]
+ pool("chess", "caves") => List(h, e, s)
+ pool("chess", "swiss") => List(c, h, e)
\end{lstlisting}
-returns the ratings map
+ Now the helper function \pcode{aux} can analyse the arguments secret and word recursively letter-by-letter and
+ decide: if the letters are the same, then return \pcode{Correct} for the corresponding position.
+ If they are not the same, but the letter is in the pool, then return \pcode{Present} and also remove
+ this letter from the pool in the next recursive call of \pcode{aux}. Otherwise return \pcode{Absent} for the
+ corresponding position. The function \pcode{score} is a wrapper for the function \pcode{aux}
+ calling \pcode{aux} with the appropriate arguments (recall what is calculated with \pcode{pool}).\mbox{}\hfill [2 Marks]
+
+\item[(4)] Implement a function \pcode{eval} that gives an integer value to each of the
+ \pcode{Tip}s such that
-\begin{center}
- \pcode{Map(1 -> List(b, a), 2 -> List(y, x), 3 -> List(c, a))}.
-\end{center}
+ \begin{center}
+ \begin{tabular}{lcl}
+ \textit{eval (Correct)} & $\dn$ & $10$\\
+ \textit{eval (Present)} & $\dn$ & $1$\\
+ \textit{eval (Absent)} & $\dn$ & $0$
+ \end{tabular}
+ \end{center}
+
+ The function \pcode{iscore} then takes an output of \pcode{score} and sums
+ up all corresponding values. For example for
+
+\begin{lstlisting}[numbers=none]
+ iscore("chess", "caves") => 21
+ iscore("chess", "swiss") => 20
+\end{lstlisting}
+ \mbox{}\hfill [0.5 Marks]
-\noindent
-In which order the elements of the list are given is unimportant.\\
-\mbox{}\hfill [1 Mark]
+\item[(5)] The function \pcode{evil} takes a list of secrets (the list from Task 1)
+ and a word as arguments, and calculates the list of words with the lowest
+ score (remember we want to make the Wordle game as difficult as possible---therefore
+ when the user gives us a word, we want to find the secrets that produce the lowest
+ score). For this implement a helper function \pcode{lowest} that goes through
+ the secrets one-by-one and calculates the score. The argument \pcode{current} is
+ the score of the ``currently'' found secrets. When the function \pcode{lowest}
+ is called for the first time then this will be set to the maximum integer value
+ \pcode{Int.MaxValue}. The accumulator will be first empty. If a secret is found
+ with the same score as \pcode{current} then this word is added to the accumulator.
+ If the secret has a lower score, then the accumulator will be discarded and this
+ secret will be the new accumulator. If the secret has a higher score, then it can be
+ ignored. For example \pcode{evil} (the wrapper for \pcode{lowest}) generates
+
+\begin{lstlisting}[numbers=none]
+evil(secrets, "stent").length => 1907
+evil(secrets, "hexes").length => 2966
+evil(secrets, "horse").length => 1203
+evil(secrets, "hoise").length => 971
+evil(secrets, "house").length => 1228
+\end{lstlisting}
-\item[(4)] Implement a function that takes a ratings map and a movieID
- as arguments. The function calculates all suggestions containing the
- given movie in its recommendations. It returns a list of all these
- recommendations (each of them is a list and needs to have the given
- movie deleted, otherwise it might happen we recommend the same movie
- ``back''). For example for the Map from above and the movie
- \pcode{"y"} we obtain \pcode{List(List("x"))}, and for the movie
- \pcode{"a"} we get \pcode{List(List("b"), List("c"))}.\\
+where \pcode{secrets} is the list generated under Task 1.
+In all cases above the iscore of the resulting secrets is 0, but this does not need to be the case
+in general.\\
+ \mbox{}\hfill [1.5 Marks]
+
+\item[(6)] The secrets generated in Task 5 are the ones with the lowest score
+ with respect to the word. You can think of these as the secrets that are furthest ``away'' from the
+ given word. This is already quite evil for a secret word---remember we can choose
+ a secret \emph{after} a user has given a first word. Now we want to make it
+ even more evil by choosing words that have the most obscure letters. For this we
+ calculate the frequency of how many times certain letters occur in our secrets
+ list (see Task 1). The \emph{frequency} of the letter $c$, say, is given by the formula
+
+ \begin{center}
+ $\textit{freq(c)} \dn 1 - \frac{\textit{number of occurrences of c}}{\textit{number of all letters}}$
+ \end{center}
+
+ That means that letters that occur fewer times in our secrets have a higher frequency.
+ For example the letter 'y' has the frequency 0.9680234350909651 while the much more
+ often occurring letter 'e' has only 0.897286463151403 (all calculations should be done
+ with Doubles).
+
+ The function \pcode{frequencies} should calculate the frequencies for all lower-case letters
+ by generating a Map from letters (\pcode{Char}) to Doubles (frequencies).\\
\mbox{}\hfill [1 Mark]
-\item[(5)] Implement a suggestions function which takes a ratings map
- and a movieID as arguments. It calculates all the recommended movies
- sorted according to the most frequently suggested movie(s) sorted
- first. This function returns \emph{all} suggested movieIDs as a list of
- strings.\\
- \mbox{}\hfill [1 Mark]
+\item[(7)] In this task we want to use the output of \pcode{evil}, rank each string in the
+ generated set and then filter out the strings that are ranked highest (the ones with the most obscure letters).
+ This list of strings often contains only a single word, but in general there might be more (see below).
+ First implement a function \pcode{rank} that takes a frequency map (from 6) and a string as arguments and
+ generates a rank by summing up all frequencies of the letters in the string. For example
+
+\begin{lstlisting}[numbers=none]
+rank(frequencies(secrets), "adobe") => 4.673604687018193
+rank(frequencies(secrets), "gaffe") => 4.745205057045945
+rank(frequencies(secrets), "fuzzy") => 4.898735738513722
+\end{lstlisting}
+
+ Finally, implement a function \pcode{ranked_evil} that selects from the output of \pcode{evil}
+ the string(s) which are highest ranked in evilness.
+
+
+\begin{lstlisting}[numbers=none]
+ranked_evil(secrets, "abbey") => List(whizz)
+ranked_evil(secrets, "afear") => List(buzzy)
+ranked_evil(secrets, "zincy") => List(jugum)
+ranked_evil(secrets, "zippy") => List(chuff)
+\end{lstlisting}
+
+This means if the user types in "abbey" then the most evil word to choose as secret is ``whizz'' (according to
+our calculations). This word has a zero \pcode{iscore} and the most obscure letters.
-\item[(6)]
- Implement then a recommendation function which generates a maximum
- of two most-suggested movies (as calculated above). But it returns
- the actual movie name, not the movieID. If fewer movies are recommended,
- then return fewer than two movie names.\\
- \mbox{}\hfill [1 Mark]
+%
+%\color{red}
+%\section*{Correction with \texttt{ranked\_evil}}
+%
+%The testcases above are actually not the maximum, but the minimum! I will make sure
+%that the task will count as solved when either the minimum (as above) or the maximum (as intended)
+%is used. The correct solutions for the above testcases using the maximum are:
+%\color{black}
+%
+%\begin{lstlisting}[numbers=none]
+%ranked_evil(secrets, "beats") => List(fuzzy)
+%ranked_evil(secrets, "vitae") => List(fuzzy)
+%ranked_evil(secrets, "bento") => List(fuzzy)
+%ranked_evil(secrets, "belts") => List(fuzzy)
+%\end{lstlisting}
+%
+%\noindent \textcolor{red}{Some further testcases for the maximum are:}
+%
+%\begin{lstlisting}[numbers=none]
+%ranked_evil(secrets, "abbey") => List(whizz)
+%ranked_evil(secrets, "afear") => List(buzzy)
+%ranked_evil(secrets, "zincy") => List(jugum)
+%ranked_evil(secrets, "zippy") => List(chuff)
+%\end{lstlisting}
+%
+%
-%\item[(7)] Calculate the recommendations for all movies according to
-% what the recommendations function in (6) produces (this
-% can take a few seconds). Put all recommendations into a list
-% (of strings) and count how often the strings occur in
-% this list. This produces a list of string-int pairs,
-% where the first component is the movie name and the second
-% is the number of how many times the movie was recommended.
-% Sort all the pairs according to the number
-% of times they were recommended (most recommended movie name
-% first).\\
-% \mbox{}\hfill [1 Mark]
-
+\mbox{}\hfill [1 Mark]
\end{itemize}
\end{document}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cws/main_cw02.text-old Tue Nov 01 15:03:48 2022 +0000
@@ -0,0 +1,221 @@
+
+% !TEX program = xelatex
+\documentclass{article}
+\usepackage{../styles/style}
+\usepackage{disclaimer}
+\usepackage{../styles/langs}
+
+\begin{document}
+
+
+%% should ask to lower case the words.
+
+\section*{Main Part 2 (Scala, 6 Marks)}
+
+\mbox{}\hfill\textit{``C makes it easy to shoot yourself in the foot; C++ makes it harder,}\\
+\mbox{}\hfill\textit{ but when you do, it blows your whole leg off.''}\smallskip\\
+\mbox{}\hfill\textit{ --- Bjarne Stroustrup (creator of the C++ language)}\bigskip\bigskip
+
+
+
+
+\noindent
+You are asked to implement a Scala program for recommending movies
+according to a ratings list.\bigskip
+
+\IMPORTANTNONE{}
+
+\noindent
+Also note that the running time of each part will be restricted to a
+maximum of 30 seconds on my laptop.
+
+\DISCLAIMER{}
+
+
+\subsection*{Reference Implementation}
+
+Like the C++ part, the Scala part works like this: you push your files
+to GitHub and receive (after sometimes a long delay) some automated
+feedback. In the end we will take a snapshot of the submitted files
+and apply an automated marking script to them.\medskip
+
+\noindent
+In addition, the Scala part comes with reference
+implementations in form of \texttt{jar}-files. This allows you to run
+any test cases on your own computer. For example you can call Scala on
+the command line with the option \texttt{-cp danube.jar} and then
+query any function from the template file. Say you want to find out
+what the function \texttt{} produces: for this you just need
+to prefix it with the object name \texttt{M2}. If you want to find out what
+these functions produce for the list \texttt{List("a", "b", "b")},
+you would type something like:
+
+\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
+$ scala -cp danube.jar
+scala> val ratings_url =
+ | """https://nms.kcl.ac.uk/christian.urban/ratings.csv"""
+
+scala> M2.get_csv_url(ratings_url)
+val res0: List[String] = List(1,1,4 ...)
+\end{lstlisting}%$
+
+\subsection*{Hints}
+
+\noindent
+Use \texttt{.split(",").toList} for splitting
+strings according to commas (similarly for the newline character \mbox{$\backslash$\texttt{n}}),
+\texttt{.getOrElse(..,..)} allows to query a Map, but also gives a
+default value if the Map is not defined, a Map can be `updated' by
+using \texttt{+}, \texttt{.contains} and \texttt{.filter} can test whether
+an element is included in a list, and respectively filter out elements in a list,
+\texttt{.sortBy(\_.\_2)} sorts a list of pairs according to the second
+elements in the pairs---the sorting is done from smallest to highest,
+\texttt{.take(n)} for taking some elements in a list (takes fewer if the list
+contains less than \texttt{n} elements).
+
+
+\newpage
+
+
+\subsection*{Main Part 2 (6 Marks, file danube.scala)}
+
+You are creating Danube.co.uk which you hope will be the next big thing
+in online movie renting. You know that you can save money by
+anticipating what movies people will rent; you will pass these savings
+on to your users by offering a discount if they rent movies that
+Danube.co.uk recommends.
+
+Your task is to generate \emph{two} movie recommendations for every
+movie a user rents. To do this, you calculate what other
+renters, who also watched this movie, suggest by giving positive ratings.
+Of course, some suggestions are more popular than others. You need to find
+the two most-frequently suggested movies. Return fewer recommendations,
+if there are fewer movies suggested.
+
+The calculations will be based on the small datasets which the research lab
+GroupLens provides for education and development purposes.
+
+\begin{center}
+\url{https://grouplens.org/datasets/movielens/}
+\end{center}
+
+\noindent
+The slightly adapted CSV-files should be downloaded in your Scala
+file from the URLs:
+
+
+\begin{center}
+\begin{tabular}{ll}
+ \url{https://nms.kcl.ac.uk/christian.urban/ratings.csv} & (940 KByte)\\
+ \url{https://nms.kcl.ac.uk/christian.urban/movies.csv} & (280 KByte)\\
+\end{tabular}
+\end{center}
+
+\noindent
+The ratings.csv file is organised as userID,
+movieID, and rating (which is between 0 and 5, with \emph{positive} ratings
+being 4 and 5). The file movie.csv is organised as
+movieID and full movie name. Both files still contain the usual
+CSV-file header (first line). In this part you are asked
+to implement functions that process these files. If bandwidth
+is an issue for you, download the files locally, but in the submitted
+version use \texttt{Source.fromURL} instead of \texttt{Source.fromFile}.
+
+\subsection*{Tasks}
+
+\begin{itemize}
+\item[(1)] Implement the function \pcode{get_csv_url} which takes an
+ URL-string as argument and requests the corresponding file. The two
+ URLs of interest are \pcode{ratings_url} and \pcode{movies_url},
+ which correspond to CSV-files mentioned above. The function should
+ return the CSV-file appropriately broken up into lines, and the
+ first line should be dropped (that is omit the header of the CSV-file).
+ The result is a list of strings (the lines in the file). In case
+ the url does not produce a file, return the empty list.\\
+ \mbox{}\hfill [1 Mark]
+
+\item[(2)] Implement two functions that process the (broken up)
+ CSV-files from (1). The \pcode{process_ratings} function filters out all
+ ratings below 4 and returns a list of (userID, movieID) pairs. The
+ \pcode{process_movies} function returns a list of (movieID, title) pairs.
+ Note the input to these functions will be the output of the function
+ \pcode{get_csv_url}.\\
+ \mbox{}\hfill [1 Mark]
+%\end{itemize}
+%
+%
+%\subsection*{Part 3 (4 Marks, file danube.scala)}
+%
+%\subsection*{Tasks}
+%
+%\begin{itemize}
+\item[(3)] Implement a kind of grouping function that calculates a Map
+ containing the userIDs and all the corresponding recommendations for
+ this user (list of movieIDs). This should be implemented in a
+ tail-recursive fashion using a Map as accumulator. This Map is set to
+ \pcode{Map()} at the beginning of the calculation. For example
+
+\begin{lstlisting}[numbers=none]
+val lst = List(("1", "a"), ("1", "b"),
+ ("2", "x"), ("3", "a"),
+ ("2", "y"), ("3", "c"))
+groupById(lst, Map())
+\end{lstlisting}
+
+returns the ratings map
+
+\begin{center}
+ \pcode{Map(1 -> List(b, a), 2 -> List(y, x), 3 -> List(c, a))}.
+\end{center}
+
+\noindent
+In which order the elements of the list are given is unimportant.\\
+\mbox{}\hfill [1 Mark]
+
+\item[(4)] Implement a function that takes a ratings map and a movieID
+ as arguments. The function calculates all suggestions containing the
+ given movie in its recommendations. It returns a list of all these
+ recommendations (each of them is a list and needs to have the given
+ movie deleted, otherwise it might happen we recommend the same movie
+ ``back''). For example for the Map from above and the movie
+ \pcode{"y"} we obtain \pcode{List(List("x"))}, and for the movie
+ \pcode{"a"} we get \pcode{List(List("b"), List("c"))}.\\
+ \mbox{}\hfill [1 Mark]
+
+\item[(5)] Implement a suggestions function which takes a ratings map
+ and a movieID as arguments. It calculates all the recommended movies
+ sorted according to the most frequently suggested movie(s) sorted
+ first. This function returns \emph{all} suggested movieIDs as a list of
+ strings.\\
+ \mbox{}\hfill [1 Mark]
+
+\item[(6)]
+ Implement then a recommendation function which generates a maximum
+ of two most-suggested movies (as calculated above). But it returns
+ the actual movie name, not the movieID. If fewer movies are recommended,
+ then return fewer than two movie names.\\
+ \mbox{}\hfill [1 Mark]
+
+%\item[(7)] Calculate the recommendations for all movies according to
+% what the recommendations function in (6) produces (this
+% can take a few seconds). Put all recommendations into a list
+% (of strings) and count how often the strings occur in
+% this list. This produces a list of string-int pairs,
+% where the first component is the movie name and the second
+% is the number of how many times the movie was recommended.
+% Sort all the pairs according to the number
+% of times they were recommended (most recommended movie name
+% first).\\
+% \mbox{}\hfill [1 Mark]
+
+\end{itemize}
+
+\end{document}
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: t
+%%% End:
+
+
+
Binary file cws/main_cw03.pdf has changed
--- a/cws/main_cw03.tex Sat Oct 08 00:30:51 2022 +0100
+++ b/cws/main_cw03.tex Tue Nov 01 15:03:48 2022 +0000
@@ -119,7 +119,7 @@
% BF IDE
% https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5
-\section*{Main Part 3 (Scala, 6 Marks)}
+\section*{Main Part 3 (Scala, 7 Marks)}
\mbox{}\hfill\textit{``Java is the most distressing thing to happen to computing since MS-DOS.''}\smallskip\\
\mbox{}\hfill\textit{ --- Alan Kay, the inventor of object-oriented programming}\bigskip\medskip
@@ -206,7 +206,7 @@
{\small
\begin{itemize}
\item[$\bullet$] \url{https://blog.cloudflare.com/details-of-the-cloudflare-outage-on-july-2-2019}
-\item[$\bullet$] \url{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}
+\item[$\bullet$] \texttt{\href{https://web.archive.org/web/20160801163029/https://www.stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}}
\item[$\bullet$] \url{https://vimeo.com/112065252}
\item[$\bullet$] \url{https://davidvgalbraith.com/how-i-fixed-atom}
\end{itemize}}
@@ -218,9 +218,9 @@
\subsubsection*{Tasks (file re.scala)}
The file \texttt{re.scala} has already a definition for regular
-expressions and also defines some handy shorthand notation for
-regular expressions. The notation in this document matches up
-with the code in the file as follows:
+expressions and also defines some handy shorthand notation for regular
+expressions. The notation in this coursework description matches up
+with the code as follows:
\begin{center}
\begin{tabular}{rcl@{\hspace{10mm}}l}
@@ -230,6 +230,7 @@
$c$ & $\mapsto$ & \texttt{CHAR(c)}\\
$\sum rs$ & $\mapsto$ & \texttt{ALTs(rs)}\\
$r_1 + r_2$ & $\mapsto$ & \texttt{ALT(r1, r2)} & \texttt{r1 | r2}\\
+ $\prod rs$ & $\mapsto$ & \texttt{SEQs(rs)}\\
$r_1 \cdot r_2$ & $\mapsto$ & \texttt{SEQ(r1, r2)} & \texttt{r1 $\sim$ r2}\\
$r^*$ & $\mapsto$ & \texttt{STAR(r)} & \texttt{r.\%}
\end{tabular}
@@ -244,13 +245,15 @@
if it is non-empty, we sometimes write $\sum\,[r_1,..., r_n]$.
The binary alternative can be seen as an abbreviation,
that is $r_1 + r_2 \dn \sum\,[r_1, r_2]$. As a result we can ignore the
-binary version and only implement the n-ary alternative.
+binary version and only implement the n-ary alternative. Similarly
+the sequence regular expression is only implemented with lists and the
+binary version can be obtained by defining $r_1 \cdot r_2 \dn \prod\,[r_1, r_2]$.
\begin{itemize}
\item[(1)] Implement a function, called \textit{nullable}, by
recursion over regular expressions. This function tests whether a
regular expression can match the empty string. This means given a
- regular expression it either returns true or false. The function
+ regular expression, it either returns true or false. The function
\textit{nullable}
is defined as follows:
@@ -260,7 +263,7 @@
$\textit{nullable}(\ONE)$ & $\dn$ & $\textit{true}$\\
$\textit{nullable}(c)$ & $\dn$ & $\textit{false}$\\
$\textit{nullable}(\sum rs)$ & $\dn$ & $\exists r \in rs.\;\textit{nullable}(r)$\\
-$\textit{nullable}(r_1 \cdot r_2)$ & $\dn$ & $\textit{nullable}(r_1) \wedge \textit{nullable}(r_2)$\\
+$\textit{nullable}(\prod rs)$ & $\dn$ & $\forall r\in rs.\;\textit{nullable}(r)$\\
$\textit{nullable}(r^*)$ & $\dn$ & $\textit{true}$\\
\end{tabular}
\end{center}~\hfill[0.5 Marks]
@@ -276,130 +279,130 @@
$\textit{der}\;c\;(\ONE)$ & $\dn$ & $\ZERO$\\
$\textit{der}\;c\;(d)$ & $\dn$ & $\textit{if}\; c = d\;\textit{then} \;\ONE \; \textit{else} \;\ZERO$\\
$\textit{der}\;c\;(\sum\;[r_1,..,r_n])$ & $\dn$ & $\sum\;[\textit{der}\;c\;r_1,..,\textit{der}\;c\;r_n]$\\
-$\textit{der}\;c\;(r_1 \cdot r_2)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r_1)$\\
- & & $\textit{then}\;((\textit{der}\;c\;r_1)\cdot r_2) + (\textit{der}\;c\;r_2)$\\
- & & $\textit{else}\;(\textit{der}\;c\;r_1)\cdot r_2$\\
+$\textit{der}\;c\;(\prod\;[])$ & $\dn$ & $\ZERO$\\
+$\textit{der}\;c\;(\prod\;r\!::\!rs)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r)$\\
+ & & $\textit{then}\;(\prod\;(\textit{der}\;c\;r)\!::\!rs) + (\textit{der}\;c\;(\prod rs))$\\
+ & & $\textit{else}\;(\prod\;(\textit{der}\;c\;r)\!::\! rs)$\\
$\textit{der}\;c\;(r^*)$ & $\dn$ & $(\textit{der}\;c\;r)\cdot (r^*)$\\
\end{tabular}
\end{center}
-
-For example given the regular expression $r = (a \cdot b) \cdot c$, the derivatives
-w.r.t.~the characters $a$, $b$ and $c$ are
-
-\begin{center}
- \begin{tabular}{lcll}
- $\textit{der}\;a\;r$ & $=$ & $(\ONE \cdot b)\cdot c$ & \quad($= r'$)\\
- $\textit{der}\;b\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$\\
- $\textit{der}\;c\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$
- \end{tabular}
-\end{center}
-
-Let $r'$ stand for the first derivative, then taking the derivatives of $r'$
-w.r.t.~the characters $a$, $b$ and $c$ gives
-
-\begin{center}
- \begin{tabular}{lcll}
- $\textit{der}\;a\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ \\
- $\textit{der}\;b\;r'$ & $=$ & $((\ZERO \cdot b) + \ONE)\cdot c$ & \quad($= r''$)\\
- $\textit{der}\;c\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$
- \end{tabular}
-\end{center}
-
-One more example: Let $r''$ stand for the second derivative above,
-then taking the derivatives of $r''$ w.r.t.~the characters $a$, $b$
-and $c$ gives
-
-\begin{center}
- \begin{tabular}{lcll}
- $\textit{der}\;a\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$ \\
- $\textit{der}\;b\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$\\
- $\textit{der}\;c\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ONE$ &
- (is $\textit{nullable}$)
- \end{tabular}
-\end{center}
-
-Note, the last derivative can match the empty string, that is it is \textit{nullable}.\\
\mbox{}\hfill\mbox{[1 Mark]}
\item[(3)] We next want to simplify regular expressions: essentially
we want to remove $\ZERO$ in regular expressions like $r + \ZERO$
and $\ZERO + r$. However, our n-ary alternative takes a list of
- regular expressions as argument, we therefore need a more general
- ``flatten'' function, called \texttt{flts}. This function should
- analyse a list of regular expressions, say $rs$, as follows:
+ regular expressions as argument, and we therefore need a more general
+ ``denesting'' function, which deletes $\ZERO$s and ``spills out'' nested
+ $\sum$s. This function, called \texttt{denest}, should analyse a
+ list of regular expressions, say $rs$, as follows:
\begin{center}
\begin{tabular}{lllll}
1) &$rs = []$ & $\dn$ & $[]$ & (empty list)\\
- 2) &$rs = \ZERO :: rest$ & $\dn$ & $\textrm{flatten}\;rest$\\
- 3) &$rs = (\sum rs_1) :: rest$ & $\dn$ & $rs_1 ::: \textrm{flatten}\;rest$\\
- 4) &$rs = r :: rest$ & $\dn$ & $r :: \textrm{flatten}\;rest$ & (otherwise)\\
+ 2) &$rs = \ZERO :: rest$ & $\dn$ & $\texttt{denest}\;rest$ & (throw away $\ZERO$)\\
+ 3) &$rs = (\sum rs) :: rest$ & $\dn$ & $rs ::: \texttt{denest}\;rest$ & (spill out $\sum$)\\
+ 4) &$rs = r :: rest$ & $\dn$ & $r :: \texttt{denest}\;rest$ & (otherwise)\\
\end{tabular}
\end{center}
The first clause states that empty lists cannot be further
- flattened. The second removes the first $\ZERO$ from the list and recurses.
+ denested. The second removes the first $\ZERO$ from the list and recurses.
The third is when the first regular expression is an \texttt{ALTs}, then
the content of this alternative should be spilled out and appended
- with the flattened rest of the list. The last case is for all other
+ with the denested rest of the list. The last case is for all other
cases where the head of the list is not $\ZERO$ and not an \texttt{ALTs},
- then we just keep the head of the list and flatten the rest.
+ then we just keep the head of the list and denest the rest.\\
\mbox{}\hfill\mbox{[1 Mark]}
-\item[(4)] Implement the function \textit{simp}, which recursively
+\item[(4)] Implement the function \texttt{flts} which flattens our
+ n-ary sequence regular expression $\prod$. Like \texttt{denest}, this
+ function is intended to delete $\ONE$s and spill out nested $\prod$s.
+ Unfortunately, there is a special case to do with $\ZERO$: If this function encounters a $\ZERO$, then
+ the whole ``product'' should be $\ZERO$. The problem is that the $\ZERO$ can be anywhere
+ inside the list. The easiest way to implement this function is therefore by using an
+ accumulator, which when called is set to \texttt{Nil}. This means \textit{flts} takes
+ two arguments (which are both lists of regular expressions)
+
+ \[
+ \texttt{flts}\;rs\;acc
+ \]
+
+ This function analyses the list $rs$ as follows
+
+ \begin{center}
+ \begin{tabular}{@{}lllll@{}}
+ 1) &$rs = []$ & $\dn$ & $acc$ & (empty list)\\
+ 2) &$rs = \ZERO :: rest$ & $\dn$ & $[\ZERO]$ & (special case for $\ZERO$)\\
+ 3) &$rs = \ONE :: rest$ & $\dn$ & $\texttt{flts}\,rest\,acc$ & (throw away $\ONE$)\\
+ 4) &$rs = (\prod rs) :: rest$ & $\dn$ & $\texttt{flts}\;rest\,(acc ::: rs)$ & (spill out $\prod$)\\
+ 5) &$rs = r :: rest$ & $\dn$ & $\texttt{flts}\;rest\,(acc ::: [r])$& (otherwise)\\
+ \end{tabular}
+ \end{center}
+
+ In the first case we just return whatever has accumulated in
+ $acc$. In the fourth case we spill out the $rs$ by appending the
+ $rs$ to the end of the accumulator. Similarly in the last case we
+ append the single regular expression $r$ to the end of the
+ accumulator. I let you think why the ``end'' is needed. \mbox{}\hfill\mbox{[1 Mark]}
+
+\item[(5)] Before we can simplify regular expressions, we need what is often called
+ \emph{smart constructors} for $\sum$ and $\prod$. While the ``normal'' constructors
+ \texttt{ALTs} and \texttt{SEQs} give us alternatives and sequences, respectively, \emph{smart}
+ constructors might return something different depending on what list of regular expressions
+ they are given as argument.
+
+ \begin{center}
+ \begin{tabular}{@{}c@{\hspace{9mm}}c@{}}
+ \begin{tabular}{l@{\hspace{2mm}}l@{\hspace{1mm}}ll}
+ & $\sum^{smart}$\smallskip\\
+ 1) & $rs = []$ & $\dn$ & $\ZERO$\\
+ 2) & $rs = [r]$ & $\dn$ & $r$\\ \\
+ 3) & otherwise & $\dn$ & $\sum\,rs$
+ \end{tabular} &
+ \begin{tabular}{l@{\hspace{2mm}}l@{\hspace{1mm}}ll}
+ & $\prod^{smart}$\smallskip\\
+ 1) & $rs = []$ & $\dn$ & $\ONE$\\
+ 2a) & $rs = [\ZERO]$ & $\dn$ & $\ZERO$\\
+ 2b) & $rs = [r]$ & $\dn$ & $r$\\
+ 3) & otherwise & $\dn$ & $\prod\,rs$
+ \end{tabular}
+ \end{tabular}
+ \end{center}
+ \mbox{}\hfill\mbox{[0.5 Marks]}
+
+\item[(6)] Implement the function \textit{simp}, which recursively
traverses a regular expression, and on the way up simplifies every
regular expression on the left (see below) to the regular expression
on the right, except it does not simplify inside ${}^*$-regular
- expressions.
+ expressions and also does not simplify $\ZERO$, $\ONE$ and characters.
\begin{center}
-\begin{tabular}{l@{\hspace{4mm}}c@{\hspace{4mm}}ll}
-$r \cdot \ZERO$ & $\mapsto$ & $\ZERO$\\
-$\ZERO \cdot r$ & $\mapsto$ & $\ZERO$\\
-$r \cdot \ONE$ & $\mapsto$ & $r$\\
-$\ONE \cdot r$ & $\mapsto$ & $r$\\
-$\sum\;[r_1,..,r_n]$ & $\mapsto$ & $\sum\;(\texttt{(flts + distinct)}[simp(r_1),..,simp(r_n)])$\\
+ \begin{tabular}{@{}l@{\hspace{3mm}}c@{\hspace{3mm}}ll@{}}
+ LHS: & & RHS:\smallskip\\
+$\sum\;[r_1,..,r_n]$ & $\mapsto$ & $\sum^{smart}\;(\texttt{(denest + distinct)}[simp(r_1),..,simp(r_n)])$\\
+ $\prod\;[r_1,..,r_n]$ & $\mapsto$ & $\prod^{smart}\;(\texttt{(flts)}[simp(r_1),..,simp(r_n)])$\\
+ $r$ & $\mapsto$ & $r$ \quad (all other cases)
\end{tabular}
\end{center}
-The last case is as follows: first apply $simp$ to all regular
-expressions $r_1,.. ,r_n$; then flatten the resulting list using
-\texttt{flts}; finally remove all duplicates in this list (this can be
+The first case is as follows: first apply $simp$ to all regular
+expressions $r_1,.. ,r_n$; then denest the resulting list using
+\texttt{denest}; after that remove all duplicates in this list (this can be
done in Scala using the function
-\texttt{\_.distinct}). When you perform these
- operations, you end up with three cases, namely where the list is
- empty, contains a single element and ``otherwise''. These cases
- should be processed as follows
-\begin{center}
-\begin{tabular}{l@{\hspace{4mm}}c@{\hspace{4mm}}ll}
-$\sum\;[]$ & $\mapsto$ & $\ZERO$\\
-$\sum\;[r]$ & $\mapsto$ & $r$\\
-$\sum\;rs$ & $\mapsto$ & $\sum\;rs$ & ``otherwise''\\
-\end{tabular}
-\end{center}
-
+\texttt{\_.distinct}). Finally, you end up with a list of (simplified)
+regular expressions; apply the smart constructor $\sum^{smart}$ to this list.
+Similarly in the $\prod$ case: simplify first all regular
+expressions $r_1,.. ,r_n$; then flatten the resulting list using \texttt{flts} and apply the
+smart constructor $\prod^{smart}$ to the result. In all other cases, just return the
+input $r$ as is.
For example the regular expression
\[(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot (r_4 \cdot \ZERO)\]
- simplifies to just $r_1$. \textbf{Hint:} Regular expressions can be
- seen as trees and there are several methods for traversing
- trees. One of them corresponds to the inside-out traversal, which is
- also sometimes called post-order tra\-versal: you traverse inside
- the tree and on the way up you apply simplification rules.
- \textbf{Another Hint:} Remember numerical expressions from school
- times---there you had expressions like
- $u + \ldots + (1 \cdot x) - \ldots (z + (y \cdot 0)) \ldots$ and
- simplification rules that looked very similar to rules above. You
- would simplify such numerical expressions by replacing for example
- the $y \cdot 0$ by $0$, or $1\cdot x$ by $x$, and then look whether
- more rules are applicable. If you regard regular expressions as
- trees and if you organise the simplification in an inside-out
- fashion, it is always clear which simplification should be applied
- next.\mbox{}\hfill\mbox{[1 Mark]}
+ simplifies to just $r_1$. \mbox{}\hfill\mbox{[1 Mark]}
-\item[(5)] Implement two functions: The first, called \textit{ders},
+\item[(7)] Implement two functions: The first, called \textit{ders},
takes a list of characters and a regular expression as arguments, and
builds the derivative w.r.t.~the list as follows:
@@ -407,7 +410,7 @@
\begin{tabular}{lcl}
$\textit{ders}\;(Nil)\;r$ & $\dn$ & $r$\\
$\textit{ders}\;(c::cs)\;r$ & $\dn$ &
- $\textit{ders}\;cs\;(\textit{simp}(\textit{der}\;c\;r))$\\
+ $\textit{ders}\;cs\;(\textit{simp}\,(\textit{der}\;c\;r))$\\
\end{tabular}
\end{center}
@@ -420,9 +423,9 @@
derivative regular expression can match the empty string (using
\textit{nullable}). For example the \textit{matcher} will produce
true for the regular expression $(a\cdot b)\cdot c$ and the string
-$abc$, but false if you give it the string $ab$. \hfill[1 Mark]
+$abc$, but false if you give it the string $ab$. \hfill[0.5 Mark]
-\item[(6)] Implement a function, called \textit{size}, by recursion
+\item[(8)] Implement a function, called \textit{size}, by recursion
over regular expressions. If a regular expression is seen as a tree,
then \textit{size} should return the number of nodes in such a
tree. Therefore this function is defined as follows:
@@ -433,7 +436,7 @@
$\textit{size}(\ONE)$ & $\dn$ & $1$\\
$\textit{size}(c)$ & $\dn$ & $1$\\
$\textit{size}(\sum\,[r_1,..,r_n]$ & $\dn$ & $1 + \textit{size}(r_1) + ... + \textit{size}(r_n)$\\
-$\textit{size}(r_1 \cdot r_2)$ & $\dn$ & $1 + \textit{size}(r_1) + \textit{size}(r_2)$\\
+$\textit{size}(\prod\,[r_1,..,r_n]$ & $\dn$ & $1 + \textit{size}(r_1) + ... + \textit{size}(r_n)$\\
$\textit{size}(r^*)$ & $\dn$ & $1 + \textit{size}(r)$\\
\end{tabular}
\end{center}
@@ -444,7 +447,7 @@
taking the derivative, but simplify the result. The sizes
are given in \texttt{re.scala}. \hfill[0.5 Marks]
-\item[(7)] You do not have to implement anything specific under this
+\item[(9)] You do not have to implement anything specific under this
task. The purpose here is that you will be marked for some ``power''
test cases. For example can your matcher decide within 30 seconds
whether the regular expression $(a^*)^*\cdot b$ matches strings of the
@@ -463,7 +466,7 @@
\subsection*{Background}
Although easily implementable in Scala (ok maybe the \texttt{simp} functions and
-\texttt{ALTs} needs a bit more thinking), the idea behind the
+the constructors \texttt{ALTs}/\texttt{SEQs} needs a bit more thinking), the idea behind the
derivative function might not so easy to be seen. To understand its
purpose better, assume a regular expression $r$ can match strings of
the form $c\!::\!cs$ (that means strings which start with a character
@@ -484,11 +487,11 @@
$\textit{der}\;c\;(\textit{der}\;b\;(\textit{der}\;a\;r))$, you obtain
a regular expression that can match the empty string. You can test
whether this is indeed the case using the function nullable, which is
-what your matcher is doing.
+what the matcher you have implemented is doing.
The purpose of the $\textit{simp}$ function is to keep the regular
expressions small. Normally the derivative function makes the regular
-expression bigger (see the SEQ case and the example in (2)) and the
+expression bigger (see the \texttt{SEQs} case and the example in Task (2)) and the
algorithm would be slower and slower over time. The $\textit{simp}$
function counters this increase in size and the result is that the
algorithm is fast throughout. By the way, this algorithm is by Janusz
@@ -501,16 +504,19 @@
If you want to see how badly the regular expression matchers do in
-Java\footnote{Version 8 and below; Version 9 and above does not seem to be as
- catastrophic, but still much worse than the regular expression
- matcher based on derivatives.}, JavaScript and Python with the
-``evil'' regular expression $(a^*)^*\cdot b$, then have a look at the
-graphs below (you can try it out for yourself: have a look at the files
+Java\footnote{Version 8 and below; Version 9 and above does not seem
+ to be as catastrophic, but still much worse than the regular
+ expression matcher based on derivatives. BTW, Scala uses the regular
+ expression matcher provided by the Java libraries. So is just as bad.}, JavaScript,
+Python Swift and Dart with the ``evil'' regular expression
+$(a^*)^*\cdot b$, then have a look at the graphs below (you can try it
+out for yourself: have a look at the files
\texttt{catastrophic9.java}, \texttt{catastrophic.js},
-\texttt{catastrophic.py} etc on KEATS). Compare this with the matcher you
-have implemented. How long can a string of $a$'s be in your matcher
-and still stay within the 30 seconds time limit? It should be muuuch better
-than your off-the-shelf matcher in your bog-standard language.
+\texttt{catastrophic.py} etc on KEATS). Compare this with the matcher
+you have implemented. How long can a string of $a$'s be in your
+matcher and still stay within the 30 seconds time limit? It should be
+mu(uu)$^*$ch better than your off-the-shelf matcher in your
+bog-standard language.
\begin{center}
\begin{tabular}{@{}cc@{}}
@@ -574,6 +580,46 @@
\end{document}
+
+For example given the regular expression $r = (a \cdot b) \cdot c$, the derivatives
+w.r.t.~the characters $a$, $b$ and $c$ are
+
+\begin{center}
+ \begin{tabular}{lcll}
+ $\textit{der}\;a\;r$ & $=$ & $(\ONE \cdot b)\cdot c$ & \quad($= r'$)\\
+ $\textit{der}\;b\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$\\
+ $\textit{der}\;c\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$
+ \end{tabular}
+\end{center}
+
+Let $r'$ stand for the first derivative, then taking the derivatives of $r'$
+w.r.t.~the characters $a$, $b$ and $c$ gives
+
+\begin{center}
+ \begin{tabular}{lcll}
+ $\textit{der}\;a\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ \\
+ $\textit{der}\;b\;r'$ & $=$ & $((\ZERO \cdot b) + \ONE)\cdot c$ & \quad($= r''$)\\
+ $\textit{der}\;c\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$
+ \end{tabular}
+\end{center}
+
+One more example: Let $r''$ stand for the second derivative above,
+then taking the derivatives of $r''$ w.r.t.~the characters $a$, $b$
+and $c$ gives
+
+\begin{center}
+ \begin{tabular}{lcll}
+ $\textit{der}\;a\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$ \\
+ $\textit{der}\;b\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$\\
+ $\textit{der}\;c\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ONE$ &
+ (is $\textit{nullable}$)
+ \end{tabular}
+\end{center}
+
+Note, the last derivative can match the empty string, that is it is \textit{nullable}.
+
+
+
%%% Local Variables:
%%% mode: latex
%%% TeX-master: t
Binary file cws/main_cw04.pdf has changed
Binary file cws/main_cw05.pdf has changed
--- a/main_solution3/re.scala Sat Oct 08 00:30:51 2022 +0100
+++ b/main_solution3/re.scala Tue Nov 01 15:03:48 2022 +0000
@@ -1,5 +1,5 @@
// Main Part 3 about Regular Expression Matching
-//=============================================
+//==============================================
object M3 {
@@ -8,13 +8,15 @@
case object ZERO extends Rexp
case object ONE extends Rexp
case class CHAR(c: Char) extends Rexp
-case class ALTs(rs: List[Rexp]) extends Rexp // alternatives
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence
-case class STAR(r: Rexp) extends Rexp // star
+case class ALTs(rs: List[Rexp]) extends Rexp // alternatives
+case class SEQs(rs: List[Rexp]) extends Rexp // sequences
+case class STAR(r: Rexp) extends Rexp // star
-//the usual binary choice can be defined in terms of ALTs
+//the usual binary choice and binary sequence can be defined
+//in terms of ALTs and SEQs
def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2))
+def SEQ(r1: Rexp, r2: Rexp) = SEQs(List(r1, r2))
// some convenience for typing in regular expressions
import scala.language.implicitConversions
@@ -41,81 +43,78 @@
def ~ (r: String) = SEQ(s, r)
}
-// (1) Complete the function nullable according to
-// the definition given in the coursework; this
-// function checks whether a regular expression
-// can match the empty string and Returns a boolean
-// accordingly.
-
+// (1)
def nullable (r: Rexp) : Boolean = r match {
case ZERO => false
case ONE => true
case CHAR(_) => false
case ALTs(rs) => rs.exists(nullable)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case SEQs(rs) => rs.forall(nullable)
case STAR(_) => true
}
-// (2) Complete the function der according to
-// the definition given in the coursework; this
-// function calculates the derivative of a
-// regular expression w.r.t. a character.
-
-def der (c: Char, r: Rexp) : Rexp = r match {
+// (2)
+def der(c: Char, r: Rexp) : Rexp = r match {
case ZERO => ZERO
case ONE => ZERO
case CHAR(d) => if (c == d) ONE else ZERO
case ALTs(rs) => ALTs(rs.map(der(c, _)))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
+ case SEQs(Nil) => ZERO
+ case SEQs(r1::rs) =>
+ if (nullable(r1)) ALT(SEQs(der(c, r1)::rs), der(c, SEQs(rs)))
+ else SEQs(der(c, r1):: rs)
case STAR(r1) => SEQ(der(c, r1), STAR(r1))
}
-// (3) Implement the flatten function flts. It
-// deletes 0s from a list of regular expressions
-// and also 'spills out', or flattens, nested
-// ALTernativeS.
+// (3)
+def denest(rs: List[Rexp]) : List[Rexp] = rs match {
+ case Nil => Nil
+ case ZERO::tl => denest(tl)
+ case ALTs(rs1)::rs2 => rs1 ::: denest(rs2)
+ case r::rs => r :: denest(rs)
+}
-def flts(rs: List[Rexp]) : List[Rexp] = rs match {
- case Nil => Nil
- case ZERO::tl => flts(tl)
- case ALTs(rs1)::rs2 => rs1 ::: flts(rs2)
- case r::rs => r :: flts(rs)
+// (4)
+def flts(rs: List[Rexp], acc: List[Rexp] = Nil) : List[Rexp] = rs match {
+ case Nil => acc
+ case ZERO::rs => ZERO::Nil
+ case ONE::rs => flts(rs, acc)
+ case SEQs(rs1)::rs => flts(rs, acc ::: rs1)
+ case r::rs => flts(rs, acc :+ r)
}
-// (4) Complete the simp function according to
-// the specification given in the coursework; this
-// function simplifies a regular expression from
-// the inside out, like you would simplify arithmetic
-// expressions; however it does not simplify inside
-// STAR-regular expressions.
+// (5)
+def ALTs_smart(rs: List[Rexp]) : Rexp = rs match {
+ case Nil => ZERO
+ case r::Nil => r
+ case rs => ALTs(rs)
+}
+def SEQs_smart(rs: List[Rexp]) : Rexp = rs match {
+ case Nil => ONE
+ case ZERO::nil => ZERO
+ case r::Nil => r
+ case rs => SEQs(rs)
+}
+
+// (6)
def simp(r: Rexp) : Rexp = r match {
- case ALTs(rs) => (flts(rs.map(simp)).distinct) match {
- case Nil => ZERO
- case r::Nil => r
- case rs => ALTs(rs)
- }
- case SEQ(r1, r2) => (simp(r1), simp(r2)) match {
- case (ZERO, _) => ZERO
- case (_, ZERO) => ZERO
- case (ONE, r2s) => r2s
- case (r1s, ONE) => r1s
- case (r1s, r2s) => SEQ(r1s, r2s)
- }
+ case ALTs(rs) =>
+ ALTs_smart(denest(rs.map(simp)).distinct)
+ case SEQs(rs) =>
+ SEQs_smart(flts(rs.map(simp)))
case r => r
}
-simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE))
+//println("Simp tests")
+//println(simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)))
+//println(simp(((CHAR('a') | ZERO) ~ ONE) |
+// (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO))))
-// (5) Complete the two functions below; the first
-// calculates the derivative w.r.t. a string; the second
-// is the regular expression matcher taking a regular
-// expression and a string and checks whether the
-// string matches the regular expression.
+
+// (7)
def ders (s: List[Char], r: Rexp) : Rexp = s match {
case Nil => r
@@ -125,43 +124,40 @@
// main matcher function
def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r))
-// (6) Complete the size function for regular
-// expressions according to the specification
-// given in the coursework.
-
+// (8)
def size(r: Rexp): Int = r match {
case ZERO => 1
case ONE => 1
case CHAR(_) => 1
case ALTs(rs) => 1 + rs.map(size).sum
- case SEQ(r1, r2) => 1 + size(r1) + size (r2)
+ case SEQs(rs) => 1 + rs.map(size).sum
case STAR(r1) => 1 + size(r1)
}
// some testing data
-
-//matcher(("a" ~ "b") ~ "c", "abc") // => true
-//matcher(("a" ~ "b") ~ "c", "ab") // => false
+/*
+println(matcher(("a" ~ "b") ~ "c", "abc")) // => true
+println(matcher(("a" ~ "b") ~ "c", "ab")) // => false
// the supposedly 'evil' regular expression (a*)* b
-// val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
+val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
-//println(matcher(EVIL, "a" * 1000 ++ "b")) // => true
-//println(matcher(EVIL, "a" * 1000)) // => false
+println(matcher(EVIL, "a" * 1000 ++ "b")) // => true
+println(matcher(EVIL, "a" * 1000)) // => false
// size without simplifications
-//println(size(der('a', der('a', EVIL)))) // => 28
-//println(size(der('a', der('a', der('a', EVIL))))) // => 58
+println(size(der('a', der('a', EVIL)))) // => 36
+println(size(der('a', der('a', der('a', EVIL))))) // => 83
// size with simplification
-//println(simp(der('a', der('a', EVIL))))
-//println(simp(der('a', der('a', der('a', EVIL)))))
+println(simp(der('a', der('a', EVIL))))
+println(simp(der('a', der('a', der('a', EVIL)))))
-//println(size(simp(der('a', der('a', EVIL))))) // => 8
-//println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 8
+println(size(simp(der('a', der('a', EVIL))))) // => 7
+println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 7
// Python needs around 30 seconds for matching 28 a's with EVIL.
// Java 9 and later increase this to an "astonishing" 40000 a's in
@@ -169,7 +165,7 @@
//
// Lets see how long it takes to match strings with
// 5 Million a's...it should be in the range of a
-// couple of seconds.
+// few seconds.
def time_needed[T](i: Int, code: => T) = {
val start = System.nanoTime()
@@ -178,19 +174,20 @@
"%.5f".format((end - start)/(i * 1.0e9))
}
-//for (i <- 0 to 5000000 by 500000) {
-// println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.")
-//}
+for (i <- 0 to 5000000 by 500000) {
+ println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.")
+}
// another "power" test case
-//simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next) == ONE
+println(simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next()) == ONE)
// the Iterator produces the rexp
//
// SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)
//
-// where SEQ is nested 50 times.
-
+// where SEQ is nested 100 times.
+*/
}
+
--- a/main_templates1/README.md Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates1/README.md Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Main 1
+# assignment2022scala - Main 1
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
* [coursework description](https://nms.kcl.ac.uk/christian.urban/main_cw01.pdf)
* reference jar:
[drumb.jar](https://nms.kcl.ac.uk/christian.urban/drumb.jar)
\ No newline at end of file
--- a/main_templates1/drumb.scala Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates1/drumb.scala Tue Nov 01 15:03:48 2022 +0000
@@ -9,66 +9,39 @@
val rstate_portfolio = List("PLD", "PSA", "AMT", "AIV", "AVB", "BXP", "CCI",
"DLR", "EQIX", "EQR", "ESS", "EXR", "FRT", "HCP")
-
-// (1) The function below takes a stock symbol and a year as arguments.
-// It should read the corresponding CSV-file and then extract the January
-// data from the given year. The data should be collected in a list of
-// strings (one entry for each line in the CSV-file).
-
import io.Source
import scala.util._
+// ADD YOUR CODE BELOW
+//======================
+
+
+// (1)
def get_january_data(symbol: String, year: Int) : List[String] = ???
-// (2) From the output of the get_january_data function, the next function
-// should extract the first line (if it exists) and the corresponding
-// first trading price in that year with type Option[Double]. If no line
-// is generated by get_january_data then the result is None; and Some if
-// there is a price.
-
-
+// (2)
def get_first_price(symbol: String, year: Int) : Option[Double] = ???
-// (3) Complete the function below that obtains all first prices
-// for the stock symbols from a portfolio (list of strings) and
-// for the given range of years. The inner lists are for the
-// stock symbols and the outer list for the years.
-
-
+// (3)
def get_prices(portfolio: List[String], years: Range) : List[List[Option[Double]]] = ???
-// (4) The function below calculates the change factor (delta) between
-// a price in year n and a price in year n + 1.
-
+// (4)
def get_delta(price_old: Option[Double], price_new: Option[Double]) : Option[Double] = ???
-// (5) The next function calculates all change factors for all prices (from a
-// portfolio). The input to this function are the nested lists created by
-// get_prices above.
-
+// (5)
def get_deltas(data: List[List[Option[Double]]]) : List[List[Option[Double]]] = ???
-
-
-// (6) Write a function that given change factors, a starting balance and an index,
-// calculates the yearly yield, i.e. new balance, according to our dumb investment
-// strategy. Index points to a year in the data list.
-
+// (6)
def yearly_yield(data: List[List[Option[Double]]], balance: Long, index: Int) : Long = ???
-// (7) Write a function compound_yield that calculates the overall balance for a
-// range of years where in each year the yearly profit is compounded to the new
-// balances and then re-invested into our portfolio. For this use the function and
-// results generated under (6). The function investment calls compound_yield
-// with the appropriate deltas and the first index.
-
+// (7)
def compound_yield(data: List[List[Option[Double]]], balance: Long, index: Int) : Long = ???
def investment(portfolio: List[String], years: Range, start_balance: Long) : Long = ???
--- a/main_templates2/README.md Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates2/README.md Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Main 2
+# assignment2022scala - Main 2
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
* [coursework description](https://nms.kcl.ac.uk/christian.urban/main_cw02.pdf)
* reference jar:
- [danube.jar](https://nms.kcl.ac.uk/christian.urban/danube.jar)
\ No newline at end of file
+ [danube.jar](https://nms.kcl.ac.uk/christian.urban/wordle.jar)
\ No newline at end of file
--- a/main_templates3/README.md Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates3/README.md Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Main 3
+# assignment2022scala - Main 3
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
* [coursework description](https://nms.kcl.ac.uk/christian.urban/main_cw03.pdf)
* reference jar:
[re.jar](https://nms.kcl.ac.uk/christian.urban/re.jar)
\ No newline at end of file
--- a/main_templates3/re.scala Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates3/re.scala Tue Nov 01 15:03:48 2022 +0000
@@ -1,23 +1,24 @@
// Main Part 3 about Regular Expression Matching
-//=============================================
+//==============================================
object M3 {
-// Regular Expressions
abstract class Rexp
case object ZERO extends Rexp
case object ONE extends Rexp
case class CHAR(c: Char) extends Rexp
-case class ALTs(rs: List[Rexp]) extends Rexp // alternatives
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence
-case class STAR(r: Rexp) extends Rexp // star
+case class ALTs(rs: List[Rexp]) extends Rexp // alternatives
+case class SEQs(rs: List[Rexp]) extends Rexp // sequences
+case class STAR(r: Rexp) extends Rexp // star
+
-//the usual binary choice can be defined in terms of ALTs
+//the usual binary choice and binary sequence can be defined
+//in terms of ALTs and SEQs
def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2))
+def SEQ(r1: Rexp, r2: Rexp) = SEQs(List(r1, r2))
// some convenience for typing regular expressions
-
import scala.language.implicitConversions
import scala.language.reflectiveCalls
@@ -42,92 +43,77 @@
def ~ (r: String) = SEQ(s, r)
}
-
+// examples for the implicits:
// ALT(CHAR('a'), CHAR('b'))
-// val reg : Rexp = "a" | "b"
-
+// val areg : Rexp = "a" | "b"
-// (1) Complete the function nullable according to
-// the definition given in the coursework; this
-// function checks whether a regular expression
-// can match the empty string and Returns a boolean
-// accordingly.
-
-def nullable (r: Rexp) : Boolean = ???
-
-
-// (2) Complete the function der according to
-// the definition given in the coursework; this
-// function calculates the derivative of a
-// regular expression w.r.t. a character.
-
-def der (c: Char, r: Rexp) : Rexp = ???
+// SEQ(CHAR('a'), CHAR('b'))
+// val sreg : Rexp = "a" ~ "b"
-// (3) Implement the flatten function flts. It
-// deletes 0s from a list of regular expressions
-// and also 'spills out', or flattens, nested
-// ALTernativeS.
+// ADD YOUR CODE BELOW
+//======================
-def flts(rs: List[Rexp]) : List[Rexp] = ???
+// (1)
+def nullable (r: Rexp) : Boolean = ???
-
+// (2)
+def der (c: Char, r: Rexp) : Rexp = ???
-// (4) Complete the simp function according to
-// the specification given in the coursework description;
-// this function simplifies a regular expression from
-// the inside out, like you would simplify arithmetic
-// expressions; however it does not simplify inside
-// STAR-regular expressions. Use the _.distinct and
-// flts functions.
+// (3)
+def denest(rs: List[Rexp]) : List[Rexp] = ???
+
+// (4)
+def flts(rs: List[Rexp], acc: List[Rexp] = Nil) : List[Rexp] = ???
+// (5)
+def ALTs_smart(rs: List[Rexp]) : Rexp = ???
+def SEQs_smart(rs: List[Rexp]) : Rexp = ???
+
+// (6)
def simp(r: Rexp) : Rexp = ???
-
-// (5) Complete the two functions below; the first
-// calculates the derivative w.r.t. a string; the second
-// is the regular expression matcher taking a regular
-// expression and a string and checks whether the
-// string matches the regular expression
-
+// (7)
def ders (s: List[Char], r: Rexp) : Rexp = ???
-
def matcher(r: Rexp, s: String): Boolean = ???
-
-// (6) Complete the size function for regular
-// expressions according to the specification
-// given in the coursework.
-
+// (8)
def size(r: Rexp): Int = ???
-// some testing data
-
+// Some testing data
+//===================
/*
+
+simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)) // => ALTs(List(ONE, CHAR(a)))
+simp(((CHAR('a') | ZERO) ~ ONE) |
+ (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO))) // => CHAR(a)
+
+matcher(("a" ~ "b") ~ "c", "ab") // => false
matcher(("a" ~ "b") ~ "c", "abc") // => true
-matcher(("a" ~ "b") ~ "c", "ab") // => false
+
// the supposedly 'evil' regular expression (a*)* b
val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
+matcher(EVIL, "a" * 1000) // => false
matcher(EVIL, "a" * 1000 ++ "b") // => true
-matcher(EVIL, "a" * 1000) // => false
+
// size without simplifications
-size(der('a', der('a', EVIL))) // => 28
-size(der('a', der('a', der('a', EVIL)))) // => 58
+size(der('a', der('a', EVIL))) // => 36
+size(der('a', der('a', der('a', EVIL)))) // => 83
// size with simplification
-size(simp(der('a', der('a', EVIL)))) // => 8
-size(simp(der('a', der('a', der('a', EVIL))))) // => 8
+size(simp(der('a', der('a', EVIL)))) // => 7
+size(simp(der('a', der('a', der('a', EVIL))))) // => 7
// Python needs around 30 seconds for matching 28 a's with EVIL.
// Java 9 and later increase this to an "astonishing" 40000 a's in
// 30 seconds.
//
// Lets see how long it really takes to match strings with
-// 5 Million a's...it should be in the range of a couple
+// 5 Million a's...it should be in the range of a few
// of seconds.
def time_needed[T](i: Int, code: => T) = {
--- a/main_templates4/README.md Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates4/README.md Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Main 4
+# assignment2022scala - Main 4
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
* [coursework description](https://nms.kcl.ac.uk/christian.urban/main_cw04.pdf)
* reference jar:
[knight1.jar](https://nms.kcl.ac.uk/christian.urban/knight1.jar),
--- a/main_templates4/knight1.scala Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates4/knight1.scala Tue Nov 01 15:03:48 2022 +0000
@@ -9,22 +9,19 @@
// templates below. Also have a look whether the functions
// at the end of the file are of any help.
-
-
type Pos = (Int, Int) // a position on a chessboard
type Path = List[Pos] // a path...a list of positions
-//(1) Complete the function that tests whether the position x
-// is inside the board and not yet element in the path.
+// ADD YOUR CODE BELOW
+//======================
+
+
+//(1)
def is_legal(dim: Int, path: Path, x: Pos) : Boolean = ???
-
-//(2) Complete the function that calculates for a position x
-// all legal onward moves that are not already in the path.
-// The moves should be ordered in a "clockwise" manner.
-
+//(2)
def legal_moves(dim: Int, path: Path, x: Pos) : List[Pos] = ???
@@ -38,21 +35,12 @@
//assert(legal_moves(8, List((6,6)), (7,7)) == List((6,5), (5,6)))
-//(3) Complete the two recursive functions below.
-// They exhaustively search for knight's tours starting from the
-// given path. The first function counts all possible tours,
-// and the second collects all tours in a list of paths.
-
+// (3)
def count_tours(dim: Int, path: Path) : Int = ???
def enum_tours(dim: Int, path: Path) : List[Path] = ???
-
-//(4) Implement a first-function that finds the first
-// element, say x, in the list xs where f is not None.
-// In that case Return f(x), otherwise None. If possible,
-// calculate f(x) only once.
-
+// (4)
def first(xs: List[Pos], f: Pos => Option[Path]) : Option[Path] = ???
@@ -64,10 +52,7 @@
//first(List((1, 0),(2, 0),(3, 0)), foo) // None
-//(5) Implement a function that uses the first-function from (4) for
-// trying out onward moves, and searches recursively for a
-// knight tour on a dim * dim-board.
-
+//(5)
def first_tour(dim: Int, path: Path) : Option[Path] = ???
--- a/main_templates4/knight2.scala Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates4/knight2.scala Tue Nov 01 15:03:48 2022 +0000
@@ -4,7 +4,6 @@
object M4b {
-
// !!! Copy any function you need from file knight1.scala !!!
//
// If you need any auxiliary functions, feel free to
@@ -14,21 +13,14 @@
type Pos = (Int, Int) // a position on a chessboard
type Path = List[Pos] // a path...a list of positions
+// ADD YOUR CODE BELOW
+//======================
-//(6) Complete the function that calculates a list of onward
-// moves like in (2) but orders them according to Warnsdorf’s
-// rule. That means moves with the fewest legal onward moves
-// should come first.
-
-
+//(6)
def ordered_moves(dim: Int, path: Path, x: Pos) : List[Pos] = ???
-//(7) Complete the function that searches for a single *closed*
-// tour using the ordered_moves function from (6). This
-// function will be tested on a 6 x 6 board.
-
-
+//(7)
def first_closed_tour_heuristics(dim: Int, path: Path) : Option[Path] = ???
--- a/main_templates4/knight3.scala Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates4/knight3.scala Tue Nov 01 15:03:48 2022 +0000
@@ -14,13 +14,11 @@
type Pos = (Int, Int) // a position on a chessboard
type Path = List[Pos] // a path...a list of positions
-//(9) Implement a function that searches for a
-// you have to be careful to write a tail-recursive version as this
-// function will be called with dimensions of up to 70 * 70
-// and starting field (0, 0). It has to produce a solution within
-// 30 seconds.
+// ADD YOUR CODE BELOW
+//======================
+//(9)
def tour_on_mega_board(dim: Int, path: Path) : Option[Path] = ???
}
--- a/main_templates5/README.md Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates5/README.md Tue Nov 01 15:03:48 2022 +0000
@@ -1,6 +1,6 @@
-# assignment2021scala - Main 5
+# assignment2022scala - Main 5
-* deadline: 21 January, 5pm
+* deadline: 19 January, 5pm
* [coursework description](https://nms.kcl.ac.uk/christian.urban/main_cw05.pdf)
* reference jar:
[bf.jar](https://nms.kcl.ac.uk/christian.urban/bf.jar),
--- a/main_templates5/bf.scala Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates5/bf.scala Tue Nov 01 15:03:48 2022 +0000
@@ -5,45 +5,26 @@
object M5a {
-
// representation of BF memory
type Mem = Map[Int, Int]
-
-// (1) Write a function that takes a file name as argument and
-// and requests the corresponding file from disk. It returns the
-// content of the file as a String. If the file does not exists,
-// the function should return the empty string.
-
import io.Source
import scala.util._
+// ADD YOUR CODE BELOW
+//======================
+
+// (1)
def load_bff(name: String) : String = ???
-
-
-// (2) Complete the functions for safely reading
-// and writing brainf*** memory. Safely read should
-// Return the value stored in the Map for a given memory
-// pointer, provided it exists; otherwise it Returns 0. The
-// writing function generates a new Map with the
-// same data, except at the given memory pointer the
-// value v is stored.
-
+// (2)
def sread(mem: Mem, mp: Int) : Int = ???
def write(mem: Mem, mp: Int, v: Int) : Mem = ???
-
-
-// (3) Implement the two jumping instructions in the
-// brainf*** language. In jumpRight, given a program and
-// a program counter move the program counter to the right
-// until after the *matching* ]-command. Similarly,
-// jumpLeft implements the move to the left to just after
-// the *matching* [-command.
+// (3)
def jumpRight(prog: String, pc: Int, level: Int) : Int = ???
@@ -60,21 +41,7 @@
-// (4) Complete the compute function that interprets (runs) a brainf***
-// program: the arguments are a program (represented as a string), a program
-// counter, a memory counter and a brainf*** memory. It Returns the
-// memory at the stage when the execution of the brainf*** program
-// finishes. The interpretation finishes once the program counter
-// pc is pointing to something outside the program string.
-// If the pc points to a character inside the program, the pc,
-// memory pointer and memory need to be updated according to
-// rules of the brainf*** language. Then, recursively, the compute
-// function continues with the command at the new program
-// counter.
-//
-// Implement the run function that calls compute with the program
-// counter and memory counter set to 0.
-
+// (4)
def compute(prog: String, pc: Int, mp: Int, mem: Mem) : Mem = ???
--- a/main_templates5/bfc.scala Sat Oct 08 00:30:51 2022 +0100
+++ b/main_templates5/bfc.scala Tue Nov 01 15:03:48 2022 +0000
@@ -4,7 +4,6 @@
object M5b {
-
// !!! Copy any function you need from file bf.scala !!!
//
// If you need any auxiliary function, feel free to
@@ -34,43 +33,12 @@
import io.Source
import scala.util._
-
-// TASKS
-//=======
+// ADD YOUR CODE BELOW
+//======================
-// (5) Write a function jtable that precomputes the "jump
-// table" for a bf-program. This function takes a bf-program
-// as an argument and Returns a Map[Int, Int]. The
-// purpose of this map is to record the information about
-// pc positions where '[' or a ']' are stored. The information
-// is to which pc-position do we need to jump next?
-//
-// For example for the program
-//
-// "+++++[->++++++++++<]>--<+++[->>++++++++++<<]>>++<<----------[+>.>.<+<]"
-//
-// we obtain the map
-//
-// Map(69 -> 61, 5 -> 20, 60 -> 70, 27 -> 44, 43 -> 28, 19 -> 6)
-//
-// This states that for the '[' on position 5, we need to
-// jump to position 20, which is just after the corresponding ']'.
-// Similarly, for the ']' on position 19, we need to jump to
-// position 6, which is just after the '[' on position 5, and so
-// on. The idea is to not calculate this information each time
-// we hit a bracket, but just look up this information in the
-// jtable. You can use the jumpLeft and jumpRight functions
-// from Part 1 for calculating the jtable.
-//
-// Then adapt the compute and run functions from Part 1
-// in order to take advantage of the information stored in the jtable.
-// This means whenever jumpLeft and jumpRight was called previously,
-// you should immediately look up the jump address in the jtable.
-
-
+// (5)
def jtable(pg: String) : Map[Int, Int] = ???
-
// testcase
//
// jtable("""+++++[->++++++++++<]>--<+++[->>++++++++++<<]>>++<<----------[+>.>.<+<]""")
@@ -80,26 +48,13 @@
def compute2(pg: String, tb: Map[Int, Int], pc: Int, mp: Int, mem: Mem) : Mem = ???
def run2(pg: String, m: Mem = Map()) = ???
-
// testcases
// time_needed(1, run2(load_bff("benchmark.bf")))
// time_needed(1, run2(load_bff("sierpinski.bf")))
-// (6) Write a function optimise which deletes "dead code" (everything
-// that is not a bf-command) and also replaces substrings of the form
-// [-] by a new command 0. The idea is that the loop [-] just resets the
-// memory at the current location to 0. In the compute3 and run3 functions
-// below you implement this command by writing the number 0 to mem(mp),
-// that is write(mem, mp, 0).
-//
-// The easiest way to modify a string in this way is to use the regular
-// expression """[^<>+\-.\[\]]""", which recognises everything that is
-// not a bf-command and replace it by the empty string. Similarly the
-// regular expression """\[-\]""" finds all occurrences of [-] and
-// by using the Scala method .replaceAll you can replace it with the
-// string "0" standing for the new bf-command.
+// (6)
def optimise(s: String) : String = ???
@@ -117,37 +72,14 @@
-// (7) Write a function combine which replaces sequences
-// of repeated increment and decrement commands by appropriate
-// two-character commands. For example for sequences of +
-//
-// orig bf-cmds | replacement
-// ------------------------------
-// + | +A
-// ++ | +B
-// +++ | +C
-// |
-// ... |
-// |
-// +++....+++ | +Z
-// (where length = 26)
-//
-// Similar for the bf-command -, > and <. All other commands should
-// be unaffected by this change.
-//
-// Adapt the compute4 and run4 functions such that they can deal
-// appropriately with such two-character commands.
-
-
+// (7)
def combine(s: String) : String = ???
// testcase
// combine(load_bff("benchmark.bf"))
-
def compute4(pg: String, tb: Map[Int, Int], pc: Int, mp: Int, mem: Mem) : Mem = ???
-
// should call first optimise and then combine on the input string
//
def run4(pg: String, m: Mem = Map()) = ???