# HG changeset patch # User Christian Urban # Date 1667315028 0 # Node ID cdfa6a293453edcfea411d9e840a11abc20bc324 # Parent 6e93040e33787a85a1f47c0a3d37ca2d5ff61d6c updated solutions and templates diff -r 6e93040e3378 -r cdfa6a293453 core_templates1/README.md --- a/core_templates1/README.md Sat Oct 08 00:30:51 2022 +0100 +++ b/core_templates1/README.md Tue Nov 01 15:03:48 2022 +0000 @@ -1,6 +1,6 @@ -# assignment2021scala - Core 1 +# assignment2022scala - Core 1 -* deadline: 21 January, 5pm +* deadline: 19 January, 5pm * [coursework description](https://nms.kcl.ac.uk/christian.urban/core_cw01.pdf) * reference jar: [collatz.jar](https://nms.kcl.ac.uk/christian.urban/collatz.jar) \ No newline at end of file diff -r 6e93040e3378 -r cdfa6a293453 core_templates1/collatz.scala --- a/core_templates1/collatz.scala Sat Oct 08 00:30:51 2022 +0100 +++ b/core_templates1/collatz.scala Tue Nov 01 15:03:48 2022 +0000 @@ -3,36 +3,18 @@ object C1 { -//(1) Complete the collatz function below. It should -// recursively calculate the number of steps needed -// until the collatz series reaches the number 1. -// If needed, you can use an auxiliary function that -// performs the recursion. The function should expect -// arguments in the range of 1 to 1 Million. +// ADD YOUR CODE BELOW +//====================== + +//(1) def collatz(n: Long) : Long = ??? -//(2) Complete the collatz_max function below. It should -// calculate how many steps are needed for each number -// from 1 up to a bound and then calculate the maximum number of -// steps and the corresponding number that needs that many -// steps. Again, you should expect bounds in the range of 1 -// up to 1 Million. The first component of the pair is -// the maximum number of steps and the second is the -// corresponding number. - +//(2) def collatz_max(bnd: Long) : (Long, Long) = ??? -//(3) Implement a function that calculates the last_odd -// number in a collatz series. For this implement an -// is_pow_of_two function which tests whether a number -// is a power of two. The function is_hard calculates -// whether 3n + 1 is a power of two. Again you can -// assume the input ranges between 1 and 1 Million, -// and also assume that the input of last_odd will not -// be a power of 2. - +//(3) def is_pow_of_two(n: Long) : Boolean = ??? def is_hard(n: Long) : Boolean = ??? diff -r 6e93040e3378 -r cdfa6a293453 core_templates2/README.md --- a/core_templates2/README.md Sat Oct 08 00:30:51 2022 +0100 +++ b/core_templates2/README.md Tue Nov 01 15:03:48 2022 +0000 @@ -1,6 +1,6 @@ -# assignment2021scala - Core 2 +# assignment2022scala - Core 2 -* deadline: 21 January, 5pm +* deadline: 19 January, 5pm * [coursework description](https://nms.kcl.ac.uk/christian.urban/core_cw02.pdf) * reference jar: [docdiff.jar](https://nms.kcl.ac.uk/christian.urban/docdiff.jar) \ No newline at end of file diff -r 6e93040e3378 -r cdfa6a293453 core_templates2/docdiff.scala --- a/core_templates2/docdiff.scala Sat Oct 08 00:30:51 2022 +0100 +++ b/core_templates2/docdiff.scala Tue Nov 01 15:03:48 2022 +0000 @@ -4,44 +4,23 @@ object C2 { +// ADD YOUR CODE BELOW +//====================== -//(1) Complete the clean function below. It should find -// all words in a string using the regular expression -// \w+ and the library function -// -// some_regex.findAllIn(some_string) -// -// The words should be Returned as a list of strings. - - +//(1) def clean(s: String) : List[String] = ??? -//(2) The function occurrences calculates the number of times -// strings occur in a list of strings. These occurrences should -// be calculated as a Map from strings to integers. - - +//(2) def occurrences(xs: List[String]): Map[String, Int] = ??? -//(3) This functions calculates the dot-product of two documents -// (list of strings). For this it calculates the occurrence -// maps from (2) and then multiplies the corresponding occurrences. -// If a string does not occur in a document, the product is zero. -// The function finally sums up all products. - - +//(3) def prod(lst1: List[String], lst2: List[String]) : Int = ??? -//(4) Complete the functions overlap and similarity. The overlap of -// two documents is calculated by the formula given in the assignment -// description. The similarity of two strings is given by the overlap -// of the cleaned strings (see (1)). - - +//(4) def overlap(lst1: List[String], lst2: List[String]) : Double = ??? def similarity(s1: String, s2: String) : Double = ??? diff -r 6e93040e3378 -r cdfa6a293453 core_templates3/README.md --- a/core_templates3/README.md Sat Oct 08 00:30:51 2022 +0100 +++ b/core_templates3/README.md Tue Nov 01 15:03:48 2022 +0000 @@ -1,6 +1,6 @@ -# assignment2021scala - Core 3 +# assignment2022scala - Core 3 -* deadline: 21 January, 5pm +* deadline: 19 January, 5pm * [coursework description](https://nms.kcl.ac.uk/christian.urban/core_cw03.pdf) * reference jar: [postfix.jar](https://nms.kcl.ac.uk/christian.urban/postfix.jar), diff -r 6e93040e3378 -r cdfa6a293453 core_templates3/postfix.scala --- a/core_templates3/postfix.scala Sat Oct 08 00:30:51 2022 +0100 +++ b/core_templates3/postfix.scala Tue Nov 01 15:03:48 2022 +0000 @@ -19,28 +19,14 @@ // helper function for splitting strings into tokens def split(s: String) : Toks = s.split(" ").toList +// ADD YOUR CODE BELOW +//====================== -// (1) Implement below the shunting yard algorithm. The most -// convenient way to this in Scala is to implement a recursive -// function and to heavily use pattern matching. The function syard -// takes some input tokens as first argument. The second and third -// arguments represent the stack and the output of the shunting yard -// algorithm. -// -// In the marking, you can assume the function is called only with -// an empty stack and an empty output list. You can also assume the -// input os only properly formatted (infix) arithmetic expressions -// (all parentheses will be well-nested, the input only contains -// operators and numbers). -// You can implement any additional helper function you need. I found -// it helpful to implement two auxiliary functions for the pattern matching: -// - +// (1) def is_op(op: String) : Boolean = ??? def prec(op1: String, op2: String) : Boolean = ??? - def syard(toks: Toks, st: Toks = Nil, out: Toks = Nil) : Toks = ??? @@ -58,13 +44,7 @@ //syard(split("( ( ( 3 ) ) + ( ( 4 + ( 5 ) ) ) )")) // 3 4 5 + + -// (2) Implement a compute function that evaluates an input list -// in postfix notation. This function takes a list of tokens -// and a stack as argumenta. The function should produce the -// result as an integer using the stack. You can assume -// this function will be only called with proper postfix -// expressions. - +// (2) def compute(toks: Toks, st: List[Int] = Nil) : Int = ??? diff -r 6e93040e3378 -r cdfa6a293453 core_templates3/postfix2.scala --- a/core_templates3/postfix2.scala Sat Oct 08 00:30:51 2022 +0100 +++ b/core_templates3/postfix2.scala Tue Nov 01 15:03:48 2022 +0000 @@ -35,12 +35,12 @@ // the operations in the basic version of the algorithm val ops = List("+", "-", "*", "/", "^") -// (3) Implement the extended version of the shunting yard algorithm. -// This version should properly account for the fact that the power -// operation is right-associative. Apart from the extension to include -// the power operation, you can make the same assumptions as in -// basic version. +// ADD YOUR CODE BELOW +//====================== + + +// (3) def syard(toks: Toks, st: Toks = Nil, out: Toks = Nil) : Toks = ??? @@ -48,9 +48,7 @@ // syard(split("3 + 4 * 8 / ( 5 - 1 ) ^ 2 ^ 3")) // 3 4 8 * 5 1 - 2 3 ^ ^ / + -// (4) Implement a compute function that produces an Int for an -// input list of tokens in postfix notation. - +// (4) def compute(toks: Toks, st: List[Int] = Nil) : Int = ??? diff -r 6e93040e3378 -r cdfa6a293453 cws/core_cw01.pdf Binary file cws/core_cw01.pdf has changed diff -r 6e93040e3378 -r cdfa6a293453 cws/core_cw02.pdf Binary file cws/core_cw02.pdf has changed diff -r 6e93040e3378 -r cdfa6a293453 cws/core_cw03.pdf Binary file cws/core_cw03.pdf has changed diff -r 6e93040e3378 -r cdfa6a293453 cws/disclaimer.sty --- a/cws/disclaimer.sty Sat Oct 08 00:30:51 2022 +0100 +++ b/cws/disclaimer.sty Tue Nov 01 15:03:48 2022 +0000 @@ -24,7 +24,7 @@ meaning in Scala than in Java. It changes the meaning of your program, and you should never use it. -\item Do not use \texttt{var}! This declares a mutable variable. Only +\item Do not use \textbf{\texttt{var}}! This declares a mutable variable. Only use \texttt{val}! \item Do not use any parallel collections! No \texttt{.par} therefore! @@ -96,9 +96,11 @@ It should be understood that the work you submit represents your \textbf{own} effort! You have implemented the code entirely -on your own. You have not used any unauthorised aid (e.g.~Google). You have not copied from anyone else. An -exception is the Scala code I showed during the lectures or -uploaded to KEATS, which you can freely use.\bigskip +on your own. You have not copied from anyone else. +Do not be tempted to ask Github Copilot for help or +do any other shenanigans like this! An exception is the Scala +code I showed during the lectures or uploaded to KEATS, +which you can freely use.\bigskip } \newcommand{\DISCLAIMEREXAM}{% diff -r 6e93040e3378 -r cdfa6a293453 cws/main_cw01.pdf Binary file cws/main_cw01.pdf has changed diff -r 6e93040e3378 -r cdfa6a293453 cws/main_cw02.pdf Binary file cws/main_cw02.pdf has changed diff -r 6e93040e3378 -r cdfa6a293453 cws/main_cw02.tex --- a/cws/main_cw02.tex Sat Oct 08 00:30:51 2022 +0100 +++ b/cws/main_cw02.tex Tue Nov 01 15:03:48 2022 +0000 @@ -4,13 +4,14 @@ \usepackage{../styles/style} \usepackage{disclaimer} \usepackage{../styles/langs} +\usepackage{graphicx} \begin{document} %% should ask to lower case the words. -\section*{Main Part 2 (Scala, 6 Marks)} +\section*{Evil Wordle Game (Scala, 7 Marks)} \mbox{}\hfill\textit{``C makes it easy to shoot yourself in the foot; C++ makes it harder,}\\ \mbox{}\hfill\textit{ but when you do, it blows your whole leg off.''}\smallskip\\ @@ -20,8 +21,8 @@ \noindent -You are asked to implement a Scala program for recommending movies -according to a ratings list.\bigskip +You are asked to implement a Scala program for making the popular Wordle game as difficult +as possible.\bigskip \IMPORTANTNONE{} @@ -31,7 +32,6 @@ \DISCLAIMER{} - \subsection*{Reference Implementation} Like the C++ part, the Scala part works like this: you push your files @@ -51,163 +51,246 @@ you would type something like: \begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small] -$ scala -cp danube.jar -scala> val ratings_url = - | """https://nms.kcl.ac.uk/christian.urban/ratings.csv""" +$ scala -cp wordle.jar +scala> val secretsURL = + | """https://nms.kcl.ac.uk/christian.urban/wordle.txt""" -scala> M2.get_csv_url(ratings_url) -val res0: List[String] = List(1,1,4 ...) +scala> M2.get_wordle_list(secretsURL) +val res0: List[String] = List(aahed, aalii, ...) \end{lstlisting}%$ \subsection*{Hints} \noindent -Use \texttt{.split(",").toList} for splitting -strings according to commas (similarly for the newline character \mbox{$\backslash$\texttt{n}}), +Useful data functions: \texttt{Source.fromURL}, +\texttt{Source.fromFile} for obtaining a webpage and reading a file, \texttt{.getOrElse(..,..)} allows to query a Map, but also gives a default value if the Map is not defined, a Map can be `updated' by using \texttt{+}, \texttt{.contains} and \texttt{.filter} can test whether an element is included in a list, and respectively filter out elements in a list, \texttt{.sortBy(\_.\_2)} sorts a list of pairs according to the second elements in the pairs---the sorting is done from smallest to highest, -\texttt{.take(n)} for taking some elements in a list (takes fewer if the list -contains less than \texttt{n} elements). +\texttt{.groupBy} orders lists according to same elements +. \newpage -\subsection*{Main Part 2 (6 Marks, file danube.scala)} - -You are creating Danube.co.uk which you hope will be the next big thing -in online movie renting. You know that you can save money by -anticipating what movies people will rent; you will pass these savings -on to your users by offering a discount if they rent movies that -Danube.co.uk recommends. +\subsection*{Main Part 2 (6 Marks, file wordle.scala)} -Your task is to generate \emph{two} movie recommendations for every -movie a user rents. To do this, you calculate what other -renters, who also watched this movie, suggest by giving positive ratings. -Of course, some suggestions are more popular than others. You need to find -the two most-frequently suggested movies. Return fewer recommendations, -if there are fewer movies suggested. - -The calculations will be based on the small datasets which the research lab -GroupLens provides for education and development purposes. +You probably know the game of Wordle\footnote{\url{https://en.wikipedia.org/wiki/Wordle}} +where you are supposed to guess a five-letter word. The feedback for guesses can help +with the next guess (green letters are correct, orange letters are present, but in the +wrong place). For example: \begin{center} -\url{https://grouplens.org/datasets/movielens/} -\end{center} +\includegraphics[scale=0.2]{../pics/w.jpeg} +\end{center} \noindent -The slightly adapted CSV-files should be downloaded in your Scala -file from the URLs: - +The idea of the program to be implemented here is to make the Wordle game as evil as possible +by finding words that are the most difficult to guess. A word list of five-letter words is available +from \begin{center} \begin{tabular}{ll} - \url{https://nms.kcl.ac.uk/christian.urban/ratings.csv} & (940 KByte)\\ - \url{https://nms.kcl.ac.uk/christian.urban/movies.csv} & (280 KByte)\\ + \url{https://nms.kcl.ac.uk/christian.urban/wordle.txt} & (78 KByte)\\ \end{tabular} \end{center} \noindent -The ratings.csv file is organised as userID, -movieID, and rating (which is between 0 and 5, with \emph{positive} ratings -being 4 and 5). The file movie.csv is organised as -movieID and full movie name. Both files still contain the usual -CSV-file header (first line). In this part you are asked -to implement functions that process these files. If bandwidth -is an issue for you, download the files locally, but in the submitted -version use \texttt{Source.fromURL} instead of \texttt{Source.fromFile}. +In your program you need to download this list and implement some +functions that in the end select the most difficult words (given an +input from the user). If bandwidth is an issue for you, download the +file locally, but in the submitted version use \texttt{Source.fromURL} +instead of \texttt{Source.fromFile}. \subsection*{Tasks} \begin{itemize} -\item[(1)] Implement the function \pcode{get_csv_url} which takes an - URL-string as argument and requests the corresponding file. The two - URLs of interest are \pcode{ratings_url} and \pcode{movies_url}, - which correspond to CSV-files mentioned above. The function should - return the CSV-file appropriately broken up into lines, and the - first line should be dropped (that is omit the header of the CSV-file). - The result is a list of strings (the lines in the file). In case - the url does not produce a file, return the empty list.\\ - \mbox{}\hfill [1 Mark] +\item[(1)] Implement the function \pcode{get_wordle_list} which takes an + URL-string as argument and requests the corresponding file. The function should + return the word list appropriately broken up into lines. + The result should be a list of strings (the lines in the file). In case + the url does not produce a file, return the empty list. + \mbox{}\hfill [0.5 Marks] -\item[(2)] Implement two functions that process the (broken up) - CSV-files from (1). The \pcode{process_ratings} function filters out all - ratings below 4 and returns a list of (userID, movieID) pairs. The - \pcode{process_movies} function returns a list of (movieID, title) pairs. - Note the input to these functions will be the output of the function - \pcode{get_csv_url}.\\ - \mbox{}\hfill [1 Mark] -%\end{itemize} -% -% -%\subsection*{Part 3 (4 Marks, file danube.scala)} -% -%\subsection*{Tasks} -% -%\begin{itemize} -\item[(3)] Implement a kind of grouping function that calculates a Map - containing the userIDs and all the corresponding recommendations for - this user (list of movieIDs). This should be implemented in a - tail-recursive fashion using a Map as accumulator. This Map is set to - \pcode{Map()} at the beginning of the calculation. For example +\item[(2)] Implement a polymorphic function \pcode{removeN}, which removes $n$ occurrences of an + element from a list (if this element is less than $n$ times present, then remove all occurrences). + For example \begin{lstlisting}[numbers=none] -val lst = List(("1", "a"), ("1", "b"), - ("2", "x"), ("3", "a"), - ("2", "y"), ("3", "c")) -groupById(lst, Map()) +removeN(List(1,2,3,2,1), 3, 2) => List(1, 2, 2, 1) +removeN(List(1,2,3,2,1), 2, 1) => List(1, 3, 2, 1) +removeN(List(1,2,3,2,1), 2, 2) => List(1, 3, 1) +removeN(List(1,2,3,2,1), 1, 1) => List(2, 3, 2, 1) +removeN(List(1,2,3,2,1), 1, 3) => List(2, 3, 2) +removeN(List(1,2,3,2,1), 0, 2) => List(1, 2, 3, 2, 1) +\end{lstlisting} + +Make sure you only remove at most $n$ occurrences of the element from the list. +This function should work for lists of integers but also lists of chars, strings etc.\\ + \mbox{}\hfill [0.5 Marks] + +\item[(3)] Implement a function \pcode{score} that calculates the + feedback for a word against a secret word using the rules of the + Wordle game. The output of \pcode{score} should be a list of 5 + elements of type \pcode{Tip} representing three outcomes: a letter + in the correct position, a letter that is present, but not in the + correct position and a letter that is absent. For example given the + secret word "chess" the score for the word "caves" is + +\begin{lstlisting}[numbers=none] +List(Correct, Absent, Absent, Present, Correct) +\end{lstlisting} + + You have to be careful with multiple occurrences of letters. For example + the secret "chess" with the guess "swiss" should produce + +\begin{lstlisting}[numbers=none] +List(Absent, Absent, Absent, Correct, Correct) +\end{lstlisting} + +even though the first 's' in "swiss" is present in the secret word, the 's' are already +`used up' by the two letters that are correct. To implement this you need to +implement first a function \pcode{pool} which calculates all the letters in +a secret that are not correct in a word. For example + +\begin{lstlisting}[numbers=none] + pool("chess", "caves") => List(h, e, s) + pool("chess", "swiss") => List(c, h, e) \end{lstlisting} -returns the ratings map + Now the helper function \pcode{aux} can analyse the arguments secret and word recursively letter-by-letter and + decide: if the letters are the same, then return \pcode{Correct} for the corresponding position. + If they are not the same, but the letter is in the pool, then return \pcode{Present} and also remove + this letter from the pool in the next recursive call of \pcode{aux}. Otherwise return \pcode{Absent} for the + corresponding position. The function \pcode{score} is a wrapper for the function \pcode{aux} + calling \pcode{aux} with the appropriate arguments (recall what is calculated with \pcode{pool}).\mbox{}\hfill [2 Marks] + +\item[(4)] Implement a function \pcode{eval} that gives an integer value to each of the + \pcode{Tip}s such that -\begin{center} - \pcode{Map(1 -> List(b, a), 2 -> List(y, x), 3 -> List(c, a))}. -\end{center} + \begin{center} + \begin{tabular}{lcl} + \textit{eval (Correct)} & $\dn$ & $10$\\ + \textit{eval (Present)} & $\dn$ & $1$\\ + \textit{eval (Absent)} & $\dn$ & $0$ + \end{tabular} + \end{center} + + The function \pcode{iscore} then takes an output of \pcode{score} and sums + up all corresponding values. For example for + +\begin{lstlisting}[numbers=none] + iscore("chess", "caves") => 21 + iscore("chess", "swiss") => 20 +\end{lstlisting} + \mbox{}\hfill [0.5 Marks] -\noindent -In which order the elements of the list are given is unimportant.\\ -\mbox{}\hfill [1 Mark] +\item[(5)] The function \pcode{evil} takes a list of secrets (the list from Task 1) + and a word as arguments, and calculates the list of words with the lowest + score (remember we want to make the Wordle game as difficult as possible---therefore + when the user gives us a word, we want to find the secrets that produce the lowest + score). For this implement a helper function \pcode{lowest} that goes through + the secrets one-by-one and calculates the score. The argument \pcode{current} is + the score of the ``currently'' found secrets. When the function \pcode{lowest} + is called for the first time then this will be set to the maximum integer value + \pcode{Int.MaxValue}. The accumulator will be first empty. If a secret is found + with the same score as \pcode{current} then this word is added to the accumulator. + If the secret has a lower score, then the accumulator will be discarded and this + secret will be the new accumulator. If the secret has a higher score, then it can be + ignored. For example \pcode{evil} (the wrapper for \pcode{lowest}) generates + +\begin{lstlisting}[numbers=none] +evil(secrets, "stent").length => 1907 +evil(secrets, "hexes").length => 2966 +evil(secrets, "horse").length => 1203 +evil(secrets, "hoise").length => 971 +evil(secrets, "house").length => 1228 +\end{lstlisting} -\item[(4)] Implement a function that takes a ratings map and a movieID - as arguments. The function calculates all suggestions containing the - given movie in its recommendations. It returns a list of all these - recommendations (each of them is a list and needs to have the given - movie deleted, otherwise it might happen we recommend the same movie - ``back''). For example for the Map from above and the movie - \pcode{"y"} we obtain \pcode{List(List("x"))}, and for the movie - \pcode{"a"} we get \pcode{List(List("b"), List("c"))}.\\ +where \pcode{secrets} is the list generated under Task 1. +In all cases above the iscore of the resulting secrets is 0, but this does not need to be the case +in general.\\ + \mbox{}\hfill [1.5 Marks] + +\item[(6)] The secrets generated in Task 5 are the ones with the lowest score + with respect to the word. You can think of these as the secrets that are furthest ``away'' from the + given word. This is already quite evil for a secret word---remember we can choose + a secret \emph{after} a user has given a first word. Now we want to make it + even more evil by choosing words that have the most obscure letters. For this we + calculate the frequency of how many times certain letters occur in our secrets + list (see Task 1). The \emph{frequency} of the letter $c$, say, is given by the formula + + \begin{center} + $\textit{freq(c)} \dn 1 - \frac{\textit{number of occurrences of c}}{\textit{number of all letters}}$ + \end{center} + + That means that letters that occur fewer times in our secrets have a higher frequency. + For example the letter 'y' has the frequency 0.9680234350909651 while the much more + often occurring letter 'e' has only 0.897286463151403 (all calculations should be done + with Doubles). + + The function \pcode{frequencies} should calculate the frequencies for all lower-case letters + by generating a Map from letters (\pcode{Char}) to Doubles (frequencies).\\ \mbox{}\hfill [1 Mark] -\item[(5)] Implement a suggestions function which takes a ratings map - and a movieID as arguments. It calculates all the recommended movies - sorted according to the most frequently suggested movie(s) sorted - first. This function returns \emph{all} suggested movieIDs as a list of - strings.\\ - \mbox{}\hfill [1 Mark] +\item[(7)] In this task we want to use the output of \pcode{evil}, rank each string in the + generated set and then filter out the strings that are ranked highest (the ones with the most obscure letters). + This list of strings often contains only a single word, but in general there might be more (see below). + First implement a function \pcode{rank} that takes a frequency map (from 6) and a string as arguments and + generates a rank by summing up all frequencies of the letters in the string. For example + +\begin{lstlisting}[numbers=none] +rank(frequencies(secrets), "adobe") => 4.673604687018193 +rank(frequencies(secrets), "gaffe") => 4.745205057045945 +rank(frequencies(secrets), "fuzzy") => 4.898735738513722 +\end{lstlisting} + + Finally, implement a function \pcode{ranked_evil} that selects from the output of \pcode{evil} + the string(s) which are highest ranked in evilness. + + +\begin{lstlisting}[numbers=none] +ranked_evil(secrets, "abbey") => List(whizz) +ranked_evil(secrets, "afear") => List(buzzy) +ranked_evil(secrets, "zincy") => List(jugum) +ranked_evil(secrets, "zippy") => List(chuff) +\end{lstlisting} + +This means if the user types in "abbey" then the most evil word to choose as secret is ``whizz'' (according to +our calculations). This word has a zero \pcode{iscore} and the most obscure letters. -\item[(6)] - Implement then a recommendation function which generates a maximum - of two most-suggested movies (as calculated above). But it returns - the actual movie name, not the movieID. If fewer movies are recommended, - then return fewer than two movie names.\\ - \mbox{}\hfill [1 Mark] +% +%\color{red} +%\section*{Correction with \texttt{ranked\_evil}} +% +%The testcases above are actually not the maximum, but the minimum! I will make sure +%that the task will count as solved when either the minimum (as above) or the maximum (as intended) +%is used. The correct solutions for the above testcases using the maximum are: +%\color{black} +% +%\begin{lstlisting}[numbers=none] +%ranked_evil(secrets, "beats") => List(fuzzy) +%ranked_evil(secrets, "vitae") => List(fuzzy) +%ranked_evil(secrets, "bento") => List(fuzzy) +%ranked_evil(secrets, "belts") => List(fuzzy) +%\end{lstlisting} +% +%\noindent \textcolor{red}{Some further testcases for the maximum are:} +% +%\begin{lstlisting}[numbers=none] +%ranked_evil(secrets, "abbey") => List(whizz) +%ranked_evil(secrets, "afear") => List(buzzy) +%ranked_evil(secrets, "zincy") => List(jugum) +%ranked_evil(secrets, "zippy") => List(chuff) +%\end{lstlisting} +% +% -%\item[(7)] Calculate the recommendations for all movies according to -% what the recommendations function in (6) produces (this -% can take a few seconds). Put all recommendations into a list -% (of strings) and count how often the strings occur in -% this list. This produces a list of string-int pairs, -% where the first component is the movie name and the second -% is the number of how many times the movie was recommended. -% Sort all the pairs according to the number -% of times they were recommended (most recommended movie name -% first).\\ -% \mbox{}\hfill [1 Mark] - +\mbox{}\hfill [1 Mark] \end{itemize} \end{document} diff -r 6e93040e3378 -r cdfa6a293453 cws/main_cw02.text-old --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cws/main_cw02.text-old Tue Nov 01 15:03:48 2022 +0000 @@ -0,0 +1,221 @@ + +% !TEX program = xelatex +\documentclass{article} +\usepackage{../styles/style} +\usepackage{disclaimer} +\usepackage{../styles/langs} + +\begin{document} + + +%% should ask to lower case the words. + +\section*{Main Part 2 (Scala, 6 Marks)} + +\mbox{}\hfill\textit{``C makes it easy to shoot yourself in the foot; C++ makes it harder,}\\ +\mbox{}\hfill\textit{ but when you do, it blows your whole leg off.''}\smallskip\\ +\mbox{}\hfill\textit{ --- Bjarne Stroustrup (creator of the C++ language)}\bigskip\bigskip + + + + +\noindent +You are asked to implement a Scala program for recommending movies +according to a ratings list.\bigskip + +\IMPORTANTNONE{} + +\noindent +Also note that the running time of each part will be restricted to a +maximum of 30 seconds on my laptop. + +\DISCLAIMER{} + + +\subsection*{Reference Implementation} + +Like the C++ part, the Scala part works like this: you push your files +to GitHub and receive (after sometimes a long delay) some automated +feedback. In the end we will take a snapshot of the submitted files +and apply an automated marking script to them.\medskip + +\noindent +In addition, the Scala part comes with reference +implementations in form of \texttt{jar}-files. This allows you to run +any test cases on your own computer. For example you can call Scala on +the command line with the option \texttt{-cp danube.jar} and then +query any function from the template file. Say you want to find out +what the function \texttt{} produces: for this you just need +to prefix it with the object name \texttt{M2}. If you want to find out what +these functions produce for the list \texttt{List("a", "b", "b")}, +you would type something like: + +\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small] +$ scala -cp danube.jar +scala> val ratings_url = + | """https://nms.kcl.ac.uk/christian.urban/ratings.csv""" + +scala> M2.get_csv_url(ratings_url) +val res0: List[String] = List(1,1,4 ...) +\end{lstlisting}%$ + +\subsection*{Hints} + +\noindent +Use \texttt{.split(",").toList} for splitting +strings according to commas (similarly for the newline character \mbox{$\backslash$\texttt{n}}), +\texttt{.getOrElse(..,..)} allows to query a Map, but also gives a +default value if the Map is not defined, a Map can be `updated' by +using \texttt{+}, \texttt{.contains} and \texttt{.filter} can test whether +an element is included in a list, and respectively filter out elements in a list, +\texttt{.sortBy(\_.\_2)} sorts a list of pairs according to the second +elements in the pairs---the sorting is done from smallest to highest, +\texttt{.take(n)} for taking some elements in a list (takes fewer if the list +contains less than \texttt{n} elements). + + +\newpage + + +\subsection*{Main Part 2 (6 Marks, file danube.scala)} + +You are creating Danube.co.uk which you hope will be the next big thing +in online movie renting. You know that you can save money by +anticipating what movies people will rent; you will pass these savings +on to your users by offering a discount if they rent movies that +Danube.co.uk recommends. + +Your task is to generate \emph{two} movie recommendations for every +movie a user rents. To do this, you calculate what other +renters, who also watched this movie, suggest by giving positive ratings. +Of course, some suggestions are more popular than others. You need to find +the two most-frequently suggested movies. Return fewer recommendations, +if there are fewer movies suggested. + +The calculations will be based on the small datasets which the research lab +GroupLens provides for education and development purposes. + +\begin{center} +\url{https://grouplens.org/datasets/movielens/} +\end{center} + +\noindent +The slightly adapted CSV-files should be downloaded in your Scala +file from the URLs: + + +\begin{center} +\begin{tabular}{ll} + \url{https://nms.kcl.ac.uk/christian.urban/ratings.csv} & (940 KByte)\\ + \url{https://nms.kcl.ac.uk/christian.urban/movies.csv} & (280 KByte)\\ +\end{tabular} +\end{center} + +\noindent +The ratings.csv file is organised as userID, +movieID, and rating (which is between 0 and 5, with \emph{positive} ratings +being 4 and 5). The file movie.csv is organised as +movieID and full movie name. Both files still contain the usual +CSV-file header (first line). In this part you are asked +to implement functions that process these files. If bandwidth +is an issue for you, download the files locally, but in the submitted +version use \texttt{Source.fromURL} instead of \texttt{Source.fromFile}. + +\subsection*{Tasks} + +\begin{itemize} +\item[(1)] Implement the function \pcode{get_csv_url} which takes an + URL-string as argument and requests the corresponding file. The two + URLs of interest are \pcode{ratings_url} and \pcode{movies_url}, + which correspond to CSV-files mentioned above. The function should + return the CSV-file appropriately broken up into lines, and the + first line should be dropped (that is omit the header of the CSV-file). + The result is a list of strings (the lines in the file). In case + the url does not produce a file, return the empty list.\\ + \mbox{}\hfill [1 Mark] + +\item[(2)] Implement two functions that process the (broken up) + CSV-files from (1). The \pcode{process_ratings} function filters out all + ratings below 4 and returns a list of (userID, movieID) pairs. The + \pcode{process_movies} function returns a list of (movieID, title) pairs. + Note the input to these functions will be the output of the function + \pcode{get_csv_url}.\\ + \mbox{}\hfill [1 Mark] +%\end{itemize} +% +% +%\subsection*{Part 3 (4 Marks, file danube.scala)} +% +%\subsection*{Tasks} +% +%\begin{itemize} +\item[(3)] Implement a kind of grouping function that calculates a Map + containing the userIDs and all the corresponding recommendations for + this user (list of movieIDs). This should be implemented in a + tail-recursive fashion using a Map as accumulator. This Map is set to + \pcode{Map()} at the beginning of the calculation. For example + +\begin{lstlisting}[numbers=none] +val lst = List(("1", "a"), ("1", "b"), + ("2", "x"), ("3", "a"), + ("2", "y"), ("3", "c")) +groupById(lst, Map()) +\end{lstlisting} + +returns the ratings map + +\begin{center} + \pcode{Map(1 -> List(b, a), 2 -> List(y, x), 3 -> List(c, a))}. +\end{center} + +\noindent +In which order the elements of the list are given is unimportant.\\ +\mbox{}\hfill [1 Mark] + +\item[(4)] Implement a function that takes a ratings map and a movieID + as arguments. The function calculates all suggestions containing the + given movie in its recommendations. It returns a list of all these + recommendations (each of them is a list and needs to have the given + movie deleted, otherwise it might happen we recommend the same movie + ``back''). For example for the Map from above and the movie + \pcode{"y"} we obtain \pcode{List(List("x"))}, and for the movie + \pcode{"a"} we get \pcode{List(List("b"), List("c"))}.\\ + \mbox{}\hfill [1 Mark] + +\item[(5)] Implement a suggestions function which takes a ratings map + and a movieID as arguments. It calculates all the recommended movies + sorted according to the most frequently suggested movie(s) sorted + first. This function returns \emph{all} suggested movieIDs as a list of + strings.\\ + \mbox{}\hfill [1 Mark] + +\item[(6)] + Implement then a recommendation function which generates a maximum + of two most-suggested movies (as calculated above). But it returns + the actual movie name, not the movieID. If fewer movies are recommended, + then return fewer than two movie names.\\ + \mbox{}\hfill [1 Mark] + +%\item[(7)] Calculate the recommendations for all movies according to +% what the recommendations function in (6) produces (this +% can take a few seconds). Put all recommendations into a list +% (of strings) and count how often the strings occur in +% this list. This produces a list of string-int pairs, +% where the first component is the movie name and the second +% is the number of how many times the movie was recommended. +% Sort all the pairs according to the number +% of times they were recommended (most recommended movie name +% first).\\ +% \mbox{}\hfill [1 Mark] + +\end{itemize} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: + + + diff -r 6e93040e3378 -r cdfa6a293453 cws/main_cw03.pdf Binary file cws/main_cw03.pdf has changed diff -r 6e93040e3378 -r cdfa6a293453 cws/main_cw03.tex --- a/cws/main_cw03.tex Sat Oct 08 00:30:51 2022 +0100 +++ b/cws/main_cw03.tex Tue Nov 01 15:03:48 2022 +0000 @@ -119,7 +119,7 @@ % BF IDE % https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5 -\section*{Main Part 3 (Scala, 6 Marks)} +\section*{Main Part 3 (Scala, 7 Marks)} \mbox{}\hfill\textit{``Java is the most distressing thing to happen to computing since MS-DOS.''}\smallskip\\ \mbox{}\hfill\textit{ --- Alan Kay, the inventor of object-oriented programming}\bigskip\medskip @@ -206,7 +206,7 @@ {\small \begin{itemize} \item[$\bullet$] \url{https://blog.cloudflare.com/details-of-the-cloudflare-outage-on-july-2-2019} -\item[$\bullet$] \url{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016} +\item[$\bullet$] \texttt{\href{https://web.archive.org/web/20160801163029/https://www.stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}} \item[$\bullet$] \url{https://vimeo.com/112065252} \item[$\bullet$] \url{https://davidvgalbraith.com/how-i-fixed-atom} \end{itemize}} @@ -218,9 +218,9 @@ \subsubsection*{Tasks (file re.scala)} The file \texttt{re.scala} has already a definition for regular -expressions and also defines some handy shorthand notation for -regular expressions. The notation in this document matches up -with the code in the file as follows: +expressions and also defines some handy shorthand notation for regular +expressions. The notation in this coursework description matches up +with the code as follows: \begin{center} \begin{tabular}{rcl@{\hspace{10mm}}l} @@ -230,6 +230,7 @@ $c$ & $\mapsto$ & \texttt{CHAR(c)}\\ $\sum rs$ & $\mapsto$ & \texttt{ALTs(rs)}\\ $r_1 + r_2$ & $\mapsto$ & \texttt{ALT(r1, r2)} & \texttt{r1 | r2}\\ + $\prod rs$ & $\mapsto$ & \texttt{SEQs(rs)}\\ $r_1 \cdot r_2$ & $\mapsto$ & \texttt{SEQ(r1, r2)} & \texttt{r1 $\sim$ r2}\\ $r^*$ & $\mapsto$ & \texttt{STAR(r)} & \texttt{r.\%} \end{tabular} @@ -244,13 +245,15 @@ if it is non-empty, we sometimes write $\sum\,[r_1,..., r_n]$. The binary alternative can be seen as an abbreviation, that is $r_1 + r_2 \dn \sum\,[r_1, r_2]$. As a result we can ignore the -binary version and only implement the n-ary alternative. +binary version and only implement the n-ary alternative. Similarly +the sequence regular expression is only implemented with lists and the +binary version can be obtained by defining $r_1 \cdot r_2 \dn \prod\,[r_1, r_2]$. \begin{itemize} \item[(1)] Implement a function, called \textit{nullable}, by recursion over regular expressions. This function tests whether a regular expression can match the empty string. This means given a - regular expression it either returns true or false. The function + regular expression, it either returns true or false. The function \textit{nullable} is defined as follows: @@ -260,7 +263,7 @@ $\textit{nullable}(\ONE)$ & $\dn$ & $\textit{true}$\\ $\textit{nullable}(c)$ & $\dn$ & $\textit{false}$\\ $\textit{nullable}(\sum rs)$ & $\dn$ & $\exists r \in rs.\;\textit{nullable}(r)$\\ -$\textit{nullable}(r_1 \cdot r_2)$ & $\dn$ & $\textit{nullable}(r_1) \wedge \textit{nullable}(r_2)$\\ +$\textit{nullable}(\prod rs)$ & $\dn$ & $\forall r\in rs.\;\textit{nullable}(r)$\\ $\textit{nullable}(r^*)$ & $\dn$ & $\textit{true}$\\ \end{tabular} \end{center}~\hfill[0.5 Marks] @@ -276,130 +279,130 @@ $\textit{der}\;c\;(\ONE)$ & $\dn$ & $\ZERO$\\ $\textit{der}\;c\;(d)$ & $\dn$ & $\textit{if}\; c = d\;\textit{then} \;\ONE \; \textit{else} \;\ZERO$\\ $\textit{der}\;c\;(\sum\;[r_1,..,r_n])$ & $\dn$ & $\sum\;[\textit{der}\;c\;r_1,..,\textit{der}\;c\;r_n]$\\ -$\textit{der}\;c\;(r_1 \cdot r_2)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r_1)$\\ - & & $\textit{then}\;((\textit{der}\;c\;r_1)\cdot r_2) + (\textit{der}\;c\;r_2)$\\ - & & $\textit{else}\;(\textit{der}\;c\;r_1)\cdot r_2$\\ +$\textit{der}\;c\;(\prod\;[])$ & $\dn$ & $\ZERO$\\ +$\textit{der}\;c\;(\prod\;r\!::\!rs)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r)$\\ + & & $\textit{then}\;(\prod\;(\textit{der}\;c\;r)\!::\!rs) + (\textit{der}\;c\;(\prod rs))$\\ + & & $\textit{else}\;(\prod\;(\textit{der}\;c\;r)\!::\! rs)$\\ $\textit{der}\;c\;(r^*)$ & $\dn$ & $(\textit{der}\;c\;r)\cdot (r^*)$\\ \end{tabular} \end{center} - -For example given the regular expression $r = (a \cdot b) \cdot c$, the derivatives -w.r.t.~the characters $a$, $b$ and $c$ are - -\begin{center} - \begin{tabular}{lcll} - $\textit{der}\;a\;r$ & $=$ & $(\ONE \cdot b)\cdot c$ & \quad($= r'$)\\ - $\textit{der}\;b\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$\\ - $\textit{der}\;c\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$ - \end{tabular} -\end{center} - -Let $r'$ stand for the first derivative, then taking the derivatives of $r'$ -w.r.t.~the characters $a$, $b$ and $c$ gives - -\begin{center} - \begin{tabular}{lcll} - $\textit{der}\;a\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ \\ - $\textit{der}\;b\;r'$ & $=$ & $((\ZERO \cdot b) + \ONE)\cdot c$ & \quad($= r''$)\\ - $\textit{der}\;c\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ - \end{tabular} -\end{center} - -One more example: Let $r''$ stand for the second derivative above, -then taking the derivatives of $r''$ w.r.t.~the characters $a$, $b$ -and $c$ gives - -\begin{center} - \begin{tabular}{lcll} - $\textit{der}\;a\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$ \\ - $\textit{der}\;b\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$\\ - $\textit{der}\;c\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ONE$ & - (is $\textit{nullable}$) - \end{tabular} -\end{center} - -Note, the last derivative can match the empty string, that is it is \textit{nullable}.\\ \mbox{}\hfill\mbox{[1 Mark]} \item[(3)] We next want to simplify regular expressions: essentially we want to remove $\ZERO$ in regular expressions like $r + \ZERO$ and $\ZERO + r$. However, our n-ary alternative takes a list of - regular expressions as argument, we therefore need a more general - ``flatten'' function, called \texttt{flts}. This function should - analyse a list of regular expressions, say $rs$, as follows: + regular expressions as argument, and we therefore need a more general + ``denesting'' function, which deletes $\ZERO$s and ``spills out'' nested + $\sum$s. This function, called \texttt{denest}, should analyse a + list of regular expressions, say $rs$, as follows: \begin{center} \begin{tabular}{lllll} 1) &$rs = []$ & $\dn$ & $[]$ & (empty list)\\ - 2) &$rs = \ZERO :: rest$ & $\dn$ & $\textrm{flatten}\;rest$\\ - 3) &$rs = (\sum rs_1) :: rest$ & $\dn$ & $rs_1 ::: \textrm{flatten}\;rest$\\ - 4) &$rs = r :: rest$ & $\dn$ & $r :: \textrm{flatten}\;rest$ & (otherwise)\\ + 2) &$rs = \ZERO :: rest$ & $\dn$ & $\texttt{denest}\;rest$ & (throw away $\ZERO$)\\ + 3) &$rs = (\sum rs) :: rest$ & $\dn$ & $rs ::: \texttt{denest}\;rest$ & (spill out $\sum$)\\ + 4) &$rs = r :: rest$ & $\dn$ & $r :: \texttt{denest}\;rest$ & (otherwise)\\ \end{tabular} \end{center} The first clause states that empty lists cannot be further - flattened. The second removes the first $\ZERO$ from the list and recurses. + denested. The second removes the first $\ZERO$ from the list and recurses. The third is when the first regular expression is an \texttt{ALTs}, then the content of this alternative should be spilled out and appended - with the flattened rest of the list. The last case is for all other + with the denested rest of the list. The last case is for all other cases where the head of the list is not $\ZERO$ and not an \texttt{ALTs}, - then we just keep the head of the list and flatten the rest. + then we just keep the head of the list and denest the rest.\\ \mbox{}\hfill\mbox{[1 Mark]} -\item[(4)] Implement the function \textit{simp}, which recursively +\item[(4)] Implement the function \texttt{flts} which flattens our + n-ary sequence regular expression $\prod$. Like \texttt{denest}, this + function is intended to delete $\ONE$s and spill out nested $\prod$s. + Unfortunately, there is a special case to do with $\ZERO$: If this function encounters a $\ZERO$, then + the whole ``product'' should be $\ZERO$. The problem is that the $\ZERO$ can be anywhere + inside the list. The easiest way to implement this function is therefore by using an + accumulator, which when called is set to \texttt{Nil}. This means \textit{flts} takes + two arguments (which are both lists of regular expressions) + + \[ + \texttt{flts}\;rs\;acc + \] + + This function analyses the list $rs$ as follows + + \begin{center} + \begin{tabular}{@{}lllll@{}} + 1) &$rs = []$ & $\dn$ & $acc$ & (empty list)\\ + 2) &$rs = \ZERO :: rest$ & $\dn$ & $[\ZERO]$ & (special case for $\ZERO$)\\ + 3) &$rs = \ONE :: rest$ & $\dn$ & $\texttt{flts}\,rest\,acc$ & (throw away $\ONE$)\\ + 4) &$rs = (\prod rs) :: rest$ & $\dn$ & $\texttt{flts}\;rest\,(acc ::: rs)$ & (spill out $\prod$)\\ + 5) &$rs = r :: rest$ & $\dn$ & $\texttt{flts}\;rest\,(acc ::: [r])$& (otherwise)\\ + \end{tabular} + \end{center} + + In the first case we just return whatever has accumulated in + $acc$. In the fourth case we spill out the $rs$ by appending the + $rs$ to the end of the accumulator. Similarly in the last case we + append the single regular expression $r$ to the end of the + accumulator. I let you think why the ``end'' is needed. \mbox{}\hfill\mbox{[1 Mark]} + +\item[(5)] Before we can simplify regular expressions, we need what is often called + \emph{smart constructors} for $\sum$ and $\prod$. While the ``normal'' constructors + \texttt{ALTs} and \texttt{SEQs} give us alternatives and sequences, respectively, \emph{smart} + constructors might return something different depending on what list of regular expressions + they are given as argument. + + \begin{center} + \begin{tabular}{@{}c@{\hspace{9mm}}c@{}} + \begin{tabular}{l@{\hspace{2mm}}l@{\hspace{1mm}}ll} + & $\sum^{smart}$\smallskip\\ + 1) & $rs = []$ & $\dn$ & $\ZERO$\\ + 2) & $rs = [r]$ & $\dn$ & $r$\\ \\ + 3) & otherwise & $\dn$ & $\sum\,rs$ + \end{tabular} & + \begin{tabular}{l@{\hspace{2mm}}l@{\hspace{1mm}}ll} + & $\prod^{smart}$\smallskip\\ + 1) & $rs = []$ & $\dn$ & $\ONE$\\ + 2a) & $rs = [\ZERO]$ & $\dn$ & $\ZERO$\\ + 2b) & $rs = [r]$ & $\dn$ & $r$\\ + 3) & otherwise & $\dn$ & $\prod\,rs$ + \end{tabular} + \end{tabular} + \end{center} + \mbox{}\hfill\mbox{[0.5 Marks]} + +\item[(6)] Implement the function \textit{simp}, which recursively traverses a regular expression, and on the way up simplifies every regular expression on the left (see below) to the regular expression on the right, except it does not simplify inside ${}^*$-regular - expressions. + expressions and also does not simplify $\ZERO$, $\ONE$ and characters. \begin{center} -\begin{tabular}{l@{\hspace{4mm}}c@{\hspace{4mm}}ll} -$r \cdot \ZERO$ & $\mapsto$ & $\ZERO$\\ -$\ZERO \cdot r$ & $\mapsto$ & $\ZERO$\\ -$r \cdot \ONE$ & $\mapsto$ & $r$\\ -$\ONE \cdot r$ & $\mapsto$ & $r$\\ -$\sum\;[r_1,..,r_n]$ & $\mapsto$ & $\sum\;(\texttt{(flts + distinct)}[simp(r_1),..,simp(r_n)])$\\ + \begin{tabular}{@{}l@{\hspace{3mm}}c@{\hspace{3mm}}ll@{}} + LHS: & & RHS:\smallskip\\ +$\sum\;[r_1,..,r_n]$ & $\mapsto$ & $\sum^{smart}\;(\texttt{(denest + distinct)}[simp(r_1),..,simp(r_n)])$\\ + $\prod\;[r_1,..,r_n]$ & $\mapsto$ & $\prod^{smart}\;(\texttt{(flts)}[simp(r_1),..,simp(r_n)])$\\ + $r$ & $\mapsto$ & $r$ \quad (all other cases) \end{tabular} \end{center} -The last case is as follows: first apply $simp$ to all regular -expressions $r_1,.. ,r_n$; then flatten the resulting list using -\texttt{flts}; finally remove all duplicates in this list (this can be +The first case is as follows: first apply $simp$ to all regular +expressions $r_1,.. ,r_n$; then denest the resulting list using +\texttt{denest}; after that remove all duplicates in this list (this can be done in Scala using the function -\texttt{\_.distinct}). When you perform these - operations, you end up with three cases, namely where the list is - empty, contains a single element and ``otherwise''. These cases - should be processed as follows -\begin{center} -\begin{tabular}{l@{\hspace{4mm}}c@{\hspace{4mm}}ll} -$\sum\;[]$ & $\mapsto$ & $\ZERO$\\ -$\sum\;[r]$ & $\mapsto$ & $r$\\ -$\sum\;rs$ & $\mapsto$ & $\sum\;rs$ & ``otherwise''\\ -\end{tabular} -\end{center} - +\texttt{\_.distinct}). Finally, you end up with a list of (simplified) +regular expressions; apply the smart constructor $\sum^{smart}$ to this list. +Similarly in the $\prod$ case: simplify first all regular +expressions $r_1,.. ,r_n$; then flatten the resulting list using \texttt{flts} and apply the +smart constructor $\prod^{smart}$ to the result. In all other cases, just return the +input $r$ as is. For example the regular expression \[(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot (r_4 \cdot \ZERO)\] - simplifies to just $r_1$. \textbf{Hint:} Regular expressions can be - seen as trees and there are several methods for traversing - trees. One of them corresponds to the inside-out traversal, which is - also sometimes called post-order tra\-versal: you traverse inside - the tree and on the way up you apply simplification rules. - \textbf{Another Hint:} Remember numerical expressions from school - times---there you had expressions like - $u + \ldots + (1 \cdot x) - \ldots (z + (y \cdot 0)) \ldots$ and - simplification rules that looked very similar to rules above. You - would simplify such numerical expressions by replacing for example - the $y \cdot 0$ by $0$, or $1\cdot x$ by $x$, and then look whether - more rules are applicable. If you regard regular expressions as - trees and if you organise the simplification in an inside-out - fashion, it is always clear which simplification should be applied - next.\mbox{}\hfill\mbox{[1 Mark]} + simplifies to just $r_1$. \mbox{}\hfill\mbox{[1 Mark]} -\item[(5)] Implement two functions: The first, called \textit{ders}, +\item[(7)] Implement two functions: The first, called \textit{ders}, takes a list of characters and a regular expression as arguments, and builds the derivative w.r.t.~the list as follows: @@ -407,7 +410,7 @@ \begin{tabular}{lcl} $\textit{ders}\;(Nil)\;r$ & $\dn$ & $r$\\ $\textit{ders}\;(c::cs)\;r$ & $\dn$ & - $\textit{ders}\;cs\;(\textit{simp}(\textit{der}\;c\;r))$\\ + $\textit{ders}\;cs\;(\textit{simp}\,(\textit{der}\;c\;r))$\\ \end{tabular} \end{center} @@ -420,9 +423,9 @@ derivative regular expression can match the empty string (using \textit{nullable}). For example the \textit{matcher} will produce true for the regular expression $(a\cdot b)\cdot c$ and the string -$abc$, but false if you give it the string $ab$. \hfill[1 Mark] +$abc$, but false if you give it the string $ab$. \hfill[0.5 Mark] -\item[(6)] Implement a function, called \textit{size}, by recursion +\item[(8)] Implement a function, called \textit{size}, by recursion over regular expressions. If a regular expression is seen as a tree, then \textit{size} should return the number of nodes in such a tree. Therefore this function is defined as follows: @@ -433,7 +436,7 @@ $\textit{size}(\ONE)$ & $\dn$ & $1$\\ $\textit{size}(c)$ & $\dn$ & $1$\\ $\textit{size}(\sum\,[r_1,..,r_n]$ & $\dn$ & $1 + \textit{size}(r_1) + ... + \textit{size}(r_n)$\\ -$\textit{size}(r_1 \cdot r_2)$ & $\dn$ & $1 + \textit{size}(r_1) + \textit{size}(r_2)$\\ +$\textit{size}(\prod\,[r_1,..,r_n]$ & $\dn$ & $1 + \textit{size}(r_1) + ... + \textit{size}(r_n)$\\ $\textit{size}(r^*)$ & $\dn$ & $1 + \textit{size}(r)$\\ \end{tabular} \end{center} @@ -444,7 +447,7 @@ taking the derivative, but simplify the result. The sizes are given in \texttt{re.scala}. \hfill[0.5 Marks] -\item[(7)] You do not have to implement anything specific under this +\item[(9)] You do not have to implement anything specific under this task. The purpose here is that you will be marked for some ``power'' test cases. For example can your matcher decide within 30 seconds whether the regular expression $(a^*)^*\cdot b$ matches strings of the @@ -463,7 +466,7 @@ \subsection*{Background} Although easily implementable in Scala (ok maybe the \texttt{simp} functions and -\texttt{ALTs} needs a bit more thinking), the idea behind the +the constructors \texttt{ALTs}/\texttt{SEQs} needs a bit more thinking), the idea behind the derivative function might not so easy to be seen. To understand its purpose better, assume a regular expression $r$ can match strings of the form $c\!::\!cs$ (that means strings which start with a character @@ -484,11 +487,11 @@ $\textit{der}\;c\;(\textit{der}\;b\;(\textit{der}\;a\;r))$, you obtain a regular expression that can match the empty string. You can test whether this is indeed the case using the function nullable, which is -what your matcher is doing. +what the matcher you have implemented is doing. The purpose of the $\textit{simp}$ function is to keep the regular expressions small. Normally the derivative function makes the regular -expression bigger (see the SEQ case and the example in (2)) and the +expression bigger (see the \texttt{SEQs} case and the example in Task (2)) and the algorithm would be slower and slower over time. The $\textit{simp}$ function counters this increase in size and the result is that the algorithm is fast throughout. By the way, this algorithm is by Janusz @@ -501,16 +504,19 @@ If you want to see how badly the regular expression matchers do in -Java\footnote{Version 8 and below; Version 9 and above does not seem to be as - catastrophic, but still much worse than the regular expression - matcher based on derivatives.}, JavaScript and Python with the -``evil'' regular expression $(a^*)^*\cdot b$, then have a look at the -graphs below (you can try it out for yourself: have a look at the files +Java\footnote{Version 8 and below; Version 9 and above does not seem + to be as catastrophic, but still much worse than the regular + expression matcher based on derivatives. BTW, Scala uses the regular + expression matcher provided by the Java libraries. So is just as bad.}, JavaScript, +Python Swift and Dart with the ``evil'' regular expression +$(a^*)^*\cdot b$, then have a look at the graphs below (you can try it +out for yourself: have a look at the files \texttt{catastrophic9.java}, \texttt{catastrophic.js}, -\texttt{catastrophic.py} etc on KEATS). Compare this with the matcher you -have implemented. How long can a string of $a$'s be in your matcher -and still stay within the 30 seconds time limit? It should be muuuch better -than your off-the-shelf matcher in your bog-standard language. +\texttt{catastrophic.py} etc on KEATS). Compare this with the matcher +you have implemented. How long can a string of $a$'s be in your +matcher and still stay within the 30 seconds time limit? It should be +mu(uu)$^*$ch better than your off-the-shelf matcher in your +bog-standard language. \begin{center} \begin{tabular}{@{}cc@{}} @@ -574,6 +580,46 @@ \end{document} + +For example given the regular expression $r = (a \cdot b) \cdot c$, the derivatives +w.r.t.~the characters $a$, $b$ and $c$ are + +\begin{center} + \begin{tabular}{lcll} + $\textit{der}\;a\;r$ & $=$ & $(\ONE \cdot b)\cdot c$ & \quad($= r'$)\\ + $\textit{der}\;b\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$\\ + $\textit{der}\;c\;r$ & $=$ & $(\ZERO \cdot b)\cdot c$ + \end{tabular} +\end{center} + +Let $r'$ stand for the first derivative, then taking the derivatives of $r'$ +w.r.t.~the characters $a$, $b$ and $c$ gives + +\begin{center} + \begin{tabular}{lcll} + $\textit{der}\;a\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ \\ + $\textit{der}\;b\;r'$ & $=$ & $((\ZERO \cdot b) + \ONE)\cdot c$ & \quad($= r''$)\\ + $\textit{der}\;c\;r'$ & $=$ & $((\ZERO \cdot b) + \ZERO)\cdot c$ + \end{tabular} +\end{center} + +One more example: Let $r''$ stand for the second derivative above, +then taking the derivatives of $r''$ w.r.t.~the characters $a$, $b$ +and $c$ gives + +\begin{center} + \begin{tabular}{lcll} + $\textit{der}\;a\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$ \\ + $\textit{der}\;b\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ZERO$\\ + $\textit{der}\;c\;r''$ & $=$ & $((\ZERO \cdot b) + \ZERO) \cdot c + \ONE$ & + (is $\textit{nullable}$) + \end{tabular} +\end{center} + +Note, the last derivative can match the empty string, that is it is \textit{nullable}. + + + %%% Local Variables: %%% mode: latex %%% TeX-master: t diff -r 6e93040e3378 -r cdfa6a293453 cws/main_cw04.pdf Binary file cws/main_cw04.pdf has changed diff -r 6e93040e3378 -r cdfa6a293453 cws/main_cw05.pdf Binary file cws/main_cw05.pdf has changed diff -r 6e93040e3378 -r cdfa6a293453 main_solution3/re.scala --- a/main_solution3/re.scala Sat Oct 08 00:30:51 2022 +0100 +++ b/main_solution3/re.scala Tue Nov 01 15:03:48 2022 +0000 @@ -1,5 +1,5 @@ // Main Part 3 about Regular Expression Matching -//============================================= +//============================================== object M3 { @@ -8,13 +8,15 @@ case object ZERO extends Rexp case object ONE extends Rexp case class CHAR(c: Char) extends Rexp -case class ALTs(rs: List[Rexp]) extends Rexp // alternatives -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence -case class STAR(r: Rexp) extends Rexp // star +case class ALTs(rs: List[Rexp]) extends Rexp // alternatives +case class SEQs(rs: List[Rexp]) extends Rexp // sequences +case class STAR(r: Rexp) extends Rexp // star -//the usual binary choice can be defined in terms of ALTs +//the usual binary choice and binary sequence can be defined +//in terms of ALTs and SEQs def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2)) +def SEQ(r1: Rexp, r2: Rexp) = SEQs(List(r1, r2)) // some convenience for typing in regular expressions import scala.language.implicitConversions @@ -41,81 +43,78 @@ def ~ (r: String) = SEQ(s, r) } -// (1) Complete the function nullable according to -// the definition given in the coursework; this -// function checks whether a regular expression -// can match the empty string and Returns a boolean -// accordingly. - +// (1) def nullable (r: Rexp) : Boolean = r match { case ZERO => false case ONE => true case CHAR(_) => false case ALTs(rs) => rs.exists(nullable) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case SEQs(rs) => rs.forall(nullable) case STAR(_) => true } -// (2) Complete the function der according to -// the definition given in the coursework; this -// function calculates the derivative of a -// regular expression w.r.t. a character. - -def der (c: Char, r: Rexp) : Rexp = r match { +// (2) +def der(c: Char, r: Rexp) : Rexp = r match { case ZERO => ZERO case ONE => ZERO case CHAR(d) => if (c == d) ONE else ZERO case ALTs(rs) => ALTs(rs.map(der(c, _))) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) + case SEQs(Nil) => ZERO + case SEQs(r1::rs) => + if (nullable(r1)) ALT(SEQs(der(c, r1)::rs), der(c, SEQs(rs))) + else SEQs(der(c, r1):: rs) case STAR(r1) => SEQ(der(c, r1), STAR(r1)) } -// (3) Implement the flatten function flts. It -// deletes 0s from a list of regular expressions -// and also 'spills out', or flattens, nested -// ALTernativeS. +// (3) +def denest(rs: List[Rexp]) : List[Rexp] = rs match { + case Nil => Nil + case ZERO::tl => denest(tl) + case ALTs(rs1)::rs2 => rs1 ::: denest(rs2) + case r::rs => r :: denest(rs) +} -def flts(rs: List[Rexp]) : List[Rexp] = rs match { - case Nil => Nil - case ZERO::tl => flts(tl) - case ALTs(rs1)::rs2 => rs1 ::: flts(rs2) - case r::rs => r :: flts(rs) +// (4) +def flts(rs: List[Rexp], acc: List[Rexp] = Nil) : List[Rexp] = rs match { + case Nil => acc + case ZERO::rs => ZERO::Nil + case ONE::rs => flts(rs, acc) + case SEQs(rs1)::rs => flts(rs, acc ::: rs1) + case r::rs => flts(rs, acc :+ r) } -// (4) Complete the simp function according to -// the specification given in the coursework; this -// function simplifies a regular expression from -// the inside out, like you would simplify arithmetic -// expressions; however it does not simplify inside -// STAR-regular expressions. +// (5) +def ALTs_smart(rs: List[Rexp]) : Rexp = rs match { + case Nil => ZERO + case r::Nil => r + case rs => ALTs(rs) +} +def SEQs_smart(rs: List[Rexp]) : Rexp = rs match { + case Nil => ONE + case ZERO::nil => ZERO + case r::Nil => r + case rs => SEQs(rs) +} + +// (6) def simp(r: Rexp) : Rexp = r match { - case ALTs(rs) => (flts(rs.map(simp)).distinct) match { - case Nil => ZERO - case r::Nil => r - case rs => ALTs(rs) - } - case SEQ(r1, r2) => (simp(r1), simp(r2)) match { - case (ZERO, _) => ZERO - case (_, ZERO) => ZERO - case (ONE, r2s) => r2s - case (r1s, ONE) => r1s - case (r1s, r2s) => SEQ(r1s, r2s) - } + case ALTs(rs) => + ALTs_smart(denest(rs.map(simp)).distinct) + case SEQs(rs) => + SEQs_smart(flts(rs.map(simp))) case r => r } -simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)) +//println("Simp tests") +//println(simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE))) +//println(simp(((CHAR('a') | ZERO) ~ ONE) | +// (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO)))) -// (5) Complete the two functions below; the first -// calculates the derivative w.r.t. a string; the second -// is the regular expression matcher taking a regular -// expression and a string and checks whether the -// string matches the regular expression. + +// (7) def ders (s: List[Char], r: Rexp) : Rexp = s match { case Nil => r @@ -125,43 +124,40 @@ // main matcher function def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r)) -// (6) Complete the size function for regular -// expressions according to the specification -// given in the coursework. - +// (8) def size(r: Rexp): Int = r match { case ZERO => 1 case ONE => 1 case CHAR(_) => 1 case ALTs(rs) => 1 + rs.map(size).sum - case SEQ(r1, r2) => 1 + size(r1) + size (r2) + case SEQs(rs) => 1 + rs.map(size).sum case STAR(r1) => 1 + size(r1) } // some testing data - -//matcher(("a" ~ "b") ~ "c", "abc") // => true -//matcher(("a" ~ "b") ~ "c", "ab") // => false +/* +println(matcher(("a" ~ "b") ~ "c", "abc")) // => true +println(matcher(("a" ~ "b") ~ "c", "ab")) // => false // the supposedly 'evil' regular expression (a*)* b -// val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) +val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) -//println(matcher(EVIL, "a" * 1000 ++ "b")) // => true -//println(matcher(EVIL, "a" * 1000)) // => false +println(matcher(EVIL, "a" * 1000 ++ "b")) // => true +println(matcher(EVIL, "a" * 1000)) // => false // size without simplifications -//println(size(der('a', der('a', EVIL)))) // => 28 -//println(size(der('a', der('a', der('a', EVIL))))) // => 58 +println(size(der('a', der('a', EVIL)))) // => 36 +println(size(der('a', der('a', der('a', EVIL))))) // => 83 // size with simplification -//println(simp(der('a', der('a', EVIL)))) -//println(simp(der('a', der('a', der('a', EVIL))))) +println(simp(der('a', der('a', EVIL)))) +println(simp(der('a', der('a', der('a', EVIL))))) -//println(size(simp(der('a', der('a', EVIL))))) // => 8 -//println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 8 +println(size(simp(der('a', der('a', EVIL))))) // => 7 +println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 7 // Python needs around 30 seconds for matching 28 a's with EVIL. // Java 9 and later increase this to an "astonishing" 40000 a's in @@ -169,7 +165,7 @@ // // Lets see how long it takes to match strings with // 5 Million a's...it should be in the range of a -// couple of seconds. +// few seconds. def time_needed[T](i: Int, code: => T) = { val start = System.nanoTime() @@ -178,19 +174,20 @@ "%.5f".format((end - start)/(i * 1.0e9)) } -//for (i <- 0 to 5000000 by 500000) { -// println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") -//} +for (i <- 0 to 5000000 by 500000) { + println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") +} // another "power" test case -//simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next) == ONE +println(simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next()) == ONE) // the Iterator produces the rexp // // SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) // -// where SEQ is nested 50 times. - +// where SEQ is nested 100 times. +*/ } + diff -r 6e93040e3378 -r cdfa6a293453 main_templates1/README.md --- a/main_templates1/README.md Sat Oct 08 00:30:51 2022 +0100 +++ b/main_templates1/README.md Tue Nov 01 15:03:48 2022 +0000 @@ -1,6 +1,6 @@ -# assignment2021scala - Main 1 +# assignment2022scala - Main 1 -* deadline: 21 January, 5pm +* deadline: 19 January, 5pm * [coursework description](https://nms.kcl.ac.uk/christian.urban/main_cw01.pdf) * reference jar: [drumb.jar](https://nms.kcl.ac.uk/christian.urban/drumb.jar) \ No newline at end of file diff -r 6e93040e3378 -r cdfa6a293453 main_templates1/drumb.scala --- a/main_templates1/drumb.scala Sat Oct 08 00:30:51 2022 +0100 +++ b/main_templates1/drumb.scala Tue Nov 01 15:03:48 2022 +0000 @@ -9,66 +9,39 @@ val rstate_portfolio = List("PLD", "PSA", "AMT", "AIV", "AVB", "BXP", "CCI", "DLR", "EQIX", "EQR", "ESS", "EXR", "FRT", "HCP") - -// (1) The function below takes a stock symbol and a year as arguments. -// It should read the corresponding CSV-file and then extract the January -// data from the given year. The data should be collected in a list of -// strings (one entry for each line in the CSV-file). - import io.Source import scala.util._ +// ADD YOUR CODE BELOW +//====================== + + +// (1) def get_january_data(symbol: String, year: Int) : List[String] = ??? -// (2) From the output of the get_january_data function, the next function -// should extract the first line (if it exists) and the corresponding -// first trading price in that year with type Option[Double]. If no line -// is generated by get_january_data then the result is None; and Some if -// there is a price. - - +// (2) def get_first_price(symbol: String, year: Int) : Option[Double] = ??? -// (3) Complete the function below that obtains all first prices -// for the stock symbols from a portfolio (list of strings) and -// for the given range of years. The inner lists are for the -// stock symbols and the outer list for the years. - - +// (3) def get_prices(portfolio: List[String], years: Range) : List[List[Option[Double]]] = ??? -// (4) The function below calculates the change factor (delta) between -// a price in year n and a price in year n + 1. - +// (4) def get_delta(price_old: Option[Double], price_new: Option[Double]) : Option[Double] = ??? -// (5) The next function calculates all change factors for all prices (from a -// portfolio). The input to this function are the nested lists created by -// get_prices above. - +// (5) def get_deltas(data: List[List[Option[Double]]]) : List[List[Option[Double]]] = ??? - - -// (6) Write a function that given change factors, a starting balance and an index, -// calculates the yearly yield, i.e. new balance, according to our dumb investment -// strategy. Index points to a year in the data list. - +// (6) def yearly_yield(data: List[List[Option[Double]]], balance: Long, index: Int) : Long = ??? -// (7) Write a function compound_yield that calculates the overall balance for a -// range of years where in each year the yearly profit is compounded to the new -// balances and then re-invested into our portfolio. For this use the function and -// results generated under (6). The function investment calls compound_yield -// with the appropriate deltas and the first index. - +// (7) def compound_yield(data: List[List[Option[Double]]], balance: Long, index: Int) : Long = ??? def investment(portfolio: List[String], years: Range, start_balance: Long) : Long = ??? diff -r 6e93040e3378 -r cdfa6a293453 main_templates2/README.md --- a/main_templates2/README.md Sat Oct 08 00:30:51 2022 +0100 +++ b/main_templates2/README.md Tue Nov 01 15:03:48 2022 +0000 @@ -1,6 +1,6 @@ -# assignment2021scala - Main 2 +# assignment2022scala - Main 2 -* deadline: 21 January, 5pm +* deadline: 19 January, 5pm * [coursework description](https://nms.kcl.ac.uk/christian.urban/main_cw02.pdf) * reference jar: - [danube.jar](https://nms.kcl.ac.uk/christian.urban/danube.jar) \ No newline at end of file + [danube.jar](https://nms.kcl.ac.uk/christian.urban/wordle.jar) \ No newline at end of file diff -r 6e93040e3378 -r cdfa6a293453 main_templates3/README.md --- a/main_templates3/README.md Sat Oct 08 00:30:51 2022 +0100 +++ b/main_templates3/README.md Tue Nov 01 15:03:48 2022 +0000 @@ -1,6 +1,6 @@ -# assignment2021scala - Main 3 +# assignment2022scala - Main 3 -* deadline: 21 January, 5pm +* deadline: 19 January, 5pm * [coursework description](https://nms.kcl.ac.uk/christian.urban/main_cw03.pdf) * reference jar: [re.jar](https://nms.kcl.ac.uk/christian.urban/re.jar) \ No newline at end of file diff -r 6e93040e3378 -r cdfa6a293453 main_templates3/re.scala --- a/main_templates3/re.scala Sat Oct 08 00:30:51 2022 +0100 +++ b/main_templates3/re.scala Tue Nov 01 15:03:48 2022 +0000 @@ -1,23 +1,24 @@ // Main Part 3 about Regular Expression Matching -//============================================= +//============================================== object M3 { -// Regular Expressions abstract class Rexp case object ZERO extends Rexp case object ONE extends Rexp case class CHAR(c: Char) extends Rexp -case class ALTs(rs: List[Rexp]) extends Rexp // alternatives -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence -case class STAR(r: Rexp) extends Rexp // star +case class ALTs(rs: List[Rexp]) extends Rexp // alternatives +case class SEQs(rs: List[Rexp]) extends Rexp // sequences +case class STAR(r: Rexp) extends Rexp // star + -//the usual binary choice can be defined in terms of ALTs +//the usual binary choice and binary sequence can be defined +//in terms of ALTs and SEQs def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2)) +def SEQ(r1: Rexp, r2: Rexp) = SEQs(List(r1, r2)) // some convenience for typing regular expressions - import scala.language.implicitConversions import scala.language.reflectiveCalls @@ -42,92 +43,77 @@ def ~ (r: String) = SEQ(s, r) } - +// examples for the implicits: // ALT(CHAR('a'), CHAR('b')) -// val reg : Rexp = "a" | "b" - +// val areg : Rexp = "a" | "b" -// (1) Complete the function nullable according to -// the definition given in the coursework; this -// function checks whether a regular expression -// can match the empty string and Returns a boolean -// accordingly. - -def nullable (r: Rexp) : Boolean = ??? - - -// (2) Complete the function der according to -// the definition given in the coursework; this -// function calculates the derivative of a -// regular expression w.r.t. a character. - -def der (c: Char, r: Rexp) : Rexp = ??? +// SEQ(CHAR('a'), CHAR('b')) +// val sreg : Rexp = "a" ~ "b" -// (3) Implement the flatten function flts. It -// deletes 0s from a list of regular expressions -// and also 'spills out', or flattens, nested -// ALTernativeS. +// ADD YOUR CODE BELOW +//====================== -def flts(rs: List[Rexp]) : List[Rexp] = ??? +// (1) +def nullable (r: Rexp) : Boolean = ??? - +// (2) +def der (c: Char, r: Rexp) : Rexp = ??? -// (4) Complete the simp function according to -// the specification given in the coursework description; -// this function simplifies a regular expression from -// the inside out, like you would simplify arithmetic -// expressions; however it does not simplify inside -// STAR-regular expressions. Use the _.distinct and -// flts functions. +// (3) +def denest(rs: List[Rexp]) : List[Rexp] = ??? + +// (4) +def flts(rs: List[Rexp], acc: List[Rexp] = Nil) : List[Rexp] = ??? +// (5) +def ALTs_smart(rs: List[Rexp]) : Rexp = ??? +def SEQs_smart(rs: List[Rexp]) : Rexp = ??? + +// (6) def simp(r: Rexp) : Rexp = ??? - -// (5) Complete the two functions below; the first -// calculates the derivative w.r.t. a string; the second -// is the regular expression matcher taking a regular -// expression and a string and checks whether the -// string matches the regular expression - +// (7) def ders (s: List[Char], r: Rexp) : Rexp = ??? - def matcher(r: Rexp, s: String): Boolean = ??? - -// (6) Complete the size function for regular -// expressions according to the specification -// given in the coursework. - +// (8) def size(r: Rexp): Int = ??? -// some testing data - +// Some testing data +//=================== /* + +simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)) // => ALTs(List(ONE, CHAR(a))) +simp(((CHAR('a') | ZERO) ~ ONE) | + (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO))) // => CHAR(a) + +matcher(("a" ~ "b") ~ "c", "ab") // => false matcher(("a" ~ "b") ~ "c", "abc") // => true -matcher(("a" ~ "b") ~ "c", "ab") // => false + // the supposedly 'evil' regular expression (a*)* b val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) +matcher(EVIL, "a" * 1000) // => false matcher(EVIL, "a" * 1000 ++ "b") // => true -matcher(EVIL, "a" * 1000) // => false + // size without simplifications -size(der('a', der('a', EVIL))) // => 28 -size(der('a', der('a', der('a', EVIL)))) // => 58 +size(der('a', der('a', EVIL))) // => 36 +size(der('a', der('a', der('a', EVIL)))) // => 83 // size with simplification -size(simp(der('a', der('a', EVIL)))) // => 8 -size(simp(der('a', der('a', der('a', EVIL))))) // => 8 +size(simp(der('a', der('a', EVIL)))) // => 7 +size(simp(der('a', der('a', der('a', EVIL))))) // => 7 // Python needs around 30 seconds for matching 28 a's with EVIL. // Java 9 and later increase this to an "astonishing" 40000 a's in // 30 seconds. // // Lets see how long it really takes to match strings with -// 5 Million a's...it should be in the range of a couple +// 5 Million a's...it should be in the range of a few // of seconds. def time_needed[T](i: Int, code: => T) = { diff -r 6e93040e3378 -r cdfa6a293453 main_templates4/README.md --- a/main_templates4/README.md Sat Oct 08 00:30:51 2022 +0100 +++ b/main_templates4/README.md Tue Nov 01 15:03:48 2022 +0000 @@ -1,6 +1,6 @@ -# assignment2021scala - Main 4 +# assignment2022scala - Main 4 -* deadline: 21 January, 5pm +* deadline: 19 January, 5pm * [coursework description](https://nms.kcl.ac.uk/christian.urban/main_cw04.pdf) * reference jar: [knight1.jar](https://nms.kcl.ac.uk/christian.urban/knight1.jar), diff -r 6e93040e3378 -r cdfa6a293453 main_templates4/knight1.scala --- a/main_templates4/knight1.scala Sat Oct 08 00:30:51 2022 +0100 +++ b/main_templates4/knight1.scala Tue Nov 01 15:03:48 2022 +0000 @@ -9,22 +9,19 @@ // templates below. Also have a look whether the functions // at the end of the file are of any help. - - type Pos = (Int, Int) // a position on a chessboard type Path = List[Pos] // a path...a list of positions -//(1) Complete the function that tests whether the position x -// is inside the board and not yet element in the path. +// ADD YOUR CODE BELOW +//====================== + + +//(1) def is_legal(dim: Int, path: Path, x: Pos) : Boolean = ??? - -//(2) Complete the function that calculates for a position x -// all legal onward moves that are not already in the path. -// The moves should be ordered in a "clockwise" manner. - +//(2) def legal_moves(dim: Int, path: Path, x: Pos) : List[Pos] = ??? @@ -38,21 +35,12 @@ //assert(legal_moves(8, List((6,6)), (7,7)) == List((6,5), (5,6))) -//(3) Complete the two recursive functions below. -// They exhaustively search for knight's tours starting from the -// given path. The first function counts all possible tours, -// and the second collects all tours in a list of paths. - +// (3) def count_tours(dim: Int, path: Path) : Int = ??? def enum_tours(dim: Int, path: Path) : List[Path] = ??? - -//(4) Implement a first-function that finds the first -// element, say x, in the list xs where f is not None. -// In that case Return f(x), otherwise None. If possible, -// calculate f(x) only once. - +// (4) def first(xs: List[Pos], f: Pos => Option[Path]) : Option[Path] = ??? @@ -64,10 +52,7 @@ //first(List((1, 0),(2, 0),(3, 0)), foo) // None -//(5) Implement a function that uses the first-function from (4) for -// trying out onward moves, and searches recursively for a -// knight tour on a dim * dim-board. - +//(5) def first_tour(dim: Int, path: Path) : Option[Path] = ??? diff -r 6e93040e3378 -r cdfa6a293453 main_templates4/knight2.scala --- a/main_templates4/knight2.scala Sat Oct 08 00:30:51 2022 +0100 +++ b/main_templates4/knight2.scala Tue Nov 01 15:03:48 2022 +0000 @@ -4,7 +4,6 @@ object M4b { - // !!! Copy any function you need from file knight1.scala !!! // // If you need any auxiliary functions, feel free to @@ -14,21 +13,14 @@ type Pos = (Int, Int) // a position on a chessboard type Path = List[Pos] // a path...a list of positions +// ADD YOUR CODE BELOW +//====================== -//(6) Complete the function that calculates a list of onward -// moves like in (2) but orders them according to Warnsdorf’s -// rule. That means moves with the fewest legal onward moves -// should come first. - - +//(6) def ordered_moves(dim: Int, path: Path, x: Pos) : List[Pos] = ??? -//(7) Complete the function that searches for a single *closed* -// tour using the ordered_moves function from (6). This -// function will be tested on a 6 x 6 board. - - +//(7) def first_closed_tour_heuristics(dim: Int, path: Path) : Option[Path] = ??? diff -r 6e93040e3378 -r cdfa6a293453 main_templates4/knight3.scala --- a/main_templates4/knight3.scala Sat Oct 08 00:30:51 2022 +0100 +++ b/main_templates4/knight3.scala Tue Nov 01 15:03:48 2022 +0000 @@ -14,13 +14,11 @@ type Pos = (Int, Int) // a position on a chessboard type Path = List[Pos] // a path...a list of positions -//(9) Implement a function that searches for a -// you have to be careful to write a tail-recursive version as this -// function will be called with dimensions of up to 70 * 70 -// and starting field (0, 0). It has to produce a solution within -// 30 seconds. +// ADD YOUR CODE BELOW +//====================== +//(9) def tour_on_mega_board(dim: Int, path: Path) : Option[Path] = ??? } diff -r 6e93040e3378 -r cdfa6a293453 main_templates5/README.md --- a/main_templates5/README.md Sat Oct 08 00:30:51 2022 +0100 +++ b/main_templates5/README.md Tue Nov 01 15:03:48 2022 +0000 @@ -1,6 +1,6 @@ -# assignment2021scala - Main 5 +# assignment2022scala - Main 5 -* deadline: 21 January, 5pm +* deadline: 19 January, 5pm * [coursework description](https://nms.kcl.ac.uk/christian.urban/main_cw05.pdf) * reference jar: [bf.jar](https://nms.kcl.ac.uk/christian.urban/bf.jar), diff -r 6e93040e3378 -r cdfa6a293453 main_templates5/bf.scala --- a/main_templates5/bf.scala Sat Oct 08 00:30:51 2022 +0100 +++ b/main_templates5/bf.scala Tue Nov 01 15:03:48 2022 +0000 @@ -5,45 +5,26 @@ object M5a { - // representation of BF memory type Mem = Map[Int, Int] - -// (1) Write a function that takes a file name as argument and -// and requests the corresponding file from disk. It returns the -// content of the file as a String. If the file does not exists, -// the function should return the empty string. - import io.Source import scala.util._ +// ADD YOUR CODE BELOW +//====================== + +// (1) def load_bff(name: String) : String = ??? - - -// (2) Complete the functions for safely reading -// and writing brainf*** memory. Safely read should -// Return the value stored in the Map for a given memory -// pointer, provided it exists; otherwise it Returns 0. The -// writing function generates a new Map with the -// same data, except at the given memory pointer the -// value v is stored. - +// (2) def sread(mem: Mem, mp: Int) : Int = ??? def write(mem: Mem, mp: Int, v: Int) : Mem = ??? - - -// (3) Implement the two jumping instructions in the -// brainf*** language. In jumpRight, given a program and -// a program counter move the program counter to the right -// until after the *matching* ]-command. Similarly, -// jumpLeft implements the move to the left to just after -// the *matching* [-command. +// (3) def jumpRight(prog: String, pc: Int, level: Int) : Int = ??? @@ -60,21 +41,7 @@ -// (4) Complete the compute function that interprets (runs) a brainf*** -// program: the arguments are a program (represented as a string), a program -// counter, a memory counter and a brainf*** memory. It Returns the -// memory at the stage when the execution of the brainf*** program -// finishes. The interpretation finishes once the program counter -// pc is pointing to something outside the program string. -// If the pc points to a character inside the program, the pc, -// memory pointer and memory need to be updated according to -// rules of the brainf*** language. Then, recursively, the compute -// function continues with the command at the new program -// counter. -// -// Implement the run function that calls compute with the program -// counter and memory counter set to 0. - +// (4) def compute(prog: String, pc: Int, mp: Int, mem: Mem) : Mem = ??? diff -r 6e93040e3378 -r cdfa6a293453 main_templates5/bfc.scala --- a/main_templates5/bfc.scala Sat Oct 08 00:30:51 2022 +0100 +++ b/main_templates5/bfc.scala Tue Nov 01 15:03:48 2022 +0000 @@ -4,7 +4,6 @@ object M5b { - // !!! Copy any function you need from file bf.scala !!! // // If you need any auxiliary function, feel free to @@ -34,43 +33,12 @@ import io.Source import scala.util._ - -// TASKS -//======= +// ADD YOUR CODE BELOW +//====================== -// (5) Write a function jtable that precomputes the "jump -// table" for a bf-program. This function takes a bf-program -// as an argument and Returns a Map[Int, Int]. The -// purpose of this map is to record the information about -// pc positions where '[' or a ']' are stored. The information -// is to which pc-position do we need to jump next? -// -// For example for the program -// -// "+++++[->++++++++++<]>--<+++[->>++++++++++<<]>>++<<----------[+>.>.<+<]" -// -// we obtain the map -// -// Map(69 -> 61, 5 -> 20, 60 -> 70, 27 -> 44, 43 -> 28, 19 -> 6) -// -// This states that for the '[' on position 5, we need to -// jump to position 20, which is just after the corresponding ']'. -// Similarly, for the ']' on position 19, we need to jump to -// position 6, which is just after the '[' on position 5, and so -// on. The idea is to not calculate this information each time -// we hit a bracket, but just look up this information in the -// jtable. You can use the jumpLeft and jumpRight functions -// from Part 1 for calculating the jtable. -// -// Then adapt the compute and run functions from Part 1 -// in order to take advantage of the information stored in the jtable. -// This means whenever jumpLeft and jumpRight was called previously, -// you should immediately look up the jump address in the jtable. - - +// (5) def jtable(pg: String) : Map[Int, Int] = ??? - // testcase // // jtable("""+++++[->++++++++++<]>--<+++[->>++++++++++<<]>>++<<----------[+>.>.<+<]""") @@ -80,26 +48,13 @@ def compute2(pg: String, tb: Map[Int, Int], pc: Int, mp: Int, mem: Mem) : Mem = ??? def run2(pg: String, m: Mem = Map()) = ??? - // testcases // time_needed(1, run2(load_bff("benchmark.bf"))) // time_needed(1, run2(load_bff("sierpinski.bf"))) -// (6) Write a function optimise which deletes "dead code" (everything -// that is not a bf-command) and also replaces substrings of the form -// [-] by a new command 0. The idea is that the loop [-] just resets the -// memory at the current location to 0. In the compute3 and run3 functions -// below you implement this command by writing the number 0 to mem(mp), -// that is write(mem, mp, 0). -// -// The easiest way to modify a string in this way is to use the regular -// expression """[^<>+\-.\[\]]""", which recognises everything that is -// not a bf-command and replace it by the empty string. Similarly the -// regular expression """\[-\]""" finds all occurrences of [-] and -// by using the Scala method .replaceAll you can replace it with the -// string "0" standing for the new bf-command. +// (6) def optimise(s: String) : String = ??? @@ -117,37 +72,14 @@ -// (7) Write a function combine which replaces sequences -// of repeated increment and decrement commands by appropriate -// two-character commands. For example for sequences of + -// -// orig bf-cmds | replacement -// ------------------------------ -// + | +A -// ++ | +B -// +++ | +C -// | -// ... | -// | -// +++....+++ | +Z -// (where length = 26) -// -// Similar for the bf-command -, > and <. All other commands should -// be unaffected by this change. -// -// Adapt the compute4 and run4 functions such that they can deal -// appropriately with such two-character commands. - - +// (7) def combine(s: String) : String = ??? // testcase // combine(load_bff("benchmark.bf")) - def compute4(pg: String, tb: Map[Int, Int], pc: Int, mp: Int, mem: Mem) : Mem = ??? - // should call first optimise and then combine on the input string // def run4(pg: String, m: Mem = Map()) = ???