updated
authorChristian Urban <christian.urban@kcl.ac.uk>
Fri, 05 Nov 2021 16:47:55 +0000
changeset 396 3ffe978a5664
parent 395 017f621f5835
child 397 085fefce672e
updated
README
core_templates1/collatz.scala
core_templates2/docdiff.jar
core_templates2/docdiff.scala
core_templates3/postfix.jar
core_templates3/postfix.scala
core_templates3/postfix2.jar
core_templates3/postfix2.scala
cws/core_cw01.pdf
cws/core_cw01.tex
cws/core_cw02.pdf
cws/core_cw02.tex
cws/core_cw03.pdf
cws/core_cw03.tex
cws/main_cw01.pdf
cws/main_cw01.tex
cws/main_cw02.tex
cws/main_cw03.tex
cws/pre_cw01.pdf
cws/pre_cw01.tex
cws/pre_cw02.tex
cws/pre_cw03.pdf
cws/pre_cw03.tex
main_solution1/drumb.scala
main_solution3/re.scala
main_templates1/drumb.jar
main_templates1/drumb.scala
main_templates2/danube.jar
main_templates2/danube.scala
main_templates3/re.jar
main_templates3/re.scala
pre_templates1/collatz.jar
pre_templates1/collatz.scala
pre_templates2/docdiff.jar
pre_templates2/docdiff.scala
pre_templates3/postfix.jar
pre_templates3/postfix.scala
pre_templates3/postfix2.jar
pre_templates3/postfix2.scala
--- a/README	Thu Nov 04 12:20:12 2021 +0000
+++ b/README	Fri Nov 05 16:47:55 2021 +0000
@@ -1,3 +1,10 @@
+TA meeting
+
+https://web.microsoftstream.com/video/cec0d34d-77f5-4940-9bad-628416d4c521
+
+=============
+
+
 Wartremover
 
 ./wartremover -traverser org.wartremover.warts.Return -traverser org.wartremover.warts.Var -traverser org.wartremover.warts.MutableDataStructures $i
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core_templates1/collatz.scala	Fri Nov 05 16:47:55 2021 +0000
@@ -0,0 +1,45 @@
+// Core Part 1 about the 3n+1 conjecture
+//============================================
+
+object C1 {
+
+//(1) Complete the collatz function below. It should
+//    recursively calculate the number of steps needed 
+//    until the collatz series reaches the number 1.
+//    If needed, you can use an auxiliary function that
+//    performs the recursion. The function should expect
+//    arguments in the range of 1 to 1 Million.
+
+def collatz(n: Long) : Long = ???
+
+
+//(2) Complete the collatz_max function below. It should
+//    calculate how many steps are needed for each number 
+//    from 1 up to a bound and then calculate the maximum number of
+//    steps and the corresponding number that needs that many 
+//    steps. Again, you should expect bounds in the range of 1
+//    up to 1 Million. The first component of the pair is
+//    the maximum number of steps and the second is the 
+//    corresponding number.
+
+def collatz_max(bnd: Long) : (Long, Long) = ???
+
+//(3) Implement a function that calculates the last_odd
+//    number in a collatz series.  For this implement an
+//    is_pow_of_two function which tests whether a number 
+//    is a power of two. The function is_hard calculates 
+//    whether 3n + 1 is a power of two. Again you can
+//    assume the input ranges between 1 and 1 Million,
+//    and also assume that the input of last_odd will not 
+//    be a power of 2.
+
+def is_pow_of_two(n: Long) : Boolean = ???
+
+def is_hard(n: Long) : Boolean = ???
+
+def last_odd(n: Long) : Long = ???
+
+}
+
+
+
Binary file core_templates2/docdiff.jar has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core_templates2/docdiff.scala	Fri Nov 05 16:47:55 2021 +0000
@@ -0,0 +1,115 @@
+// Core Part 2 about Code Similarity
+//===================================
+
+
+object C2 { 
+
+
+//(1) Complete the clean function below. It should find
+//    all words in a string using the regular expression
+//    \w+  and the library function 
+//
+//         some_regex.findAllIn(some_string)
+//
+//    The words should be Returned as a list of strings.
+
+
+def clean(s: String) : List[String] = ???
+  
+
+
+//(2) The function occurrences calculates the number of times  
+//    strings occur in a list of strings. These occurrences should 
+//    be calculated as a Map from strings to integers.
+
+
+def occurrences(xs: List[String]): Map[String, Int] = ???
+
+
+//(3) This functions calculates the dot-product of two documents
+//    (list of strings). For this it calculates the occurrence
+//    maps from (2) and then multiplies the corresponding occurrences. 
+//    If a string does not occur in a document, the product is zero.
+//    The function finally sums up all products. 
+
+
+def prod(lst1: List[String], lst2: List[String]) : Int = ???
+
+
+//(4) Complete the functions overlap and similarity. The overlap of
+//    two documents is calculated by the formula given in the assignment
+//    description. The similarity of two strings is given by the overlap
+//    of the cleaned strings (see (1)).  
+
+
+def overlap(lst1: List[String], lst2: List[String]) : Double = ???
+
+def similarity(s1: String, s2: String) : Double = ???
+
+
+
+/* Test cases
+
+
+val list1 = List("a", "b", "b", "c", "d") 
+val list2 = List("d", "b", "d", "b", "d")
+
+occurrences(List("a", "b", "b", "c", "d"))   // Map(a -> 1, b -> 2, c -> 1, d -> 1)
+occurrences(List("d", "b", "d", "b", "d"))   // Map(d -> 3, b -> 2)
+
+prod(list1,list2) // 7 
+
+overlap(list1, list2)   // 0.5384615384615384
+overlap(list2, list1)   // 0.5384615384615384
+overlap(list1, list1)   // 1.0
+overlap(list2, list2)   // 1.0
+
+// Plagiarism examples from 
+// https://desales.libguides.com/avoidingplagiarism/examples
+
+val orig1 = """There is a strong market demand for eco-tourism in
+Australia. Its rich and diverse natural heritage ensures Australia's
+capacity to attract international ecotourists and gives Australia a
+comparative advantage in the highly competitive tourism industry."""
+
+val plag1 = """There is a high market demand for eco-tourism in
+Australia. Australia has a comparative advantage in the highly
+competitive tourism industry due to its rich and varied natural
+heritage which ensures Australia's capacity to attract international
+ecotourists."""
+
+similarity(orig1, plag1) // 0.8679245283018868
+
+
+// Plagiarism examples from 
+// https://www.utc.edu/library/help/tutorials/plagiarism/examples-of-plagiarism.php
+
+val orig2 = """No oil spill is entirely benign. Depending on timing and
+location, even a relatively minor spill can cause significant harm to
+individual organisms and entire populations. Oil spills can cause
+impacts over a range of time scales, from days to years, or even
+decades for certain spills. Impacts are typically divided into acute
+(short-term) and chronic (long-term) effects. Both types are part of a
+complicated and often controversial equation that is addressed after
+an oil spill: ecosystem recovery."""
+
+val plag2 = """There is no such thing as a "good" oil spill. If the
+time and place are just right, even a small oil spill can cause damage
+to sensitive ecosystems. Further, spills can cause harm days, months,
+years, or even decades after they occur. Because of this, spills are
+usually broken into short-term (acute) and long-term (chronic)
+effects. Both of these types of harm must be addressed in ecosystem
+recovery: a controversial tactic that is often implemented immediately
+following an oil spill."""
+
+overlap(clean(orig2), clean(plag2))  // 0.728
+similarity(orig2, plag2)             // 0.728
+
+
+ 
+// The punchline: everything above 0.6 looks suspicious and 
+// should be investigated by staff.
+
+*/
+
+}
Binary file core_templates3/postfix.jar has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core_templates3/postfix.scala	Fri Nov 05 16:47:55 2021 +0000
@@ -0,0 +1,81 @@
+// Shunting Yard Algorithm
+// by Edsger Dijkstra
+// ========================
+
+object C3a {
+
+// type of tokens
+type Toks = List[String]
+
+// the operations in the basic version of the algorithm
+val ops = List("+", "-", "*", "/")
+
+// the precedences of the operators
+val precs = Map("+" -> 1,
+		"-" -> 1,
+		"*" -> 2,
+		"/" -> 2)
+
+// helper function for splitting strings into tokens
+def split(s: String) : Toks = s.split(" ").toList
+
+
+// (1) Implement below the shunting yard algorithm. The most
+// convenient way to this in Scala is to implement a recursive 
+// function and to heavily use pattern matching. The function syard 
+// takes some input tokens as first argument. The second and third 
+// arguments represent the stack and the output of the shunting yard 
+// algorithm.
+//
+// In the marking, you can assume the function is called only with 
+// an empty stack and an empty output list. You can also assume the
+// input os  only properly formatted (infix) arithmetic expressions
+// (all parentheses will be well-nested, the input only contains 
+// operators and numbers).
+
+// You can implement any additional helper function you need. I found 
+// it helpful to implement two auxiliary functions for the pattern matching:  
+// 
+
+def is_op(op: String) : Boolean = ???
+def prec(op1: String, op2: String) : Boolean = ???
+
+
+def syard(toks: Toks, st: Toks = Nil, out: Toks = Nil) : Toks = ???
+
+
+// test cases
+//syard(split("3 + 4 * ( 2 - 1 )"))  // 3 4 2 1 - * +
+//syard(split("10 + 12 * 33"))       // 10 12 33 * +
+//syard(split("( 5 + 7 ) * 2"))      // 5 7 + 2 *
+//syard(split("5 + 7 / 2"))          // 5 7 2 / +
+//syard(split("5 * 7 / 2"))          // 5 7 * 2 /
+//syard(split("9 + 24 / ( 7 - 3 )")) // 9 24 7 3 - / +
+
+//syard(split("3 + 4 + 5"))           // 3 4 + 5 +
+//syard(split("( ( 3 + 4 ) + 5 )"))    // 3 4 + 5 +
+//syard(split("( 3 + ( 4 + 5 ) )"))    // 3 4 5 + +
+//syard(split("( ( ( 3 ) ) + ( ( 4 + ( 5 ) ) ) )")) // 3 4 5 + +
+
+ 
+// (2) Implement a compute function that evaluates an input list
+// in postfix notation. This function takes a list of tokens
+// and a stack as argumenta. The function should produce the 
+// result as an integer using the stack. You can assume 
+// this function will be only called with proper postfix 
+// expressions.    
+
+def compute(toks: Toks, st: List[Int] = Nil) : Int = ???
+
+
+// test cases
+// compute(syard(split("3 + 4 * ( 2 - 1 )")))  // 7
+// compute(syard(split("10 + 12 * 33")))       // 406
+// compute(syard(split("( 5 + 7 ) * 2")))      // 24
+// compute(syard(split("5 + 7 / 2")))          // 8
+// compute(syard(split("5 * 7 / 2")))          // 17
+// compute(syard(split("9 + 24 / ( 7 - 3 )"))) // 15
+
+}
+
+
Binary file core_templates3/postfix2.jar has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core_templates3/postfix2.scala	Fri Nov 05 16:47:55 2021 +0000
@@ -0,0 +1,69 @@
+// Shunting Yard Algorithm 
+// including Associativity for Operators 
+// =====================================
+
+object C3b {
+
+
+// type of tokens
+type Toks = List[String]
+
+// helper function for splitting strings into tokens
+def split(s: String) : Toks = s.split(" ").toList
+
+// left- and right-associativity
+abstract class Assoc
+case object LA extends Assoc
+case object RA extends Assoc
+
+
+// power is right-associative,
+// everything else is left-associative
+def assoc(s: String) : Assoc = s match {
+  case "^" => RA
+  case _ => LA
+}
+
+
+// the precedences of the operators
+val precs = Map("+" -> 1,
+  		"-" -> 1,
+		"*" -> 2,
+		"/" -> 2,
+                "^" -> 4)
+
+// the operations in the basic version of the algorithm
+val ops = List("+", "-", "*", "/", "^")
+
+// (3) Implement the extended version of the shunting yard algorithm.
+// This version should properly account for the fact that the power 
+// operation is right-associative. Apart from the extension to include
+// the power operation, you can make the same assumptions as in 
+// basic version.
+
+def syard(toks: Toks, st: Toks = Nil, out: Toks = Nil) : Toks = ???
+
+
+// test cases
+// syard(split("3 + 4 * 8 / ( 5 - 1 ) ^ 2 ^ 3"))  // 3 4 8 * 5 1 - 2 3 ^ ^ / +
+
+
+// (4) Implement a compute function that produces an Int for an
+// input list of tokens in postfix notation.
+
+def compute(toks: Toks, st: List[Int] = Nil) : Int = ???
+
+
+// test cases
+// compute(syard(split("3 + 4 * ( 2 - 1 )")))   // 7
+// compute(syard(split("10 + 12 * 33")))       // 406
+// compute(syard(split("( 5 + 7 ) * 2")))      // 24
+// compute(syard(split("5 + 7 / 2")))          // 8
+// compute(syard(split("5 * 7 / 2")))          // 17
+// compute(syard(split("9 + 24 / ( 7 - 3 )"))) // 15
+// compute(syard(split("4 ^ 3 ^ 2")))      // 262144
+// compute(syard(split("4 ^ ( 3 ^ 2 )")))  // 262144
+// compute(syard(split("( 4 ^ 3 ) ^ 2")))  // 4096
+// compute(syard(split("( 3 + 1 ) ^ 2 ^ 3")))   // 65536
+
+}
Binary file cws/core_cw01.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cws/core_cw01.tex	Fri Nov 05 16:47:55 2021 +0000
@@ -0,0 +1,299 @@
+% !TEX program = xelatex
+\documentclass{article}
+\usepackage{../style}
+\usepackage{disclaimer}
+\usepackage{../langs}
+
+
+
+\begin{document}
+
+\section*{Core Part 1 (Scala, 3 Marks)}
+
+\mbox{}\hfill\textit{``The most effective debugging tool is still careful thought,}\\
+\mbox{}\hfill\textit{coupled with judiciously placed print statements.''}\smallskip\\
+\mbox{}\hfill\textit{ --- Brian W. Kernighan, in Unix for Beginners (1979)}\bigskip
+
+ 
+\IMPORTANT{This part is about Scala. It is due on \cwSIX{} at 5pm and worth 3\%.
+Any 1\% you achieve in the preliminary part counts as your ``weekly engagement''.}
+
+\noindent
+Also note that the running time of each part will be restricted to a
+maximum of 30 seconds on my laptop.
+
+\DISCLAIMER{}
+
+\subsection*{Reference Implementation}
+
+Like the C++ assignments, the Scala assignments will work like this: you
+push your files to GitHub and receive (after sometimes a long delay) some
+automated feedback. In the end we take a snapshot of the submitted files and
+apply an automated marking script to them.\medskip
+
+\noindent
+In addition, the Scala coursework comes with a reference implementation
+in form of \texttt{jar}-files. This allows you to run any test cases on
+your own computer. For example you can call Scala on the command line
+with the option \texttt{-cp collatz.jar} and then query any function
+from the template file. Say you want to find out what the functions
+\texttt{collatz} and \texttt{collatz\_max} produce: for this you just
+need to prefix them with the object name \texttt{C1}. If you want to
+find out what these functions produce for the argument \texttt{6}, you
+would type something like:
+
+\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
+$ scala -cp collatz.jar
+  
+scala> C1.collatz(6)
+...
+scala> C1.collatz_max(6)
+...
+\end{lstlisting}%$
+
+\subsection*{Hints}
+
+\noindent
+\textbf{For the Core Part 1:} useful math operators: \texttt{\%} for modulo, \texttt{\&} for bit-wise and; useful
+functions: \mbox{\texttt{(1\,to\,10)}} for ranges, \texttt{.toInt},
+\texttt{.toList} for conversions, you can use \texttt{List(...).max} for the
+maximum of a list, \texttt{List(...).indexOf(...)} for the first index of
+a value in a list.\bigskip
+
+
+
+\newpage
+\subsection*{Core Part 1 (3 Marks, file collatz.scala)}
+
+This part is about function definitions and recursion. You are asked
+to implement a Scala program that tests examples of the
+\emph{$3n + 1$-conjecture}, also called \emph{Collatz
+  conjecture}.\video{https://www.youtube.com./watch?v=LqKpkdRRLZw}
+This conjecture can be described as follows: Start with any positive
+number $n$ greater than $0$:
+
+\begin{itemize}
+\item If $n$ is even, divide it by $2$ to obtain $n / 2$.
+\item If $n$ is odd, multiply it by $3$ and add $1$ to obtain $3n +
+  1$.
+\item Repeat this process and you will always end up with $1$.
+\end{itemize}
+
+\noindent
+For example if you start with, say, $6$ and $9$, you obtain the
+two \emph{Collatz series}
+%
+\[
+\begin{array}{@{}l@{\hspace{5mm}}l@{}}
+6, 3, 10, 5, 16, 8, 4, 2, 1 & \text{(= 8 steps)}\\
+9, 28, 14, 7, 22, 11, 34, 17, 52, 26, 13, 40, 20, 10, 5, 16, 8, 4, 2, 1  & \text{(= 19 steps)}\\
+\end{array}
+\]
+
+\noindent
+As you can see, the numbers go up and down like a roller-coaster, but
+curiously they seem to always terminate in $1$. Nobody knows why. The
+conjecture is that this will \emph{always} happen for every number
+greater than 0.\footnote{While it is relatively easy to test this
+conjecture with particular numbers, it is an interesting open problem to
+\emph{prove} that the conjecture is true for \emph{all} numbers ($> 0$).
+Paul Erd\"o{}s, a famous mathematician you might have heard about, said
+about this conjecture: ``Mathematics may not [yet] be ready for such
+problems.'' and also offered a \$500 cash prize for its solution.
+Jeffrey Lagarias, another mathematician, claimed that based only on
+known information about this problem, ``this is an extraordinarily
+difficult problem, completely out of reach of present day mathematics.''
+There is also a \href{https://xkcd.com/710/}{xkcd} cartoon about this
+conjecture\here{https://xkcd.com/710/}). If you are able to solve this
+conjecture, you will definitely get famous.}\bigskip
+
+\noindent
+\textbf{Tasks}
+
+\begin{itemize}
+\item[(1)] You are asked to implement a recursive function that
+  calculates the number of steps needed until a series ends
+  with $1$. In case of starting with $6$, it takes $8$ steps and in
+  case of starting with $9$, it takes $19$ (see above). We assume it 
+  takes $0$ steps, if we start with $1$. In order to
+  try out this function with large numbers, you should use
+  \texttt{Long} as argument type, instead of \texttt{Int}.  You can
+  assume this function will be called with numbers between $1$ and
+  $1$ Million. \hfill[1 Mark]
+
+\item[(2)] Write a second function that takes an upper bound as
+  an argument and calculates the steps for all numbers in the range from
+  1 up to this bound (the bound including). It returns the maximum number of
+  steps and the corresponding number that needs that many steps.  More
+  precisely it returns a pair where the first component is the number
+  of steps and the second is the corresponding number. \hfill\mbox{[1
+    Mark]}
+
+\item[(3)] Write a function that calculates \emph{hard
+    numbers} \here{https://medium.com/cantors-paradise/the-collatz-conjecture-some-shocking-results-from-180-000-iterations-7fea130d0377}
+  in the Collatz series---these are the last odd numbers just before a
+  power of two is reached.  For this, implement an
+  \textit{is-power-of-two} function which tests whether a number is a
+  power of two. The easiest way to implement this is by using the
+  bit-operator $\&$ of Scala. For a power of two, say $n$ with $n > 0$, it
+  holds that $n \;\&\; (n - 1)$ is equal to zero. I let you think why
+  this is the case.
+
+  The function \textit{is-hard} calculates whether
+  $3n + 1$ is a power of two.  Finally the \textit{last-odd} function
+  calculates the last odd number before a power of 2 in the Collatz
+  series. This means for example when starting with 9,
+  we receive 5 as the last odd number.  Surprisingly a lot of numbers
+  have 5 as last-odd number. But for example for 113 we obtain 85,
+  because of the series
+  %
+  \[113, 340, 170, \,\fbox{85}\,, 256, 128, 64, 32, 16, 8, 4, 2, 1\]
+
+  The \textit{last-odd} function will only be called with numbers that are not
+  powers of 2 themselves.\hfill\mbox{[1 Mark]}
+\end{itemize}
+
+\noindent
+\textbf{Test Data:} Some test ranges and cases are:
+
+\begin{itemize}
+\item 1 to 10 where $9$ takes 19 steps 
+\item 1 to 100 where $97$ takes 118 steps,
+\item 1 to 1,000 where $871$ takes 178 steps,
+\item 1 to 10,000 where $6,171$ takes 261 steps,
+\item 1 to 100,000 where $77,031$ takes 350 steps, 
+\item 1 to 1 Million where $837,799$ takes 524 steps
+  %% runs out of stack space
+  %% \item[$\bullet$] $1 - 10$ million where $8,400,511$ takes 685 steps
+\item 21 is the last odd number for 84
+\item 341 is the last odd number for 201, 604, 605 and 8600
+  
+\end{itemize}
+  
+
+
+
+
+
+\end{document}
+
+
+%%%%%%% Historical Stuff
+\newpage
+
+This part is about web-scraping and list-processing in Scala. It uses
+online data about the per-capita alcohol consumption for each country
+(per year?), and a file containing the data about the population size of
+each country.  From this data you are supposed to estimate how many
+litres of pure alcohol are consumed worldwide.\bigskip
+
+\noindent
+\textbf{Tasks (file alcohol.scala):}
+
+\begin{itemize}
+\item[(1)] Write a function that given an URL requests a
+  comma-separated value (CSV) list.  We are interested in the list
+  from the following URL
+
+\begin{center}
+  \url{https://raw.githubusercontent.com/fivethirtyeight/data/master/alcohol-consumption/drinks.csv}
+\end{center}
+
+\noindent Your function should take a string (the URL) as input, and
+produce a list of strings as output, where each string is one line in
+the corresponding CSV-list.  This list from the URL above should
+contain 194 lines.\medskip
+
+\noindent
+Write another function that can read the file \texttt{population.csv}
+from disk (the file is distributed with the assignment). This
+function should take a string as argument, the file name, and again
+return a list of strings corresponding to each entry in the
+CSV-list. For \texttt{population.csv}, this list should contain 216
+lines.\hfill[1 Mark]
+
+
+\item[(2)] Unfortunately, the CSV-lists contain a lot of ``junk'' and we
+  need to extract the data that interests us.  From the header of the
+  alcohol list, you can see there are 5 columns
+  
+  \begin{center}
+    \begin{tabular}{l}
+      \texttt{country (name),}\\
+      \texttt{beer\_servings,}\\
+      \texttt{spirit\_servings,}\\
+      \texttt{wine\_servings,}\\
+      \texttt{total\_litres\_of\_pure\_alcohol}
+    \end{tabular}  
+  \end{center}
+
+  \noindent
+  Write a function that extracts the data from the first column,
+  the country name, and the data from the fifth column (converted into
+  a \texttt{Double}). For this go through each line of the CSV-list
+  (except the first line), use the \texttt{split(",")} function to
+  divide each line into an array of 5 elements. Keep the data from the
+  first and fifth element in these arrays.\medskip
+
+  \noindent
+  Write another function that processes the population size list. This
+  is already of the form country name and population size.\footnote{Your
+    friendly lecturer already did the messy processing for you from the
+  Worldbank database, see \url{https://github.com/datasets/population/tree/master/data} for the original.} Again, split the
+  strings according to the commas. However, this time generate a
+  \texttt{Map} from country names to population sizes.\hfill[1 Mark]
+
+\item[(3)] In (2) you generated the data about the alcohol consumption
+  per-capita for each country, and also the population size for each
+  country. From this generate next a sorted(!) list of the overall
+  alcohol consumption for each country. The list should be sorted from
+  highest alcohol consumption to lowest. The difficulty is that the
+  data is scraped off from ``random'' sources on the Internet and
+  annoyingly the spelling of some country names does not always agree in both
+  lists. For example the alcohol list contains
+  \texttt{Bosnia-Herzegovina}, while the population writes this country as
+  \texttt{Bosnia and Herzegovina}. In your sorted
+  overall list include only countries from the alcohol list, whose
+  exact country name is also in the population size list. This means
+  you can ignore countries like Bosnia-Herzegovina from the overall
+  alcohol consumption. There are 177 countries where the names
+  agree. The UK is ranked 10th on this list by
+  consuming 671,976,864 Litres of pure alcohol each year.\medskip
+  
+  \noindent
+  Finally, write another function that takes an integer, say
+  \texttt{n}, as argument. You can assume this integer is between 0
+  and 177 (the number of countries in the sorted list above).  The
+  function should return a triple, where the first component is the
+  sum of the alcohol consumption in all countries (on the list); the
+  second component is the sum of the \texttt{n}-highest alcohol
+  consumers on the list; and the third component is the percentage the
+  \texttt{n}-highest alcohol consumers drink with respect to the
+  the world consumption. You will see that according to our data, 164
+  countries (out of 177) gobble up 100\% of the World alcohol
+  consumption.\hfill\mbox{[1 Mark]}
+\end{itemize}
+
+\noindent
+\textbf{Hints:} useful list functions: \texttt{.drop(n)},
+\texttt{.take(n)} for dropping or taking some elements in a list,
+\texttt{.getLines} for separating lines in a string;
+\texttt{.sortBy(\_.\_2)} sorts a list of pairs according to the second
+elements in the pairs---the sorting is done from smallest to highest;
+useful \texttt{Map} functions: \texttt{.toMap} converts a list of
+pairs into a \texttt{Map}, \texttt{.isDefinedAt(k)} tests whether the
+map is defined at that key, that is would produce a result when
+called with this key; useful data functions: \texttt{Source.fromURL},
+\texttt{Source.fromFile} for obtaining a webpage and reading a file.
+
+\newpage
+
+
+
+
+
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: t
+%%% End: 
Binary file cws/core_cw02.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cws/core_cw02.tex	Fri Nov 05 16:47:55 2021 +0000
@@ -0,0 +1,161 @@
+% !TEX program = xelatex
+\documentclass{article}
+\usepackage{../style}
+\usepackage{disclaimer}
+\usepackage{../langs}
+
+\begin{document}
+
+
+%% should ask to lower case the words.
+
+\section*{Core Part 2 (Scala, 3 Marks)}
+
+\mbox{}\hfill\textit{``What one programmer can do in one month,}\\
+\mbox{}\hfill\textit{two programmers can do in two months.''}\smallskip\\
+\mbox{}\hfill\textit{ --- Frederick P.~Brooks (author of The Mythical Man-Month)}\bigskip\medskip
+
+\IMPORTANT{You are asked to implement a Scala program for measuring similarity in
+  texts. The preliminary part is due on \cwSEVEN{} at 5pm and worth 3\%.
+  Any 1\% you achieve in the preliminary part counts as your ``weekly engagement''.}
+
+\noindent
+Also note that the running time of each part will be restricted to a
+maximum of 30 seconds on my laptop.
+
+\DISCLAIMER{}
+
+
+\subsection*{Reference Implementation}
+
+Like the C++ part, the Scala part works like this: you
+push your files to GitHub and receive (after sometimes a long delay) some
+automated feedback. In the end we will take a snapshot of the submitted files and
+apply an automated marking script to them.\medskip
+
+\noindent
+In addition, the Scala part comes with reference
+implementations in form of \texttt{jar}-files. This allows you to run
+any test cases on your own computer. For example you can call Scala on
+the command line with the option \texttt{-cp docdiff.jar} and then
+query any function from the template file. Say you want to find out
+what the function \texttt{occurrences} produces: for this you just need
+to prefix it with the object name \texttt{C2}.  If you want to find out what
+these functions produce for the list \texttt{List("a", "b", "b")},
+you would type something like:
+
+\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
+$ scala -cp docdiff.jar
+  
+scala> C2.occurrences(List("a", "b", "b"))
+...
+\end{lstlisting}%$
+
+\subsection*{Hints}
+
+\noindent
+\textbf{For the Core Part 2:} useful operations involving regular
+expressions:
+\[\texttt{reg.findAllIn(s).toList}\]
+\noindent finds all
+substrings in \texttt{s} according to a regular regular expression
+\texttt{reg}; useful list operations: \texttt{.distinct}
+removing duplicates from a list, \texttt{.count} counts the number of
+elements in a list that satisfy some condition, \texttt{.toMap}
+transfers a list of pairs into a Map, \texttt{.sum} adds up a list of
+integers, \texttt{.max} calculates the maximum of a list.\bigskip
+
+
+
+\newpage
+\subsection*{Core Part 2 (3 Marks, file docdiff.scala)}
+
+It seems plagiarism---stealing and submitting someone
+else's code---is a serious problem at other
+universities.\footnote{Surely, King's students, after all their
+  instructions and warnings, would never commit such an offence. Yes?}
+Detecting such plagiarism is time-consuming and disheartening for
+lecturers at those universities. To aid these poor souls, let's
+implement in this part a program that determines the similarity
+between two documents (be they source code or texts in English). A
+document will be represented as a list of strings.
+
+
+\subsection*{Tasks}
+
+\begin{itemize}
+\item[(1)] Implement a function that `cleans' a string by finding all
+  (proper) words in the string. For this use the regular expression
+  \texttt{\textbackslash{}w+} for recognising words and the library function
+  \texttt{findAllIn}. The function should return a document (a list of
+  strings).
+  \mbox{}\hfill\mbox{[0.5 Marks]}
+
+\item[(2)] In order to compute the overlap between two documents, we
+  associate each document with a \texttt{Map}. This Map represents the
+  strings in a document and how many times these strings occur in the
+  document. A simple (though slightly inefficient) method for counting
+  the number of string-occurrences in a document is as follows: remove
+  all duplicates from the document; for each of these (unique)
+  strings, count how many times they occur in the original document.
+  Return a Map associating strings with occurrences. For example
+
+  \begin{center}
+  \pcode{occurrences(List("a", "b", "b", "c", "d"))}
+  \end{center}
+
+  produces \pcode{Map(a -> 1, b -> 2, c -> 1, d -> 1)} and
+
+  \begin{center}
+  \pcode{occurrences(List("d", "b", "d", "b", "d"))}
+  \end{center}
+
+  produces \pcode{Map(d -> 3, b -> 2)}.\hfill[1 Mark]
+
+\item[(3)] You can think of the Maps calculated under (2) as memory-efficient
+  representations of sparse ``vectors''. In this subtask you need to
+  implement the \emph{product} of two such vectors, sometimes also called
+  \emph{dot product} of two vectors.\footnote{\url{https://en.wikipedia.org/wiki/Dot_product}}
+
+  For this dot product, implement a function that takes two documents
+  (\texttt{List[String]}) as arguments. The function first calculates
+  the (unique) strings in both. For each string, it multiplies the
+  corresponding occurrences in each document. If a string does not
+  occur in one of the documents, then the product for this string is zero. At the end
+  you need to add up all products. For the two documents in (2) the dot
+  product is 7, because
+
+  \[
+    \underbrace{1 * 0}_{"a"} \;\;+\;\;
+    \underbrace{2 * 2}_{"b"} \;\;+\;\;
+    \underbrace{1 * 0}_{"c"} \;\;+\;\;
+    \underbrace{1 * 3}_{"d"} \qquad = 7
+  \]  
+  
+  \hfill\mbox{[1 Mark]}
+
+\item[(4)] Implement first a function that calculates the overlap
+  between two documents, say $d_1$ and $d_2$, according to the formula
+
+  \[
+  \texttt{overlap}(d_1, d_2) = \frac{d_1 \cdot d_2}{max(d_1^2, d_2^2)}  
+  \]
+
+  where $d_1^2$ means $d_1 \cdot d_1$ and so on.
+  You can expect this function to return a \texttt{Double} between 0 and 1. The
+  overlap between the lists in (2) is $0.5384615384615384$.
+
+  Second, implement a function that calculates the similarity of
+  two strings, by first extracting the substrings using the clean
+  function from (1)
+  and then calculating the overlap of the resulting documents.\\
+  \mbox{}\hfill\mbox{[0.5 Marks]}
+\end{itemize}
+
+
+\end{document} 
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: t
+%%% End: 
Binary file cws/core_cw03.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cws/core_cw03.tex	Fri Nov 05 16:47:55 2021 +0000
@@ -0,0 +1,181 @@
+% !TEX program = xelatex
+\documentclass{article}
+\usepackage{../style}
+\usepackage{../langs}
+\usepackage{disclaimer}
+\usepackage{tikz}
+\usepackage{pgf}
+\usepackage{pgfplots}
+\usepackage{stackengine}
+%% \usepackage{accents}
+\newcommand\barbelow[1]{\stackunder[1.2pt]{#1}{\raisebox{-4mm}{\boldmath$\uparrow$}}}
+\usepackage{ulem}
+
+\begin{document}
+
+% BF IDE
+% https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5
+  
+\section*{Core Part 3 (Scala, 3 Marks)}
+
+\mbox{}\hfill\textit{``[Google’s MapReduce] abstraction is inspired by the}\\
+\mbox{}\hfill\textit{map and reduce primitives present in Lisp and many}\\
+\mbox{}\hfill\textit{other functional languages.''}\smallskip\\
+\mbox{}\hfill\textit{ --- Dean and Ghemawat, who designed this concept at Google}
+\bigskip
+
+\IMPORTANT{This part is about the shunting yard algorithm by Dijkstra.
+  The preliminary part is due on \sout{\cwEIGHT{}} \textcolor{red}{11 December}
+  at 5pm and worth 3\%.
+  Any 1\% you achieve in the preliminary part counts as your
+  ``weekly engagement''.}
+
+\noindent
+Also note that the running time of each part will be restricted to a
+maximum of 30 seconds on my laptop.  
+
+\DISCLAIMER{}
+
+\subsection*{Reference Implementation}
+
+This Scala assignment comes with two reference implementations in
+form of \texttt{jar}-files. This allows
+you to run any test cases on your own computer. For example you can
+call Scala on the command line with the option \texttt{-cp
+  postfix.jar} and then query any function from the
+\texttt{postfix.scala} file (similarly for file \texttt{postfix2.scala}). As
+usual you have to prefix the calls with \texttt{C3a} and
+\texttt{C3b}, respectively.
+
+
+\begin{lstlisting}[xleftmargin=1mm,numbers=none,basicstyle=\ttfamily\small]
+$ scala -cp postfix.jar
+
+scala> C3a.syard(C3a.split("( 5 + 7 ) * 2"))
+val res0: C3a.Toks = List(5, 7, +, 2, *)
+\end{lstlisting}%$
+
+
+\subsection*{Hints}
+
+\noindent
+\textbf{For Core Part 3:} useful operations for determining
+whether a string is a number are \texttt{.forall} and \texttt{.isDigit}.
+One way to calculate the the power operation is to use \texttt{.pow}
+on \texttt{BigInt}s, like \texttt{BigInt(n).pow(m).toInt}.
+\bigskip
+
+
+\subsection*{Core Part (3 Marks, files postfix.scala, postfix2.scala)}
+
+The \emph{Shunting Yard Algorithm} has been developed by Edsger Dijkstra,
+an influential computer scientist who developed many well-known
+algorithms. This algorithm transforms the usual infix notation of
+arithmetic expressions into the postfix notation, sometimes also
+called reverse Polish notation.
+
+Why on Earth do people use the postfix notation? It is much more
+convenient to work with the usual infix notation for arithmetic
+expressions. Most modern pocket calculators (as opposed to the ones used 20
+years ago) understand infix notation. So why on Earth? \ldots{}Well,
+many computers under the hood, even nowadays, use postfix notation
+extensively. For example if you give to the Java compiler the
+expression $1 + ((2 * 3) + (4 - 3))$, it will generate the Java Byte
+code
+
+\begin{lstlisting}[language=JVMIS,numbers=none]
+ldc 1 
+ldc 2 
+ldc 3 
+imul 
+ldc 4 
+ldc 3 
+isub 
+iadd 
+iadd
+\end{lstlisting}
+
+\noindent
+where the command \texttt{ldc} loads a constant onto the stack, and \texttt{imul},
+\texttt{isub} and \texttt{iadd} are commands acting on the stack. Clearly this
+is the arithmetic expression in postfix notation.\bigskip
+
+\noindent
+The shunting yard algorithm processes an input token list using an
+operator stack and an output list. The input consists of numbers,
+operators ($+$, $-$, $*$, $/$) and parentheses, and for the purpose of
+the assignment we assume the input is always a well-formed expression
+in infix notation.  The calculation in the shunting yard algorithm uses
+information about the
+precedences of the operators (given in the template file). The
+algorithm processes the input token list as follows:
+
+\begin{itemize}
+\item If there is a number as input token, then this token is
+  transferred directly to the output list. Then the rest of the input is
+  processed.
+\item If there is an operator as input token, then you need to check
+  what is on top of the operator stack. If there are operators with
+  a higher or equal precedence, these operators are first popped off
+  from the stack and moved to the output list. Then the operator from the input
+  is pushed onto the stack and the rest of the input is processed.
+\item If the input is a left-parenthesis, you push it on to the stack
+  and continue processing the input.
+\item If the input is a right-parenthesis, then you pop off all operators
+  from the stack to the output list until you reach the left-parenthesis.
+  Then you discharge the $($ and $)$ from the input and stack, and continue
+  processing the input list.
+\item If the input is empty, then you move all remaining operators
+  from the stack to the output list.  
+\end{itemize}  
+
+\subsubsection*{Tasks (file postfix.scala)}
+
+\begin{itemize}
+\item[(1)] Implement the shunting yard algorithm described above. The
+  function, called \texttt{syard}, takes a list of tokens as first
+  argument. The second and third arguments are the stack and output
+  list represented as Scala lists. The most convenient way to
+  implement this algorithm is to analyse what the input list, stack
+  and output list look like in each step using pattern-matching. The
+  algorithm transforms for example the input
+
+  \[
+  \texttt{List(3, +, 4, *, (, 2, -, 1, ))}
+  \]
+
+  into the postfix output
+
+  \[
+  \texttt{List(3, 4, 2, 1, -, *, +)}
+  \]  
+  
+  You can assume the input list is always a  list representing
+  a well-formed infix arithmetic expression.\hfill[1 Mark]
+
+\item[(2)] Implement a compute function that takes a postfix expression
+  as argument and evaluates it generating an integer as result. It uses a
+  stack to evaluate the postfix expression. The operators $+$, $-$, $*$
+  are as usual; $/$ is division on integers, for example $7 / 3 = 2$.
+  \hfill[1 Mark]
+\end{itemize}
+
+\subsubsection*{Task (file postfix2.scala)}
+
+\begin{itemize}
+\item[(3/4)] Extend the code in (1) and (2) to include the power
+  operator.  This requires proper account of associativity of
+  the operators. The power operator is right-associative, whereas the
+  other operators are left-associative.  Left-associative operators
+  are popped off if the precedence is bigger or equal, while
+  right-associative operators are only popped off if the precedence is
+  bigger.\hfill[1 Marks]
+\end{itemize}
+
+\end{document}
+
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: t
+%%% End: 
Binary file cws/main_cw01.pdf has changed
--- a/cws/main_cw01.tex	Thu Nov 04 12:20:12 2021 +0000
+++ b/cws/main_cw01.tex	Fri Nov 05 16:47:55 2021 +0000
@@ -33,19 +33,19 @@
 function from the template file. Say you want to find out what
 the function \code{get_january_data}
 produces: for this you just need to prefix them with the object name
-\texttt{CW6b} and call them with some arguments: 
+\texttt{M1} and call them with some arguments: 
 
 \begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
 $ scala -cp drumb.jar
   
-scala> CW6b.get_january_data("FB", 2014)
+scala> M1.get_january_data("FB", 2014)
 val res2: List[String] = List(2014-01-02,54.709999,....)
 \end{lstlisting}%$
 
 \subsection*{Hints}
 
 \noindent
-\textbf{For the Main Part:} useful string functions:
+\textbf{For Main Part 1:} useful string functions:
 \texttt{.startsWith(...)} for checking whether a string has a given
 prefix, \texttt{\_ ++ \_} for concatenating two strings; useful option
 functions: \texttt{.flatten} flattens a list of options such that it
@@ -58,13 +58,13 @@
 \texttt{.split(",").toList} for splitting strings according to a
 comma.\bigskip
 
-\noindent
+\noindent\alert
 \textbf{Note!} Fortunately Scala supports operator overloading. But
 make sure you understand the difference between \texttt{100 / 3} and
 \texttt{100.0 / 3}!
 
 \newpage
-\subsection*{Main Part (7 Marks, file drumb.scala)}
+\subsection*{Main Part 1 (7 Marks, file drumb.scala)}
 
 A purely fictional character named Mr T.~Drumb inherited in 1978
 approximately 200 Million Dollar from his father. Mr Drumb prides
@@ -102,7 +102,13 @@
 severely rate-limited.  Therefore this part comes with a number
 of files containing CSV-lists with the historical stock prices for the
 companies in our portfolios. Use these files for the following
-tasks.\bigskip
+tasks.\medskip
+
+\noindent\alert
+\textbf{Note:} Do not hardcode the path to the CSV-files. The testing
+framework will assume that these files are in the same directory as the
+drumb.scala file.
+\bigskip
 
 \newpage
 \noindent
@@ -231,7 +237,9 @@
 of companies that went bust or were de-listed over the years.
 So where does this leave our fictional character Mr T.~Drumb? Well, given
 his inheritance, a really dumb investment strategy would have done
-equally well, if not much better.\medskip
+equally well, if not much better. And one would assume this guy is
+by now locked up in prison and the key thrown away, but alas he
+is still around annoying commonsense people.\medskip
 
 \end{document}
 
--- a/cws/main_cw02.tex	Thu Nov 04 12:20:12 2021 +0000
+++ b/cws/main_cw02.tex	Fri Nov 05 16:47:55 2021 +0000
@@ -9,7 +9,7 @@
 
 %% should ask to lower case the words.
 
-\section*{Main Part 2 (Scala, 7 Marks)}
+\section*{Main Part 2 (Scala, 6 Marks)}
 
 
 \noindent
@@ -39,7 +39,7 @@
 the command line with the option \texttt{-cp danube.jar} and then
 query any function from the template file. Say you want to find out
 what the function \texttt{} produces: for this you just need
-to prefix it with the object name \texttt{CW7b}.  If you want to find out what
+to prefix it with the object name \texttt{M2}.  If you want to find out what
 these functions produce for the list \texttt{List("a", "b", "b")},
 you would type something like:
 
@@ -48,7 +48,7 @@
 scala> val ratings_url =
      | """https://nms.kcl.ac.uk/christian.urban/ratings.csv"""
 
-scala> CW7b.get_csv_url(ratings_url)
+scala> M2.get_csv_url(ratings_url)
 val res0: List[String] = List(1,1,4 ...)
 \end{lstlisting}%$
 
@@ -70,7 +70,7 @@
 \newpage
 
 
-\subsection*{Main Part 2 (7 Marks, file danube.scala)}
+\subsection*{Main Part 2 (6 Marks, file danube.scala)}
 
 You are creating Danube.co.uk which you hope will be the next big thing
 in online movie renting. You know that you can save money by
@@ -189,17 +189,17 @@
   then return fewer than two movie names.\\
   \mbox{}\hfill [1 Mark]
 
-\item[(7)] Calculate the recommendations for all movies according to
- what the recommendations function in (6) produces (this
- can take a few seconds). Put all recommendations into a list 
- (of strings) and count how often the strings occur in
- this list. This produces a list of string-int pairs,
- where the first component is the movie name and the second
- is the number of how many times the movie was recommended. 
- Sort all the pairs according to the number
- of times they were recommended (most recommended movie name 
- first).\\
- \mbox{}\hfill [1 Mark]
+%\item[(7)] Calculate the recommendations for all movies according to
+% what the recommendations function in (6) produces (this
+% can take a few seconds). Put all recommendations into a list 
+% (of strings) and count how often the strings occur in
+% this list. This produces a list of string-int pairs,
+% where the first component is the movie name and the second
+% is the number of how many times the movie was recommended. 
+% Sort all the pairs according to the number
+% of times they were recommended (most recommended movie name 
+% first).\\
+% \mbox{}\hfill [1 Mark]
   
 \end{itemize}
 
--- a/cws/main_cw03.tex	Thu Nov 04 12:20:12 2021 +0000
+++ b/cws/main_cw03.tex	Fri Nov 05 16:47:55 2021 +0000
@@ -119,7 +119,7 @@
 % BF IDE
 % https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5
   
-\section*{Main Part 3 (Scala, 7 Marks)}
+\section*{Main Part 3 (Scala, 6 Marks)}
 
 %\mbox{}\hfill\textit{``[Google’s MapReduce] abstraction is inspired by the}\\
 %\mbox{}\hfill\textit{map and reduce primitives present in Lisp and many}\\
@@ -150,7 +150,7 @@
 computer. For example you can call Scala on the command line with the
 option \texttt{-cp re.jar} and then query any function from the
 \texttt{re.scala} template file. As usual you have to prefix the calls
-with \texttt{CW8c} or import this object.  Since some tasks
+with \texttt{M3} or import this object.  Since some tasks
 are time sensitive, you can check the reference implementation as
 follows: if you want to know, for example, how long it takes to match
 strings of $a$'s using the regular expression $(a^*)^*\cdot b$ you can
@@ -159,7 +159,7 @@
 
 \begin{lstlisting}[xleftmargin=1mm,numbers=none,basicstyle=\ttfamily\small]
 $ scala -cp re.jar
-scala> import CW8c._  
+scala> import M3._  
 scala> for (i <- 0 to 5000000 by 500000) {
   | println(f"$i: ${time_needed(2, matcher(EVIL, "a" * i))}%.5f secs.")
   | }
@@ -230,12 +230,21 @@
   $\ZERO$ & $\mapsto$ & \texttt{ZERO}\\
   $\ONE$  & $\mapsto$ & \texttt{ONE}\\
   $c$     & $\mapsto$ & \texttt{CHAR(c)}\\
+  $\sum rs$ & $\mapsto$ & \texttt{ALTs(rs)}\\  
   $r_1 + r_2$ & $\mapsto$ & \texttt{ALT(r1, r2)} & \texttt{r1 | r2}\\
   $r_1 \cdot r_2$ & $\mapsto$ & \texttt{SEQ(r1, r2)} & \texttt{r1 $\sim$ r2}\\
   $r^*$ & $\mapsto$ &  \texttt{STAR(r)} & \texttt{r.\%}
 \end{tabular}    
 \end{center}  
 
+\noindent
+The alternative regular expressions comes in two versions: one is
+binary (+ / \texttt{ALT}) and the other is n-ary ($\sum$ /
+\texttt{ALTs}). The latter takes a list of regular expressions as
+arguments.  In what follows we shall use $rs$ to stand for lists of
+regular expressions.  The binary alternative can be seen as an abbreviation,
+that is $r_1 + r_2 \dn \sum\,[r_1, r_2]$. As a result we can ignore the
+binary version and only implement the n-ary alternative.
 
 \begin{itemize}
 \item[(1)] Implement a function, called \textit{nullable}, by
@@ -250,11 +259,11 @@
 $\textit{nullable}(\ZERO)$ & $\dn$ & $\textit{false}$\\
 $\textit{nullable}(\ONE)$  & $\dn$ & $\textit{true}$\\
 $\textit{nullable}(c)$     & $\dn$ & $\textit{false}$\\
-$\textit{nullable}(r_1 + r_2)$ & $\dn$ & $\textit{nullable}(r_1) \vee \textit{nullable}(r_2)$\\
+$\textit{nullable}(\sum rs)$ & $\dn$ & $\exists r \in rs.\;\textit{nullable}(r)$\\
 $\textit{nullable}(r_1 \cdot r_2)$ & $\dn$ & $\textit{nullable}(r_1) \wedge \textit{nullable}(r_2)$\\
 $\textit{nullable}(r^*)$ & $\dn$ & $\textit{true}$\\
 \end{tabular}
-\end{center}~\hfill[1 Mark]
+\end{center}~\hfill[0.5 Marks]
 
 \item[(2)] Implement a function, called \textit{der}, by recursion over
   regular expressions. It takes a character and a regular expression
@@ -266,7 +275,7 @@
 $\textit{der}\;c\;(\ZERO)$ & $\dn$ & $\ZERO$\\
 $\textit{der}\;c\;(\ONE)$  & $\dn$ & $\ZERO$\\
 $\textit{der}\;c\;(d)$     & $\dn$ & $\textit{if}\; c = d\;\textit{then} \;\ONE \; \textit{else} \;\ZERO$\\
-$\textit{der}\;c\;(r_1 + r_2)$ & $\dn$ & $(\textit{der}\;c\;r_1) + (\textit{der}\;c\;r_2)$\\
+$\textit{der}\;c\;(\sum\;[r_1,..,r_n])$ & $\dn$ & $\sum\;[\textit{der}\;c\;r_1,..,\textit{der}\;c\;r_n]$\\
 $\textit{der}\;c\;(r_1 \cdot r_2)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r_1)$\\
       & & $\textit{then}\;((\textit{der}\;c\;r_1)\cdot r_2) + (\textit{der}\;c\;r_2)$\\
       & & $\textit{else}\;(\textit{der}\;c\;r_1)\cdot r_2$\\
@@ -312,7 +321,32 @@
 Note, the last derivative can match the empty string, that is it is \textit{nullable}.\\
 \mbox{}\hfill\mbox{[1 Mark]}
 
-\item[(3)] Implement the function \textit{simp}, which recursively
+\item[(3)] We next want to simplify regular expressions: essentially
+  we want to remove $\ZERO$ in regular expressions like $r + \ZERO$
+  and $\ZERO + r$.  However, our n-ary alternative takes a list of
+  regular expressions as argument, we therefore need a more general
+  ``flatten'' function, called \texttt{flts}. This function should
+  analyse a list of regular expressions, say $rs$, as follows:
+
+  \begin{center}
+    \begin{tabular}{lllll}
+      1) &$rs = []$                & $\dn$ & $[]$ & (empty list)\\
+      2) &$rs = \ZERO :: rest$     & $\dn$ & $\textrm{flatten}\;rest$\\
+      3) &$rs = (\sum rs_1) :: rest$ & $\dn$ & $rs_1 ::: \textrm{flatten}\;rest$\\
+      4) &$rs = r :: rest$         & $\dn$ & $r :: \textrm{flatten}\;rest$ & (otherwise)\\
+    \end{tabular}  
+  \end{center}  
+
+  The first clause just states that empty lists cannot be further
+  flattened. The second removes all $\ZERO$s from the list.
+  The third is when the first regular expression is an \texttt{ALTs}, then
+  the content of this alternative should be spilled out and appended
+  with the flattened rest of the list. The last case is for all other
+  cases where the head of the list is not $\ZERO$ and not an \texttt{ALTs},
+  then we just keep the head of the list and flatten the rest.
+  \mbox{}\hfill\mbox{[1 Mark]}
+
+\item[(4)] Implement the function \textit{simp}, which recursively
   traverses a regular expression, and on the way up simplifies every
   regular expression on the left (see below) to the regular expression
   on the right, except it does not simplify inside ${}^*$-regular
@@ -324,31 +358,35 @@
 $\ZERO \cdot r$ & $\mapsto$ & $\ZERO$\\ 
 $r \cdot \ONE$ & $\mapsto$ & $r$\\ 
 $\ONE \cdot r$ & $\mapsto$ & $r$\\ 
-$r + \ZERO$ & $\mapsto$ & $r$\\ 
-$\ZERO + r$ & $\mapsto$ & $r$\\ 
-$r + r$ & $\mapsto$ & $r$\\ 
+$\sum\;[r_1,..,r_n]$ & $\mapsto$ & $\sum\;(\texttt{(flts + distinct)}[simp(r_1),..,simp(r_n)])$\\  
 \end{tabular}
-  \end{center}
+\end{center}
+
+  The last case is as follows: first apply $simp$ to all regular expressions
+  $r_1,.. ,r_n$; then flatten the resulting list using \texttt{flts};
+  finally remove all duplicates in this list (this can be done in Scala
+  using the function \texttt{\_.distinct}).
 
   For example the regular expression
   \[(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot (r_4 \cdot \ZERO)\]
 
   simplifies to just $r_1$. \textbf{Hint:} Regular expressions can be
   seen as trees and there are several methods for traversing
-  trees. One of them corresponds to the inside-out traversal, which is also
-  sometimes called post-order tra\-versal: you traverse inside the
-  tree and on the way up you apply simplification rules.
-  \textbf{Another Hint:}
-  Remember numerical expressions from school times---there you had expressions
-  like $u + \ldots + (1 \cdot x) - \ldots (z + (y \cdot 0)) \ldots$
-  and simplification rules that looked very similar to rules
-  above. You would simplify such numerical expressions by replacing
-  for example the $y \cdot 0$ by $0$, or $1\cdot x$ by $x$, and then
-  look whether more rules are applicable. If you organise the
-  simplification in an inside-out fashion, it is always clear which
-  simplification should be applied next.\hfill[1 Mark]
+  trees. One of them corresponds to the inside-out traversal, which is
+  also sometimes called post-order tra\-versal: you traverse inside
+  the tree and on the way up you apply simplification rules.
+  \textbf{Another Hint:} Remember numerical expressions from school
+  times---there you had expressions like
+  $u + \ldots + (1 \cdot x) - \ldots (z + (y \cdot 0)) \ldots$ and
+  simplification rules that looked very similar to rules above. You
+  would simplify such numerical expressions by replacing for example
+  the $y \cdot 0$ by $0$, or $1\cdot x$ by $x$, and then look whether
+  more rules are applicable. If you regard regular expressions as
+  trees and if you organise the simplification in an inside-out
+  fashion, it is always clear which simplification should be applied
+  next.\mbox{}\hfill\mbox{[1 Mark]}
 
-\item[(4)] Implement two functions: The first, called \textit{ders},
+\item[(5)] Implement two functions: The first, called \textit{ders},
   takes a list of characters and a regular expression as arguments, and
   builds the derivative w.r.t.~the list as follows:
 
@@ -371,7 +409,7 @@
 true for the regular expression $(a\cdot b)\cdot c$ and the string
 $abc$, but false if you give it the string $ab$. \hfill[1 Mark]
 
-\item[(5)] Implement a function, called \textit{size}, by recursion
+\item[(6)] Implement a function, called \textit{size}, by recursion
   over regular expressions. If a regular expression is seen as a tree,
   then \textit{size} should return the number of nodes in such a
   tree. Therefore this function is defined as follows:
@@ -381,7 +419,7 @@
 $\textit{size}(\ZERO)$ & $\dn$ & $1$\\
 $\textit{size}(\ONE)$  & $\dn$ & $1$\\
 $\textit{size}(c)$     & $\dn$ & $1$\\
-$\textit{size}(r_1 + r_2)$ & $\dn$ & $1 + \textit{size}(r_1) + \textit{size}(r_2)$\\
+$\textit{size}(\sum\,[r_1,..,r_n]$ & $\dn$ & $1 + \textit{size}(r_1) + ... + \textit{size}(r_n)$\\
 $\textit{size}(r_1 \cdot r_2)$ & $\dn$ & $1 + \textit{size}(r_1) + \textit{size}(r_2)$\\
 $\textit{size}(r^*)$ & $\dn$ & $1 + \textit{size}(r)$\\
 \end{tabular}
@@ -391,9 +429,9 @@
 expression $(a^*)^* \cdot b$ grows when taking successive derivatives
 according the letter $a$ without simplification and then compare it to
 taking the derivative, but simplify the result.  The sizes
-are given in \texttt{re.scala}. \hfill[1 Mark]
+are given in \texttt{re.scala}. \hfill[0.5 Marks]
 
-\item[(6)] You do not have to implement anything specific under this
+\item[(7)] You do not have to implement anything specific under this
   task.  The purpose here is that you will be marked for some ``power''
   test cases. For example can your matcher decide within 30 seconds
   whether the regular expression $(a^*)^*\cdot b$ matches strings of the
@@ -406,20 +444,22 @@
 
   \noindent correctly to just \texttt{ONE}, where \texttt{SEQ} is nested
   50 or more times?\\
-  \mbox{}\hfill[2 Marks]
+  \mbox{}\hfill[1 Mark]
 \end{itemize}
 
 \subsection*{Background}
 
-Although easily implementable in Scala, the idea behind the derivative
-function might not so easy to be seen. To understand its purpose
-better, assume a regular expression $r$ can match strings of the form
-$c\!::\!cs$ (that means strings which start with a character $c$ and have
-some rest, or tail, $cs$). If you take the derivative of $r$ with
-respect to the character $c$, then you obtain a regular expression
-that can match all the strings $cs$.  In other words, the regular
-expression $\textit{der}\;c\;r$ can match the same strings $c\!::\!cs$
-that can be matched by $r$, except that the $c$ is chopped off.
+Although easily implementable in Scala (ok maybe the \texttt{simp} functions and
+\texttt{ALTs} needs a bit more thinking), the idea behind the
+derivative function might not so easy to be seen. To understand its
+purpose better, assume a regular expression $r$ can match strings of
+the form $c\!::\!cs$ (that means strings which start with a character
+$c$ and have some rest, or tail, $cs$). If you take the derivative of
+$r$ with respect to the character $c$, then you obtain a regular
+expression that can match all the strings $cs$.  In other words, the
+regular expression $\textit{der}\;c\;r$ can match the same strings
+$c\!::\!cs$ that can be matched by $r$, except that the $c$ is chopped
+off.
 
 Assume now $r$ can match the string $abc$. If you take the derivative
 according to $a$ then you obtain a regular expression that can match
@@ -451,12 +491,13 @@
 Java\footnote{Version 8 and below; Version 9 and above does not seem to be as
   catastrophic, but still much worse than the regular expression
   matcher based on derivatives.}, JavaScript and Python with the
-`evil' regular expression $(a^*)^*\cdot b$, then have a look at the
+``evil'' regular expression $(a^*)^*\cdot b$, then have a look at the
 graphs below (you can try it out for yourself: have a look at the files
 \texttt{catastrophic9.java}, \texttt{catastrophic.js},
 \texttt{catastrophic.py} etc on KEATS). Compare this with the matcher you
-have implemented. How long can the string of $a$'s be in your matcher
-and still stay within the 30 seconds time limit?
+have implemented. How long can a string of $a$'s be in your matcher
+and still stay within the 30 seconds time limit? It should be muuuch better
+than your off-the-shelf matcher in your bog-standard language.
 
 \begin{center}
 \begin{tabular}{@{}cc@{}}
@@ -477,7 +518,7 @@
     scaled ticks=false,
     axis lines=left,
     width=6cm,
-    height=5.5cm, 
+    height=5.0cm, 
     legend entries={Python, Java 8, JavaScript, Swift, Dart},  
     legend pos=north west,
     legend cell align=left]
@@ -503,7 +544,7 @@
     scaled ticks=false,
     axis lines=left,
     width=6cm,
-    height=5.5cm, 
+    height=5.0cm, 
     legend entries={Java 9},  
     legend pos=north west]
 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java9.data};
Binary file cws/pre_cw01.pdf has changed
--- a/cws/pre_cw01.tex	Thu Nov 04 12:20:12 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,299 +0,0 @@
-% !TEX program = xelatex
-\documentclass{article}
-\usepackage{../style}
-\usepackage{disclaimer}
-\usepackage{../langs}
-
-
-
-\begin{document}
-
-\section*{Preliminary Part 1 (Scala, 3 Marks)}
-
-\mbox{}\hfill\textit{``The most effective debugging tool is still careful thought,}\\
-\mbox{}\hfill\textit{coupled with judiciously placed print statements.''}\smallskip\\
-\mbox{}\hfill\textit{ --- Brian W. Kernighan, in Unix for Beginners (1979)}\bigskip
-
- 
-\IMPORTANT{This part is about Scala. It is due on \cwSIX{} at 5pm and worth 3\%.
-Any 1\% you achieve in the preliminary part counts as your ``weekly engagement''.}
-
-\noindent
-Also note that the running time of each part will be restricted to a
-maximum of 30 seconds on my laptop.
-
-\DISCLAIMER{}
-
-\subsection*{Reference Implementation}
-
-Like the C++ assignments, the Scala assignments will work like this: you
-push your files to GitHub and receive (after sometimes a long delay) some
-automated feedback. In the end we take a snapshot of the submitted files and
-apply an automated marking script to them.\medskip
-
-\noindent
-In addition, the Scala coursework comes with a reference implementation
-in form of \texttt{jar}-files. This allows you to run any test cases on
-your own computer. For example you can call Scala on the command line
-with the option \texttt{-cp collatz.jar} and then query any function
-from the template file. Say you want to find out what the functions
-\texttt{collatz} and \texttt{collatz\_max} produce: for this you just
-need to prefix them with the object name \texttt{CW6a}. If you want to
-find out what these functions produce for the argument \texttt{6}, you
-would type something like:
-
-\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
-$ scala -cp collatz.jar
-  
-scala> CW6a.collatz(6)
-...
-scala> CW6a.collatz_max(6)
-...
-\end{lstlisting}%$
-
-\subsection*{Hints}
-
-\noindent
-\textbf{For the Preliminary Part:} useful math operators: \texttt{\%} for modulo, \texttt{\&} for bit-wise and; useful
-functions: \mbox{\texttt{(1\,to\,10)}} for ranges, \texttt{.toInt},
-\texttt{.toList} for conversions, you can use \texttt{List(...).max} for the
-maximum of a list, \texttt{List(...).indexOf(...)} for the first index of
-a value in a list.\bigskip
-
-
-
-\newpage
-\subsection*{Preliminary Part (3 Marks, file collatz.scala)}
-
-This part is about function definitions and recursion. You are asked
-to implement a Scala program that tests examples of the
-\emph{$3n + 1$-conjecture}, also called \emph{Collatz
-  conjecture}.\video{https://www.youtube.com./watch?v=LqKpkdRRLZw}
-This conjecture can be described as follows: Start with any positive
-number $n$ greater than $0$:
-
-\begin{itemize}
-\item If $n$ is even, divide it by $2$ to obtain $n / 2$.
-\item If $n$ is odd, multiply it by $3$ and add $1$ to obtain $3n +
-  1$.
-\item Repeat this process and you will always end up with $1$.
-\end{itemize}
-
-\noindent
-For example if you start with, say, $6$ and $9$, you obtain the
-two \emph{Collatz series}
-%
-\[
-\begin{array}{@{}l@{\hspace{5mm}}l@{}}
-6, 3, 10, 5, 16, 8, 4, 2, 1 & \text{(= 8 steps)}\\
-9, 28, 14, 7, 22, 11, 34, 17, 52, 26, 13, 40, 20, 10, 5, 16, 8, 4, 2, 1  & \text{(= 19 steps)}\\
-\end{array}
-\]
-
-\noindent
-As you can see, the numbers go up and down like a roller-coaster, but
-curiously they seem to always terminate in $1$. Nobody knows why. The
-conjecture is that this will \emph{always} happen for every number
-greater than 0.\footnote{While it is relatively easy to test this
-conjecture with particular numbers, it is an interesting open problem to
-\emph{prove} that the conjecture is true for \emph{all} numbers ($> 0$).
-Paul Erd\"o{}s, a famous mathematician you might have heard about, said
-about this conjecture: ``Mathematics may not [yet] be ready for such
-problems.'' and also offered a \$500 cash prize for its solution.
-Jeffrey Lagarias, another mathematician, claimed that based only on
-known information about this problem, ``this is an extraordinarily
-difficult problem, completely out of reach of present day mathematics.''
-There is also a \href{https://xkcd.com/710/}{xkcd} cartoon about this
-conjecture\here{https://xkcd.com/710/}). If you are able to solve this
-conjecture, you will definitely get famous.}\bigskip
-
-\noindent
-\textbf{Tasks}
-
-\begin{itemize}
-\item[(1)] You are asked to implement a recursive function that
-  calculates the number of steps needed until a series ends
-  with $1$. In case of starting with $6$, it takes $8$ steps and in
-  case of starting with $9$, it takes $19$ (see above). We assume it 
-  takes $0$ steps, if we start with $1$. In order to
-  try out this function with large numbers, you should use
-  \texttt{Long} as argument type, instead of \texttt{Int}.  You can
-  assume this function will be called with numbers between $1$ and
-  $1$ Million. \hfill[1 Mark]
-
-\item[(2)] Write a second function that takes an upper bound as
-  an argument and calculates the steps for all numbers in the range from
-  1 up to this bound (the bound including). It returns the maximum number of
-  steps and the corresponding number that needs that many steps.  More
-  precisely it returns a pair where the first component is the number
-  of steps and the second is the corresponding number. \hfill\mbox{[1
-    Mark]}
-
-\item[(3)] Write a function that calculates \emph{hard
-    numbers} \here{https://medium.com/cantors-paradise/the-collatz-conjecture-some-shocking-results-from-180-000-iterations-7fea130d0377}
-  in the Collatz series---these are the last odd numbers just before a
-  power of two is reached.  For this, implement an
-  \textit{is-power-of-two} function which tests whether a number is a
-  power of two. The easiest way to implement this is by using the
-  bit-operator $\&$ of Scala. For a power of two, say $n$ with $n > 0$, it
-  holds that $n \;\&\; (n - 1)$ is equal to zero. I let you think why
-  this is the case.
-
-  The function \textit{is-hard} calculates whether
-  $3n + 1$ is a power of two.  Finally the \textit{last-odd} function
-  calculates the last odd number before a power of 2 in the Collatz
-  series. This means for example when starting with 9,
-  we receive 5 as the last odd number.  Surprisingly a lot of numbers
-  have 5 as last-odd number. But for example for 113 we obtain 85,
-  because of the series
-  %
-  \[113, 340, 170, \,\fbox{85}\,, 256, 128, 64, 32, 16, 8, 4, 2, 1\]
-
-  The \textit{last-odd} function will only be called with numbers that are not
-  powers of 2 themselves.
-\end{itemize}
-
-\noindent
-\textbf{Test Data:} Some test ranges and cases are:
-
-\begin{itemize}
-\item 1 to 10 where $9$ takes 19 steps 
-\item 1 to 100 where $97$ takes 118 steps,
-\item 1 to 1,000 where $871$ takes 178 steps,
-\item 1 to 10,000 where $6,171$ takes 261 steps,
-\item 1 to 100,000 where $77,031$ takes 350 steps, 
-\item 1 to 1 Million where $837,799$ takes 524 steps
-  %% runs out of stack space
-  %% \item[$\bullet$] $1 - 10$ million where $8,400,511$ takes 685 steps
-\item 21 is the last odd number for 84
-\item 341 is the last odd number for 201, 604, 605 and 8600
-  
-\end{itemize}
-  
-
-
-
-
-
-\end{document}
-
-
-%%%%%%% Historical Stuff
-\newpage
-
-This part is about web-scraping and list-processing in Scala. It uses
-online data about the per-capita alcohol consumption for each country
-(per year?), and a file containing the data about the population size of
-each country.  From this data you are supposed to estimate how many
-litres of pure alcohol are consumed worldwide.\bigskip
-
-\noindent
-\textbf{Tasks (file alcohol.scala):}
-
-\begin{itemize}
-\item[(1)] Write a function that given an URL requests a
-  comma-separated value (CSV) list.  We are interested in the list
-  from the following URL
-
-\begin{center}
-  \url{https://raw.githubusercontent.com/fivethirtyeight/data/master/alcohol-consumption/drinks.csv}
-\end{center}
-
-\noindent Your function should take a string (the URL) as input, and
-produce a list of strings as output, where each string is one line in
-the corresponding CSV-list.  This list from the URL above should
-contain 194 lines.\medskip
-
-\noindent
-Write another function that can read the file \texttt{population.csv}
-from disk (the file is distributed with the assignment). This
-function should take a string as argument, the file name, and again
-return a list of strings corresponding to each entry in the
-CSV-list. For \texttt{population.csv}, this list should contain 216
-lines.\hfill[1 Mark]
-
-
-\item[(2)] Unfortunately, the CSV-lists contain a lot of ``junk'' and we
-  need to extract the data that interests us.  From the header of the
-  alcohol list, you can see there are 5 columns
-  
-  \begin{center}
-    \begin{tabular}{l}
-      \texttt{country (name),}\\
-      \texttt{beer\_servings,}\\
-      \texttt{spirit\_servings,}\\
-      \texttt{wine\_servings,}\\
-      \texttt{total\_litres\_of\_pure\_alcohol}
-    \end{tabular}  
-  \end{center}
-
-  \noindent
-  Write a function that extracts the data from the first column,
-  the country name, and the data from the fifth column (converted into
-  a \texttt{Double}). For this go through each line of the CSV-list
-  (except the first line), use the \texttt{split(",")} function to
-  divide each line into an array of 5 elements. Keep the data from the
-  first and fifth element in these arrays.\medskip
-
-  \noindent
-  Write another function that processes the population size list. This
-  is already of the form country name and population size.\footnote{Your
-    friendly lecturer already did the messy processing for you from the
-  Worldbank database, see \url{https://github.com/datasets/population/tree/master/data} for the original.} Again, split the
-  strings according to the commas. However, this time generate a
-  \texttt{Map} from country names to population sizes.\hfill[1 Mark]
-
-\item[(3)] In (2) you generated the data about the alcohol consumption
-  per-capita for each country, and also the population size for each
-  country. From this generate next a sorted(!) list of the overall
-  alcohol consumption for each country. The list should be sorted from
-  highest alcohol consumption to lowest. The difficulty is that the
-  data is scraped off from ``random'' sources on the Internet and
-  annoyingly the spelling of some country names does not always agree in both
-  lists. For example the alcohol list contains
-  \texttt{Bosnia-Herzegovina}, while the population writes this country as
-  \texttt{Bosnia and Herzegovina}. In your sorted
-  overall list include only countries from the alcohol list, whose
-  exact country name is also in the population size list. This means
-  you can ignore countries like Bosnia-Herzegovina from the overall
-  alcohol consumption. There are 177 countries where the names
-  agree. The UK is ranked 10th on this list by
-  consuming 671,976,864 Litres of pure alcohol each year.\medskip
-  
-  \noindent
-  Finally, write another function that takes an integer, say
-  \texttt{n}, as argument. You can assume this integer is between 0
-  and 177 (the number of countries in the sorted list above).  The
-  function should return a triple, where the first component is the
-  sum of the alcohol consumption in all countries (on the list); the
-  second component is the sum of the \texttt{n}-highest alcohol
-  consumers on the list; and the third component is the percentage the
-  \texttt{n}-highest alcohol consumers drink with respect to the
-  the world consumption. You will see that according to our data, 164
-  countries (out of 177) gobble up 100\% of the World alcohol
-  consumption.\hfill\mbox{[1 Mark]}
-\end{itemize}
-
-\noindent
-\textbf{Hints:} useful list functions: \texttt{.drop(n)},
-\texttt{.take(n)} for dropping or taking some elements in a list,
-\texttt{.getLines} for separating lines in a string;
-\texttt{.sortBy(\_.\_2)} sorts a list of pairs according to the second
-elements in the pairs---the sorting is done from smallest to highest;
-useful \texttt{Map} functions: \texttt{.toMap} converts a list of
-pairs into a \texttt{Map}, \texttt{.isDefinedAt(k)} tests whether the
-map is defined at that key, that is would produce a result when
-called with this key; useful data functions: \texttt{Source.fromURL},
-\texttt{Source.fromFile} for obtaining a webpage and reading a file.
-
-\newpage
-
-
-
-
-
-
-%%% Local Variables: 
-%%% mode: latex
-%%% TeX-master: t
-%%% End: 
--- a/cws/pre_cw02.tex	Thu Nov 04 12:20:12 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,161 +0,0 @@
-% !TEX program = xelatex
-\documentclass{article}
-\usepackage{../style}
-\usepackage{disclaimer}
-\usepackage{../langs}
-
-\begin{document}
-
-
-%% should ask to lower case the words.
-
-\section*{Preliminary Part 2 (Scala, 3 Marks)}
-
-\mbox{}\hfill\textit{``What one programmer can do in one month,}\\
-\mbox{}\hfill\textit{two programmers can do in two months.''}\smallskip\\
-\mbox{}\hfill\textit{ --- Frederick P.~Brooks (author of The Mythical Man-Month)}\bigskip\medskip
-
-\IMPORTANT{You are asked to implement a Scala program for measuring similarity in
-  texts. The preliminary part is due on \cwSEVEN{} at 5pm and worth 3\%.
-  Any 1\% you achieve in the preliminary part counts as your ``weekly engagement''.}
-
-\noindent
-Also note that the running time of each part will be restricted to a
-maximum of 30 seconds on my laptop.
-
-\DISCLAIMER{}
-
-
-\subsection*{Reference Implementation}
-
-Like the C++ part, the Scala part works like this: you
-push your files to GitHub and receive (after sometimes a long delay) some
-automated feedback. In the end we will take a snapshot of the submitted files and
-apply an automated marking script to them.\medskip
-
-\noindent
-In addition, the Scala part comes with reference
-implementations in form of \texttt{jar}-files. This allows you to run
-any test cases on your own computer. For example you can call Scala on
-the command line with the option \texttt{-cp docdiff.jar} and then
-query any function from the template file. Say you want to find out
-what the function \texttt{occurrences} produces: for this you just need
-to prefix it with the object name \texttt{CW7a}.  If you want to find out what
-these functions produce for the list \texttt{List("a", "b", "b")},
-you would type something like:
-
-\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
-$ scala -cp docdiff.jar
-  
-scala> CW7a.occurrences(List("a", "b", "b"))
-...
-\end{lstlisting}%$
-
-\subsection*{Hints}
-
-\noindent
-\textbf{For the Preliminary Part:} useful operations involving regular
-expressions:
-\[\texttt{reg.findAllIn(s).toList}\]
-\noindent finds all
-substrings in \texttt{s} according to a regular regular expression
-\texttt{reg}; useful list operations: \texttt{.distinct}
-removing duplicates from a list, \texttt{.count} counts the number of
-elements in a list that satisfy some condition, \texttt{.toMap}
-transfers a list of pairs into a Map, \texttt{.sum} adds up a list of
-integers, \texttt{.max} calculates the maximum of a list.\bigskip
-
-
-
-\newpage
-\subsection*{Preliminary Part (3 Marks, file docdiff.scala)}
-
-It seems source code plagiarism---stealing and submitting someone
-else's code---is a serious problem at other
-universities.\footnote{Surely, King's students, after all their
-  instructions and warnings, would never commit such an offence. Yes?}
-Detecting such plagiarism is time-consuming and disheartening for
-lecturers at those universities. To aid these poor souls, let's
-implement in this part a program that determines the similarity
-between two documents (be they source code or texts in English). A
-document will be represented as a list of strings.
-
-
-\subsection*{Tasks}
-
-\begin{itemize}
-\item[(1)] Implement a function that `cleans' a string by finding all
-  (proper) words in this string. For this use the regular expression
-  \texttt{\textbackslash{}w+} for recognising words and the library function
-  \texttt{findAllIn}. The function should return a document (a list of
-  strings).\\
-  \mbox{}\hfill [0.5 Marks]
-
-\item[(2)] In order to compute the overlap between two documents, we
-  associate each document with a \texttt{Map}. This Map represents the
-  strings in a document and how many times these strings occur in the
-  document. A simple (though slightly inefficient) method for counting
-  the number of string-occurrences in a document is as follows: remove
-  all duplicates from the document; for each of these (unique)
-  strings, count how many times they occur in the original document.
-  Return a Map associating strings with occurrences. For example
-
-  \begin{center}
-  \pcode{occurrences(List("a", "b", "b", "c", "d"))}
-  \end{center}
-
-  produces \pcode{Map(a -> 1, b -> 2, c -> 1, d -> 1)} and
-
-  \begin{center}
-  \pcode{occurrences(List("d", "b", "d", "b", "d"))}
-  \end{center}
-
-  produces \pcode{Map(d -> 3, b -> 2)}.\hfill[1 Mark]
-
-\item[(3)] You can think of the Maps calculated under (2) as memory-efficient
-  representations of sparse ``vectors''. In this subtask you need to
-  implement the \emph{product} of two such vectors, sometimes also called
-  \emph{dot product} of two vectors.\footnote{\url{https://en.wikipedia.org/wiki/Dot_product}}
-
-  For this dot product, implement a function that takes two documents
-  (\texttt{List[String]}) as arguments. The function first calculates
-  the (unique) strings in both. For each string, it multiplies the
-  corresponding occurrences in each document. If a string does not
-  occur in one of the documents, then the product for this string is zero. At the end
-  you need to add up all products. For the two documents in (2) the dot
-  product is 7, because
-
-  \[
-    \underbrace{1 * 0}_{"a"} \;\;+\;\;
-    \underbrace{2 * 2}_{"b"} \;\;+\;\;
-    \underbrace{1 * 0}_{"c"} \;\;+\;\;
-    \underbrace{1 * 3}_{"d"} \qquad = 7
-  \]  
-  
-  \hfill\mbox{[1 Mark]}
-
-\item[(4)] Implement first a function that calculates the overlap
-  between two documents, say $d_1$ and $d_2$, according to the formula
-
-  \[
-  \texttt{overlap}(d_1, d_2) = \frac{d_1 \cdot d_2}{max(d_1^2, d_2^2)}  
-  \]
-
-  where $d_1^2$ means $d_1 \cdot d_1$ and so on.
-  You can expect this function to return a \texttt{Double} between 0 and 1. The
-  overlap between the lists in (2) is $0.5384615384615384$.
-
-  Second, implement a function that calculates the similarity of
-  two strings, by first extracting the substrings using the clean
-  function from (1)
-  and then calculating the overlap of the resulting documents.\\
-  \mbox{}\hfill\mbox{[0.5 Marks]}
-\end{itemize}
-
-
-\end{document} 
-
-%%% Local Variables: 
-%%% mode: latex
-%%% TeX-master: t
-%%% End: 
Binary file cws/pre_cw03.pdf has changed
--- a/cws/pre_cw03.tex	Thu Nov 04 12:20:12 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,181 +0,0 @@
-% !TEX program = xelatex
-\documentclass{article}
-\usepackage{../style}
-\usepackage{../langs}
-\usepackage{disclaimer}
-\usepackage{tikz}
-\usepackage{pgf}
-\usepackage{pgfplots}
-\usepackage{stackengine}
-%% \usepackage{accents}
-\newcommand\barbelow[1]{\stackunder[1.2pt]{#1}{\raisebox{-4mm}{\boldmath$\uparrow$}}}
-\usepackage{ulem}
-
-\begin{document}
-
-% BF IDE
-% https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5
-  
-\section*{Preliminary Part 3 (Scala, 3 Marks)}
-
-\mbox{}\hfill\textit{``[Google’s MapReduce] abstraction is inspired by the}\\
-\mbox{}\hfill\textit{map and reduce primitives present in Lisp and many}\\
-\mbox{}\hfill\textit{other functional languages.''}\smallskip\\
-\mbox{}\hfill\textit{ --- Dean and Ghemawat, who designed this concept at Google}
-\bigskip
-
-\IMPORTANT{This part is about the shunting yard algorithm by Dijkstra.
-  The preliminary part is due on \sout{\cwEIGHT{}} \textcolor{red}{11 December}
-  at 5pm and worth 3\%.
-  Any 1\% you achieve in the preliminary part counts as your
-  ``weekly engagement''.}
-
-\noindent
-Also note that the running time of each part will be restricted to a
-maximum of 30 seconds on my laptop.  
-
-\DISCLAIMER{}
-
-\subsection*{Reference Implementation}
-
-This Scala assignment comes with two reference implementations in
-form of \texttt{jar}-files. This allows
-you to run any test cases on your own computer. For example you can
-call Scala on the command line with the option \texttt{-cp
-  postfix.jar} and then query any function from the
-\texttt{postfix.scala} file (similarly for file \texttt{postfix2.scala}). As
-usual you have to prefix the calls with \texttt{CW8a} and
-\texttt{CW8b}, respectively.
-
-
-\begin{lstlisting}[xleftmargin=1mm,numbers=none,basicstyle=\ttfamily\small]
-$ scala -cp postfix.jar
-
-scala> CW8a.syard(CW8a.split("( 5 + 7 ) * 2"))
-val res0: CW8a.Toks = List(5, 7, +, 2, *)
-\end{lstlisting}%$
-
-
-\subsection*{Hints}
-
-\noindent
-\textbf{For the Preliminary Part:} useful operations for determining
-whether a string is a number are \texttt{.forall} and \texttt{.isDigit}.
-One way to calculate the the power operation is to use \texttt{.pow}
-on \texttt{BigInt}s, like \texttt{BigInt(n).pow(m).toInt}.
-\bigskip
-
-
-\subsection*{Preliminary Part (3 Marks, files postfix.scala, postfix2.scala)}
-
-The \emph{Shunting Yard Algorithm} has been developed by Edsger Dijkstra,
-an influential computer scientist who developed many well-known
-algorithms. This algorithm transforms the usual infix notation of
-arithmetic expressions into the postfix notation, sometimes also
-called reverse Polish notation.
-
-Why on Earth do people use the postfix notation? It is much more
-convenient to work with the usual infix notation for arithmetic
-expressions. Most modern pocket calculators (as opposed to the ones used 20
-years ago) understand infix notation. So why on Earth? \ldots{}Well,
-many computers under the hood, even nowadays, use postfix notation
-extensively. For example if you give to the Java compiler the
-expression $1 + ((2 * 3) + (4 - 3))$, it will generate the Java Byte
-code
-
-\begin{lstlisting}[language=JVMIS,numbers=none]
-ldc 1 
-ldc 2 
-ldc 3 
-imul 
-ldc 4 
-ldc 3 
-isub 
-iadd 
-iadd
-\end{lstlisting}
-
-\noindent
-where the command \texttt{ldc} loads a constant onto the stack, and \texttt{imul},
-\texttt{isub} and \texttt{iadd} are commands acting on the stack. Clearly this
-is the arithmetic expression in postfix notation.\bigskip
-
-\noindent
-The shunting yard algorithm processes an input token list using an
-operator stack and an output list. The input consists of numbers,
-operators ($+$, $-$, $*$, $/$) and parentheses, and for the purpose of
-the assignment we assume the input is always a well-formed expression
-in infix notation.  The calculation in the shunting yard algorithm uses
-information about the
-precedences of the operators (given in the template file). The
-algorithm processes the input token list as follows:
-
-\begin{itemize}
-\item If there is a number as input token, then this token is
-  transferred directly to the output list. Then the rest of the input is
-  processed.
-\item If there is an operator as input token, then you need to check
-  what is on top of the operator stack. If there are operators with
-  a higher or equal precedence, these operators are first popped off
-  from the stack and moved to the output list. Then the operator from the input
-  is pushed onto the stack and the rest of the input is processed.
-\item If the input is a left-parenthesis, you push it on to the stack
-  and continue processing the input.
-\item If the input is a right-parenthesis, then you pop off all operators
-  from the stack to the output list until you reach the left-parenthesis.
-  Then you discharge the $($ and $)$ from the input and stack, and continue
-  processing the input list.
-\item If the input is empty, then you move all remaining operators
-  from the stack to the output list.  
-\end{itemize}  
-
-\subsubsection*{Tasks (file postfix.scala)}
-
-\begin{itemize}
-\item[(1)] Implement the shunting yard algorithm described above. The
-  function, called \texttt{syard}, takes a list of tokens as first
-  argument. The second and third arguments are the stack and output
-  list represented as Scala lists. The most convenient way to
-  implement this algorithm is to analyse what the input list, stack
-  and output list look like in each step using pattern-matching. The
-  algorithm transforms for example the input
-
-  \[
-  \texttt{List(3, +, 4, *, (, 2, -, 1, ))}
-  \]
-
-  into the postfix output
-
-  \[
-  \texttt{List(3, 4, 2, 1, -, *, +)}
-  \]  
-  
-  You can assume the input list is always a  list representing
-  a well-formed infix arithmetic expression.\hfill[1 Mark]
-
-\item[(2)] Implement a compute function that takes a postfix expression
-  as argument and evaluates it generating an integer as result. It uses a
-  stack to evaluate the postfix expression. The operators $+$, $-$, $*$
-  are as usual; $/$ is division on integers, for example $7 / 3 = 2$.
-  \hfill[1 Mark]
-\end{itemize}
-
-\subsubsection*{Task (file postfix2.scala)}
-
-\begin{itemize}
-\item[(3/4)] Extend the code in (1) and (2) to include the power
-  operator.  This requires proper account of associativity of
-  the operators. The power operator is right-associative, whereas the
-  other operators are left-associative.  Left-associative operators
-  are popped off if the precedence is bigger or equal, while
-  right-associative operators are only popped off if the precedence is
-  bigger.\hfill[1 Marks]
-\end{itemize}
-
-\end{document}
-
-
-%%% Local Variables: 
-%%% mode: latex
-%%% TeX-master: t
-%%% End: 
--- a/main_solution1/drumb.scala	Thu Nov 04 12:20:12 2021 +0000
+++ b/main_solution1/drumb.scala	Fri Nov 05 16:47:55 2021 +0000
@@ -24,7 +24,7 @@
 //     strings for each line in the CSV-file.
 
 def get_january_data(symbol: String, year: Int) : List[String] = 
-  Source.fromFile(symbol ++ ".csv")("ISO-8859-1").getLines.toList.filter(_.startsWith(year.toString))
+  Source.fromFile(symbol ++ ".csv")("ISO-8859-1").getLines().toList.filter(_.startsWith(year.toString))
 
 
 //test cases
@@ -169,10 +169,32 @@
 
 //test cases for the two portfolios given above
 
-//println("Real data: " + investment(rstate_portfolio, 1978 to 2019, 100))
-//println("Blue data: " + investment(blchip_portfolio, 1978 to 2019, 100))
+  println("Real data: " + investment(rstate_portfolio, 1978 to 2019, 100))
+  println("Blue data: " + investment(blchip_portfolio, 1978 to 2019, 100))
+
 
 }
 
 
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+import CW6b._ 
+investment(rstate_portfolio, 1978 to 2019, 100)
--- a/main_solution3/re.scala	Thu Nov 04 12:20:12 2021 +0000
+++ b/main_solution3/re.scala	Fri Nov 05 16:47:55 2021 +0000
@@ -1,19 +1,24 @@
 // Core Part about Regular Expression Matching
 //=============================================
 
-object CW8c {
+object M3 {
 
 // Regular Expressions
 abstract class Rexp
 case object ZERO extends Rexp
 case object ONE extends Rexp
 case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
-case class STAR(r: Rexp) extends Rexp 
+case class ALTs(rs: List[Rexp]) extends Rexp      // alternatives 
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   // sequence
+case class STAR(r: Rexp) extends Rexp             // star
 
 // some convenience for typing in regular expressions
 
+
+//the usual binary choice can be defined in terms of ALTs
+def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2))
+
+
 import scala.language.implicitConversions    
 import scala.language.reflectiveCalls 
 
@@ -49,7 +54,7 @@
   case ZERO => false
   case ONE => true
   case CHAR(_) => false
-  case ALT(r1, r2) => nullable(r1) || nullable(r2)
+  case ALTs(rs) => rs.exists(nullable)
   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
   case STAR(_) => true
 }
@@ -63,13 +68,22 @@
   case ZERO => ZERO
   case ONE => ZERO
   case CHAR(d) => if (c == d) ONE else ZERO
-  case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+  case ALTs(rs) => ALTs(rs.map(der(c, _)))
   case SEQ(r1, r2) => 
     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
     else SEQ(der(c, r1), r2)
   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
 }
 
+
+
+def flts(rs: List[Rexp]) : List[Rexp] = rs match {
+  case Nil => Nil
+  case ZERO::tl => flts(tl)
+  case ALTs(rs1)::rs2 => rs1 ::: flts(rs2)  
+  case r::rs => r :: flts(rs) 
+}
+
 // (3) Complete the simp function according to
 // the specification given in the coursework; this
 // function simplifies a regular expression from
@@ -77,11 +91,12 @@
 // expressions; however it does not simplify inside 
 // STAR-regular expressions.
 
+
 def simp(r: Rexp) : Rexp = r match {
-  case ALT(r1, r2) => (simp(r1), simp(r2)) match {
-    case (ZERO, r2s) => r2s
-    case (r1s, ZERO) => r1s
-    case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
+  case ALTs(rs) => (flts(rs.map(simp)).distinct) match {
+    case Nil => ZERO
+    case r::Nil => r  
+    case rs => ALTs(rs)
   }
   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match {
     case (ZERO, _) => ZERO
@@ -117,7 +132,7 @@
   case ZERO => 1
   case ONE => 1
   case CHAR(_) => 1
-  case ALT(r1, r2) => 1 + size(r1) + size (r2)
+  case ALTs(rs) => 1 + rs.map(size).sum
   case SEQ(r1, r2) => 1 + size(r1) + size (r2)
   case STAR(r1) => 1 + size(r1)
 }
@@ -132,16 +147,19 @@
 // the supposedly 'evil' regular expression (a*)* b
 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
 
-//matcher(EVIL, "a" * 1000 ++ "b")   // => true
-//matcher(EVIL, "a" * 1000)          // => false
+//println(matcher(EVIL, "a" * 1000 ++ "b"))   // => true
+//println(matcher(EVIL, "a" * 1000))          // => false
 
 // size without simplifications
-//size(der('a', der('a', EVIL)))             // => 28
-//size(der('a', der('a', der('a', EVIL))))   // => 58
+//println(size(der('a', der('a', EVIL))))             // => 28
+//println(size(der('a', der('a', der('a', EVIL)))))   // => 58
 
 // size with simplification
-//size(simp(der('a', der('a', EVIL))))           // => 8
-//size(simp(der('a', der('a', der('a', EVIL))))) // => 8
+//println(simp(der('a', der('a', EVIL))))          // => 8
+//println(simp(der('a', der('a', der('a', EVIL)))))// => 8
+
+//println(size(simp(der('a', der('a', EVIL)))))           // => 8
+//println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 8
 
 // Python needs around 30 seconds for matching 28 a's with EVIL. 
 // Java 9 and later increase this to an "astonishing" 40000 a's in
Binary file main_templates1/drumb.jar has changed
--- a/main_templates1/drumb.scala	Thu Nov 04 12:20:12 2021 +0000
+++ b/main_templates1/drumb.scala	Fri Nov 05 16:47:55 2021 +0000
@@ -1,7 +1,7 @@
-// Core Part about a really dumb investment strategy
+// Main Part 1 about a really dumb investment strategy
 //===================================================
 
-object CW6b {
+object M1 {
 
 //two test portfolios
 
Binary file main_templates2/danube.jar has changed
--- a/main_templates2/danube.scala	Thu Nov 04 12:20:12 2021 +0000
+++ b/main_templates2/danube.scala	Fri Nov 05 16:47:55 2021 +0000
@@ -1,8 +1,8 @@
-// Core Part about Movie Recommendations 
+// Main Part 2 about Movie Recommendations 
 // at Danube.co.uk
 //===========================================
 
-object CW7b {
+object M2 {
 
 import io.Source
 import scala.util._
@@ -158,29 +158,4 @@
 
 
 
-// (7) Calculate the recommendations for all movies according to
-// what the recommendations function in (6) produces (this
-// can take a few seconds). Put all recommendations into a list 
-// (of strings) and count how often the strings occur in
-// this list. This produces a list of string-int pairs,
-// where the first component is the movie name and the second
-// is the number of how many times the movie was recommended. 
-// Sort all the pairs according to the number
-// of times they were recommended (most recommended movie name 
-// first).
-
-def most_recommended(recs: Map[String, List[String]],
-                     movs: Map[String, String]) : List[(String, Int)] = ???
-
-
-// testcase
-//
-//most_recommended(ratings_map, movies_map).take(3)
-// =>
-// List((Matrix,698), 
-//      (Star Wars: Episode IV - A New Hope (1977),402), 
-//      (Jerry Maguire (1996),382))
-
-
-
 }
Binary file main_templates3/re.jar has changed
--- a/main_templates3/re.scala	Thu Nov 04 12:20:12 2021 +0000
+++ b/main_templates3/re.scala	Fri Nov 05 16:47:55 2021 +0000
@@ -1,20 +1,24 @@
-// Core Part about Regular Expression Matching
+// Main Part 3 about Regular Expression Matching
 //=============================================
 
-object CW8c {
+object M3 {
 
 // Regular Expressions
 abstract class Rexp
 case object ZERO extends Rexp
 case object ONE extends Rexp
 case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp   // alternative 
+case class ALTs(rs: List[Rexp]) extends Rexp      // alternatives 
 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   // sequence
 case class STAR(r: Rexp) extends Rexp             // star
 
 
 // some convenience for typing regular expressions
 
+//the usual binary choice can be defined in terms of ALTs
+def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2))
+
+
 import scala.language.implicitConversions    
 import scala.language.reflectiveCalls 
 
@@ -56,17 +60,27 @@
 def der (c: Char, r: Rexp) : Rexp = ???
 
 
-// (3) Complete the simp function according to
-// the specification given in the coursework; this
-// function simplifies a regular expression from
+// (3) Implement the flatten function flts. It
+// deletes 0s from a list of regular expressions
+// and also 'spills out', or flattens, nested 
+// ALTernativeS.
+
+def flts(rs: List[Rexp]) : List[Rexp] = ???
+
+
+
+// (4) Complete the simp function according to
+// the specification given in the coursework description; 
+// this function simplifies a regular expression from
 // the inside out, like you would simplify arithmetic 
 // expressions; however it does not simplify inside 
-// STAR-regular expressions.
+// STAR-regular expressions. Use the _.distinct and 
+// flts functions.
 
 def simp(r: Rexp) : Rexp = ???
 
 
-// (4) Complete the two functions below; the first 
+// (5) Complete the two functions below; the first 
 // calculates the derivative w.r.t. a string; the second
 // is the regular expression matcher taking a regular
 // expression and a string and checks whether the
@@ -77,7 +91,7 @@
 def matcher(r: Rexp, s: String): Boolean = ???
 
 
-// (5) Complete the size function for regular
+// (6) Complete the size function for regular
 // expressions according to the specification 
 // given in the coursework.
 
Binary file pre_templates1/collatz.jar has changed
--- a/pre_templates1/collatz.scala	Thu Nov 04 12:20:12 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-// Preliminary Part about the 3n+1 conjecture
-//============================================
-
-object CW6a {
-
-//(1) Complete the collatz function below. It should
-//    recursively calculate the number of steps needed 
-//    until the collatz series reaches the number 1.
-//    If needed, you can use an auxiliary function that
-//    performs the recursion. The function should expect
-//    arguments in the range of 1 to 1 Million.
-
-def collatz(n: Long) : Long = ???
-
-
-//(2) Complete the collatz_max function below. It should
-//    calculate how many steps are needed for each number 
-//    from 1 up to a bound and then calculate the maximum number of
-//    steps and the corresponding number that needs that many 
-//    steps. Again, you should expect bounds in the range of 1
-//    up to 1 Million. The first component of the pair is
-//    the maximum number of steps and the second is the 
-//    corresponding number.
-
-def collatz_max(bnd: Long) : (Long, Long) = ???
-
-//(3) Implement a function that calculates the last_odd
-//    number in a collatz series.  For this implement an
-//    is_pow_of_two function which tests whether a number 
-//    is a power of two. The function is_hard calculates 
-//    whether 3n + 1 is a power of two. Again you can
-//    assume the input ranges between 1 and 1 Million,
-//    and also assume that the input of last_odd will not 
-//    be a power of 2.
-
-def is_pow_of_two(n: Long) : Boolean = ???
-
-def is_hard(n: Long) : Boolean = ???
-
-def last_odd(n: Long) : Long = ???
-
-}
-
Binary file pre_templates2/docdiff.jar has changed
--- a/pre_templates2/docdiff.scala	Thu Nov 04 12:20:12 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-// Preliminary Part about Code Similarity
-//========================================
-
-
-object CW7a { 
-
-
-//(1) Complete the clean function below. It should find
-//    all words in a string using the regular expression
-//    \w+  and the library function 
-//
-//         some_regex.findAllIn(some_string)
-//
-//    The words should be Returned as a list of strings.
-
-
-def clean(s: String) : List[String] = ???
-  
-
-
-//(2) The function occurrences calculates the number of times  
-//    strings occur in a list of strings. These occurrences should 
-//    be calculated as a Map from strings to integers.
-
-
-def occurrences(xs: List[String]): Map[String, Int] = ???
-
-
-//(3) This functions calculates the dot-product of two documents
-//    (list of strings). For this it calculates the occurrence
-//    maps from (2) and then multiplies the corresponding occurrences. 
-//    If a string does not occur in a document, the product is zero.
-//    The function finally sums up all products. 
-
-
-def prod(lst1: List[String], lst2: List[String]) : Int = ???
-
-
-//(4) Complete the functions overlap and similarity. The overlap of
-//    two documents is calculated by the formula given in the assignment
-//    description. The similarity of two strings is given by the overlap
-//    of the cleaned strings (see (1)).  
-
-
-def overlap(lst1: List[String], lst2: List[String]) : Double = ???
-
-def similarity(s1: String, s2: String) : Double = ???
-
-
-
-/* Test cases
-
-
-val list1 = List("a", "b", "b", "c", "d") 
-val list2 = List("d", "b", "d", "b", "d")
-
-occurrences(List("a", "b", "b", "c", "d"))   // Map(a -> 1, b -> 2, c -> 1, d -> 1)
-occurrences(List("d", "b", "d", "b", "d"))   // Map(d -> 3, b -> 2)
-
-prod(list1,list2) // 7 
-
-overlap(list1, list2)   // 0.5384615384615384
-overlap(list2, list1)   // 0.5384615384615384
-overlap(list1, list1)   // 1.0
-overlap(list2, list2)   // 1.0
-
-// Plagiarism examples from 
-// https://desales.libguides.com/avoidingplagiarism/examples
-
-val orig1 = """There is a strong market demand for eco-tourism in
-Australia. Its rich and diverse natural heritage ensures Australia's
-capacity to attract international ecotourists and gives Australia a
-comparative advantage in the highly competitive tourism industry."""
-
-val plag1 = """There is a high market demand for eco-tourism in
-Australia. Australia has a comparative advantage in the highly
-competitive tourism industry due to its rich and varied natural
-heritage which ensures Australia's capacity to attract international
-ecotourists."""
-
-similarity(orig1, plag1) // 0.8679245283018868
-
-
-// Plagiarism examples from 
-// https://www.utc.edu/library/help/tutorials/plagiarism/examples-of-plagiarism.php
-
-val orig2 = """No oil spill is entirely benign. Depending on timing and
-location, even a relatively minor spill can cause significant harm to
-individual organisms and entire populations. Oil spills can cause
-impacts over a range of time scales, from days to years, or even
-decades for certain spills. Impacts are typically divided into acute
-(short-term) and chronic (long-term) effects. Both types are part of a
-complicated and often controversial equation that is addressed after
-an oil spill: ecosystem recovery."""
-
-val plag2 = """There is no such thing as a "good" oil spill. If the
-time and place are just right, even a small oil spill can cause damage
-to sensitive ecosystems. Further, spills can cause harm days, months,
-years, or even decades after they occur. Because of this, spills are
-usually broken into short-term (acute) and long-term (chronic)
-effects. Both of these types of harm must be addressed in ecosystem
-recovery: a controversial tactic that is often implemented immediately
-following an oil spill."""
-
-overlap(clean(orig2), clean(plag2))  // 0.728
-similarity(orig2, plag2)             // 0.728
-
-
- 
-// The punchline: everything above 0.6 looks suspicious and 
-// should be investigated by staff.
-
-*/
-
-}
Binary file pre_templates3/postfix.jar has changed
--- a/pre_templates3/postfix.scala	Thu Nov 04 12:20:12 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-// Shunting Yard Algorithm
-// by Edsger Dijkstra
-// ========================
-
-object CW8a {
-
-// type of tokens
-type Toks = List[String]
-
-// the operations in the basic version of the algorithm
-val ops = List("+", "-", "*", "/")
-
-// the precedences of the operators
-val precs = Map("+" -> 1,
-		"-" -> 1,
-		"*" -> 2,
-		"/" -> 2)
-
-// helper function for splitting strings into tokens
-def split(s: String) : Toks = s.split(" ").toList
-
-
-// (1) Implement below the shunting yard algorithm. The most
-// convenient way to this in Scala is to implement a recursive 
-// function and to heavily use pattern matching. The function syard 
-// takes some input tokens as first argument. The second and third 
-// arguments represent the stack and the output of the shunting yard 
-// algorithm.
-//
-// In the marking, you can assume the function is called only with 
-// an empty stack and an empty output list. You can also assume the
-// input os  only properly formatted (infix) arithmetic expressions
-// (all parentheses will be well-nested, the input only contains 
-// operators and numbers).
-
-// You can implement any additional helper function you need. I found 
-// it helpful to implement two auxiliary functions for the pattern matching:  
-// 
-
-def is_op(op: String) : Boolean = ???
-def prec(op1: String, op2: String) : Boolean = ???
-
-
-def syard(toks: Toks, st: Toks = Nil, out: Toks = Nil) : Toks = ???
-
-
-// test cases
-//syard(split("3 + 4 * ( 2 - 1 )"))  // 3 4 2 1 - * +
-//syard(split("10 + 12 * 33"))       // 10 12 33 * +
-//syard(split("( 5 + 7 ) * 2"))      // 5 7 + 2 *
-//syard(split("5 + 7 / 2"))          // 5 7 2 / +
-//syard(split("5 * 7 / 2"))          // 5 7 * 2 /
-//syard(split("9 + 24 / ( 7 - 3 )")) // 9 24 7 3 - / +
-
-//syard(split("3 + 4 + 5"))           // 3 4 + 5 +
-//syard(split("( ( 3 + 4 ) + 5 )"))    // 3 4 + 5 +
-//syard(split("( 3 + ( 4 + 5 ) )"))    // 3 4 5 + +
-//syard(split("( ( ( 3 ) ) + ( ( 4 + ( 5 ) ) ) )")) // 3 4 5 + +
-
- 
-// (2) Implement a compute function that evaluates an input list
-// in postfix notation. This function takes a list of tokens
-// and a stack as argumenta. The function should produce the 
-// result as an integer using the stack. You can assume 
-// this function will be only called with proper postfix 
-// expressions.    
-
-def compute(toks: Toks, st: List[Int] = Nil) : Int = ???
-
-
-// test cases
-// compute(syard(split("3 + 4 * ( 2 - 1 )")))  // 7
-// compute(syard(split("10 + 12 * 33")))       // 406
-// compute(syard(split("( 5 + 7 ) * 2")))      // 24
-// compute(syard(split("5 + 7 / 2")))          // 8
-// compute(syard(split("5 * 7 / 2")))          // 17
-// compute(syard(split("9 + 24 / ( 7 - 3 )"))) // 15
-
-}
-
-
Binary file pre_templates3/postfix2.jar has changed
--- a/pre_templates3/postfix2.scala	Thu Nov 04 12:20:12 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-// Shunting Yard Algorithm 
-// including Associativity for Operators 
-// =====================================
-
-object CW8b {
-
-
-// type of tokens
-type Toks = List[String]
-
-// helper function for splitting strings into tokens
-def split(s: String) : Toks = s.split(" ").toList
-
-// left- and right-associativity
-abstract class Assoc
-case object LA extends Assoc
-case object RA extends Assoc
-
-
-// power is right-associative,
-// everything else is left-associative
-def assoc(s: String) : Assoc = s match {
-  case "^" => RA
-  case _ => LA
-}
-
-
-// the precedences of the operators
-val precs = Map("+" -> 1,
-  		"-" -> 1,
-		"*" -> 2,
-		"/" -> 2,
-                "^" -> 4)
-
-// the operations in the basic version of the algorithm
-val ops = List("+", "-", "*", "/", "^")
-
-// (3) Implement the extended version of the shunting yard algorithm.
-// This version should properly account for the fact that the power 
-// operation is right-associative. Apart from the extension to include
-// the power operation, you can make the same assumptions as in 
-// basic version.
-
-def syard(toks: Toks, st: Toks = Nil, out: Toks = Nil) : Toks = ???
-
-
-// test cases
-// syard(split("3 + 4 * 8 / ( 5 - 1 ) ^ 2 ^ 3"))  // 3 4 8 * 5 1 - 2 3 ^ ^ / +
-
-
-// (4) Implement a compute function that produces an Int for an
-// input list of tokens in postfix notation.
-
-def compute(toks: Toks, st: List[Int] = Nil) : Int = ???
-
-
-// test cases
-// compute(syard(split("3 + 4 * ( 2 - 1 )")))   // 7
-// compute(syard(split("10 + 12 * 33")))       // 406
-// compute(syard(split("( 5 + 7 ) * 2")))      // 24
-// compute(syard(split("5 + 7 / 2")))          // 8
-// compute(syard(split("5 * 7 / 2")))          // 17
-// compute(syard(split("9 + 24 / ( 7 - 3 )"))) // 15
-// compute(syard(split("4 ^ 3 ^ 2")))      // 262144
-// compute(syard(split("4 ^ ( 3 ^ 2 )")))  // 262144
-// compute(syard(split("( 4 ^ 3 ) ^ 2")))  // 4096
-// compute(syard(split("( 3 + 1 ) ^ 2 ^ 3")))   // 65536
-
-}