updated
authorChristian Urban <christian.urban@kcl.ac.uk>
Mon, 08 Nov 2021 00:17:50 +0000
changeset 400 e48ea8300b2d
parent 399 b17a98b0c52f
child 401 9471c3b7ea02
updated
core_solution2/docdiff.jar
core_solution2/docdiff.scala
cws/main_cw03.pdf
cws/main_cw04.pdf
cws/main_cw04.tex
cws/main_cw05.pdf
cws/main_cw05.tex
cws/upload
handouts/pep-ho.pdf
handouts/pep-ho.tex
main_solution1/drumb.scala
main_solution2/danube.jar
main_solution2/danube.scala
main_solution3/re.jar
main_solution3/re.scala
main_solution4/knight1.scala
main_solution4/knight2.scala
main_solution4/knight3.scala
main_solution5/bf.jar
main_solution5/bf.scala
main_solution5/bfc.jar
main_templates3/re.scala
main_templates4/knight1.scala
main_templates4/knight2.scala
main_templates5/bf.scala
pre_solution2/docdiff.jar
pre_solution2/docdiff.scala
Binary file core_solution2/docdiff.jar has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core_solution2/docdiff.scala	Mon Nov 08 00:17:50 2021 +0000
@@ -0,0 +1,117 @@
+// Preliminary Part about Code Similarity
+//========================================
+
+
+object CW7a { 
+
+//(1) Complete the clean function below. It should find
+//    all words in a string using the regular expression
+//    \w+  and the library function 
+//
+//         some_regex.findAllIn(some_string)
+//
+//    The words should be Returned as a list of strings.
+
+def clean(s: String) : List[String] = 
+  ("""\w+""".r).findAllIn(s).toList
+
+
+//(2) The function occurrences calculates the number of times  
+//    strings occur in a list of strings. These occurrences should 
+//    be calculated as a Map from strings to integers.
+
+def occurrences(xs: List[String]): Map[String, Int] =
+  (for (x <- xs.distinct) yield (x, xs.count(_ == x))).toMap
+
+//(3) This functions calculates the dot-product of two documents
+//    (list of strings). For this it calculates the occurrence
+//    maps from (2) and then multiplies the corresponding occurrences. 
+//    If a string does not occur in a document, the product is zero.
+//    The function finally sums up all products. 
+
+def prod(lst1: List[String], lst2: List[String]) : Int = {
+    val words = (lst1 ::: lst2).distinct
+    val occs1 = occurrences(lst1)
+    val occs2 = occurrences(lst2)
+    words.map{ w => occs1.getOrElse(w, 0) * occs2.getOrElse(w, 0) }.sum
+}          
+
+//(4) Complete the functions overlap and similarity. The overlap of
+//    two documents is calculated by the formula given in the assignment
+//    description. The similarity of two strings is given by the overlap
+//    of the cleaned (see (1)) strings.  
+
+def overlap(lst1: List[String], lst2: List[String]) : Double = {
+    val m1 = prod(lst1, lst1)
+    val m2 = prod(lst2, lst2) 
+    prod(lst1, lst2).toDouble / (List(m1, m2).max)
+}
+
+def similarity(s1: String, s2: String) : Double =
+  overlap(clean(s1), clean(s2))
+
+
+/*
+
+
+val list1 = List("a", "b", "b", "c", "d") 
+val list2 = List("d", "b", "d", "b", "d")
+
+occurrences(List("a", "b", "b", "c", "d"))   // Map(a -> 1, b -> 2, c -> 1, d -> 1)
+occurrences(List("d", "b", "d", "b", "d"))   // Map(d -> 3, b -> 2)
+
+prod(list1,list2) // 7 
+
+overlap(list1, list2)   // 0.5384615384615384
+overlap(list2, list1)   // 0.5384615384615384
+overlap(list1, list1)   // 1.0
+overlap(list2, list2)   // 1.0
+
+// Plagiarism examples from 
+// https://desales.libguides.com/avoidingplagiarism/examples
+
+val orig1 = """There is a strong market demand for eco-tourism in
+Australia. Its rich and diverse natural heritage ensures Australia's
+capacity to attract international ecotourists and gives Australia a
+comparative advantage in the highly competitive tourism industry."""
+
+val plag1 = """There is a high market demand for eco-tourism in
+Australia. Australia has a comparative advantage in the highly
+competitive tourism industry due to its rich and varied natural
+heritage which ensures Australia's capacity to attract international
+ecotourists."""
+
+similarity(orig1, plag1)
+
+
+// Plagiarism examples from 
+// https://www.utc.edu/library/help/tutorials/plagiarism/examples-of-plagiarism.php
+
+val orig2 = """No oil spill is entirely benign. Depending on timing and
+location, even a relatively minor spill can cause significant harm to
+individual organisms and entire populations. Oil spills can cause
+impacts over a range of time scales, from days to years, or even
+decades for certain spills. Impacts are typically divided into acute
+(short-term) and chronic (long-term) effects. Both types are part of a
+complicated and often controversial equation that is addressed after
+an oil spill: ecosystem recovery."""
+
+val plag2 = """There is no such thing as a "good" oil spill. If the
+time and place are just right, even a small oil spill can cause damage
+to sensitive ecosystems. Further, spills can cause harm days, months,
+years, or even decades after they occur. Because of this, spills are
+usually broken into short-term (acute) and long-term (chronic)
+effects. Both of these types of harm must be addressed in ecosystem
+recovery: a controversial tactic that is often implemented immediately
+following an oil spill."""
+
+overlap(clean(orig2), clean(plag2))
+similarity(orig2, plag2)
+
+// The punchline: everything above 0.6 looks suspicious and 
+// should be looked at by staff.
+
+*/
+
+
+}
Binary file cws/main_cw03.pdf has changed
Binary file cws/main_cw04.pdf has changed
--- a/cws/main_cw04.tex	Sat Nov 06 00:06:39 2021 +0000
+++ b/cws/main_cw04.tex	Mon Nov 08 00:17:50 2021 +0000
@@ -34,7 +34,7 @@
 % \textcolor{red}{16 December} at 5pm; the core part (6\%)
 % is due on \cwNINEa{} at 5pm. Any 1\% you achieve in the
 % preliminary part counts as your ``weekly engagement''.
-\bigskip 
+\medskip 
 
 % Note the core, more advanced, part might include material you have not
 %yet seen in the first three lectures. \bigskip
@@ -218,7 +218,7 @@
 made available by importing \texttt{scala.annotation.tailrec}.\medskip
 
 
-
+\newpage
 
 \subsection*{Tasks}
 
Binary file cws/main_cw05.pdf has changed
--- a/cws/main_cw05.tex	Sat Nov 06 00:06:39 2021 +0000
+++ b/cws/main_cw05.tex	Mon Nov 08 00:17:50 2021 +0000
@@ -98,8 +98,7 @@
 more esoteric languages out there. One is called \emph{brainf***}. 
 \here{https://esolangs.org/wiki/Brainfuck}
 You
-are asked in this part to implement an interpreter for
-a slight extension of this language.
+are asked in this part to implement an interpreter for this language.
 
 Urban M\"uller developed the original version of brainf*** in 1993.  A close
 relative of this language was already introduced in 1964 by Corado
--- a/cws/upload	Sat Nov 06 00:06:39 2021 +0000
+++ b/cws/upload	Mon Nov 08 00:17:50 2021 +0000
@@ -1,7 +1,7 @@
 #!/bin/bash
 set -euo pipefail
 
-fls=${1:-"pre_cw01.pdf pre_cw02.pdf pre_cw03.pdf main_cw01.pdf main_cw02.pdf main_cw03.pdf main_cw04.pdf main_cw05.pdf"} 
+fls=${1:-"core_cw01.pdf core_cw02.pdf core_cw03.pdf main_cw01.pdf main_cw02.pdf main_cw03.pdf main_cw04.pdf main_cw05.pdf"} 
 
 for f in $fls; do
     echo -e "uploading $f"
Binary file handouts/pep-ho.pdf has changed
--- a/handouts/pep-ho.tex	Sat Nov 06 00:06:39 2021 +0000
+++ b/handouts/pep-ho.tex	Mon Nov 08 00:17:50 2021 +0000
@@ -194,13 +194,13 @@
 \includegraphics[scale=0.15]{../pics/vscode.png}\\[-10mm]\mbox{}
 \end{center}
 \caption{My installation of VS Code includes the following
-  packages from Marketplace: \textbf{Scala Syntax (official)} 0.3.4,
-  \textbf{Code Runner} 0.9.13, \textbf{Code Spell Checker} 1.7.17,
-  \textbf{Rewrap} 1.9.1 and \textbf{Subtle Match
+  packages from Marketplace: \textbf{Scala Syntax (official)} 0.5.4,
+  \textbf{Code Runner} 0.11.6, \textbf{Code Spell Checker} 2.0.12,
+  \textbf{Rewrap} 1.14.0 and \textbf{Subtle Match
   Brackets} 3.0.0. I have also bound the keys \keys{Ctrl} \keys{Ret} to the
   action ``Run-Selected-Text-In-Active-Terminal'' in order to quickly
   evaluate small code snippets in the Scala REPL. I use the internal
-  terminal to run Scala 2.13.1.\label{vscode}}
+  terminal to run Scala 2.13.6.\label{vscode}}
 \end{boxedminipage}
 \end{figure}  
 
@@ -380,8 +380,8 @@
 \end{lstlisting}}   
 & 
 {\footnotesize\begin{lstlisting}[xleftmargin=0mm]
-for (y <- (0 until H)/*@\keys{\texttt{.par}}@*/) {
-  for (x <- (0 until W)/*@\keys{\texttt{.par}}@*/) {
+for (y <- (0 until H).par) {
+  for (x <- (0 until W).par) {
       
     val c = start + 
       (x * d_x + y * d_y * i)
@@ -461,7 +461,7 @@
 
 \begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
 $ scala
-Welcome to Scala 2.13.1 (Java HotSpot(TM) 64-Bit Server VM, Java 9).
+Welcome to Scala 2.13.6 (OpenJDK 64-Bit Server VM, Java 17).
 Type in expressions for evaluation. Or try :help.
 
 scala>
@@ -1717,8 +1717,8 @@
 about Scala and of course lots of help online. For example
 
 \begin{itemize}
-\item \url{http://www.scala-lang.org/docu/files/ScalaByExample.pdf}
-\item \url{http://www.scala-lang.org/docu/files/ScalaTutorial.pdf}
+%%\item \url{http://www.scala-lang.org/docu/files/ScalaByExample.pdf}
+%%\item \url{http://www.scala-lang.org/docu/files/ScalaTutorial.pdf}
 \item \url{https://www.youtube.com/user/ShadowofCatron}
 \item \url{http://docs.scala-lang.org/tutorials}
 \item \url{https://www.scala-exercises.org}
--- a/main_solution1/drumb.scala	Sat Nov 06 00:06:39 2021 +0000
+++ b/main_solution1/drumb.scala	Mon Nov 08 00:17:50 2021 +0000
@@ -1,4 +1,4 @@
-// Core Part 6 about a really dumb investment strategy
+// Main Part 1 about a really dumb investment strategy
 //=====================================================
 
 
@@ -6,7 +6,7 @@
 //   > scala -d drumb.jar  drumb.scala
 
 
-object CW6b { 
+object M1 { 
 
 
 //two test portfolios
Binary file main_solution2/danube.jar has changed
--- a/main_solution2/danube.scala	Sat Nov 06 00:06:39 2021 +0000
+++ b/main_solution2/danube.scala	Mon Nov 08 00:17:50 2021 +0000
@@ -3,7 +3,7 @@
 //========================================
 
 
-object CW7b { // for purposes of generating a jar
+object M2 { // for purposes of generating a jar
 
 import io.Source
 import scala.util._
@@ -57,11 +57,6 @@
 //good_ratings.length   //48580
 //movie_names.length    // 9742
 
-//==============================================
-// Do not change anything below, unless you want 
-// to submit the file for the advanced part 3!
-//==============================================
-
 
 // (3) Implement a grouping function that calulates a map
 //     containing the userIds and all the corresponding recommendations 
@@ -162,35 +157,5 @@
 // recommendations(ratings_map, movies_map, "4")
 //   => Nil  (there are three ratings for this movie in ratings.csv but they are not positive)     
 
-// (7) Calculate the recommendations for all movies according to
-// what the recommendations function in (6) produces (this
-// can take a few seconds). Put all recommendations into a list 
-// (of strings) and count how often the strings occur in
-// this list. This produces a list of string-int pairs,
-// where the first component is the movie name and the second
-// is the number of how many times they were recommended. 
-// Sort all the pairs according to the number
-// of times they were recommended (most recommended movie name 
-// first).
-
-def occurrences(xs: List[String]): List[(String, Int)] =
-  for (x <- xs.distinct) yield (x, xs.count(_ == x))
-
-def most_recommended(recs: Map[String, List[String]],
-                     movs: Map[String, String]) : List[(String, Int)] = {
-   val all =  (for (name <- movs.toList.map(_._1)) yield {
-     recommendations(recs, movs, name)                     
-   }).flatten
-   val occs = occurrences(all)
-   occs.sortBy(_._2).reverse
-}
-
-
-//most_recommended(ratings_map, movies_map).take(3)
-// =>
-// List((Matrix,698), 
-//      (Star Wars: Episode IV - A New Hope (1977),402), 
-//      (Jerry Maguire (1996),382))
-
 
 }
Binary file main_solution3/re.jar has changed
--- a/main_solution3/re.scala	Sat Nov 06 00:06:39 2021 +0000
+++ b/main_solution3/re.scala	Mon Nov 08 00:17:50 2021 +0000
@@ -155,8 +155,8 @@
 //println(size(der('a', der('a', der('a', EVIL)))))   // => 58
 
 // size with simplification
-//println(simp(der('a', der('a', EVIL))))          // => 8
-//println(simp(der('a', der('a', der('a', EVIL)))))// => 8
+//println(simp(der('a', der('a', EVIL))))          
+//println(simp(der('a', der('a', der('a', EVIL)))))
 
 //println(size(simp(der('a', der('a', EVIL)))))           // => 8
 //println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 8
@@ -173,11 +173,11 @@
   val start = System.nanoTime()
   for (j <- 1 to i) code
   val end = System.nanoTime()
-  (end - start)/(i * 1.0e9)
+  "%.5f".format((end - start)/(i * 1.0e9))
 }
 
 //for (i <- 0 to 5000000 by 500000) {
-//  println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))) + " secs.") 
+//  println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") 
 //}
 
 // another "power" test case 
@@ -187,7 +187,7 @@
 //
 //      SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)
 //
-//    where SEQ is nested 100 times.
+//    where SEQ is nested 50 times.
  
 
 
--- a/main_solution4/knight1.scala	Sat Nov 06 00:06:39 2021 +0000
+++ b/main_solution4/knight1.scala	Mon Nov 08 00:17:50 2021 +0000
@@ -1,7 +1,7 @@
 // Part 1 about finding and counting Knight's tours
 //==================================================
 
-object CW9a {   // for preparing the jar
+object M4a {   // for preparing the jar
 
 type Pos = (Int, Int)    // a position on a chessboard 
 type Path = List[Pos]    // a path...a list of positions
@@ -43,8 +43,6 @@
   List(( 1,  2),( 2,  1),( 2, -1),( 1, -2),
        (-1, -2),(-2, -1),(-2,  1),(-1,  2)).map(add_pair(x, _))
 
-// 1 mark
-
 def legal_moves(dim: Int, path: Path, x: Pos): List[Pos] = 
   moves(x).filter(is_legal(dim, path, _))
 
@@ -62,7 +60,6 @@
 //assert(legal_moves(2, Nil, (0,0)) == List())
 //assert(legal_moves(3, Nil, (0,0)) == List((1,2), (2,1)))
 
-// 2 marks
 
 def tcount_tours(dim: Int, path: Path): Int = {
   if (path.length == dim * dim) 1
@@ -119,7 +116,6 @@
 }
 */
 
-// 1 mark
 
 def first(xs: List[Pos], f: Pos => Option[Path]): Option[Path] = xs match {
   case Nil => None
@@ -136,8 +132,6 @@
 //first(List((1, 0),(2, 0),(3, 0)), foo)
 
 
-// 1 mark
-
 def tfirst_tour(dim: Int, path: Path): Option[Path] = {
   if (path.length == dim * dim) Some(path)
   else
@@ -157,7 +151,7 @@
 
 // 15 secs for 8 x 8
 //val ts1 = time_needed(0,first_tour(8, List((0, 0))).get)
-//val ts1 = time_needed(0,first_tour(8, List((1, 1))).get)
+//??val ts1 = time_needed(0,first_tour(8, List((1, 1))).get)
 
 // no result for 4 x 4
 //val ts2 = time_needed(0, first_tour(4, List((0, 0))))
--- a/main_solution4/knight2.scala	Sat Nov 06 00:06:39 2021 +0000
+++ b/main_solution4/knight2.scala	Mon Nov 08 00:17:50 2021 +0000
@@ -1,7 +1,7 @@
-// Part 4 about finding a single tour using the Warnsdorf Rule
+// Part 2 about finding a single tour using the Warnsdorf Rule
 //=============================================================
 
-object CW9b { // for preparing the jar
+object M4b { // for preparing the jar
 
 type Pos = (Int, Int)
 type Path = List[Pos]
--- a/main_solution4/knight3.scala	Sat Nov 06 00:06:39 2021 +0000
+++ b/main_solution4/knight3.scala	Mon Nov 08 00:17:50 2021 +0000
@@ -1,7 +1,7 @@
 // Part 3 about finding a single tour using the Warnsdorf Rule
 //=============================================================
 
-object CW9c { // for preparing the jar
+object M4c { // for preparing the jar
 
 type Pos = (Int, Int)
 type Path = List[Pos]
@@ -61,6 +61,7 @@
   time_needed(ttour_on_mega_board(dim: Int, path: Path))
 
 
-
+// testcases
+//print_board(70, tour_on_mega_board(70, List((0, 0))).get)
 
 }
Binary file main_solution5/bf.jar has changed
--- a/main_solution5/bf.scala	Sat Nov 06 00:06:39 2021 +0000
+++ b/main_solution5/bf.scala	Mon Nov 08 00:17:50 2021 +0000
@@ -2,7 +2,7 @@
 // the Brainf***++ language
 //==============================================
 
-object CW10a {  
+object M5a {  
 
 
 // representation of Bf memory 
@@ -96,18 +96,11 @@
       case '+' => (pc + 1, mp, write(mem, mp, sread(mem, mp) + 1))
       case '-' => (pc + 1, mp, write(mem, mp, sread(mem, mp) - 1))
       case '.' => { print(sread(mem, mp).toChar); (pc + 1, mp, mem) }
-      //case ',' => (pc + 1, mp, write(mem, mp, Console.in.read().toByte))
-      //case ',' => (pc + 1, mp, write(mem, mp, scala.io.StdIn.readByte()))
       case '['  => 
 	      if (sread(mem, mp) == 0) (jumpRight(prog, pc + 1, 0), mp, mem) else (pc + 1, mp, mem) 
       case ']'  => 
 	      if (sread(mem, mp) != 0) (jumpLeft(prog, pc - 1, 0), mp, mem) else (pc + 1, mp, mem) 
  
-      // new commands
-      case '@' => (pc + 1, mp, write(mem, sread(mem, mp), sread(mem, mp - 1)))
-      case '*' => (pc + 1, mp, write(mem, mp, sread(mem, mp) * sread(mem, mp -1)))
-      case '#' => { println(s"${sread(mem, mp)}"); (pc + 1, mp, mem) }
-      
       case _ => (pc + 1, mp, mem)
     }		     
     compute(prog, new_pc, new_mp, new_mem)	
@@ -142,15 +135,6 @@
 // prints out numbers 0 to 9
 //run("""+++++[->++++++++++<]>--<+++[->>++++++++++<<]>>++<<----------[+>.>.<+<]""")
 
-// bf++ program calculating the cube-function, 10 * 10 * 10 = 1000
-//run("""++++++++++#>+***#""")           // Map(0 -> 10, 1 -> 1000)
-
-
-// bf++ program copies 3 from 0-cell to to cells 1, 4, 5, 6 and 7
-// (note that because of how the program wprks cell 1 will contain 7) 
-//run("""+++>+@+@+@+@+@""")   // Map(0 -> 3, 1 -> 7, 4 -> 3, 5 -> 3, 6 -> 3, 7 -> 3)
-
-
 
 // some more "useful" programs
 //-----------------------------
Binary file main_solution5/bfc.jar has changed
--- a/main_templates3/re.scala	Sat Nov 06 00:06:39 2021 +0000
+++ b/main_templates3/re.scala	Mon Nov 08 00:17:50 2021 +0000
@@ -130,15 +130,15 @@
   val start = System.nanoTime()
   for (j <- 1 to i) code
   val end = System.nanoTime()
-  (end - start)/(i * 1.0e9)
+  "%.5f".format((end - start)/(i * 1.0e9))
 }
 
 for (i <- 0 to 5000000 by 500000) {
-  println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))))
+  println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") 
 }
 
 // another "power" test case 
-simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next) == ONE
+simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next()) == ONE
 
 // the Iterator produces the rexp
 //
--- a/main_templates4/knight1.scala	Sat Nov 06 00:06:39 2021 +0000
+++ b/main_templates4/knight1.scala	Mon Nov 08 00:17:50 2021 +0000
@@ -4,10 +4,10 @@
 
 object M4a {
 
-// If you need any auxiliary function, feel free to 
-// implement it, but do not make any changes to the
+// If you need any auxiliary functions, feel free to 
+// implement them, but do not make any changes to the
 // templates below. Also have a look whether the functions
-// at the end are of any help.
+// at the end of the file are of any help.
 
 
 
@@ -85,7 +85,9 @@
 }
 
 // can be called for example with
+//
 //     time_needed(count_tours(dim, List((0, 0))))
+//
 // in order to print out the time that is needed for 
 // running count_tours
 
--- a/main_templates4/knight2.scala	Sat Nov 06 00:06:39 2021 +0000
+++ b/main_templates4/knight2.scala	Mon Nov 08 00:17:50 2021 +0000
@@ -7,8 +7,8 @@
 
 // !!! Copy any function you need from file knight1.scala !!!
 //
-// If you need any auxiliary function, feel free to 
-// implement it, but do not make any changes to the
+// If you need any auxiliary functions, feel free to 
+// implement them, but do not make any changes to the
 // templates below.
 
 type Pos = (Int, Int)    // a position on a chessboard 
--- a/main_templates5/bf.scala	Sat Nov 06 00:06:39 2021 +0000
+++ b/main_templates5/bf.scala	Mon Nov 08 00:17:50 2021 +0000
@@ -12,9 +12,9 @@
 
 
 // (1) Write a function that takes a file name as argument and
-// and requests the corresponding file from disk. It Returns the
+// and requests the corresponding file from disk. It returns the
 // content of the file as a String. If the file does not exists,
-// the function should Return the empty string.
+// the function should return the empty string.
 
 import io.Source
 import scala.util._
Binary file pre_solution2/docdiff.jar has changed
--- a/pre_solution2/docdiff.scala	Sat Nov 06 00:06:39 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,117 +0,0 @@
-// Preliminary Part about Code Similarity
-//========================================
-
-
-object CW7a { 
-
-//(1) Complete the clean function below. It should find
-//    all words in a string using the regular expression
-//    \w+  and the library function 
-//
-//         some_regex.findAllIn(some_string)
-//
-//    The words should be Returned as a list of strings.
-
-def clean(s: String) : List[String] = 
-  ("""\w+""".r).findAllIn(s).toList
-
-
-//(2) The function occurrences calculates the number of times  
-//    strings occur in a list of strings. These occurrences should 
-//    be calculated as a Map from strings to integers.
-
-def occurrences(xs: List[String]): Map[String, Int] =
-  (for (x <- xs.distinct) yield (x, xs.count(_ == x))).toMap
-
-//(3) This functions calculates the dot-product of two documents
-//    (list of strings). For this it calculates the occurrence
-//    maps from (2) and then multiplies the corresponding occurrences. 
-//    If a string does not occur in a document, the product is zero.
-//    The function finally sums up all products. 
-
-def prod(lst1: List[String], lst2: List[String]) : Int = {
-    val words = (lst1 ::: lst2).distinct
-    val occs1 = occurrences(lst1)
-    val occs2 = occurrences(lst2)
-    words.map{ w => occs1.getOrElse(w, 0) * occs2.getOrElse(w, 0) }.sum
-}          
-
-//(4) Complete the functions overlap and similarity. The overlap of
-//    two documents is calculated by the formula given in the assignment
-//    description. The similarity of two strings is given by the overlap
-//    of the cleaned (see (1)) strings.  
-
-def overlap(lst1: List[String], lst2: List[String]) : Double = {
-    val m1 = prod(lst1, lst1)
-    val m2 = prod(lst2, lst2) 
-    prod(lst1, lst2).toDouble / (List(m1, m2).max)
-}
-
-def similarity(s1: String, s2: String) : Double =
-  overlap(clean(s1), clean(s2))
-
-
-/*
-
-
-val list1 = List("a", "b", "b", "c", "d") 
-val list2 = List("d", "b", "d", "b", "d")
-
-occurrences(List("a", "b", "b", "c", "d"))   // Map(a -> 1, b -> 2, c -> 1, d -> 1)
-occurrences(List("d", "b", "d", "b", "d"))   // Map(d -> 3, b -> 2)
-
-prod(list1,list2) // 7 
-
-overlap(list1, list2)   // 0.5384615384615384
-overlap(list2, list1)   // 0.5384615384615384
-overlap(list1, list1)   // 1.0
-overlap(list2, list2)   // 1.0
-
-// Plagiarism examples from 
-// https://desales.libguides.com/avoidingplagiarism/examples
-
-val orig1 = """There is a strong market demand for eco-tourism in
-Australia. Its rich and diverse natural heritage ensures Australia's
-capacity to attract international ecotourists and gives Australia a
-comparative advantage in the highly competitive tourism industry."""
-
-val plag1 = """There is a high market demand for eco-tourism in
-Australia. Australia has a comparative advantage in the highly
-competitive tourism industry due to its rich and varied natural
-heritage which ensures Australia's capacity to attract international
-ecotourists."""
-
-similarity(orig1, plag1)
-
-
-// Plagiarism examples from 
-// https://www.utc.edu/library/help/tutorials/plagiarism/examples-of-plagiarism.php
-
-val orig2 = """No oil spill is entirely benign. Depending on timing and
-location, even a relatively minor spill can cause significant harm to
-individual organisms and entire populations. Oil spills can cause
-impacts over a range of time scales, from days to years, or even
-decades for certain spills. Impacts are typically divided into acute
-(short-term) and chronic (long-term) effects. Both types are part of a
-complicated and often controversial equation that is addressed after
-an oil spill: ecosystem recovery."""
-
-val plag2 = """There is no such thing as a "good" oil spill. If the
-time and place are just right, even a small oil spill can cause damage
-to sensitive ecosystems. Further, spills can cause harm days, months,
-years, or even decades after they occur. Because of this, spills are
-usually broken into short-term (acute) and long-term (chronic)
-effects. Both of these types of harm must be addressed in ecosystem
-recovery: a controversial tactic that is often implemented immediately
-following an oil spill."""
-
-overlap(clean(orig2), clean(plag2))
-similarity(orig2, plag2)
-
-// The punchline: everything above 0.6 looks suspicious and 
-// should be looked at by staff.
-
-*/
-
-
-}