# HG changeset patch
# User Christian Urban <christian.urban@kcl.ac.uk>
# Date 1606694775 0
# Node ID 5616b45d656fed3ef4709b4b94e96687b4d1e78c
# Parent  7a5ad01a85b5a0da95c57352c8947d91a704b40e
updated

diff -r 7a5ad01a85b5 -r 5616b45d656f cws/main_cw04.tex
--- a/cws/main_cw04.tex	Sat Nov 28 15:58:36 2020 +0000
+++ b/cws/main_cw04.tex	Mon Nov 30 00:06:15 2020 +0000
@@ -6,6 +6,7 @@
 \usepackage{../style}
 \usepackage{../langs}
 \usepackage{disclaimer}
+\usepackage{ulem}
 
 \begin{document}
 
@@ -29,7 +30,7 @@
 This part is about searching and backtracking. You are asked to
 implement Scala programs that solve various versions of the
 \textit{Knight's Tour Problem} on a chessboard. The preliminary part (4\%) is
-due on  \cwNINE{} at 5pm; the core part (6\%) is due on \cwNINEa{} at 5pm.
+due on  \sout{\cwNINE{}} \textcolor{red}{16 December} at 5pm; the core part (6\%) is due on \cwNINEa{} at 5pm.
 Any 1\% you achieve in the preliminary part counts as your ``weekly engagement''.
 \bigskip 
 %Note the core, more advanced, part might include material you have not
diff -r 7a5ad01a85b5 -r 5616b45d656f cws/pre_cw03.pdf
Binary file cws/pre_cw03.pdf has changed
diff -r 7a5ad01a85b5 -r 5616b45d656f cws/pre_cw03.tex
--- a/cws/pre_cw03.tex	Sat Nov 28 15:58:36 2020 +0000
+++ b/cws/pre_cw03.tex	Mon Nov 30 00:06:15 2020 +0000
@@ -9,7 +9,7 @@
 \usepackage{stackengine}
 %% \usepackage{accents}
 \newcommand\barbelow[1]{\stackunder[1.2pt]{#1}{\raisebox{-4mm}{\boldmath$\uparrow$}}}
-
+\usepackage{ulem}
 
 \begin{document}
 
@@ -25,7 +25,8 @@
 \bigskip
 
 \IMPORTANT{This part is about the shunting yard algorithm by Dijkstra.
-  The preliminary part is due on \cwEIGHT{} at 5pm and worth 3\%.
+  The preliminary part is due on \sout{\cwEIGHT{}} \textcolor{red}{11 December}
+  at 5pm and worth 3\%.
   Any 1\% you achieve in the preliminary part counts as your
   ``weekly engagement''.}
 
diff -r 7a5ad01a85b5 -r 5616b45d656f main_solution4/knight3.scala
--- a/main_solution4/knight3.scala	Sat Nov 28 15:58:36 2020 +0000
+++ b/main_solution4/knight3.scala	Mon Nov 30 00:06:15 2020 +0000
@@ -60,4 +60,7 @@
 def tour_on_mega_board(dim: Int, path: Path) =
   time_needed(ttour_on_mega_board(dim: Int, path: Path))
 
+
+
+
 }
diff -r 7a5ad01a85b5 -r 5616b45d656f main_testing2/danube.scala
--- a/main_testing2/danube.scala	Sat Nov 28 15:58:36 2020 +0000
+++ b/main_testing2/danube.scala	Mon Nov 30 00:06:15 2020 +0000
@@ -1,17 +1,19 @@
 // Core Part about Movie Recommendations 
 // at Danube.co.uk
-//===========================================
+//========================================
+
+
+object CW7b { // for purposes of generating a jar
 
 import io.Source
 import scala.util._
 
-object CW7b { // for purposes of generating a jar
 
 // (1) Implement the function get_csv_url which takes an url-string
 //     as argument and requests the corresponding file. The two urls
 //     of interest are ratings_url and movies_url, which correspond 
 //     to CSV-files.
-//     The function should ReTurn the CSV file appropriately broken
+//     The function should return the CSV file appropriately broken
 //     up into lines, and the first line should be dropped (that is without
 //     the header of the CSV file). The result is a list of strings (lines
 //     in the file).
@@ -33,14 +35,19 @@
 //movies.length   // 9742
 
 // (2) Implement two functions that process the CSV files. The ratings
-//     function filters out all ratings below 4 and ReTurns a list of 
-//     (userID, movieID) pairs. The movies function just ReTurns a list 
+//     function filters out all ratings below 4 and returns a list of 
+//     (userID, movieID) pairs. The movies function just returns a list 
 //     of (movieId, title) pairs.
 
 
+//def process_ratings(lines: List[String]) : List[(String, String)] = {
+//  for (cols <- lines.map(_.split(",").toList); 
+//       if (cols(2).toFloat >= 4)) yield (cols(0), cols(1))  
+//}
+
 def process_ratings(lines: List[String]) : List[(String, String)] = {
   for (cols <- lines.map(_.split(",").toList); 
-       if (cols(2).toFloat >= 4)) yield (cols(0), cols(1))  
+       if (cols(2).toInt >= 4)) yield (cols(0), cols(1))  
 }
 
 def process_movies(lines: List[String]) : List[(String, String)] = {
@@ -77,18 +84,8 @@
   }
 }
 
-//
-//val ls = List(("1", "a"), ("2", "a"), ("1", "c"), ("2", "a"), ("1", "c"))
-//
-//val m = groupById(ls, Map())
-//
-//m.getOrElse("1", Nil).count(_ == "c") // => 2
-//m.getOrElse("1", Nil).count(_ == "a") // => 1
-
 // test cases
 //val ratings_map = groupById(good_ratings, Map())
-//groupById(good_ratings, Map()).get("214")
-//groupById(good_ratings, Map()).toList.minBy(_._2.length)
 //val movies_map = movie_names.toMap
 
 //ratings_map.get("414").get.map(movies_map.get(_)) // most prolific recommender with 1227 positive ratings
@@ -96,10 +93,9 @@
 //ratings_map.get("214").get.map(movies_map.get(_)) // least prolific recommender with only 1 positive rating
 
 
-
 //(4) Implement a function that takes a ratings map and a movie_name as argument.
 // The function calculates all suggestions containing
-// the movie mov in its recommendations. It ReTurns a list of all these
+// the movie mov in its recommendations. It returns a list of all these
 // recommendations (each of them is a list and needs to have mov deleted, 
 // otherwise it might happen we recommend the same movie).
 
@@ -124,23 +120,14 @@
 // (5) Implement a suggestions function which takes a rating
 // map and a movie_name as arguments. It calculates all the recommended
 // movies sorted according to the most frequently suggested movie(s) first.
-
-// needed in Scala 2.13.
- 
-def mapValues[S, T, R](m: Map[S, T], f: T => R) =
-  m.map { case (x, y) => (x, f(y)) }
-
 def suggestions(recs: Map[String, List[String]], 
                     mov_name: String) : List[String] = {
   val favs = favourites(recs, mov_name).flatten
-  val favs_counted = mapValues(favs.groupBy(identity), (v:List[String]) => v.size).toList
+  val favs_counted = favs.groupBy(identity).view.mapValues(_.size).toList
   val favs_sorted = favs_counted.sortBy(_._2).reverse
   favs_sorted.map(_._1)
 }
 
-// check
-// groupMap is equivalent to groupBy(key).mapValues(_.map(f))
-
 // test cases
 
 //suggestions(ratings_map, "912")
@@ -163,7 +150,7 @@
 // recommendations(ratings_map, movies_map, "912")
 //   => List(Godfather, Star Wars: Episode IV - A NewHope (1977))
 
-//recommendations(ratings_map, movies_map, "260")
+// recommendations(ratings_map, movies_map, "260")
 //   => List(Star Wars: Episode V - The Empire Strikes Back (1980), 
 //           Star Wars: Episode VI - Return of the Jedi (1983))
 
@@ -177,20 +164,53 @@
 //   => List(Shawshank Redemption, Forrest Gump (1994))
 
 // recommendations(ratings_map, movies_map, "4")
-//   => Nil  (there are three ratings fro this movie in ratings.csv but they are not positive)     
+//   => Nil  (there are three ratings for this movie in ratings.csv but they are not positive)     
+
+// (7) Calculate the recommendations for all movies according to
+// what the recommendations function in (6) produces (this
+// can take a few seconds). Put all recommendations into a list 
+// (of strings) and count how often the strings occur in
+// this list. This produces a list of string-int pairs,
+// where the first component is the movie name and the second
+// is the number of how many times they were recommended. 
+// Sort all the pairs according to the number
+// of times they were recommended (most recommended movie name 
+// first).
+
+def occurrences(xs: List[String]): List[(String, Int)] =
+  for (x <- xs.distinct) yield (x, xs.count(_ == x))
+
+def most_recommended(recs: Map[String, List[String]],
+                     movs: Map[String, String]) : List[(String, Int)] = {
+   val all =  (for (name <- movs.toList.map(_._1)) yield {
+     recommendations(recs, movs, name)                     
+   }).flatten
+   val occs = occurrences(all)
+   occs.sortBy(_._2).reverse
+}
 
 
-// If you want to calculate the recomendations for all movies.
-// Will take a few seconds calculation time.
-
-//val all = for (name <- movie_names.map(_._1)) yield {
-//  recommendations(ratings_map, movies_map, name)
-//}
-
-// helper functions
-//List().take(2)
-//List(1).take(2)
-//List(1,2).take(2)
-//List(1,2,3).take(2)
+//most_recommended(ratings_map, movies_map).take(3)
+// =>
+// List((Matrix,698), 
+//      (Star Wars: Episode IV - A New Hope (1977),402), 
+//      (Jerry Maguire (1996),382))
 
 }
+
+//val ratings_url = """https://nms.kcl.ac.uk/christian.urban/ratings.csv"""
+//val movies_url = """https://nms.kcl.ac.uk/christian.urban/movies.csv"""
+
+/*
+val ratings = get_csv_url(ratings_url)
+val movies = get_csv_url(movies_url)
+
+val good_ratings = process_ratings(ratings)
+val movie_names = process_movies(movies)
+
+val ratings_map = groupById(good_ratings, Map())
+val movies_map = movie_names.toMap
+
+
+println(most_recommended(ratings_map, movies_map).take(3))
+*/
diff -r 7a5ad01a85b5 -r 5616b45d656f main_testing2/danube_test.sh
--- a/main_testing2/danube_test.sh	Sat Nov 28 15:58:36 2020 +0000
+++ b/main_testing2/danube_test.sh	Mon Nov 30 00:06:15 2020 +0000
@@ -173,3 +173,21 @@
   fi
 fi
 
+### danube most_recommended
+
+if [ $tsts -eq 0 ]
+then
+  echo -e  "  val ratings_map2 =  for ((k, v) <- ratings_map) yield (k, v.take(2)) " >> $out
+  echo -e  "  most_recommended(ratings_map2, movies_map).take(3) == " >> $out
+  echo -e  "      List((\"M*A*S*H (a.k.a. MASH) (1970)\",15), " >> $out 
+  echo -e  "           (\"Star Trek: First Contact (1996)\",10), " >> $out
+  echo -e  "           (\"Inception (2010)\",9))) " >> $out
+
+  if (scala_assert "danube.scala" "danube_test7.scala") 
+  then
+    echo -e  "  --> success" >> $out
+  else
+    echo -e  "  --> ONE OF THE TESTS FAILED\n" >>  $out
+  fi
+fi
+
diff -r 7a5ad01a85b5 -r 5616b45d656f main_testing2/danube_test2.scala
--- a/main_testing2/danube_test2.scala	Sat Nov 28 15:58:36 2020 +0000
+++ b/main_testing2/danube_test2.scala	Mon Nov 30 00:06:15 2020 +0000
@@ -5,7 +5,7 @@
 import CW7b._
 
 def urban_get_csv_file(name: String) : List[String] = {
-  val csv = Source.fromFile(name)
+  val csv = Source.fromFile(name)("ISO-8859-1")
   csv.mkString.split("\n").toList.drop(1)
 }
 
diff -r 7a5ad01a85b5 -r 5616b45d656f main_testing2/danube_test4.scala
--- a/main_testing2/danube_test4.scala	Sat Nov 28 15:58:36 2020 +0000
+++ b/main_testing2/danube_test4.scala	Mon Nov 30 00:06:15 2020 +0000
@@ -9,17 +9,17 @@
 def urban_get_csv_file(name: String) : List[String] = {
   import io.Source
   import scala.util._
-  val csv = Source.fromFile(name)
+  val csv = Source.fromFile(name)("ISO-8859-1")
   csv.mkString.split("\n").toList.drop(1)
 }
 
 def urban_process_ratings(lines: List[String]) : List[(String, String)] = {
   for (cols <- lines.map(_.split(",").toList); 
-       if (cols(2).toFloat >= 4)) yield (cols(0), cols(1))  
+       if (cols(2).toInt >= 4)) yield (cols(0), cols(1))  
 }
 
 
-val urban_good_ratings = process_ratings(urban_get_csv_file("ratings.csv"))
+val urban_good_ratings = urban_process_ratings(urban_get_csv_file("ratings.csv"))
 
 val urban_ratings_map = urban_groupById(urban_good_ratings)
 
diff -r 7a5ad01a85b5 -r 5616b45d656f main_testing2/danube_test5.scala
--- a/main_testing2/danube_test5.scala	Sat Nov 28 15:58:36 2020 +0000
+++ b/main_testing2/danube_test5.scala	Mon Nov 30 00:06:15 2020 +0000
@@ -3,25 +3,35 @@
 
 import CW7b._
 
-def urban_groupById(ratings: List[(String, String)]) = 
-  ratings.groupBy(_._1).view.mapValues(_.map(_._2)).toMap 
+def urban_groupById(ratings: List[(String, String)], 
+              m: Map[String, List[String]]) : Map[String, List[String]] = ratings match {
+  case Nil => m
+  case (id, mov) :: rest => {
+    val old_ratings = m.getOrElse (id, Nil)
+    val new_ratings = m + (id -> (mov :: old_ratings))
+    urban_groupById(rest, new_ratings)
+  }
+}
+
+//def urban_groupById(ratings: List[(String, String)]) = 
+//  ratings.groupBy(_._1).view.mapValues(_.map(_._2)).toMap 
 
 def urban_get_csv_file(name: String) : List[String] = {
   import io.Source
   import scala.util._
-  val csv = Source.fromFile(name)
+  val csv = Source.fromFile(name)("ISO-8859-1")
   csv.mkString.split("\n").toList.drop(1)
 }
 
 def urban_process_ratings(lines: List[String]) : List[(String, String)] = {
   for (cols <- lines.map(_.split(",").toList); 
-       if (cols(2).toFloat >= 4)) yield (cols(0), cols(1))  
+       if (cols(2).toInt >= 4)) yield (cols(0), cols(1))  
 }
 
 
-val urban_good_ratings = process_ratings(urban_get_csv_file("ratings.csv"))
+val urban_good_ratings = urban_process_ratings(urban_get_csv_file("ratings.csv"))
 
-val urban_ratings_map = urban_groupById(urban_good_ratings)
+val urban_ratings_map = urban_groupById(urban_good_ratings, Map())
 
 assert(suggestions(urban_ratings_map, "912").length  == 4110)
 assert(suggestions(urban_ratings_map, "858").length  == 4883)
diff -r 7a5ad01a85b5 -r 5616b45d656f main_testing2/danube_test6.scala
--- a/main_testing2/danube_test6.scala	Sat Nov 28 15:58:36 2020 +0000
+++ b/main_testing2/danube_test6.scala	Mon Nov 30 00:06:15 2020 +0000
@@ -3,20 +3,29 @@
 
 // first test 
 
+def urban_groupById(ratings: List[(String, String)], 
+              m: Map[String, List[String]]) : Map[String, List[String]] = ratings match {
+  case Nil => m
+  case (id, mov) :: rest => {
+    val old_ratings = m.getOrElse (id, Nil)
+    val new_ratings = m + (id -> (mov :: old_ratings))
+    urban_groupById(rest, new_ratings)
+  }
+}
 
-def urban_groupById(ratings: List[(String, String)]) = 
-  ratings.groupBy(_._1).view.mapValues(_.map(_._2)).toMap 
+//def urban_groupById(ratings: List[(String, String)]) = 
+//  ratings.groupBy(_._1).view.mapValues(_.map(_._2)).toMap 
 
 def urban_get_csv_file(name: String) : List[String] = {
   import io.Source
   import scala.util._
-  val csv = Source.fromFile(name)
+  val csv = Source.fromFile(name)("ISO-8859-1")
   csv.mkString.split("\n").toList.drop(1)
 }
 
 def urban_process_ratings(lines: List[String]) : List[(String, String)] = {
   for (cols <- lines.map(_.split(",").toList); 
-       if (cols(2).toFloat >= 4)) yield (cols(0), cols(1))  
+       if (cols(2).toInt >= 4)) yield (cols(0), cols(1))  
 }
 
 def urban_process_movies(lines: List[String]) : List[(String, String)] = {
@@ -24,10 +33,10 @@
 }
 
 
-val urban_good_ratings = process_ratings(urban_get_csv_file("ratings.csv"))
-val urban_movie_names = process_movies(urban_get_csv_file("movies.csv")).toMap
+val urban_good_ratings = urban_process_ratings(urban_get_csv_file("ratings.csv"))
+val urban_movie_names = urban_process_movies(urban_get_csv_file("movies.csv")).toMap
 
-val urban_ratings_map = urban_groupById(urban_good_ratings)
+val urban_ratings_map = urban_groupById(urban_good_ratings, Map())
 
 assert((for (n <- List("1", "2", "3", "4", "5")) yield {
   recommendations(urban_ratings_map, urban_movie_names, n).length
diff -r 7a5ad01a85b5 -r 5616b45d656f main_testing2/danube_test7.scala
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main_testing2/danube_test7.scala	Mon Nov 30 00:06:15 2020 +0000
@@ -0,0 +1,48 @@
+
+import CW7b._
+
+// first test 
+
+def urban_groupById(ratings: List[(String, String)], 
+              m: Map[String, List[String]]) : Map[String, List[String]] = ratings match {
+  case Nil => m
+  case (id, mov) :: rest => {
+    val old_ratings = m.getOrElse (id, Nil)
+    val new_ratings = m + (id -> (mov :: old_ratings))
+    urban_groupById(rest, new_ratings)
+  }
+}
+//def urban_groupById(ratings: List[(String, String)]) = 
+//  ratings.groupBy(_._1).view.mapValues(_.map(_._2)).toMap 
+
+def urban_get_csv_file(name: String) : List[String] = {
+  import io.Source
+  import scala.util._
+  val csv = Source.fromFile(name)("ISO-8859-1")
+  csv.mkString.split("\n").toList.drop(1)
+}
+
+def urban_process_ratings(lines: List[String]) : List[(String, String)] = {
+  for (cols <- lines.map(_.split(",").toList); 
+       if (cols(2).toInt >= 4)) yield (cols(0), cols(1))  
+}
+
+def urban_process_movies(lines: List[String]) : List[(String, String)] = {
+  for (cols <- lines.map(_.split(",").toList)) yield (cols(0), cols(1))  
+}
+
+
+val urban_good_ratings = urban_process_ratings(urban_get_csv_file("ratings.csv"))
+val urban_movie_names = urban_process_movies(urban_get_csv_file("movies.csv"))
+
+val urban_movie_names_map = urban_movie_names.toMap
+val urban_ratings_map = urban_groupById(urban_good_ratings, Map())
+//val urban_ratings_map = groupById(urban_good_ratings, Map())
+
+val urban_ratings_map2 =  for ((k, v) <- urban_ratings_map) yield (k, v.take(2))
+
+assert(most_recommended(urban_ratings_map2, urban_movie_names_map).take(3) == 
+       List(("M*A*S*H (a.k.a. MASH) (1970)",15), 
+            ("Star Trek: First Contact (1996)",10), 
+            ("Inception (2010)",9)))
+
diff -r 7a5ad01a85b5 -r 5616b45d656f slides/slides04.pdf
Binary file slides/slides04.pdf has changed
diff -r 7a5ad01a85b5 -r 5616b45d656f slides/slides04.tex
--- a/slides/slides04.tex	Sat Nov 28 15:58:36 2020 +0000
+++ b/slides/slides04.tex	Mon Nov 30 00:06:15 2020 +0000
@@ -461,45 +461,6 @@
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     
-\begin{frame}[c]
-
-\begin{center}
-  \includegraphics[angle=90,scale=0.35]{/Users/cu/vote.pdf}
-\end{center}
- 
- \only<2>{%
-\begin{textblock}{13}(10,10)
-\includegraphics[scale=0.2]{/Users/cu/goodvote.png}
-\end{textblock}}
-
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     
-\begin{frame}[c]
-
-\begin{center}
-  \includegraphics[scale=0.25]{/Users/cu/dresden.png}
-\end{center}
-
-\begin{textblock}{13}(2,12)
-\includegraphics[scale=0.08]{/Users/cu/kiss.jpg}
-\end{textblock}
-
-\begin{textblock}{13}(6.8,12)
-\includegraphics[scale=0.079]{/Users/cu/pioniere.jpg}
-\end{textblock}
-
-\begin{textblock}{13}(11,12)
-\includegraphics[scale=0.20]{/Users/cu/iron.jpg}
-\end{textblock}
-
-\DOWNarrow{1}{11}{8.6}
-
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     
-