# HG changeset patch # User Christian Urban # Date 1606694775 0 # Node ID 5616b45d656fed3ef4709b4b94e96687b4d1e78c # Parent 7a5ad01a85b5a0da95c57352c8947d91a704b40e updated diff -r 7a5ad01a85b5 -r 5616b45d656f cws/main_cw04.tex --- a/cws/main_cw04.tex Sat Nov 28 15:58:36 2020 +0000 +++ b/cws/main_cw04.tex Mon Nov 30 00:06:15 2020 +0000 @@ -6,6 +6,7 @@ \usepackage{../style} \usepackage{../langs} \usepackage{disclaimer} +\usepackage{ulem} \begin{document} @@ -29,7 +30,7 @@ This part is about searching and backtracking. You are asked to implement Scala programs that solve various versions of the \textit{Knight's Tour Problem} on a chessboard. The preliminary part (4\%) is -due on \cwNINE{} at 5pm; the core part (6\%) is due on \cwNINEa{} at 5pm. +due on \sout{\cwNINE{}} \textcolor{red}{16 December} at 5pm; the core part (6\%) is due on \cwNINEa{} at 5pm. Any 1\% you achieve in the preliminary part counts as your ``weekly engagement''. \bigskip %Note the core, more advanced, part might include material you have not diff -r 7a5ad01a85b5 -r 5616b45d656f cws/pre_cw03.pdf Binary file cws/pre_cw03.pdf has changed diff -r 7a5ad01a85b5 -r 5616b45d656f cws/pre_cw03.tex --- a/cws/pre_cw03.tex Sat Nov 28 15:58:36 2020 +0000 +++ b/cws/pre_cw03.tex Mon Nov 30 00:06:15 2020 +0000 @@ -9,7 +9,7 @@ \usepackage{stackengine} %% \usepackage{accents} \newcommand\barbelow[1]{\stackunder[1.2pt]{#1}{\raisebox{-4mm}{\boldmath$\uparrow$}}} - +\usepackage{ulem} \begin{document} @@ -25,7 +25,8 @@ \bigskip \IMPORTANT{This part is about the shunting yard algorithm by Dijkstra. - The preliminary part is due on \cwEIGHT{} at 5pm and worth 3\%. + The preliminary part is due on \sout{\cwEIGHT{}} \textcolor{red}{11 December} + at 5pm and worth 3\%. Any 1\% you achieve in the preliminary part counts as your ``weekly engagement''.} diff -r 7a5ad01a85b5 -r 5616b45d656f main_solution4/knight3.scala --- a/main_solution4/knight3.scala Sat Nov 28 15:58:36 2020 +0000 +++ b/main_solution4/knight3.scala Mon Nov 30 00:06:15 2020 +0000 @@ -60,4 +60,7 @@ def tour_on_mega_board(dim: Int, path: Path) = time_needed(ttour_on_mega_board(dim: Int, path: Path)) + + + } diff -r 7a5ad01a85b5 -r 5616b45d656f main_testing2/danube.scala --- a/main_testing2/danube.scala Sat Nov 28 15:58:36 2020 +0000 +++ b/main_testing2/danube.scala Mon Nov 30 00:06:15 2020 +0000 @@ -1,17 +1,19 @@ // Core Part about Movie Recommendations // at Danube.co.uk -//=========================================== +//======================================== + + +object CW7b { // for purposes of generating a jar import io.Source import scala.util._ -object CW7b { // for purposes of generating a jar // (1) Implement the function get_csv_url which takes an url-string // as argument and requests the corresponding file. The two urls // of interest are ratings_url and movies_url, which correspond // to CSV-files. -// The function should ReTurn the CSV file appropriately broken +// The function should return the CSV file appropriately broken // up into lines, and the first line should be dropped (that is without // the header of the CSV file). The result is a list of strings (lines // in the file). @@ -33,14 +35,19 @@ //movies.length // 9742 // (2) Implement two functions that process the CSV files. The ratings -// function filters out all ratings below 4 and ReTurns a list of -// (userID, movieID) pairs. The movies function just ReTurns a list +// function filters out all ratings below 4 and returns a list of +// (userID, movieID) pairs. The movies function just returns a list // of (movieId, title) pairs. +//def process_ratings(lines: List[String]) : List[(String, String)] = { +// for (cols <- lines.map(_.split(",").toList); +// if (cols(2).toFloat >= 4)) yield (cols(0), cols(1)) +//} + def process_ratings(lines: List[String]) : List[(String, String)] = { for (cols <- lines.map(_.split(",").toList); - if (cols(2).toFloat >= 4)) yield (cols(0), cols(1)) + if (cols(2).toInt >= 4)) yield (cols(0), cols(1)) } def process_movies(lines: List[String]) : List[(String, String)] = { @@ -77,18 +84,8 @@ } } -// -//val ls = List(("1", "a"), ("2", "a"), ("1", "c"), ("2", "a"), ("1", "c")) -// -//val m = groupById(ls, Map()) -// -//m.getOrElse("1", Nil).count(_ == "c") // => 2 -//m.getOrElse("1", Nil).count(_ == "a") // => 1 - // test cases //val ratings_map = groupById(good_ratings, Map()) -//groupById(good_ratings, Map()).get("214") -//groupById(good_ratings, Map()).toList.minBy(_._2.length) //val movies_map = movie_names.toMap //ratings_map.get("414").get.map(movies_map.get(_)) // most prolific recommender with 1227 positive ratings @@ -96,10 +93,9 @@ //ratings_map.get("214").get.map(movies_map.get(_)) // least prolific recommender with only 1 positive rating - //(4) Implement a function that takes a ratings map and a movie_name as argument. // The function calculates all suggestions containing -// the movie mov in its recommendations. It ReTurns a list of all these +// the movie mov in its recommendations. It returns a list of all these // recommendations (each of them is a list and needs to have mov deleted, // otherwise it might happen we recommend the same movie). @@ -124,23 +120,14 @@ // (5) Implement a suggestions function which takes a rating // map and a movie_name as arguments. It calculates all the recommended // movies sorted according to the most frequently suggested movie(s) first. - -// needed in Scala 2.13. - -def mapValues[S, T, R](m: Map[S, T], f: T => R) = - m.map { case (x, y) => (x, f(y)) } - def suggestions(recs: Map[String, List[String]], mov_name: String) : List[String] = { val favs = favourites(recs, mov_name).flatten - val favs_counted = mapValues(favs.groupBy(identity), (v:List[String]) => v.size).toList + val favs_counted = favs.groupBy(identity).view.mapValues(_.size).toList val favs_sorted = favs_counted.sortBy(_._2).reverse favs_sorted.map(_._1) } -// check -// groupMap is equivalent to groupBy(key).mapValues(_.map(f)) - // test cases //suggestions(ratings_map, "912") @@ -163,7 +150,7 @@ // recommendations(ratings_map, movies_map, "912") // => List(Godfather, Star Wars: Episode IV - A NewHope (1977)) -//recommendations(ratings_map, movies_map, "260") +// recommendations(ratings_map, movies_map, "260") // => List(Star Wars: Episode V - The Empire Strikes Back (1980), // Star Wars: Episode VI - Return of the Jedi (1983)) @@ -177,20 +164,53 @@ // => List(Shawshank Redemption, Forrest Gump (1994)) // recommendations(ratings_map, movies_map, "4") -// => Nil (there are three ratings fro this movie in ratings.csv but they are not positive) +// => Nil (there are three ratings for this movie in ratings.csv but they are not positive) + +// (7) Calculate the recommendations for all movies according to +// what the recommendations function in (6) produces (this +// can take a few seconds). Put all recommendations into a list +// (of strings) and count how often the strings occur in +// this list. This produces a list of string-int pairs, +// where the first component is the movie name and the second +// is the number of how many times they were recommended. +// Sort all the pairs according to the number +// of times they were recommended (most recommended movie name +// first). + +def occurrences(xs: List[String]): List[(String, Int)] = + for (x <- xs.distinct) yield (x, xs.count(_ == x)) + +def most_recommended(recs: Map[String, List[String]], + movs: Map[String, String]) : List[(String, Int)] = { + val all = (for (name <- movs.toList.map(_._1)) yield { + recommendations(recs, movs, name) + }).flatten + val occs = occurrences(all) + occs.sortBy(_._2).reverse +} -// If you want to calculate the recomendations for all movies. -// Will take a few seconds calculation time. - -//val all = for (name <- movie_names.map(_._1)) yield { -// recommendations(ratings_map, movies_map, name) -//} - -// helper functions -//List().take(2) -//List(1).take(2) -//List(1,2).take(2) -//List(1,2,3).take(2) +//most_recommended(ratings_map, movies_map).take(3) +// => +// List((Matrix,698), +// (Star Wars: Episode IV - A New Hope (1977),402), +// (Jerry Maguire (1996),382)) } + +//val ratings_url = """https://nms.kcl.ac.uk/christian.urban/ratings.csv""" +//val movies_url = """https://nms.kcl.ac.uk/christian.urban/movies.csv""" + +/* +val ratings = get_csv_url(ratings_url) +val movies = get_csv_url(movies_url) + +val good_ratings = process_ratings(ratings) +val movie_names = process_movies(movies) + +val ratings_map = groupById(good_ratings, Map()) +val movies_map = movie_names.toMap + + +println(most_recommended(ratings_map, movies_map).take(3)) +*/ diff -r 7a5ad01a85b5 -r 5616b45d656f main_testing2/danube_test.sh --- a/main_testing2/danube_test.sh Sat Nov 28 15:58:36 2020 +0000 +++ b/main_testing2/danube_test.sh Mon Nov 30 00:06:15 2020 +0000 @@ -173,3 +173,21 @@ fi fi +### danube most_recommended + +if [ $tsts -eq 0 ] +then + echo -e " val ratings_map2 = for ((k, v) <- ratings_map) yield (k, v.take(2)) " >> $out + echo -e " most_recommended(ratings_map2, movies_map).take(3) == " >> $out + echo -e " List((\"M*A*S*H (a.k.a. MASH) (1970)\",15), " >> $out + echo -e " (\"Star Trek: First Contact (1996)\",10), " >> $out + echo -e " (\"Inception (2010)\",9))) " >> $out + + if (scala_assert "danube.scala" "danube_test7.scala") + then + echo -e " --> success" >> $out + else + echo -e " --> ONE OF THE TESTS FAILED\n" >> $out + fi +fi + diff -r 7a5ad01a85b5 -r 5616b45d656f main_testing2/danube_test2.scala --- a/main_testing2/danube_test2.scala Sat Nov 28 15:58:36 2020 +0000 +++ b/main_testing2/danube_test2.scala Mon Nov 30 00:06:15 2020 +0000 @@ -5,7 +5,7 @@ import CW7b._ def urban_get_csv_file(name: String) : List[String] = { - val csv = Source.fromFile(name) + val csv = Source.fromFile(name)("ISO-8859-1") csv.mkString.split("\n").toList.drop(1) } diff -r 7a5ad01a85b5 -r 5616b45d656f main_testing2/danube_test4.scala --- a/main_testing2/danube_test4.scala Sat Nov 28 15:58:36 2020 +0000 +++ b/main_testing2/danube_test4.scala Mon Nov 30 00:06:15 2020 +0000 @@ -9,17 +9,17 @@ def urban_get_csv_file(name: String) : List[String] = { import io.Source import scala.util._ - val csv = Source.fromFile(name) + val csv = Source.fromFile(name)("ISO-8859-1") csv.mkString.split("\n").toList.drop(1) } def urban_process_ratings(lines: List[String]) : List[(String, String)] = { for (cols <- lines.map(_.split(",").toList); - if (cols(2).toFloat >= 4)) yield (cols(0), cols(1)) + if (cols(2).toInt >= 4)) yield (cols(0), cols(1)) } -val urban_good_ratings = process_ratings(urban_get_csv_file("ratings.csv")) +val urban_good_ratings = urban_process_ratings(urban_get_csv_file("ratings.csv")) val urban_ratings_map = urban_groupById(urban_good_ratings) diff -r 7a5ad01a85b5 -r 5616b45d656f main_testing2/danube_test5.scala --- a/main_testing2/danube_test5.scala Sat Nov 28 15:58:36 2020 +0000 +++ b/main_testing2/danube_test5.scala Mon Nov 30 00:06:15 2020 +0000 @@ -3,25 +3,35 @@ import CW7b._ -def urban_groupById(ratings: List[(String, String)]) = - ratings.groupBy(_._1).view.mapValues(_.map(_._2)).toMap +def urban_groupById(ratings: List[(String, String)], + m: Map[String, List[String]]) : Map[String, List[String]] = ratings match { + case Nil => m + case (id, mov) :: rest => { + val old_ratings = m.getOrElse (id, Nil) + val new_ratings = m + (id -> (mov :: old_ratings)) + urban_groupById(rest, new_ratings) + } +} + +//def urban_groupById(ratings: List[(String, String)]) = +// ratings.groupBy(_._1).view.mapValues(_.map(_._2)).toMap def urban_get_csv_file(name: String) : List[String] = { import io.Source import scala.util._ - val csv = Source.fromFile(name) + val csv = Source.fromFile(name)("ISO-8859-1") csv.mkString.split("\n").toList.drop(1) } def urban_process_ratings(lines: List[String]) : List[(String, String)] = { for (cols <- lines.map(_.split(",").toList); - if (cols(2).toFloat >= 4)) yield (cols(0), cols(1)) + if (cols(2).toInt >= 4)) yield (cols(0), cols(1)) } -val urban_good_ratings = process_ratings(urban_get_csv_file("ratings.csv")) +val urban_good_ratings = urban_process_ratings(urban_get_csv_file("ratings.csv")) -val urban_ratings_map = urban_groupById(urban_good_ratings) +val urban_ratings_map = urban_groupById(urban_good_ratings, Map()) assert(suggestions(urban_ratings_map, "912").length == 4110) assert(suggestions(urban_ratings_map, "858").length == 4883) diff -r 7a5ad01a85b5 -r 5616b45d656f main_testing2/danube_test6.scala --- a/main_testing2/danube_test6.scala Sat Nov 28 15:58:36 2020 +0000 +++ b/main_testing2/danube_test6.scala Mon Nov 30 00:06:15 2020 +0000 @@ -3,20 +3,29 @@ // first test +def urban_groupById(ratings: List[(String, String)], + m: Map[String, List[String]]) : Map[String, List[String]] = ratings match { + case Nil => m + case (id, mov) :: rest => { + val old_ratings = m.getOrElse (id, Nil) + val new_ratings = m + (id -> (mov :: old_ratings)) + urban_groupById(rest, new_ratings) + } +} -def urban_groupById(ratings: List[(String, String)]) = - ratings.groupBy(_._1).view.mapValues(_.map(_._2)).toMap +//def urban_groupById(ratings: List[(String, String)]) = +// ratings.groupBy(_._1).view.mapValues(_.map(_._2)).toMap def urban_get_csv_file(name: String) : List[String] = { import io.Source import scala.util._ - val csv = Source.fromFile(name) + val csv = Source.fromFile(name)("ISO-8859-1") csv.mkString.split("\n").toList.drop(1) } def urban_process_ratings(lines: List[String]) : List[(String, String)] = { for (cols <- lines.map(_.split(",").toList); - if (cols(2).toFloat >= 4)) yield (cols(0), cols(1)) + if (cols(2).toInt >= 4)) yield (cols(0), cols(1)) } def urban_process_movies(lines: List[String]) : List[(String, String)] = { @@ -24,10 +33,10 @@ } -val urban_good_ratings = process_ratings(urban_get_csv_file("ratings.csv")) -val urban_movie_names = process_movies(urban_get_csv_file("movies.csv")).toMap +val urban_good_ratings = urban_process_ratings(urban_get_csv_file("ratings.csv")) +val urban_movie_names = urban_process_movies(urban_get_csv_file("movies.csv")).toMap -val urban_ratings_map = urban_groupById(urban_good_ratings) +val urban_ratings_map = urban_groupById(urban_good_ratings, Map()) assert((for (n <- List("1", "2", "3", "4", "5")) yield { recommendations(urban_ratings_map, urban_movie_names, n).length diff -r 7a5ad01a85b5 -r 5616b45d656f main_testing2/danube_test7.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/main_testing2/danube_test7.scala Mon Nov 30 00:06:15 2020 +0000 @@ -0,0 +1,48 @@ + +import CW7b._ + +// first test + +def urban_groupById(ratings: List[(String, String)], + m: Map[String, List[String]]) : Map[String, List[String]] = ratings match { + case Nil => m + case (id, mov) :: rest => { + val old_ratings = m.getOrElse (id, Nil) + val new_ratings = m + (id -> (mov :: old_ratings)) + urban_groupById(rest, new_ratings) + } +} +//def urban_groupById(ratings: List[(String, String)]) = +// ratings.groupBy(_._1).view.mapValues(_.map(_._2)).toMap + +def urban_get_csv_file(name: String) : List[String] = { + import io.Source + import scala.util._ + val csv = Source.fromFile(name)("ISO-8859-1") + csv.mkString.split("\n").toList.drop(1) +} + +def urban_process_ratings(lines: List[String]) : List[(String, String)] = { + for (cols <- lines.map(_.split(",").toList); + if (cols(2).toInt >= 4)) yield (cols(0), cols(1)) +} + +def urban_process_movies(lines: List[String]) : List[(String, String)] = { + for (cols <- lines.map(_.split(",").toList)) yield (cols(0), cols(1)) +} + + +val urban_good_ratings = urban_process_ratings(urban_get_csv_file("ratings.csv")) +val urban_movie_names = urban_process_movies(urban_get_csv_file("movies.csv")) + +val urban_movie_names_map = urban_movie_names.toMap +val urban_ratings_map = urban_groupById(urban_good_ratings, Map()) +//val urban_ratings_map = groupById(urban_good_ratings, Map()) + +val urban_ratings_map2 = for ((k, v) <- urban_ratings_map) yield (k, v.take(2)) + +assert(most_recommended(urban_ratings_map2, urban_movie_names_map).take(3) == + List(("M*A*S*H (a.k.a. MASH) (1970)",15), + ("Star Trek: First Contact (1996)",10), + ("Inception (2010)",9))) + diff -r 7a5ad01a85b5 -r 5616b45d656f slides/slides04.pdf Binary file slides/slides04.pdf has changed diff -r 7a5ad01a85b5 -r 5616b45d656f slides/slides04.tex --- a/slides/slides04.tex Sat Nov 28 15:58:36 2020 +0000 +++ b/slides/slides04.tex Mon Nov 30 00:06:15 2020 +0000 @@ -461,45 +461,6 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] - -\begin{center} - \includegraphics[angle=90,scale=0.35]{/Users/cu/vote.pdf} -\end{center} - - \only<2>{% -\begin{textblock}{13}(10,10) -\includegraphics[scale=0.2]{/Users/cu/goodvote.png} -\end{textblock}} - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] - -\begin{center} - \includegraphics[scale=0.25]{/Users/cu/dresden.png} -\end{center} - -\begin{textblock}{13}(2,12) -\includegraphics[scale=0.08]{/Users/cu/kiss.jpg} -\end{textblock} - -\begin{textblock}{13}(6.8,12) -\includegraphics[scale=0.079]{/Users/cu/pioniere.jpg} -\end{textblock} - -\begin{textblock}{13}(11,12) -\includegraphics[scale=0.20]{/Users/cu/iron.jpg} -\end{textblock} - -\DOWNarrow{1}{11}{8.6} - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -