|         |      1 // Core Part about Movie Recommendations  | 
|         |      2 // at Danube.co.uk | 
|         |      3 //=========================================== | 
|         |      4  | 
|         |      5 import io.Source | 
|         |      6 import scala.util._ | 
|         |      7  | 
|         |      8 object CW7b { // for purposes of generating a jar | 
|         |      9  | 
|         |     10 // (1) Implement the function get_csv_url which takes an url-string | 
|         |     11 //     as argument and requests the corresponding file. The two urls | 
|         |     12 //     of interest are ratings_url and movies_url, which correspond  | 
|         |     13 //     to CSV-files. | 
|         |     14 //     The function should ReTurn the CSV file appropriately broken | 
|         |     15 //     up into lines, and the first line should be dropped (that is without | 
|         |     16 //     the header of the CSV file). The result is a list of strings (lines | 
|         |     17 //     in the file). | 
|         |     18  | 
|         |     19 def get_csv_url(url: String) : List[String] = { | 
|         |     20   val csv = Source.fromURL(url)("ISO-8859-1") | 
|         |     21   csv.mkString.split("\n").toList.drop(1) | 
|         |     22 } | 
|         |     23  | 
|         |     24 val ratings_url = """https://nms.kcl.ac.uk/christian.urban/ratings.csv""" | 
|         |     25 val movies_url = """https://nms.kcl.ac.uk/christian.urban/movies.csv""" | 
|         |     26  | 
|         |     27 // test cases | 
|         |     28  | 
|         |     29 //val ratings = get_csv_url(ratings_url) | 
|         |     30 //val movies = get_csv_url(movies_url) | 
|         |     31  | 
|         |     32 //ratings.length  // 87313 | 
|         |     33 //movies.length   // 9742 | 
|         |     34  | 
|         |     35 // (2) Implement two functions that process the CSV files. The ratings | 
|         |     36 //     function filters out all ratings below 4 and ReTurns a list of  | 
|         |     37 //     (userID, movieID) pairs. The movies function just ReTurns a list  | 
|         |     38 //     of (movieId, title) pairs. | 
|         |     39  | 
|         |     40  | 
|         |     41 def process_ratings(lines: List[String]) : List[(String, String)] = { | 
|         |     42   for (cols <- lines.map(_.split(",").toList);  | 
|         |     43        if (cols(2).toFloat >= 4)) yield (cols(0), cols(1))   | 
|         |     44 } | 
|         |     45  | 
|         |     46 def process_movies(lines: List[String]) : List[(String, String)] = { | 
|         |     47   for (cols <- lines.map(_.split(",").toList)) yield (cols(0), cols(1))   | 
|         |     48 } | 
|         |     49  | 
|         |     50 // test cases | 
|         |     51  | 
|         |     52 //val good_ratings = process_ratings(ratings) | 
|         |     53 //val movie_names = process_movies(movies) | 
|         |     54  | 
|         |     55 //good_ratings.length   //48580 | 
|         |     56 //movie_names.length    // 9742 | 
|         |     57  | 
|         |     58 //============================================== | 
|         |     59 // Do not change anything below, unless you want  | 
|         |     60 // to submit the file for the advanced part 3! | 
|         |     61 //============================================== | 
|         |     62  | 
|         |     63  | 
|         |     64 // (3) Implement a grouping function that calulates a map | 
|         |     65 //     containing the userIds and all the corresponding recommendations  | 
|         |     66 //     (list of movieIds). This  should be implemented in a tail | 
|         |     67 //     recursive fashion, using a map m as accumulator. This map | 
|         |     68 //     is set to Map() at the beginning of the claculation. | 
|         |     69  | 
|         |     70 def groupById(ratings: List[(String, String)],  | 
|         |     71               m: Map[String, List[String]]) : Map[String, List[String]] = ratings match { | 
|         |     72   case Nil => m | 
|         |     73   case (id, mov) :: rest => { | 
|         |     74     val old_ratings = m.getOrElse (id, Nil) | 
|         |     75     val new_ratings = m + (id -> (mov :: old_ratings)) | 
|         |     76     groupById(rest, new_ratings) | 
|         |     77   } | 
|         |     78 } | 
|         |     79  | 
|         |     80 // | 
|         |     81 //val ls = List(("1", "a"), ("2", "a"), ("1", "c"), ("2", "a"), ("1", "c")) | 
|         |     82 // | 
|         |     83 //val m = groupById(ls, Map()) | 
|         |     84 // | 
|         |     85 //m.getOrElse("1", Nil).count(_ == "c") // => 2 | 
|         |     86 //m.getOrElse("1", Nil).count(_ == "a") // => 1 | 
|         |     87  | 
|         |     88 // test cases | 
|         |     89 //val ratings_map = groupById(good_ratings, Map()) | 
|         |     90 //groupById(good_ratings, Map()).get("214") | 
|         |     91 //groupById(good_ratings, Map()).toList.minBy(_._2.length) | 
|         |     92 //val movies_map = movie_names.toMap | 
|         |     93  | 
|         |     94 //ratings_map.get("414").get.map(movies_map.get(_)) // most prolific recommender with 1227 positive ratings | 
|         |     95 //ratings_map.get("474").get.map(movies_map.get(_)) // second-most prolific recommender with 787 positive ratings | 
|         |     96 //ratings_map.get("214").get.map(movies_map.get(_)) // least prolific recommender with only 1 positive rating | 
|         |     97  | 
|         |     98  | 
|         |     99  | 
|         |    100 //(4) Implement a function that takes a ratings map and a movie_name as argument. | 
|         |    101 // The function calculates all suggestions containing | 
|         |    102 // the movie mov in its recommendations. It ReTurns a list of all these | 
|         |    103 // recommendations (each of them is a list and needs to have mov deleted,  | 
|         |    104 // otherwise it might happen we recommend the same movie). | 
|         |    105  | 
|         |    106 def favourites(m: Map[String, List[String]], mov: String) : List[List[String]] =  | 
|         |    107   (for (id <- m.keys.toList; | 
|         |    108         if m(id).contains(mov)) yield m(id).filter(_ != mov)) | 
|         |    109  | 
|         |    110  | 
|         |    111  | 
|         |    112 // test cases | 
|         |    113 // movie ID "912" -> Casablanca (1942) | 
|         |    114 //          "858" -> Godfather | 
|         |    115 //          "260" -> Star Wars: Episode IV - A New Hope (1977) | 
|         |    116  | 
|         |    117 //favourites(ratings_map, "912").length  // => 80 | 
|         |    118  | 
|         |    119 // That means there are 80 users that recommend the movie with ID 912. | 
|         |    120 // Of these 80  users, 55 gave a good rating to movie 858 and | 
|         |    121 // 52 a good rating to movies 260, 318, 593. | 
|         |    122  | 
|         |    123  | 
|         |    124 // (5) Implement a suggestions function which takes a rating | 
|         |    125 // map and a movie_name as arguments. It calculates all the recommended | 
|         |    126 // movies sorted according to the most frequently suggested movie(s) first. | 
|         |    127  | 
|         |    128 // needed in Scala 2.13. | 
|         |    129   | 
|         |    130 def mapValues[S, T, R](m: Map[S, T], f: T => R) = | 
|         |    131   m.map { case (x, y) => (x, f(y)) } | 
|         |    132  | 
|         |    133 def suggestions(recs: Map[String, List[String]],  | 
|         |    134                     mov_name: String) : List[String] = { | 
|         |    135   val favs = favourites(recs, mov_name).flatten | 
|         |    136   val favs_counted = mapValues(favs.groupBy(identity), (v:List[String]) => v.size).toList | 
|         |    137   val favs_sorted = favs_counted.sortBy(_._2).reverse | 
|         |    138   favs_sorted.map(_._1) | 
|         |    139 } | 
|         |    140  | 
|         |    141 // check | 
|         |    142 // groupMap is equivalent to groupBy(key).mapValues(_.map(f)) | 
|         |    143  | 
|         |    144 // test cases | 
|         |    145  | 
|         |    146 //suggestions(ratings_map, "912") | 
|         |    147 //suggestions(ratings_map, "912").length   | 
|         |    148 // => 4110 suggestions with List(858, 260, 318, 593, ...) | 
|         |    149 //    being the most frequently suggested movies | 
|         |    150  | 
|         |    151 // (6) Implement recommendations functions which generates at most | 
|         |    152 // *two* of the most frequently suggested movies. It Returns the  | 
|         |    153 // actual movie names, not the movieIDs. | 
|         |    154  | 
|         |    155 def recommendations(recs: Map[String, List[String]], | 
|         |    156                    movs: Map[String, String], | 
|         |    157                    mov_name: String) : List[String] = | 
|         |    158   suggestions(recs, mov_name).take(2).map(movs.get(_).get)                  | 
|         |    159  | 
|         |    160  | 
|         |    161 // testcases | 
|         |    162  | 
|         |    163 // recommendations(ratings_map, movies_map, "912") | 
|         |    164 //   => List(Godfather, Star Wars: Episode IV - A NewHope (1977)) | 
|         |    165  | 
|         |    166 //recommendations(ratings_map, movies_map, "260") | 
|         |    167 //   => List(Star Wars: Episode V - The Empire Strikes Back (1980),  | 
|         |    168 //           Star Wars: Episode VI - Return of the Jedi (1983)) | 
|         |    169  | 
|         |    170 // recommendations(ratings_map, movies_map, "2") | 
|         |    171 //   => List(Lion King, Jurassic Park (1993)) | 
|         |    172  | 
|         |    173 // recommendations(ratings_map, movies_map, "0") | 
|         |    174 //   => Nil | 
|         |    175  | 
|         |    176 // recommendations(ratings_map, movies_map, "1") | 
|         |    177 //   => List(Shawshank Redemption, Forrest Gump (1994)) | 
|         |    178  | 
|         |    179 // recommendations(ratings_map, movies_map, "4") | 
|         |    180 //   => Nil  (there are three ratings fro this movie in ratings.csv but they are not positive)      | 
|         |    181  | 
|         |    182  | 
|         |    183 // If you want to calculate the recomendations for all movies. | 
|         |    184 // Will take a few seconds calculation time. | 
|         |    185  | 
|         |    186 //val all = for (name <- movie_names.map(_._1)) yield { | 
|         |    187 //  recommendations(ratings_map, movies_map, name) | 
|         |    188 //} | 
|         |    189  | 
|         |    190 // helper functions | 
|         |    191 //List().take(2) | 
|         |    192 //List(1).take(2) | 
|         |    193 //List(1,2).take(2) | 
|         |    194 //List(1,2,3).take(2) | 
|         |    195  | 
|         |    196 } |