54 //val good_ratings = process_ratings(ratings) |
54 //val good_ratings = process_ratings(ratings) |
55 //val movie_names = process_movies(movies) |
55 //val movie_names = process_movies(movies) |
56 |
56 |
57 //good_ratings.length //48580 |
57 //good_ratings.length //48580 |
58 //movie_names.length // 9742 |
58 //movie_names.length // 9742 |
59 |
|
60 //============================================== |
|
61 // Do not change anything below, unless you want |
|
62 // to submit the file for the advanced part 3! |
|
63 //============================================== |
|
64 |
59 |
65 |
60 |
66 // (3) Implement a grouping function that calulates a map |
61 // (3) Implement a grouping function that calulates a map |
67 // containing the userIds and all the corresponding recommendations |
62 // containing the userIds and all the corresponding recommendations |
68 // (list of movieIds). This should be implemented in a tail |
63 // (list of movieIds). This should be implemented in a tail |
160 // => List(Shawshank Redemption, Forrest Gump (1994)) |
155 // => List(Shawshank Redemption, Forrest Gump (1994)) |
161 |
156 |
162 // recommendations(ratings_map, movies_map, "4") |
157 // recommendations(ratings_map, movies_map, "4") |
163 // => Nil (there are three ratings for this movie in ratings.csv but they are not positive) |
158 // => Nil (there are three ratings for this movie in ratings.csv but they are not positive) |
164 |
159 |
165 // (7) Calculate the recommendations for all movies according to |
|
166 // what the recommendations function in (6) produces (this |
|
167 // can take a few seconds). Put all recommendations into a list |
|
168 // (of strings) and count how often the strings occur in |
|
169 // this list. This produces a list of string-int pairs, |
|
170 // where the first component is the movie name and the second |
|
171 // is the number of how many times they were recommended. |
|
172 // Sort all the pairs according to the number |
|
173 // of times they were recommended (most recommended movie name |
|
174 // first). |
|
175 |
|
176 def occurrences(xs: List[String]): List[(String, Int)] = |
|
177 for (x <- xs.distinct) yield (x, xs.count(_ == x)) |
|
178 |
|
179 def most_recommended(recs: Map[String, List[String]], |
|
180 movs: Map[String, String]) : List[(String, Int)] = { |
|
181 val all = (for (name <- movs.toList.map(_._1)) yield { |
|
182 recommendations(recs, movs, name) |
|
183 }).flatten |
|
184 val occs = occurrences(all) |
|
185 occs.sortBy(_._2).reverse |
|
186 } |
|
187 |
|
188 |
|
189 //most_recommended(ratings_map, movies_map).take(3) |
|
190 // => |
|
191 // List((Matrix,698), |
|
192 // (Star Wars: Episode IV - A New Hope (1977),402), |
|
193 // (Jerry Maguire (1996),382)) |
|
194 |
|
195 |
160 |
196 } |
161 } |