379
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
2 |
import CW7b._
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
3 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
4 |
// first test
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
5 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
6 |
def urban_groupById(ratings: List[(String, String)],
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
7 |
m: Map[String, List[String]]) : Map[String, List[String]] = ratings match {
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
8 |
case Nil => m
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
9 |
case (id, mov) :: rest => {
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
10 |
val old_ratings = m.getOrElse (id, Nil)
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
11 |
val new_ratings = m + (id -> (mov :: old_ratings))
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
12 |
urban_groupById(rest, new_ratings)
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
13 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
14 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
15 |
//def urban_groupById(ratings: List[(String, String)]) =
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
16 |
// ratings.groupBy(_._1).view.mapValues(_.map(_._2)).toMap
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
17 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
18 |
def urban_get_csv_file(name: String) : List[String] = {
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
19 |
import io.Source
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
20 |
import scala.util._
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
21 |
val csv = Source.fromFile(name)("ISO-8859-1")
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
22 |
csv.mkString.split("\n").toList.drop(1)
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
23 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
24 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
25 |
def urban_process_ratings(lines: List[String]) : List[(String, String)] = {
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
26 |
for (cols <- lines.map(_.split(",").toList);
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
27 |
if (cols(2).toInt >= 4)) yield (cols(0), cols(1))
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
28 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
29 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
30 |
def urban_process_movies(lines: List[String]) : List[(String, String)] = {
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
31 |
for (cols <- lines.map(_.split(",").toList)) yield (cols(0), cols(1))
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
32 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
33 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
34 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
35 |
val urban_good_ratings = urban_process_ratings(urban_get_csv_file("ratings.csv"))
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
36 |
val urban_movie_names = urban_process_movies(urban_get_csv_file("movies.csv"))
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
37 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
38 |
val urban_movie_names_map = urban_movie_names.toMap
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
39 |
val urban_ratings_map = urban_groupById(urban_good_ratings, Map())
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
40 |
//val urban_ratings_map = groupById(urban_good_ratings, Map())
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
41 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
42 |
val urban_ratings_map2 = for ((k, v) <- urban_ratings_map) yield (k, v.take(2))
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
43 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
44 |
assert(most_recommended(urban_ratings_map2, urban_movie_names_map).take(3) ==
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
45 |
List(("M*A*S*H (a.k.a. MASH) (1970)",15),
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
46 |
("Star Trek: First Contact (1996)",10),
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
47 |
("Inception (2010)",9)))
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
48 |
|