379
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     1  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     2  | 
import CW7b._
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     3  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     4  | 
// first test 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     5  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     6  | 
def urban_groupById(ratings: List[(String, String)], 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     7  | 
              m: Map[String, List[String]]) : Map[String, List[String]] = ratings match {
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     8  | 
  case Nil => m
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     9  | 
  case (id, mov) :: rest => {
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    10  | 
    val old_ratings = m.getOrElse (id, Nil)
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    11  | 
    val new_ratings = m + (id -> (mov :: old_ratings))
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    12  | 
    urban_groupById(rest, new_ratings)
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    13  | 
  }
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    14  | 
}
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    15  | 
//def urban_groupById(ratings: List[(String, String)]) = 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    16  | 
//  ratings.groupBy(_._1).view.mapValues(_.map(_._2)).toMap 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    17  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    18  | 
def urban_get_csv_file(name: String) : List[String] = {
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    19  | 
  import io.Source
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    20  | 
  import scala.util._
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    21  | 
  val csv = Source.fromFile(name)("ISO-8859-1")
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    22  | 
  csv.mkString.split("\n").toList.drop(1)
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    23  | 
}
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    24  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    25  | 
def urban_process_ratings(lines: List[String]) : List[(String, String)] = {
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    26  | 
  for (cols <- lines.map(_.split(",").toList); 
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    27  | 
       if (cols(2).toInt >= 4)) yield (cols(0), cols(1))  
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    28  | 
}
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    29  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    30  | 
def urban_process_movies(lines: List[String]) : List[(String, String)] = {
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    31  | 
  for (cols <- lines.map(_.split(",").toList)) yield (cols(0), cols(1))  
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    32  | 
}
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    33  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    34  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    35  | 
val urban_good_ratings = urban_process_ratings(urban_get_csv_file("ratings.csv"))
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    36  | 
val urban_movie_names = urban_process_movies(urban_get_csv_file("movies.csv"))
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    37  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    38  | 
val urban_movie_names_map = urban_movie_names.toMap
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    39  | 
val urban_ratings_map = urban_groupById(urban_good_ratings, Map())
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    40  | 
//val urban_ratings_map = groupById(urban_good_ratings, Map())
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    41  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    42  | 
val urban_ratings_map2 =  for ((k, v) <- urban_ratings_map) yield (k, v.take(2))
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    43  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    44  | 
assert(most_recommended(urban_ratings_map2, urban_movie_names_map).take(3) == 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    45  | 
       List(("M*A*S*H (a.k.a. MASH) (1970)",15), 
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    46  | 
            ("Star Trek: First Contact (1996)",10), 
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    47  | 
            ("Inception (2010)",9)))
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    48  | 
  |