main_solution2/danube.scala
changeset 400 e48ea8300b2d
parent 384 6e1237691307
equal deleted inserted replaced
399:b17a98b0c52f 400:e48ea8300b2d
     1 // Core Part about Movie Recommendations 
     1 // Core Part about Movie Recommendations 
     2 // at Danube.co.uk
     2 // at Danube.co.uk
     3 //========================================
     3 //========================================
     4 
     4 
     5 
     5 
     6 object CW7b { // for purposes of generating a jar
     6 object M2 { // for purposes of generating a jar
     7 
     7 
     8 import io.Source
     8 import io.Source
     9 import scala.util._
     9 import scala.util._
    10 
    10 
    11 
    11 
    54 //val good_ratings = process_ratings(ratings)
    54 //val good_ratings = process_ratings(ratings)
    55 //val movie_names = process_movies(movies)
    55 //val movie_names = process_movies(movies)
    56 
    56 
    57 //good_ratings.length   //48580
    57 //good_ratings.length   //48580
    58 //movie_names.length    // 9742
    58 //movie_names.length    // 9742
    59 
       
    60 //==============================================
       
    61 // Do not change anything below, unless you want 
       
    62 // to submit the file for the advanced part 3!
       
    63 //==============================================
       
    64 
    59 
    65 
    60 
    66 // (3) Implement a grouping function that calulates a map
    61 // (3) Implement a grouping function that calulates a map
    67 //     containing the userIds and all the corresponding recommendations 
    62 //     containing the userIds and all the corresponding recommendations 
    68 //     (list of movieIds). This  should be implemented in a tail
    63 //     (list of movieIds). This  should be implemented in a tail
   160 //   => List(Shawshank Redemption, Forrest Gump (1994))
   155 //   => List(Shawshank Redemption, Forrest Gump (1994))
   161 
   156 
   162 // recommendations(ratings_map, movies_map, "4")
   157 // recommendations(ratings_map, movies_map, "4")
   163 //   => Nil  (there are three ratings for this movie in ratings.csv but they are not positive)     
   158 //   => Nil  (there are three ratings for this movie in ratings.csv but they are not positive)     
   164 
   159 
   165 // (7) Calculate the recommendations for all movies according to
       
   166 // what the recommendations function in (6) produces (this
       
   167 // can take a few seconds). Put all recommendations into a list 
       
   168 // (of strings) and count how often the strings occur in
       
   169 // this list. This produces a list of string-int pairs,
       
   170 // where the first component is the movie name and the second
       
   171 // is the number of how many times they were recommended. 
       
   172 // Sort all the pairs according to the number
       
   173 // of times they were recommended (most recommended movie name 
       
   174 // first).
       
   175 
       
   176 def occurrences(xs: List[String]): List[(String, Int)] =
       
   177   for (x <- xs.distinct) yield (x, xs.count(_ == x))
       
   178 
       
   179 def most_recommended(recs: Map[String, List[String]],
       
   180                      movs: Map[String, String]) : List[(String, Int)] = {
       
   181    val all =  (for (name <- movs.toList.map(_._1)) yield {
       
   182      recommendations(recs, movs, name)                     
       
   183    }).flatten
       
   184    val occs = occurrences(all)
       
   185    occs.sortBy(_._2).reverse
       
   186 }
       
   187 
       
   188 
       
   189 //most_recommended(ratings_map, movies_map).take(3)
       
   190 // =>
       
   191 // List((Matrix,698), 
       
   192 //      (Star Wars: Episode IV - A New Hope (1977),402), 
       
   193 //      (Jerry Maguire (1996),382))
       
   194 
       
   195 
   160 
   196 }
   161 }