main_testing2/danube_test7.scala
author Christian Urban <christian.urban@kcl.ac.uk>
Mon, 30 Nov 2020 12:58:37 +0000
changeset 381 6c2792a3e00d
parent 379 73ad2e018516
child 384 627a944c744b
permissions -rw-r--r--
updated duration class
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
379
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     1
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     2
import CW7b._
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     3
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     4
// first test 
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     5
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     6
def urban_groupById(ratings: List[(String, String)], 
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     7
              m: Map[String, List[String]]) : Map[String, List[String]] = ratings match {
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     8
  case Nil => m
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     9
  case (id, mov) :: rest => {
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    10
    val old_ratings = m.getOrElse (id, Nil)
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    11
    val new_ratings = m + (id -> (mov :: old_ratings))
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    12
    urban_groupById(rest, new_ratings)
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    13
  }
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    14
}
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    15
//def urban_groupById(ratings: List[(String, String)]) = 
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    16
//  ratings.groupBy(_._1).view.mapValues(_.map(_._2)).toMap 
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    17
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    18
def urban_get_csv_file(name: String) : List[String] = {
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    19
  import io.Source
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    20
  import scala.util._
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    21
  val csv = Source.fromFile(name)("ISO-8859-1")
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    22
  csv.mkString.split("\n").toList.drop(1)
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    23
}
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    24
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    25
def urban_process_ratings(lines: List[String]) : List[(String, String)] = {
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    26
  for (cols <- lines.map(_.split(",").toList); 
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    27
       if (cols(2).toInt >= 4)) yield (cols(0), cols(1))  
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    28
}
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    29
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    30
def urban_process_movies(lines: List[String]) : List[(String, String)] = {
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    31
  for (cols <- lines.map(_.split(",").toList)) yield (cols(0), cols(1))  
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    32
}
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    33
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    34
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    35
val urban_good_ratings = urban_process_ratings(urban_get_csv_file("ratings.csv"))
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    36
val urban_movie_names = urban_process_movies(urban_get_csv_file("movies.csv"))
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    37
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    38
val urban_movie_names_map = urban_movie_names.toMap
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    39
val urban_ratings_map = urban_groupById(urban_good_ratings, Map())
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    40
//val urban_ratings_map = groupById(urban_good_ratings, Map())
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    41
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    42
val urban_ratings_map2 =  for ((k, v) <- urban_ratings_map) yield (k, v.take(2))
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    43
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    44
assert(most_recommended(urban_ratings_map2, urban_movie_names_map).take(3) == 
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    45
       List(("M*A*S*H (a.k.a. MASH) (1970)",15), 
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    46
            ("Star Trek: First Contact (1996)",10), 
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    47
            ("Inception (2010)",9)))
73ad2e018516 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    48