testing2/danube.scala
author Christian Urban <urbanc@in.tum.de>
Tue, 03 Dec 2019 01:22:16 +0000
changeset 326 e5453add7df6
parent 284 9a04eb6a2291
child 329 8a34b2ebc8cc
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
284
9a04eb6a2291 updated
Christian Urban <urbanc@in.tum.de>
parents: 266
diff changeset
     1
// Core Part about Movie Recommendations 
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     2
// at Danube.co.uk
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     3
//===========================================
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     4
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
     5
object CW7b {
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
     6
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     7
import io.Source
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     8
import scala.util._
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     9
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    10
// (1) Implement the function get_csv_url which takes an url-string
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    11
//     as argument and requests the corresponding file. The two urls
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    12
//     of interest are ratings_url and movies_url, which correspond 
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    13
//     to CSV-files.
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    14
//
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    15
//     The function should ReTurn the CSV-file appropriately broken
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    16
//     up into lines, and the first line should be dropped (that is without
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    17
//     the header of the CSV-file). The result is a list of strings (lines
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    18
//     in the file).
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    19
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    20
def get_csv_url(url: String) : List[String] = {
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    21
    Try(Source.fromURL(url)("UTF-8").mkString.split("\n").toList.tail).getOrElse(List())
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    22
}
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    23
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    24
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    25
val ratings_url = """https://nms.kcl.ac.uk/christian.urban/ratings.csv"""
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    26
val movies_url = """https://nms.kcl.ac.uk/christian.urban/movies.csv"""
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    27
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    28
// testcases
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    29
//-----------
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    30
// val ratings = get_csv_url(ratings_url)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    31
// val movies = get_csv_url(movies_url)
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    32
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    33
//ratings.length  // 87313
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    34
//movies.length   // 9742
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    35
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    36
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    37
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    38
// (2) Implement two functions that process the CSV-files from (1). The ratings
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    39
//     function filters out all ratings below 4 and ReTurns a list of 
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    40
//     (userID, movieID) pairs. The movies function just ReTurns a list 
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    41
//     of (movieID, title) pairs.
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    42
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    43
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    44
def process_ratings(lines: List[String]) : List[(String, String)] = {
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    45
    val filteredLines = lines.filter(line => line.split(",")(2).toInt >= 4)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    46
    filteredLines.map(line => (line.split(",")(0), line.split(",")(1)))
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    47
}
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    48
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    49
def process_movies(lines: List[String]) : List[(String, String)] = {
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    50
    lines.map(line => (line.split(",")(0), line.split(",")(1)))
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    51
}
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    52
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    53
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    54
// testcases
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    55
//-----------
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    56
// val good_ratings = process_ratings(ratings)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    57
// val movie_names = process_movies(movies)
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    58
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    59
//good_ratings.length   //48580
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    60
//movie_names.length    // 9742
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    61
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    62
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    63
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    64
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    65
// (3) Implement a grouping function that calculates a Map
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    66
//     containing the userIDs and all the corresponding recommendations 
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    67
//     (list of movieIDs). This  should be implemented in a tail
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    68
//     recursive fashion, using a Map m as accumulator. This Map m
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    69
//     is set to Map() at the beginning of the calculation.
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    70
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    71
def groupById(ratings: List[(String, String)], 
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    72
              m: Map[String, List[String]]) : Map[String, List[String]] = {
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    73
    if (ratings.length == 0) m
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    74
    else {
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    75
        val firstUser = ratings(0)._1
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    76
        val userRatings = ratings.filter(r => r._1 == firstUser)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    77
        val movieIds = userRatings.map(r => r._2)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    78
        val newMap = m + (firstUser -> movieIds)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    79
        groupById(ratings.filter(r => r._1 != firstUser), newMap)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    80
    }
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    81
}
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    82
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    83
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    84
// testcases
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    85
//-----------
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    86
//val ratings_map = groupById(good_ratings, Map())
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    87
//val movies_map = movie_names.toMap
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    88
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    89
//ratings_map.get("414").get.map(movies_map.get(_)) 
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    90
//    => most prolific recommender with 1227 positive ratings
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    91
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    92
//ratings_map.get("474").get.map(movies_map.get(_)) 
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    93
//    => second-most prolific recommender with 787 positive ratings
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    94
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    95
//ratings_map.get("214").get.map(movies_map.get(_)) 
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
    96
//    => least prolific recommender with only 1 positive rating
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    97
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    98
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    99
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   100
// (4) Implement a function that takes a ratings map and a movie_name as argument.
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   101
//     The function calculates all suggestions containing
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   102
//     the movie in its recommendations. It ReTurns a list of all these
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   103
//     recommendations (each of them is a list and needs to have the movie deleted, 
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   104
//     otherwise it might happen we recommend the same movie).
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   105
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   106
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   107
def favourites(m: Map[String, List[String]], mov: String) : List[List[String]] = {
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   108
    val movieLists = m.map(r => r._2).toList.filter(_.contains(mov))
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   109
    for (movieList <- movieLists) yield {
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   110
        movieList.filter(_!=mov)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   111
    }
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   112
}
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   113
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   114
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   115
// testcases
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   116
//-----------
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   117
// movie ID "912" -> Casablanca (1942)
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   118
//          "858" -> Godfather
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   119
//          "260" -> Star Wars: Episode IV - A New Hope (1977)
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   120
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   121
//favourites(ratings_map, "912").length  // => 80
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   122
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   123
// That means there are 80 users that recommend the movie with ID 912.
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   124
// Of these 80  users, 55 gave a good rating to movie 858 and
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   125
// 52 a good rating to movies 260, 318, 593.
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   126
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   127
266
ca48ac1d3c3e updated to 2.13
Christian Urban <urbanc@in.tum.de>
parents: 249
diff changeset
   128
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   129
// (5) Implement a suggestions function which takes a rating
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   130
//     map and a movie_name as arguments. It calculates all the recommended
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   131
//     movies sorted according to the most frequently suggested movie(s) first.
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   132
266
ca48ac1d3c3e updated to 2.13
Christian Urban <urbanc@in.tum.de>
parents: 249
diff changeset
   133
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   134
def suggestions(recs: Map[String, List[String]], 
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   135
                mov_name: String) : List[String] = {
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   136
    val favs = favourites(recs, mov_name).flatten
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   137
    favs.map(x => (x, favs.count(_==x)))
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   138
        .sortBy(_._1)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   139
        .reverse
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   140
        .sortBy(_._2)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   141
        .reverse
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   142
        .distinct
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   143
        .map(_._1)
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   144
}
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   145
266
ca48ac1d3c3e updated to 2.13
Christian Urban <urbanc@in.tum.de>
parents: 249
diff changeset
   146
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   147
// testcases
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   148
//-----------
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   149
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   150
//suggestions(ratings_map, "912")
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   151
//suggestions(ratings_map, "912").length  
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   152
// => 4110 suggestions with List(858, 260, 318, 593, ...)
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   153
//    being the most frequently suggested movies
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   154
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   155
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   156
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   157
// (6) Implement a recommendations function which generates at most
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   158
//     *two* of the most frequently suggested movies. It ReTurns the 
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   159
//     actual movie names, not the movieIDs.
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   160
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   161
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   162
def recommendations(recs: Map[String, List[String]],
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   163
                    movs: Map[String, String],
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   164
                    mov_name: String) : List[String] = {
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   165
    val sug = suggestions(recs, mov_name)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   166
    val toptwo = sug.take(2)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   167
    if (toptwo.length == 0) Nil
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   168
    else toptwo.map(movs(_))
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   169
}
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   170
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   171
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   172
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   173
// testcases
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   174
//-----------
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   175
// recommendations(ratings_map, movies_map, "912")
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   176
//   => List(Godfather, Star Wars: Episode IV - A NewHope (1977))
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   177
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   178
//recommendations(ratings_map, movies_map, "260")
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   179
//   => List(Star Wars: Episode V - The Empire Strikes Back (1980), 
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   180
//           Star Wars: Episode VI - Return of the Jedi (1983))
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   181
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   182
// recommendations(ratings_map, movies_map, "2")
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   183
//   => List(Lion King, Jurassic Park (1993))
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   184
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   185
// recommendations(ratings_map, movies_map, "0")
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   186
//   => Nil
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   187
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   188
// recommendations(ratings_map, movies_map, "1")
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   189
//   => List(Shawshank Redemption, Forrest Gump (1994))
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   190
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   191
// recommendations(ratings_map, movies_map, "4")
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   192
//   => Nil  (there are three ratings for this movie in ratings.csv but they are not positive)     
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   193
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   194
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   195
// If you want to calculate the recommendations for all movies,
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   196
// then use this code (it will take a few seconds calculation time).
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   197
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   198
//val all = for (name <- movie_names.map(_._1)) yield {
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   199
//  recommendations(ratings_map, movies_map, name)
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   200
//}
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   201
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   202
// helper functions
284
9a04eb6a2291 updated
Christian Urban <urbanc@in.tum.de>
parents: 266
diff changeset
   203
//List().take(2)
211
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   204
//List(1).take(2)
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   205
//List(1,2).take(2)
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   206
//List(1,2,3).take(2)
092e0879a5ae updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   207
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   208
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   209
284
9a04eb6a2291 updated
Christian Urban <urbanc@in.tum.de>
parents: 266
diff changeset
   210
}
326
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   211
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   212
// val ratings_url = """https://nms.kcl.ac.uk/christian.urban/ratings.csv"""
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   213
// val movies_url = """https://nms.kcl.ac.uk/christian.urban/movies.csv"""
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   214
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   215
// val ratings = CW7b.get_csv_url(ratings_url)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   216
// val movies = CW7b.get_csv_url(movies_url)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   217
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   218
// println(movies.length)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   219
// val good_ratings = CW7b.process_ratings(ratings)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   220
// val movie_names = CW7b.process_movies(movies)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   221
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   222
// val ratings_map = CW7b.groupById(good_ratings, Map())
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   223
// val movies_map = movie_names.toMap
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   224
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   225
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   226
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   227
//println(CW7b.recommendations(ratings_map, movies_map, "912"))
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   228
/*
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   229
val ratings_url = """https://nms.kcl.ac.uk/christian.urban/ratings.csv"""
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   230
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   231
val ratings = CW7b.get_csv_url(ratings_url)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   232
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   233
val good_ratings = CW7b.process_ratings(ratings)
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   234
val ratings_map = CW7b.groupById(good_ratings, Map())
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   235
e5453add7df6 updated
Christian Urban <urbanc@in.tum.de>
parents: 284
diff changeset
   236
println(CW7b.suggestions(ratings_map, "912").length)*/