testing1/alcohol.scala
author Christian Urban <urbanc@in.tum.de>
Sun, 03 Dec 2017 21:11:49 +0000
changeset 161 6ea450e999e2
parent 152 114a89518aea
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     1
// Part 2 about Alcohol-Consumption Worldwide
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     2
//============================================
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     3
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     4
object CW6b {
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     5
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     6
import io.Source
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     7
import scala.util._
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     8
161
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
     9
val url_alcohol =
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    10
  "https://raw.githubusercontent.com/fivethirtyeight/data/master/alcohol-consumption/drinks.csv"
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    11
161
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    12
val file_population =
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    13
  "population.csv"
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    14
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    15
161
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    16
//(1) Complete the get_csv_page function below. It takes a URL-string
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    17
//    as argument and generates a list of strings corresponding to each
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    18
//    line in the downloaded csv-list. The URL url_alcohol above is one
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    19
//    possible argument.
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    20
161
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    21
//def get_csv_page(url: String) : List[String] = ...
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    22
def get_csv_page(url: String) : List[String] = {
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    23
  val csv = Source.fromURL(url)
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    24
  val contents = csv.mkString.split("\n")
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    25
  contents.toList
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    26
}
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    27
//    Complete the get_csv_file function below. It takes a file name
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    28
//    as argument and reads the content of the given file. Like above,
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    29
//    it should generate a list of strings corresponding to each
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    30
//    line in the csv-list. The filename file_population is one possible
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    31
//    argument.
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    32
161
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    33
//def get_csv_file(file: String) : List[String] = ...
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    34
def get_csv_file(file: String) : List[String] = {
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    35
  val csv = Source.fromFile(file)
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    36
  val contents = csv.mkString.split("\n")
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    37
  contents.toList
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    38
}
161
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    39
//(2) Complete the functions that process the csv-lists. For
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    40
//    process_alcs extract the country name (as String) and the
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    41
//    pure alcohol consumption (as Double). For process_pops
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    42
//    generate a Map of Strings (country names) to Long numbers
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    43
//    (population sizes).
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    44
161
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    45
//def process_alcs(lines: List[String]) : List[(String, Double)] = ...
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    46
def process_alcs(lines: List[String]) : List[(String, Double)] = {
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    47
  val beheaded = lines.drop(1)
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    48
  val splitEntries = for (n <- beheaded) yield n.split(",").toList
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    49
  for (n <- splitEntries) yield (n.take(1).mkString, n.drop(4).mkString.toDouble)
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    50
}
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    51
//def process_pops(lines: List[String]) : Map[String, Long] = ...
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    52
def process_pops(lines: List[String]) : Map[String, Long] = {
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    53
  val beheaded = lines.drop(1);
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    54
  def toOnePair(line: String) : (String, Long) = {
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    55
    val splitAsList = line.split(",").toList
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    56
    (splitAsList.take(1).mkString, splitAsList.drop(1).mkString.toLong)
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    57
  }
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    58
  val splitEntries = for (n <- beheaded) yield toOnePair(n)
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    59
  splitEntries.toMap
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    60
}
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    61
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    62
161
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    63
//(3) Calculate for each country the overall alcohol_consumption using
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    64
//    the data from the alcohol list and the population sizes list. You
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    65
//    should only include countries on the alcohol list that are also
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    66
//    on the population sizes list with the exact same name. Note that
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    67
//    the spelling of some names in the alcohol list differs from the
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    68
//    population sizes list. You can ignore entries where the names differ.
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    69
//    Sort the resulting list according to the country with the highest alcohol
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    70
//    consumption to the country with the lowest alcohol consumption.
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    71
161
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    72
//def sorted_country_consumption() : List[(String, Long)] = ...
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    73
def sorted_country_consumption() : List[(String, Long)] = {
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    74
  val countryToPop = process_pops(get_csv_file(file_population))
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    75
  val countryAndAlc = process_alcs(get_csv_page(url_alcohol))
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    76
  val countryAndConsumption = countryAndAlc.collect {
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    77
    case oneCountryAndAlc
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    78
    if countryToPop.isDefinedAt(oneCountryAndAlc._1) =>
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    79
    (oneCountryAndAlc._1, (oneCountryAndAlc._2*countryToPop.get(oneCountryAndAlc._1).get).toLong)
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    80
  }
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    81
  countryAndConsumption.sortWith(_._2 > _._2)
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    82
}
161
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    83
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    84
//   Calculate the world consumption of pure alcohol of all countries, which
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    85
//   should be the first element in the tuple below. The second element is
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    86
//   the overall consumption of the first n countries in the sorted list
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    87
//   from above; and finally the double should be the percentage of the
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    88
//   first n countries drinking from the the world consumption of alcohol.
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    89
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    90
//def percentage(n: Int) : (Long, Long, Double) = ...
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    91
def percentage(n: Int) : (Long, Long, Double) = {
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    92
  val ctryConsump = sorted_country_consumption()
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    93
  val totalAlc = ctryConsump.map(_._2).sum
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    94
  val firstNAlc = ctryConsump.take(n).map(_._2).sum
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    95
  val pcntage = (firstNAlc*1.0/totalAlc)*100;
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    96
  (ctryConsump.map(_._2).sum, ctryConsump.take(n).map(_._2).sum, pcntage)
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    97
}
6ea450e999e2 updated
Christian Urban <urbanc@in.tum.de>
parents: 152
diff changeset
    98
}