progs/alcohol.scala
author Christian Urban <urbanc@in.tum.de>
Sat, 17 Nov 2018 22:39:02 +0000
changeset 210 63a1376cbebd
parent 128 166bb9b6b20a
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
127
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
     1
// Part 2 about Alcohol-Consumption Worldwide
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
     2
//============================================
18
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     3
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents: 127
diff changeset
     4
object CW6b {
18
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     5
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     6
import io.Source
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     7
import scala.util._
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     8
127
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
     9
def get_csv_page(url: String) : List[String] = 
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    10
  Source.fromURL(url)("ISO-8859-1").getLines.toList
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    11
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    12
def get_csv_file(file: String) : List[String] = 
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    13
  Source.fromFile(file)("ISO-8859-1").getLines.toList
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    14
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    15
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    16
val url_alcohol = 
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    17
  "https://raw.githubusercontent.com/fivethirtyeight/data/master/alcohol-consumption/drinks.csv"
18
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    18
127
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    19
val file_population = 
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents: 127
diff changeset
    20
  "population.csv"
127
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    21
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    22
get_csv_page(url_alcohol)
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    23
get_csv_file(file_population)
18
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    24
127
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    25
get_csv_page(url_alcohol).size
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    26
get_csv_file(file_population).size
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    27
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    28
val alcs = get_csv_page(url_alcohol)
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    29
val pops = get_csv_file(file_population)
18
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    30
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    31
127
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    32
def process_alcs(lines: List[String]) : List[(String, Double)] =
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    33
  for (l <- lines) yield {
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    34
    val entries = l.split(",").toList 
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    35
    (entries(0), entries(4).toDouble) 
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    36
  }
18
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    37
127
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    38
def process_pops(lines: List[String]) : Map[String, Long] =
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    39
  (for (l <- lines) yield {
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    40
    val entries = l.split(",").toList 
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    41
    (entries(0), entries(1).toLong)
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    42
  }).toMap
18
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    43
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    44
127
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    45
def sorted_country_consumption() : List[(String, Long)] = {
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    46
  val alcs2 = process_alcs(alcs.drop(1))
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    47
  val pops2 = process_pops(pops.drop(1))
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    48
  val cons_list = 
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents: 127
diff changeset
    49
    for ((cname, cons) <- alcs2; 
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents: 127
diff changeset
    50
	 if pops2.isDefinedAt(cname)) yield (cname, (cons * pops2(cname)).toLong)
127
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    51
  cons_list.sortBy(_._2).reverse
18
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    52
}
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    53
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents: 127
diff changeset
    54
sorted_country_consumption()(9)
127
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    55
sorted_country_consumption().size
18
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    56
127
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    57
def percentage(n: Int) : (Long, Long, Double) = {
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    58
  val cons_list = sorted_country_consumption()
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    59
  val sum_n = cons_list.take(n).map(_._2).sum
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    60
  val sum_all = cons_list.map(_._2).sum
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    61
  val perc = (sum_n.toDouble / sum_all.toDouble) * 100.0
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    62
  (sum_all, sum_n, perc)
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    63
}
18
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    64
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents: 127
diff changeset
    65
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents: 127
diff changeset
    66
percentage(10)
127
b4def82f3f9f updated
Christian Urban <urbanc@in.tum.de>
parents: 18
diff changeset
    67
percentage(164)
18
87e55eb309ed updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    68
128
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents: 127
diff changeset
    69
assert(percentage(164) == (28771558364L, 28771558364L, 100.0))
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents: 127
diff changeset
    70
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents: 127
diff changeset
    71
}
166bb9b6b20a updated
Christian Urban <urbanc@in.tum.de>
parents: 127
diff changeset
    72