testing1/alcohol.scala
author Christian Urban <urbanc@in.tum.de>
Thu, 23 Nov 2017 10:56:47 +0000
changeset 153 316f9c6cc2ff
parent 152 16dbc95d7d77
child 161 bb43401d82c3
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
128
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     1
// Part 2 about Alcohol-Consumption Worldwide
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     2
//============================================
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     3
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     4
object CW6b {
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     5
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     6
import io.Source
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     7
import scala.util._
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     8
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     9
def get_csv_page(url: String) : List[String] = 
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    10
  Source.fromURL(url)("ISO-8859-1").getLines.toList
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    11
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    12
def get_csv_file(file: String) : List[String] = 
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    13
  Source.fromFile(file)("ISO-8859-1").getLines.toList
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    14
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    15
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    16
val url_alcohol = 
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    17
  "https://raw.githubusercontent.com/fivethirtyeight/data/master/alcohol-consumption/drinks.csv"
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    18
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    19
val file_population = 
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    20
  "population.csv"
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    21
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    22
get_csv_page(url_alcohol)
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    23
get_csv_file(file_population)
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    24
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    25
get_csv_page(url_alcohol).size
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    26
get_csv_file(file_population).size
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    27
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    28
val alcs = get_csv_page(url_alcohol)
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    29
val pops = get_csv_file(file_population)
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    30
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    31
def process_alcs(lines: List[String]) : List[(String, Double)] =
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    32
  for (l <- lines) yield {
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    33
    val entries = l.split(",").toList 
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    34
    (entries(0), entries(4).toDouble) 
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    35
  }
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    36
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    37
def process_pops(lines: List[String]) : Map[String, Long] =
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    38
  (for (l <- lines) yield {
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    39
    val entries = l.split(",").toList 
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    40
    (entries(0), entries(1).toLong)
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    41
  }).toMap
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    42
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    43
152
16dbc95d7d77 updated
Christian Urban <urbanc@in.tum.de>
parents: 144
diff changeset
    44
process_alcs(alcs.drop(1))(1)
16dbc95d7d77 updated
Christian Urban <urbanc@in.tum.de>
parents: 144
diff changeset
    45
process_pops(pops.drop(1))("Albania")
16dbc95d7d77 updated
Christian Urban <urbanc@in.tum.de>
parents: 144
diff changeset
    46
128
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    47
def sorted_country_consumption() : List[(String, Long)] = {
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    48
  val alcs2 = process_alcs(alcs.drop(1))
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    49
  val pops2 = process_pops(pops.drop(1))
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    50
  val cons_list = 
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    51
    for ((cname, cons) <- alcs2; 
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    52
	 if pops2.isDefinedAt(cname)) yield (cname, (cons * pops2(cname)).toLong)
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    53
  cons_list.sortBy(_._2).reverse
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    54
}
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    55
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    56
sorted_country_consumption().take(10)
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    57
sorted_country_consumption().size
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    58
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    59
def percentage(n: Int) : (Long, Long, Double) = {
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    60
  val cons_list = sorted_country_consumption()
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    61
  val sum_n = cons_list.take(n).map(_._2).sum
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    62
  val sum_all = cons_list.map(_._2).sum
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    63
  val perc = (sum_n.toDouble / sum_all.toDouble) * 100.0
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    64
  (sum_all, sum_n, perc)
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    65
}
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    66
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    67
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    68
percentage(10)
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    69
percentage(164)
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    70
fd03a2f06286 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    71
}