128
|
1 |
// Part 2 about Alcohol-Consumption Worldwide
|
|
2 |
//============================================
|
|
3 |
|
|
4 |
object CW6b {
|
|
5 |
|
|
6 |
import io.Source
|
|
7 |
import scala.util._
|
|
8 |
|
161
|
9 |
val url_alcohol =
|
128
|
10 |
"https://raw.githubusercontent.com/fivethirtyeight/data/master/alcohol-consumption/drinks.csv"
|
|
11 |
|
161
|
12 |
val file_population =
|
128
|
13 |
"population.csv"
|
|
14 |
|
|
15 |
|
161
|
16 |
//(1) Complete the get_csv_page function below. It takes a URL-string
|
|
17 |
// as argument and generates a list of strings corresponding to each
|
|
18 |
// line in the downloaded csv-list. The URL url_alcohol above is one
|
|
19 |
// possible argument.
|
128
|
20 |
|
161
|
21 |
//def get_csv_page(url: String) : List[String] = ...
|
|
22 |
def get_csv_page(url: String) : List[String] = {
|
|
23 |
val csv = Source.fromURL(url)
|
|
24 |
val contents = csv.mkString.split("\n")
|
|
25 |
contents.toList
|
|
26 |
}
|
|
27 |
// Complete the get_csv_file function below. It takes a file name
|
|
28 |
// as argument and reads the content of the given file. Like above,
|
|
29 |
// it should generate a list of strings corresponding to each
|
|
30 |
// line in the csv-list. The filename file_population is one possible
|
|
31 |
// argument.
|
128
|
32 |
|
161
|
33 |
//def get_csv_file(file: String) : List[String] = ...
|
|
34 |
def get_csv_file(file: String) : List[String] = {
|
|
35 |
val csv = Source.fromFile(file)
|
|
36 |
val contents = csv.mkString.split("\n")
|
|
37 |
contents.toList
|
128
|
38 |
}
|
161
|
39 |
//(2) Complete the functions that process the csv-lists. For
|
|
40 |
// process_alcs extract the country name (as String) and the
|
|
41 |
// pure alcohol consumption (as Double). For process_pops
|
|
42 |
// generate a Map of Strings (country names) to Long numbers
|
|
43 |
// (population sizes).
|
128
|
44 |
|
161
|
45 |
//def process_alcs(lines: List[String]) : List[(String, Double)] = ...
|
|
46 |
def process_alcs(lines: List[String]) : List[(String, Double)] = {
|
|
47 |
val beheaded = lines.drop(1)
|
|
48 |
val splitEntries = for (n <- beheaded) yield n.split(",").toList
|
|
49 |
for (n <- splitEntries) yield (n.take(1).mkString, n.drop(4).mkString.toDouble)
|
|
50 |
}
|
|
51 |
//def process_pops(lines: List[String]) : Map[String, Long] = ...
|
|
52 |
def process_pops(lines: List[String]) : Map[String, Long] = {
|
|
53 |
val beheaded = lines.drop(1);
|
|
54 |
def toOnePair(line: String) : (String, Long) = {
|
|
55 |
val splitAsList = line.split(",").toList
|
|
56 |
(splitAsList.take(1).mkString, splitAsList.drop(1).mkString.toLong)
|
|
57 |
}
|
|
58 |
val splitEntries = for (n <- beheaded) yield toOnePair(n)
|
|
59 |
splitEntries.toMap
|
128
|
60 |
}
|
|
61 |
|
|
62 |
|
161
|
63 |
//(3) Calculate for each country the overall alcohol_consumption using
|
|
64 |
// the data from the alcohol list and the population sizes list. You
|
|
65 |
// should only include countries on the alcohol list that are also
|
|
66 |
// on the population sizes list with the exact same name. Note that
|
|
67 |
// the spelling of some names in the alcohol list differs from the
|
|
68 |
// population sizes list. You can ignore entries where the names differ.
|
|
69 |
// Sort the resulting list according to the country with the highest alcohol
|
|
70 |
// consumption to the country with the lowest alcohol consumption.
|
128
|
71 |
|
161
|
72 |
//def sorted_country_consumption() : List[(String, Long)] = ...
|
|
73 |
def sorted_country_consumption() : List[(String, Long)] = {
|
|
74 |
val countryToPop = process_pops(get_csv_file(file_population))
|
|
75 |
val countryAndAlc = process_alcs(get_csv_page(url_alcohol))
|
|
76 |
val countryAndConsumption = countryAndAlc.collect {
|
|
77 |
case oneCountryAndAlc
|
|
78 |
if countryToPop.isDefinedAt(oneCountryAndAlc._1) =>
|
|
79 |
(oneCountryAndAlc._1, (oneCountryAndAlc._2*countryToPop.get(oneCountryAndAlc._1).get).toLong)
|
|
80 |
}
|
|
81 |
countryAndConsumption.sortWith(_._2 > _._2)
|
128
|
82 |
}
|
161
|
83 |
|
|
84 |
// Calculate the world consumption of pure alcohol of all countries, which
|
|
85 |
// should be the first element in the tuple below. The second element is
|
|
86 |
// the overall consumption of the first n countries in the sorted list
|
|
87 |
// from above; and finally the double should be the percentage of the
|
|
88 |
// first n countries drinking from the the world consumption of alcohol.
|
|
89 |
|
|
90 |
//def percentage(n: Int) : (Long, Long, Double) = ...
|
|
91 |
def percentage(n: Int) : (Long, Long, Double) = {
|
|
92 |
val ctryConsump = sorted_country_consumption()
|
|
93 |
val totalAlc = ctryConsump.map(_._2).sum
|
|
94 |
val firstNAlc = ctryConsump.take(n).map(_._2).sum
|
|
95 |
val pcntage = (firstNAlc*1.0/totalAlc)*100;
|
|
96 |
(ctryConsump.map(_._2).sum, ctryConsump.take(n).map(_._2).sum, pcntage)
|
|
97 |
}
|
|
98 |
}
|