4 object CW6b { |
4 object CW6b { |
5 |
5 |
6 import io.Source |
6 import io.Source |
7 import scala.util._ |
7 import scala.util._ |
8 |
8 |
9 def get_csv_page(url: String) : List[String] = |
9 val url_alcohol = |
10 Source.fromURL(url)("ISO-8859-1").getLines.toList |
10 "https://raw.githubusercontent.com/fivethirtyeight/data/master/alcohol-consumption/drinks.csv" |
11 |
11 |
12 def get_csv_file(file: String) : List[String] = |
12 val file_population = |
13 Source.fromFile(file)("ISO-8859-1").getLines.toList |
13 "population.csv" |
14 |
14 |
15 |
15 |
16 val url_alcohol = |
16 //(1) Complete the get_csv_page function below. It takes a URL-string |
17 "https://raw.githubusercontent.com/fivethirtyeight/data/master/alcohol-consumption/drinks.csv" |
17 // as argument and generates a list of strings corresponding to each |
|
18 // line in the downloaded csv-list. The URL url_alcohol above is one |
|
19 // possible argument. |
18 |
20 |
19 val file_population = |
21 //def get_csv_page(url: String) : List[String] = ... |
20 "population.csv" |
22 def get_csv_page(url: String) : List[String] = { |
|
23 val csv = Source.fromURL(url) |
|
24 val contents = csv.mkString.split("\n") |
|
25 contents.toList |
|
26 } |
|
27 // Complete the get_csv_file function below. It takes a file name |
|
28 // as argument and reads the content of the given file. Like above, |
|
29 // it should generate a list of strings corresponding to each |
|
30 // line in the csv-list. The filename file_population is one possible |
|
31 // argument. |
21 |
32 |
22 get_csv_page(url_alcohol) |
33 //def get_csv_file(file: String) : List[String] = ... |
23 get_csv_file(file_population) |
34 def get_csv_file(file: String) : List[String] = { |
|
35 val csv = Source.fromFile(file) |
|
36 val contents = csv.mkString.split("\n") |
|
37 contents.toList |
|
38 } |
|
39 //(2) Complete the functions that process the csv-lists. For |
|
40 // process_alcs extract the country name (as String) and the |
|
41 // pure alcohol consumption (as Double). For process_pops |
|
42 // generate a Map of Strings (country names) to Long numbers |
|
43 // (population sizes). |
24 |
44 |
25 get_csv_page(url_alcohol).size |
45 //def process_alcs(lines: List[String]) : List[(String, Double)] = ... |
26 get_csv_file(file_population).size |
46 def process_alcs(lines: List[String]) : List[(String, Double)] = { |
27 |
47 val beheaded = lines.drop(1) |
28 val alcs = get_csv_page(url_alcohol) |
48 val splitEntries = for (n <- beheaded) yield n.split(",").toList |
29 val pops = get_csv_file(file_population) |
49 for (n <- splitEntries) yield (n.take(1).mkString, n.drop(4).mkString.toDouble) |
30 |
50 } |
31 def process_alcs(lines: List[String]) : List[(String, Double)] = |
51 //def process_pops(lines: List[String]) : Map[String, Long] = ... |
32 for (l <- lines) yield { |
52 def process_pops(lines: List[String]) : Map[String, Long] = { |
33 val entries = l.split(",").toList |
53 val beheaded = lines.drop(1); |
34 (entries(0), entries(4).toDouble) |
54 def toOnePair(line: String) : (String, Long) = { |
|
55 val splitAsList = line.split(",").toList |
|
56 (splitAsList.take(1).mkString, splitAsList.drop(1).mkString.toLong) |
35 } |
57 } |
36 |
58 val splitEntries = for (n <- beheaded) yield toOnePair(n) |
37 def process_pops(lines: List[String]) : Map[String, Long] = |
59 splitEntries.toMap |
38 (for (l <- lines) yield { |
|
39 val entries = l.split(",").toList |
|
40 (entries(0), entries(1).toLong) |
|
41 }).toMap |
|
42 |
|
43 |
|
44 process_alcs(alcs.drop(1))(1) |
|
45 process_pops(pops.drop(1))("Albania") |
|
46 |
|
47 def sorted_country_consumption() : List[(String, Long)] = { |
|
48 val alcs2 = process_alcs(alcs.drop(1)) |
|
49 val pops2 = process_pops(pops.drop(1)) |
|
50 val cons_list = |
|
51 for ((cname, cons) <- alcs2; |
|
52 if pops2.isDefinedAt(cname)) yield (cname, (cons * pops2(cname)).toLong) |
|
53 cons_list.sortBy(_._2).reverse |
|
54 } |
|
55 |
|
56 sorted_country_consumption().take(10) |
|
57 sorted_country_consumption().size |
|
58 |
|
59 def percentage(n: Int) : (Long, Long, Double) = { |
|
60 val cons_list = sorted_country_consumption() |
|
61 val sum_n = cons_list.take(n).map(_._2).sum |
|
62 val sum_all = cons_list.map(_._2).sum |
|
63 val perc = (sum_n.toDouble / sum_all.toDouble) * 100.0 |
|
64 (sum_all, sum_n, perc) |
|
65 } |
60 } |
66 |
61 |
67 |
62 |
68 percentage(10) |
63 //(3) Calculate for each country the overall alcohol_consumption using |
69 percentage(164) |
64 // the data from the alcohol list and the population sizes list. You |
|
65 // should only include countries on the alcohol list that are also |
|
66 // on the population sizes list with the exact same name. Note that |
|
67 // the spelling of some names in the alcohol list differs from the |
|
68 // population sizes list. You can ignore entries where the names differ. |
|
69 // Sort the resulting list according to the country with the highest alcohol |
|
70 // consumption to the country with the lowest alcohol consumption. |
70 |
71 |
|
72 //def sorted_country_consumption() : List[(String, Long)] = ... |
|
73 def sorted_country_consumption() : List[(String, Long)] = { |
|
74 val countryToPop = process_pops(get_csv_file(file_population)) |
|
75 val countryAndAlc = process_alcs(get_csv_page(url_alcohol)) |
|
76 val countryAndConsumption = countryAndAlc.collect { |
|
77 case oneCountryAndAlc |
|
78 if countryToPop.isDefinedAt(oneCountryAndAlc._1) => |
|
79 (oneCountryAndAlc._1, (oneCountryAndAlc._2*countryToPop.get(oneCountryAndAlc._1).get).toLong) |
|
80 } |
|
81 countryAndConsumption.sortWith(_._2 > _._2) |
71 } |
82 } |
|
83 |
|
84 // Calculate the world consumption of pure alcohol of all countries, which |
|
85 // should be the first element in the tuple below. The second element is |
|
86 // the overall consumption of the first n countries in the sorted list |
|
87 // from above; and finally the double should be the percentage of the |
|
88 // first n countries drinking from the the world consumption of alcohol. |
|
89 |
|
90 //def percentage(n: Int) : (Long, Long, Double) = ... |
|
91 def percentage(n: Int) : (Long, Long, Double) = { |
|
92 val ctryConsump = sorted_country_consumption() |
|
93 val totalAlc = ctryConsump.map(_._2).sum |
|
94 val firstNAlc = ctryConsump.take(n).map(_._2).sum |
|
95 val pcntage = (firstNAlc*1.0/totalAlc)*100; |
|
96 (ctryConsump.map(_._2).sum, ctryConsump.take(n).map(_._2).sum, pcntage) |
|
97 } |
|
98 } |