--- a/testing1/alcohol.scala Wed Nov 29 21:22:29 2017 +0000
+++ b/testing1/alcohol.scala Sun Dec 03 21:11:49 2017 +0000
@@ -6,66 +6,93 @@
import io.Source
import scala.util._
-def get_csv_page(url: String) : List[String] =
- Source.fromURL(url)("ISO-8859-1").getLines.toList
-
-def get_csv_file(file: String) : List[String] =
- Source.fromFile(file)("ISO-8859-1").getLines.toList
-
-
-val url_alcohol =
+val url_alcohol =
"https://raw.githubusercontent.com/fivethirtyeight/data/master/alcohol-consumption/drinks.csv"
-val file_population =
+val file_population =
"population.csv"
-get_csv_page(url_alcohol)
-get_csv_file(file_population)
-get_csv_page(url_alcohol).size
-get_csv_file(file_population).size
-
-val alcs = get_csv_page(url_alcohol)
-val pops = get_csv_file(file_population)
+//(1) Complete the get_csv_page function below. It takes a URL-string
+// as argument and generates a list of strings corresponding to each
+// line in the downloaded csv-list. The URL url_alcohol above is one
+// possible argument.
-def process_alcs(lines: List[String]) : List[(String, Double)] =
- for (l <- lines) yield {
- val entries = l.split(",").toList
- (entries(0), entries(4).toDouble)
- }
-
-def process_pops(lines: List[String]) : Map[String, Long] =
- (for (l <- lines) yield {
- val entries = l.split(",").toList
- (entries(0), entries(1).toLong)
- }).toMap
-
+//def get_csv_page(url: String) : List[String] = ...
+def get_csv_page(url: String) : List[String] = {
+ val csv = Source.fromURL(url)
+ val contents = csv.mkString.split("\n")
+ contents.toList
+}
+// Complete the get_csv_file function below. It takes a file name
+// as argument and reads the content of the given file. Like above,
+// it should generate a list of strings corresponding to each
+// line in the csv-list. The filename file_population is one possible
+// argument.
-process_alcs(alcs.drop(1))(1)
-process_pops(pops.drop(1))("Albania")
-
-def sorted_country_consumption() : List[(String, Long)] = {
- val alcs2 = process_alcs(alcs.drop(1))
- val pops2 = process_pops(pops.drop(1))
- val cons_list =
- for ((cname, cons) <- alcs2;
- if pops2.isDefinedAt(cname)) yield (cname, (cons * pops2(cname)).toLong)
- cons_list.sortBy(_._2).reverse
+//def get_csv_file(file: String) : List[String] = ...
+def get_csv_file(file: String) : List[String] = {
+ val csv = Source.fromFile(file)
+ val contents = csv.mkString.split("\n")
+ contents.toList
}
-
-sorted_country_consumption().take(10)
-sorted_country_consumption().size
+//(2) Complete the functions that process the csv-lists. For
+// process_alcs extract the country name (as String) and the
+// pure alcohol consumption (as Double). For process_pops
+// generate a Map of Strings (country names) to Long numbers
+// (population sizes).
-def percentage(n: Int) : (Long, Long, Double) = {
- val cons_list = sorted_country_consumption()
- val sum_n = cons_list.take(n).map(_._2).sum
- val sum_all = cons_list.map(_._2).sum
- val perc = (sum_n.toDouble / sum_all.toDouble) * 100.0
- (sum_all, sum_n, perc)
+//def process_alcs(lines: List[String]) : List[(String, Double)] = ...
+def process_alcs(lines: List[String]) : List[(String, Double)] = {
+ val beheaded = lines.drop(1)
+ val splitEntries = for (n <- beheaded) yield n.split(",").toList
+ for (n <- splitEntries) yield (n.take(1).mkString, n.drop(4).mkString.toDouble)
+}
+//def process_pops(lines: List[String]) : Map[String, Long] = ...
+def process_pops(lines: List[String]) : Map[String, Long] = {
+ val beheaded = lines.drop(1);
+ def toOnePair(line: String) : (String, Long) = {
+ val splitAsList = line.split(",").toList
+ (splitAsList.take(1).mkString, splitAsList.drop(1).mkString.toLong)
+ }
+ val splitEntries = for (n <- beheaded) yield toOnePair(n)
+ splitEntries.toMap
}
-percentage(10)
-percentage(164)
+//(3) Calculate for each country the overall alcohol_consumption using
+// the data from the alcohol list and the population sizes list. You
+// should only include countries on the alcohol list that are also
+// on the population sizes list with the exact same name. Note that
+// the spelling of some names in the alcohol list differs from the
+// population sizes list. You can ignore entries where the names differ.
+// Sort the resulting list according to the country with the highest alcohol
+// consumption to the country with the lowest alcohol consumption.
+//def sorted_country_consumption() : List[(String, Long)] = ...
+def sorted_country_consumption() : List[(String, Long)] = {
+ val countryToPop = process_pops(get_csv_file(file_population))
+ val countryAndAlc = process_alcs(get_csv_page(url_alcohol))
+ val countryAndConsumption = countryAndAlc.collect {
+ case oneCountryAndAlc
+ if countryToPop.isDefinedAt(oneCountryAndAlc._1) =>
+ (oneCountryAndAlc._1, (oneCountryAndAlc._2*countryToPop.get(oneCountryAndAlc._1).get).toLong)
+ }
+ countryAndConsumption.sortWith(_._2 > _._2)
}
+
+// Calculate the world consumption of pure alcohol of all countries, which
+// should be the first element in the tuple below. The second element is
+// the overall consumption of the first n countries in the sorted list
+// from above; and finally the double should be the percentage of the
+// first n countries drinking from the the world consumption of alcohol.
+
+//def percentage(n: Int) : (Long, Long, Double) = ...
+def percentage(n: Int) : (Long, Long, Double) = {
+ val ctryConsump = sorted_country_consumption()
+ val totalAlc = ctryConsump.map(_._2).sum
+ val firstNAlc = ctryConsump.take(n).map(_._2).sum
+ val pcntage = (firstNAlc*1.0/totalAlc)*100;
+ (ctryConsump.map(_._2).sum, ctryConsump.take(n).map(_._2).sum, pcntage)
+}
+}