testing1/alcohol.scala
changeset 161 6ea450e999e2
parent 152 114a89518aea
--- a/testing1/alcohol.scala	Wed Nov 29 21:22:29 2017 +0000
+++ b/testing1/alcohol.scala	Sun Dec 03 21:11:49 2017 +0000
@@ -6,66 +6,93 @@
 import io.Source
 import scala.util._
 
-def get_csv_page(url: String) : List[String] = 
-  Source.fromURL(url)("ISO-8859-1").getLines.toList
-
-def get_csv_file(file: String) : List[String] = 
-  Source.fromFile(file)("ISO-8859-1").getLines.toList
-
-
-val url_alcohol = 
+val url_alcohol =
   "https://raw.githubusercontent.com/fivethirtyeight/data/master/alcohol-consumption/drinks.csv"
 
-val file_population = 
+val file_population =
   "population.csv"
 
-get_csv_page(url_alcohol)
-get_csv_file(file_population)
 
-get_csv_page(url_alcohol).size
-get_csv_file(file_population).size
-
-val alcs = get_csv_page(url_alcohol)
-val pops = get_csv_file(file_population)
+//(1) Complete the get_csv_page function below. It takes a URL-string
+//    as argument and generates a list of strings corresponding to each
+//    line in the downloaded csv-list. The URL url_alcohol above is one
+//    possible argument.
 
-def process_alcs(lines: List[String]) : List[(String, Double)] =
-  for (l <- lines) yield {
-    val entries = l.split(",").toList 
-    (entries(0), entries(4).toDouble) 
-  }
-
-def process_pops(lines: List[String]) : Map[String, Long] =
-  (for (l <- lines) yield {
-    val entries = l.split(",").toList 
-    (entries(0), entries(1).toLong)
-  }).toMap
-
+//def get_csv_page(url: String) : List[String] = ...
+def get_csv_page(url: String) : List[String] = {
+  val csv = Source.fromURL(url)
+  val contents = csv.mkString.split("\n")
+  contents.toList
+}
+//    Complete the get_csv_file function below. It takes a file name
+//    as argument and reads the content of the given file. Like above,
+//    it should generate a list of strings corresponding to each
+//    line in the csv-list. The filename file_population is one possible
+//    argument.
 
-process_alcs(alcs.drop(1))(1)
-process_pops(pops.drop(1))("Albania")
-
-def sorted_country_consumption() : List[(String, Long)] = {
-  val alcs2 = process_alcs(alcs.drop(1))
-  val pops2 = process_pops(pops.drop(1))
-  val cons_list = 
-    for ((cname, cons) <- alcs2; 
-	 if pops2.isDefinedAt(cname)) yield (cname, (cons * pops2(cname)).toLong)
-  cons_list.sortBy(_._2).reverse
+//def get_csv_file(file: String) : List[String] = ...
+def get_csv_file(file: String) : List[String] = {
+  val csv = Source.fromFile(file)
+  val contents = csv.mkString.split("\n")
+  contents.toList
 }
-
-sorted_country_consumption().take(10)
-sorted_country_consumption().size
+//(2) Complete the functions that process the csv-lists. For
+//    process_alcs extract the country name (as String) and the
+//    pure alcohol consumption (as Double). For process_pops
+//    generate a Map of Strings (country names) to Long numbers
+//    (population sizes).
 
-def percentage(n: Int) : (Long, Long, Double) = {
-  val cons_list = sorted_country_consumption()
-  val sum_n = cons_list.take(n).map(_._2).sum
-  val sum_all = cons_list.map(_._2).sum
-  val perc = (sum_n.toDouble / sum_all.toDouble) * 100.0
-  (sum_all, sum_n, perc)
+//def process_alcs(lines: List[String]) : List[(String, Double)] = ...
+def process_alcs(lines: List[String]) : List[(String, Double)] = {
+  val beheaded = lines.drop(1)
+  val splitEntries = for (n <- beheaded) yield n.split(",").toList
+  for (n <- splitEntries) yield (n.take(1).mkString, n.drop(4).mkString.toDouble)
+}
+//def process_pops(lines: List[String]) : Map[String, Long] = ...
+def process_pops(lines: List[String]) : Map[String, Long] = {
+  val beheaded = lines.drop(1);
+  def toOnePair(line: String) : (String, Long) = {
+    val splitAsList = line.split(",").toList
+    (splitAsList.take(1).mkString, splitAsList.drop(1).mkString.toLong)
+  }
+  val splitEntries = for (n <- beheaded) yield toOnePair(n)
+  splitEntries.toMap
 }
 
 
-percentage(10)
-percentage(164)
+//(3) Calculate for each country the overall alcohol_consumption using
+//    the data from the alcohol list and the population sizes list. You
+//    should only include countries on the alcohol list that are also
+//    on the population sizes list with the exact same name. Note that
+//    the spelling of some names in the alcohol list differs from the
+//    population sizes list. You can ignore entries where the names differ.
+//    Sort the resulting list according to the country with the highest alcohol
+//    consumption to the country with the lowest alcohol consumption.
 
+//def sorted_country_consumption() : List[(String, Long)] = ...
+def sorted_country_consumption() : List[(String, Long)] = {
+  val countryToPop = process_pops(get_csv_file(file_population))
+  val countryAndAlc = process_alcs(get_csv_page(url_alcohol))
+  val countryAndConsumption = countryAndAlc.collect {
+    case oneCountryAndAlc
+    if countryToPop.isDefinedAt(oneCountryAndAlc._1) =>
+    (oneCountryAndAlc._1, (oneCountryAndAlc._2*countryToPop.get(oneCountryAndAlc._1).get).toLong)
+  }
+  countryAndConsumption.sortWith(_._2 > _._2)
 }
+
+//   Calculate the world consumption of pure alcohol of all countries, which
+//   should be the first element in the tuple below. The second element is
+//   the overall consumption of the first n countries in the sorted list
+//   from above; and finally the double should be the percentage of the
+//   first n countries drinking from the the world consumption of alcohol.
+
+//def percentage(n: Int) : (Long, Long, Double) = ...
+def percentage(n: Int) : (Long, Long, Double) = {
+  val ctryConsump = sorted_country_consumption()
+  val totalAlc = ctryConsump.map(_._2).sum
+  val firstNAlc = ctryConsump.take(n).map(_._2).sum
+  val pcntage = (firstNAlc*1.0/totalAlc)*100;
+  (ctryConsump.map(_._2).sum, ctryConsump.take(n).map(_._2).sum, pcntage)
+}
+}