diff -r 863feeb5c760 -r 6ea450e999e2 testing1/alcohol.scala --- a/testing1/alcohol.scala Wed Nov 29 21:22:29 2017 +0000 +++ b/testing1/alcohol.scala Sun Dec 03 21:11:49 2017 +0000 @@ -6,66 +6,93 @@ import io.Source import scala.util._ -def get_csv_page(url: String) : List[String] = - Source.fromURL(url)("ISO-8859-1").getLines.toList - -def get_csv_file(file: String) : List[String] = - Source.fromFile(file)("ISO-8859-1").getLines.toList - - -val url_alcohol = +val url_alcohol = "https://raw.githubusercontent.com/fivethirtyeight/data/master/alcohol-consumption/drinks.csv" -val file_population = +val file_population = "population.csv" -get_csv_page(url_alcohol) -get_csv_file(file_population) -get_csv_page(url_alcohol).size -get_csv_file(file_population).size - -val alcs = get_csv_page(url_alcohol) -val pops = get_csv_file(file_population) +//(1) Complete the get_csv_page function below. It takes a URL-string +// as argument and generates a list of strings corresponding to each +// line in the downloaded csv-list. The URL url_alcohol above is one +// possible argument. -def process_alcs(lines: List[String]) : List[(String, Double)] = - for (l <- lines) yield { - val entries = l.split(",").toList - (entries(0), entries(4).toDouble) - } - -def process_pops(lines: List[String]) : Map[String, Long] = - (for (l <- lines) yield { - val entries = l.split(",").toList - (entries(0), entries(1).toLong) - }).toMap - +//def get_csv_page(url: String) : List[String] = ... +def get_csv_page(url: String) : List[String] = { + val csv = Source.fromURL(url) + val contents = csv.mkString.split("\n") + contents.toList +} +// Complete the get_csv_file function below. It takes a file name +// as argument and reads the content of the given file. Like above, +// it should generate a list of strings corresponding to each +// line in the csv-list. The filename file_population is one possible +// argument. -process_alcs(alcs.drop(1))(1) -process_pops(pops.drop(1))("Albania") - -def sorted_country_consumption() : List[(String, Long)] = { - val alcs2 = process_alcs(alcs.drop(1)) - val pops2 = process_pops(pops.drop(1)) - val cons_list = - for ((cname, cons) <- alcs2; - if pops2.isDefinedAt(cname)) yield (cname, (cons * pops2(cname)).toLong) - cons_list.sortBy(_._2).reverse +//def get_csv_file(file: String) : List[String] = ... +def get_csv_file(file: String) : List[String] = { + val csv = Source.fromFile(file) + val contents = csv.mkString.split("\n") + contents.toList } - -sorted_country_consumption().take(10) -sorted_country_consumption().size +//(2) Complete the functions that process the csv-lists. For +// process_alcs extract the country name (as String) and the +// pure alcohol consumption (as Double). For process_pops +// generate a Map of Strings (country names) to Long numbers +// (population sizes). -def percentage(n: Int) : (Long, Long, Double) = { - val cons_list = sorted_country_consumption() - val sum_n = cons_list.take(n).map(_._2).sum - val sum_all = cons_list.map(_._2).sum - val perc = (sum_n.toDouble / sum_all.toDouble) * 100.0 - (sum_all, sum_n, perc) +//def process_alcs(lines: List[String]) : List[(String, Double)] = ... +def process_alcs(lines: List[String]) : List[(String, Double)] = { + val beheaded = lines.drop(1) + val splitEntries = for (n <- beheaded) yield n.split(",").toList + for (n <- splitEntries) yield (n.take(1).mkString, n.drop(4).mkString.toDouble) +} +//def process_pops(lines: List[String]) : Map[String, Long] = ... +def process_pops(lines: List[String]) : Map[String, Long] = { + val beheaded = lines.drop(1); + def toOnePair(line: String) : (String, Long) = { + val splitAsList = line.split(",").toList + (splitAsList.take(1).mkString, splitAsList.drop(1).mkString.toLong) + } + val splitEntries = for (n <- beheaded) yield toOnePair(n) + splitEntries.toMap } -percentage(10) -percentage(164) +//(3) Calculate for each country the overall alcohol_consumption using +// the data from the alcohol list and the population sizes list. You +// should only include countries on the alcohol list that are also +// on the population sizes list with the exact same name. Note that +// the spelling of some names in the alcohol list differs from the +// population sizes list. You can ignore entries where the names differ. +// Sort the resulting list according to the country with the highest alcohol +// consumption to the country with the lowest alcohol consumption. +//def sorted_country_consumption() : List[(String, Long)] = ... +def sorted_country_consumption() : List[(String, Long)] = { + val countryToPop = process_pops(get_csv_file(file_population)) + val countryAndAlc = process_alcs(get_csv_page(url_alcohol)) + val countryAndConsumption = countryAndAlc.collect { + case oneCountryAndAlc + if countryToPop.isDefinedAt(oneCountryAndAlc._1) => + (oneCountryAndAlc._1, (oneCountryAndAlc._2*countryToPop.get(oneCountryAndAlc._1).get).toLong) + } + countryAndConsumption.sortWith(_._2 > _._2) } + +// Calculate the world consumption of pure alcohol of all countries, which +// should be the first element in the tuple below. The second element is +// the overall consumption of the first n countries in the sorted list +// from above; and finally the double should be the percentage of the +// first n countries drinking from the the world consumption of alcohol. + +//def percentage(n: Int) : (Long, Long, Double) = ... +def percentage(n: Int) : (Long, Long, Double) = { + val ctryConsump = sorted_country_consumption() + val totalAlc = ctryConsump.map(_._2).sum + val firstNAlc = ctryConsump.take(n).map(_._2).sum + val pcntage = (firstNAlc*1.0/totalAlc)*100; + (ctryConsump.map(_._2).sum, ctryConsump.take(n).map(_._2).sum, pcntage) +} +}