progs/scraper.scala
changeset 99 91145f6d9b0e
parent 93 4794759139ea
child 103 bea2dd1c7e73
equal deleted inserted replaced
98:1f3d89fe9820 99:91145f6d9b0e
    10 conn.setDoOutput(true)
    10 conn.setDoOutput(true)
    11 conn.connect
    11 conn.connect
    12 
    12 
    13 //sending data
    13 //sending data
    14 val wr = new OutputStreamWriter(conn.getOutputStream())
    14 val wr = new OutputStreamWriter(conn.getOutputStream())
    15 //wr.write("Fdate=2012-9-24&Tdate=2012-09-25")
    15 //wr.write("Fdate=2012-9-24&Tdate=2013-08-25")
    16 //wr.write("Fdate=2012-9-18&Tdate=2012-09-25")
    16 //wr.write("Fdate=2012-9-18&Tdate=2012-09-24")
    17 wr.write("Fdate=2001-5-18&Tdate=2012-09-25")
    17 wr.write("Fdate=2001-5-18&Tdate=2012-09-25")
    18 wr.flush
    18 wr.flush
    19 wr.close
    19 wr.close
    20 
    20 
    21 //receiving data
    21 //receiving data
    22 val page = fromInputStream(conn.getInputStream).getLines.mkString("\n")
    22 val page = fromInputStream(conn.getInputStream).getLines.mkString("\n")
    23 
    23 
    24 //println(page)
    24 println(page)
    25 
    25 
    26 // regular expression . excludes newlines, 
    26 // regular expression . excludes newlines, 
    27 // therefore we have to use [\S\s]
    27 // therefore we have to use [\S\s]
    28 val regex1 = """<tr align="center">[\S\s]*?</tr>""".r
    28 val regex1 = """<tr align="center">[\S\s]*?</tr>""".r
    29 val rows = regex1.findAllIn(page).toList
    29 val rows = regex1.findAllIn(page).toList
    41 val data = rows.map { aux }
    41 val data = rows.map { aux }
    42 
    42 
    43 def compare(i: Int)(e: Array[String], f: Array[String]) = e(i).toInt < f(i).toInt
    43 def compare(i: Int)(e: Array[String], f: Array[String]) = e(i).toInt < f(i).toInt
    44 
    44 
    45 //day with highest particle pollution (PM_10)
    45 //day with highest particle pollution (PM_10)
    46 data.sortWith(compare(1)).last
    46 println(data.sortWith(compare(1)).last)
    47 
    47 
    48 //day with highest sulfur dioxide (SO_2)
    48 //day with highest sulfur dioxide (SO_2)
    49 data.sortWith(compare(2)).last
    49 println(data.sortWith(compare(2)).last)
    50 
    50 
    51 //day with highest nitro dioxide (NO_2)
    51 //day with highest nitro dioxide (NO_2)
    52 data.sortWith(compare(3)).last
    52 println(data.sortWith(compare(3)).last)
    53 
    53 
    54 //days with highest PM_10
    54 //days with highest PM_10
    55 val groups = data.groupBy(_(1).toInt)
    55 val groups = data.groupBy(_(1).toInt)
    56 val max_key = groups.keySet.max
    56 val max_key = groups.keySet.max
    57 groups(max_key)
    57 println(groups(max_key))