added
authorChristian Urban <christian dot urban at kcl dot ac dot uk>
Wed, 25 Sep 2013 20:35:54 +0100
changeset 99 91145f6d9b0e
parent 98 1f3d89fe9820
child 100 cbc2270c2938
added
progs/crawler1.scala
progs/crawler3.scala
progs/scraper.scala
--- a/progs/crawler1.scala	Tue Sep 24 23:31:53 2013 +0100
+++ b/progs/crawler1.scala	Wed Sep 25 20:35:54 2013 +0100
@@ -3,10 +3,10 @@
 import scala.util._
 
 // gets the first ~10K of a page
-def get_page(url: String) : String = 
+def get_page(url: String) : String = {
   Try(Source.fromURL(url).take(10000).mkString) getOrElse 
     { println(s"  Problem with: $url"); ""}
-
+}
 
 // regex for URLs
 val http_pattern = """\"https?://[^\"]*\"""".r
--- a/progs/crawler3.scala	Tue Sep 24 23:31:53 2013 +0100
+++ b/progs/crawler3.scala	Wed Sep 25 20:35:54 2013 +0100
@@ -3,9 +3,10 @@
 import scala.util._
 
 // gets the first ~10K of a page
-def get_page(url: String) : String = 
+def get_page(url: String) : String = {
   Try(Source.fromURL(url).take(10000).mkString) getOrElse 
     { println(s"  Problem with: $url"); ""}
+}
 
 // staring URL for the crawler
 val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/"""
--- a/progs/scraper.scala	Tue Sep 24 23:31:53 2013 +0100
+++ b/progs/scraper.scala	Wed Sep 25 20:35:54 2013 +0100
@@ -12,8 +12,8 @@
 
 //sending data
 val wr = new OutputStreamWriter(conn.getOutputStream())
-//wr.write("Fdate=2012-9-24&Tdate=2012-09-25")
-//wr.write("Fdate=2012-9-18&Tdate=2012-09-25")
+//wr.write("Fdate=2012-9-24&Tdate=2013-08-25")
+//wr.write("Fdate=2012-9-18&Tdate=2012-09-24")
 wr.write("Fdate=2001-5-18&Tdate=2012-09-25")
 wr.flush
 wr.close
@@ -21,7 +21,7 @@
 //receiving data
 val page = fromInputStream(conn.getInputStream).getLines.mkString("\n")
 
-//println(page)
+println(page)
 
 // regular expression . excludes newlines, 
 // therefore we have to use [\S\s]
@@ -43,15 +43,15 @@
 def compare(i: Int)(e: Array[String], f: Array[String]) = e(i).toInt < f(i).toInt
 
 //day with highest particle pollution (PM_10)
-data.sortWith(compare(1)).last
+println(data.sortWith(compare(1)).last)
 
 //day with highest sulfur dioxide (SO_2)
-data.sortWith(compare(2)).last
+println(data.sortWith(compare(2)).last)
 
 //day with highest nitro dioxide (NO_2)
-data.sortWith(compare(3)).last
+println(data.sortWith(compare(3)).last)
 
 //days with highest PM_10
 val groups = data.groupBy(_(1).toInt)
 val max_key = groups.keySet.max
-groups(max_key)
+println(groups(max_key))