--- a/progs/crawler1.scala Tue Sep 24 23:31:53 2013 +0100
+++ b/progs/crawler1.scala Wed Sep 25 20:35:54 2013 +0100
@@ -3,10 +3,10 @@
import scala.util._
// gets the first ~10K of a page
-def get_page(url: String) : String =
+def get_page(url: String) : String = {
Try(Source.fromURL(url).take(10000).mkString) getOrElse
{ println(s" Problem with: $url"); ""}
-
+}
// regex for URLs
val http_pattern = """\"https?://[^\"]*\"""".r
--- a/progs/crawler3.scala Tue Sep 24 23:31:53 2013 +0100
+++ b/progs/crawler3.scala Wed Sep 25 20:35:54 2013 +0100
@@ -3,9 +3,10 @@
import scala.util._
// gets the first ~10K of a page
-def get_page(url: String) : String =
+def get_page(url: String) : String = {
Try(Source.fromURL(url).take(10000).mkString) getOrElse
{ println(s" Problem with: $url"); ""}
+}
// staring URL for the crawler
val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/"""
--- a/progs/scraper.scala Tue Sep 24 23:31:53 2013 +0100
+++ b/progs/scraper.scala Wed Sep 25 20:35:54 2013 +0100
@@ -12,8 +12,8 @@
//sending data
val wr = new OutputStreamWriter(conn.getOutputStream())
-//wr.write("Fdate=2012-9-24&Tdate=2012-09-25")
-//wr.write("Fdate=2012-9-18&Tdate=2012-09-25")
+//wr.write("Fdate=2012-9-24&Tdate=2013-08-25")
+//wr.write("Fdate=2012-9-18&Tdate=2012-09-24")
wr.write("Fdate=2001-5-18&Tdate=2012-09-25")
wr.flush
wr.close
@@ -21,7 +21,7 @@
//receiving data
val page = fromInputStream(conn.getInputStream).getLines.mkString("\n")
-//println(page)
+println(page)
// regular expression . excludes newlines,
// therefore we have to use [\S\s]
@@ -43,15 +43,15 @@
def compare(i: Int)(e: Array[String], f: Array[String]) = e(i).toInt < f(i).toInt
//day with highest particle pollution (PM_10)
-data.sortWith(compare(1)).last
+println(data.sortWith(compare(1)).last)
//day with highest sulfur dioxide (SO_2)
-data.sortWith(compare(2)).last
+println(data.sortWith(compare(2)).last)
//day with highest nitro dioxide (NO_2)
-data.sortWith(compare(3)).last
+println(data.sortWith(compare(3)).last)
//days with highest PM_10
val groups = data.groupBy(_(1).toInt)
val max_key = groups.keySet.max
-groups(max_key)
+println(groups(max_key))