# HG changeset patch # User Christian Urban # Date 1379984213 -3600 # Node ID 9fcd3de53c06859fb0b3ac299d4ae6de5b9d9906 # Parent dbe49327b6c58f3d7dd3db6bd6e74d6e2f9c18ec updated diff -r dbe49327b6c5 -r 9fcd3de53c06 progs/app1.scala --- a/progs/app1.scala Tue Sep 24 01:12:36 2013 +0100 +++ b/progs/app1.scala Tue Sep 24 01:56:53 2013 +0100 @@ -1,12 +1,3 @@ -def get_page(url: String) : String = { - try { - Source.fromURL(url).take(10000).mkString - } - catch { - case e => { - println(" Problem with: " + url) - "" - } - } -} - +def get_page(url: String) : String = + Try(Source.fromURL(url).take(10000).mkString) getOrElse + { println(s" Problem with: $url"); ""} diff -r dbe49327b6c5 -r 9fcd3de53c06 progs/app2.scala --- a/progs/app2.scala Tue Sep 24 01:12:36 2013 +0100 +++ b/progs/app2.scala Tue Sep 24 01:56:53 2013 +0100 @@ -3,13 +3,13 @@ def unquote(s: String) = s.drop(1).dropRight(1) def get_all_URLs(page: String) : Set[String] = { - (http_pattern.findAllIn(page)).map { unquote(_) }.toSet + http_pattern.findAllIn(page).map(unquote).toSet } def crawl(url: String, n: Int) : Unit = { if (n == 0) () else { - println("Visiting: " + n + " " + url) + println(s"Visiting: $n $url") for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) } } diff -r dbe49327b6c5 -r 9fcd3de53c06 progs/app3.scala --- a/progs/app3.scala Tue Sep 24 01:12:36 2013 +0100 +++ b/progs/app3.scala Tue Sep 24 01:56:53 2013 +0100 @@ -4,7 +4,7 @@ if (n == 0) () else if (my_urls.findFirstIn(url) == None) () else { - println("Visiting: " + n + " " + url) + println(s"Visiting: $n $url") for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) } } diff -r dbe49327b6c5 -r 9fcd3de53c06 progs/app4.scala --- a/progs/app4.scala Tue Sep 24 01:12:36 2013 +0100 +++ b/progs/app4.scala Tue Sep 24 01:56:53 2013 +0100 @@ -6,7 +6,7 @@ def crawl(url: String, n: Int) : Unit = { if (n == 0) () else { - println("Visiting: " + n + " " + url) + println(s"Visiting: $n $url") val page = get_page(url) println(email_pattern.findAllIn(page).mkString("\n")) for (u <- get_all_URLs(page)) crawl(u, n - 1) diff -r dbe49327b6c5 -r 9fcd3de53c06 progs/crawler1.scala --- a/progs/crawler1.scala Tue Sep 24 01:12:36 2013 +0100 +++ b/progs/crawler1.scala Tue Sep 24 01:56:53 2013 +0100 @@ -1,18 +1,11 @@ import io.Source import scala.util.matching.Regex +import scala.util._ // gets the first ~10K of a page -def get_page(url: String) : String = { - try { - Source.fromURL(url).take(10000).mkString - } - catch { - case _ : Throwable => { - println(s" Problem with: $url") - "" - } - } -} +def get_page(url: String) : String = + Try(Source.fromURL(url).take(10000).mkString) getOrElse + { println(s" Problem with: $url"); ""} // regex for URLs diff -r dbe49327b6c5 -r 9fcd3de53c06 progs/crawler2.scala --- a/progs/crawler2.scala Tue Sep 24 01:12:36 2013 +0100 +++ b/progs/crawler2.scala Tue Sep 24 01:56:53 2013 +0100 @@ -1,18 +1,11 @@ import io.Source import scala.util.matching.Regex +import scala.util._ // gets the first ~10K of a page -def get_page(url: String) : String = { - try { - Source.fromURL(url).take(10000).mkString - } - catch { - case _ : Throwable => { - println(s" Problem with: $url") - "" - } - } -} +def get_page(url: String) : String = + Try(Source.fromURL(url).take(10000).mkString) getOrElse + { println(s" Problem with: $url"); ""} // staring URL for the crawler val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" @@ -24,7 +17,7 @@ def unquote(s: String) = s.drop(1).dropRight(1) def get_all_URLs(page: String) : Set[String] = { - (http_pattern.findAllIn(page)).map { unquote(_) }.toSet + http_pattern.findAllIn(page).map(unquote).toSet } // naive version - seraches until a given depth diff -r dbe49327b6c5 -r 9fcd3de53c06 progs/crawler3.scala --- a/progs/crawler3.scala Tue Sep 24 01:12:36 2013 +0100 +++ b/progs/crawler3.scala Tue Sep 24 01:56:53 2013 +0100 @@ -1,18 +1,11 @@ import io.Source import scala.util.matching.Regex +import scala.util._ // gets the first ~10K of a page -def get_page(url: String) : String = { - try { - Source.fromURL(url).take(10000).mkString - } - catch { - case _ : Throwable => { - println(s" Problem with: $url") - "" - } - } -} +def get_page(url: String) : String = + Try(Source.fromURL(url).take(10000).mkString) getOrElse + { println(s" Problem with: $url"); ""} // staring URL for the crawler val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" @@ -27,7 +20,7 @@ def unquote(s: String) = s.drop(1).dropRight(1) def get_all_URLs(page: String) : Set[String] = { - (http_pattern.findAllIn(page)).map { unquote(_) }.toSet + http_pattern.findAllIn(page).map(unquote).toSet } // naive version - seraches until a given depth