diff -r dbe49327b6c5 -r 9fcd3de53c06 progs/crawler3.scala --- a/progs/crawler3.scala Tue Sep 24 01:12:36 2013 +0100 +++ b/progs/crawler3.scala Tue Sep 24 01:56:53 2013 +0100 @@ -1,18 +1,11 @@ import io.Source import scala.util.matching.Regex +import scala.util._ // gets the first ~10K of a page -def get_page(url: String) : String = { - try { - Source.fromURL(url).take(10000).mkString - } - catch { - case _ : Throwable => { - println(s" Problem with: $url") - "" - } - } -} +def get_page(url: String) : String = + Try(Source.fromURL(url).take(10000).mkString) getOrElse + { println(s" Problem with: $url"); ""} // staring URL for the crawler val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" @@ -27,7 +20,7 @@ def unquote(s: String) = s.drop(1).dropRight(1) def get_all_URLs(page: String) : Set[String] = { - (http_pattern.findAllIn(page)).map { unquote(_) }.toSet + http_pattern.findAllIn(page).map(unquote).toSet } // naive version - seraches until a given depth