diff -r 546f2090ce12 -r a47c4227a0c6 progs/crawler1.scala --- a/progs/crawler1.scala Fri Sep 23 15:22:33 2016 +0100 +++ b/progs/crawler1.scala Sat Sep 24 08:31:04 2016 +0100 @@ -7,8 +7,8 @@ // gets the first 10K of a web-page def get_page(url: String) : String = { - Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString) getOrElse - { println(s" Problem with: $url"); ""} + Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString). + getOrElse { println(s" Problem with: $url"); ""} } // regex for URLs @@ -32,9 +32,8 @@ } // some starting URLs for the crawler -//val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc""" +val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc""" //val startURL = """http://www.inf.kcl.ac.uk/staff/mcburney""" -val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/bsc-projects-16.html""" crawl(startURL, 2)