diff -r 352d15782d35 -r 71fc4a7a7039 progs/crawler2.scala --- a/progs/crawler2.scala Sat May 05 10:31:00 2018 +0100 +++ b/progs/crawler2.scala Fri Jun 01 15:28:37 2018 +0100 @@ -13,7 +13,8 @@ // regexes for URLs and "my" domain val http_pattern = """"https?://[^"]*"""".r -val my_urls = """urbanc""".r /*@\label{myurlline}@*/ +val my_urls = """urban""".r /*@\label{myurlline}@*/ +//val my_urls = """kcl.ac.uk""".r def unquote(s: String) = s.drop(1).dropRight(1) @@ -28,15 +29,16 @@ } /*@\label{changeendline}@*/ else { println(s"Visiting: $n $url") - for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) + for (u <- get_all_URLs(get_page(url)).par) crawl(u, n - 1) } } // starting URL for the crawler -val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc""" -val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/bsc-projects-16.html""" +val startURL = """https://nms.kcl.ac.uk/christian.urban/""" +//val startURL = """https://nms.kcl.ac.uk/christian.urban/bsc-projects-17.html""" + // can now deal with depth 3 and beyond -crawl(startURL, 2) +crawl(startURL, 3)