equal
deleted
inserted
replaced
21 http_pattern.findAllIn(page).map(unquote).toSet |
21 http_pattern.findAllIn(page).map(unquote).toSet |
22 } |
22 } |
23 |
23 |
24 def crawl(url: String, n: Int) : Unit = { |
24 def crawl(url: String, n: Int) : Unit = { |
25 if (n == 0) () |
25 if (n == 0) () |
26 else if (my_urls.findFirstIn(url) == None) () |
26 else if (my_urls.findFirstIn(url) == None) { |
|
27 println(s"Visiting: $n $url") |
|
28 get_page(url); () |
|
29 } |
27 else { |
30 else { |
28 println(s"Visiting: $n $url") |
31 println(s"Visiting: $n $url") |
29 for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) |
32 for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) |
30 } |
33 } |
31 } |
34 } |
32 |
35 |
33 // staring URL for the crawler |
36 // staring URL for the crawler |
34 val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" |
37 val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" |
35 |
38 |
36 // can now deal with depth 3 and beyond |
39 // can now deal with depth 3 and beyond |
37 crawl(startURL, 4) |
40 crawl(startURL, 3) |
38 |
41 |
39 |
42 |