equal
deleted
inserted
replaced
5 def get_page(url: String) : String = { |
5 def get_page(url: String) : String = { |
6 try { |
6 try { |
7 Source.fromURL(url).take(10000).mkString |
7 Source.fromURL(url).take(10000).mkString |
8 } |
8 } |
9 catch { |
9 catch { |
10 case e => { |
10 case _ : Throwable => { |
11 println(" Problem with: " + url) |
11 println(s" Problem with: $url") |
12 "" |
12 "" |
13 } |
13 } |
14 } |
14 } |
15 } |
15 } |
16 |
16 |
31 // visits pages potentially more than once |
31 // visits pages potentially more than once |
32 def crawl(url: String, n: Int) : Unit = { |
32 def crawl(url: String, n: Int) : Unit = { |
33 if (n == 0) () |
33 if (n == 0) () |
34 else if (my_urls.findFirstIn(url) == None) () |
34 else if (my_urls.findFirstIn(url) == None) () |
35 else { |
35 else { |
36 println("Visiting: " + n + " " + url) |
36 println(s"Visiting: $n $url") |
37 for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) |
37 for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) |
38 } |
38 } |
39 } |
39 } |
40 |
40 |
41 // can now deal with depth 3 |
41 // can now deal with depth 3 |