equal
deleted
inserted
replaced
5 def get_page(url: String) : String = { |
5 def get_page(url: String) : String = { |
6 try { |
6 try { |
7 Source.fromURL(url).take(10000).mkString |
7 Source.fromURL(url).take(10000).mkString |
8 } |
8 } |
9 catch { |
9 catch { |
10 case e => { |
10 case _ : Throwable => { |
11 println(" Problem with: " + url) |
11 println(s" Problem with: $url") |
12 "" |
12 "" |
13 } |
13 } |
14 } |
14 } |
15 } |
15 } |
16 |
16 |
34 // visits pages potentially more than once |
34 // visits pages potentially more than once |
35 def crawl(url: String, n: Int) : Unit = { |
35 def crawl(url: String, n: Int) : Unit = { |
36 if (n == 0) () |
36 if (n == 0) () |
37 //else if (my_urls.findFirstIn(url) == None) () |
37 //else if (my_urls.findFirstIn(url) == None) () |
38 else { |
38 else { |
39 println("Visiting: " + n + " " + url) |
39 println(s"Visiting: $n $url") |
40 val page = get_page(url) |
40 val page = get_page(url) |
41 println(email_pattern.findAllIn(page).mkString("\n")) |
41 println(email_pattern.findAllIn(page).mkString("\n")) |
42 for (u <- get_all_URLs(page)) crawl(u, n - 1) |
42 for (u <- get_all_URLs(page)) crawl(u, n - 1) |
43 } |
43 } |
44 } |
44 } |