equal
deleted
inserted
replaced
1 import io.Source |
1 import io.Source |
2 import scala.util.matching.Regex |
2 import scala.util.matching.Regex |
|
3 import scala.util._ |
3 |
4 |
4 // gets the first ~10K of a page |
5 // gets the first ~10K of a page |
5 def get_page(url: String) : String = { |
6 def get_page(url: String) : String = |
6 try { |
7 Try(Source.fromURL(url).take(10000).mkString) getOrElse |
7 Source.fromURL(url).take(10000).mkString |
8 { println(s" Problem with: $url"); ""} |
8 } |
|
9 catch { |
|
10 case _ : Throwable => { |
|
11 println(s" Problem with: $url") |
|
12 "" |
|
13 } |
|
14 } |
|
15 } |
|
16 |
9 |
17 |
10 |
18 // regex for URLs |
11 // regex for URLs |
19 val http_pattern = """\"https?://[^\"]*\"""".r |
12 val http_pattern = """\"https?://[^\"]*\"""".r |
20 |
13 |