equal
deleted
inserted
replaced
1 import io.Source |
1 import io.Source |
2 import scala.util.matching.Regex |
2 import scala.util.matching.Regex |
3 import scala.util._ |
3 import scala.util._ |
4 |
4 |
5 // gets the first ~10K of a page |
5 // gets the first ~10K of a page |
6 def get_page(url: String) : String = |
6 def get_page(url: String) : String = { |
7 Try(Source.fromURL(url).take(10000).mkString) getOrElse |
7 Try(Source.fromURL(url).take(10000).mkString) getOrElse |
8 { println(s" Problem with: $url"); ""} |
8 { println(s" Problem with: $url"); ""} |
|
9 } |
9 |
10 |
10 // staring URL for the crawler |
11 // staring URL for the crawler |
11 val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" |
12 val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" |
12 |
13 |
13 // regex for URLs |
14 // regex for URLs |