equal
deleted
inserted
replaced
4 import io.Source |
4 import io.Source |
5 import scala.util.matching.Regex |
5 import scala.util.matching.Regex |
6 import scala.util._ |
6 import scala.util._ |
7 |
7 |
8 def get_page(url: String) : String = { |
8 def get_page(url: String) : String = { |
9 Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString) getOrElse |
9 Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString). |
10 { println(s" Problem with: $url"); ""} |
10 getOrElse { println(s" Problem with: $url"); ""} |
11 } |
11 } |
12 |
12 |
13 // regexes for URLs, for "my" domain and for email addresses |
13 // regexes for URLs, for "my" domain and for email addresses |
14 val http_pattern = """"https?://[^"]*"""".r |
14 val http_pattern = """"https?://[^"]*"""".r |
15 val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r /*@\label{emailline}@*/ |
15 val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r /*@\label{emailline}@*/ |