diff -r 91145f6d9b0e -r cbc2270c2938 progs/crawler3.scala --- a/progs/crawler3.scala Wed Sep 25 20:35:54 2013 +0100 +++ b/progs/crawler3.scala Thu Sep 26 10:36:24 2013 +0100 @@ -1,3 +1,6 @@ +// This version of the crawler also +// harvests emails from webpages + import io.Source import scala.util.matching.Regex import scala.util._ @@ -16,7 +19,8 @@ val my_urls = """urbanc""".r val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r -// http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/ +// The regular expression for emails comes from: +// http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/ def unquote(s: String) = s.drop(1).dropRight(1) @@ -37,7 +41,4 @@ } } -// can now deal with depth 3 -// start on command line crawl(startURL, 3) -