progs/crawler3.scala
changeset 254 dcd4688690ce
parent 242 35104ee14f87
child 329 dbba38a5c2ae
equal deleted inserted replaced
253:75c469893514 254:dcd4688690ce
    15 val my_urls = """urbanc""".r
    15 val my_urls = """urbanc""".r
    16 val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
    16 val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
    17 
    17 
    18 def unquote(s: String) = s.drop(1).dropRight(1)
    18 def unquote(s: String) = s.drop(1).dropRight(1)
    19 
    19 
    20 def get_all_URLs(page: String) : Set[String] = {
    20 def get_all_URLs(page: String) : Set[String] = 
    21   http_pattern.findAllIn(page).map(unquote).toSet
    21   http_pattern.findAllIn(page).map(unquote).toSet
    22 }
       
    23 
    22 
    24 def print_str(s: String) = 
    23 def print_str(s: String) = 
    25   if (s == "") () else println(s)
    24   if (s == "") () else println(s)
    26 
    25 
    27 def crawl(url: String, n: Int) : Unit = {
    26 def crawl(url: String, n: Int) : Unit = {