progs/app4.scala
changeset 254 dcd4688690ce
parent 97 60a3ba90dd53
child 330 0806e45d873c
equal deleted inserted replaced
253:75c469893514 254:dcd4688690ce
     1 val http_pattern = """\"https?://[^\"]*\"""".r
     1 val http_pattern = """"https?://[^"]*"""".r
     2 val my_urls = """urbanc""".r
     2 val my_urls = """urbanc""".r
     3 val email_pattern = 
     3 val email_pattern = 
     4   """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
     4   """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
       
     5 
       
     6 def print_str(s: String) = 
       
     7   if (s == "") () else println(s)
     5 
     8 
     6 def crawl(url: String, n: Int) : Unit = {
     9 def crawl(url: String, n: Int) : Unit = {
     7   if (n == 0) ()
    10   if (n == 0) ()
     8   else {
    11   else {
     9     println(s"Visiting: $n $url")
    12     println(s"Visiting: $n $url")
    10     val page = get_page(url)
    13     val page = get_page(url)
    11     println(email_pattern.findAllIn(page).mkString("\n"))
    14     print_str(email_pattern.findAllIn(page).mkString("\n"))
    12     for (u <- get_all_URLs(page)) 
    15     for (u <- get_all_URLs(page).par) crawl(u, n - 1)
    13       crawl(u, n - 1)
       
    14   }
    16   }
    15 }
    17 }