progs/app2.scala
changeset 254 dcd4688690ce
parent 97 60a3ba90dd53
equal deleted inserted replaced
253:75c469893514 254:dcd4688690ce
     1 val http_pattern = """\"https?://[^\"]*\"""".r
     1 val http_pattern = """"https?://[^"]*"""".r
     2 
     2 
     3 def unquote(s: String) = s.drop(1).dropRight(1)
     3 def unquote(s: String) = s.drop(1).dropRight(1)
     4 
     4 
     5 def get_all_URLs(page: String) : Set[String] = {
     5 def get_all_URLs(page: String) : Set[String] = 
     6   http_pattern.findAllIn(page).map(unquote).toSet
     6   http_pattern.findAllIn(page).map(unquote).toSet
     7 }
       
     8 
     7 
     9 def crawl(url: String, n: Int) : Unit = {
     8 def crawl(url: String, n: Int) : Unit = {
    10   if (n == 0) ()
     9   if (n == 0) ()
    11   else {
    10   else {
    12     println(s"Visiting: $n $url")
    11     println(s"Visiting: $n $url")
    13     for (u <- get_all_URLs(get_page(url))) 
    12     for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
    14       crawl(u, n - 1)
       
    15   }
    13   }
    16 }
    14 }
    17 
    15 
       
    16 crawl(some_start_URL, 2)