progs/app2.scala
changeset 96 9fcd3de53c06
parent 93 4794759139ea
child 97 60a3ba90dd53
equal deleted inserted replaced
95:dbe49327b6c5 96:9fcd3de53c06
     1 val http_pattern = """\"https?://[^\"]*\"""".r
     1 val http_pattern = """\"https?://[^\"]*\"""".r
     2 
     2 
     3 def unquote(s: String) = s.drop(1).dropRight(1)
     3 def unquote(s: String) = s.drop(1).dropRight(1)
     4 
     4 
     5 def get_all_URLs(page: String) : Set[String] = {
     5 def get_all_URLs(page: String) : Set[String] = {
     6   (http_pattern.findAllIn(page)).map { unquote(_) }.toSet
     6   http_pattern.findAllIn(page).map(unquote).toSet
     7 }
     7 }
     8 
     8 
     9 def crawl(url: String, n: Int) : Unit = {
     9 def crawl(url: String, n: Int) : Unit = {
    10   if (n == 0) ()
    10   if (n == 0) ()
    11   else {
    11   else {
    12     println("Visiting: " + n + " " + url)
    12     println(s"Visiting: $n $url")
    13     for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
    13     for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
    14   }
    14   }
    15 }
    15 }
    16 
    16