progs/crawler1.scala
changeset 254 dcd4688690ce
parent 242 35104ee14f87
child 257 70c307641d05
equal deleted inserted replaced
253:75c469893514 254:dcd4688690ce
    15 val http_pattern = """"https?://[^"]*"""".r
    15 val http_pattern = """"https?://[^"]*"""".r
    16 
    16 
    17 // drops the first and last character from a string
    17 // drops the first and last character from a string
    18 def unquote(s: String) = s.drop(1).dropRight(1)
    18 def unquote(s: String) = s.drop(1).dropRight(1)
    19 
    19 
    20 def get_all_URLs(page: String) : Set[String] = {
    20 def get_all_URLs(page: String) : Set[String] = 
    21   http_pattern.findAllIn(page).map(unquote).toSet
    21   http_pattern.findAllIn(page).map(unquote).toSet
    22 }
    22 
    23 
    23 
    24 // naive version of crawl - searches until a given depth,
    24 // naive version of crawl - searches until a given depth,
    25 // visits pages potentially more than once
    25 // visits pages potentially more than once
    26 def crawl(url: String, n: Int) : Unit = {
    26 def crawl(url: String, n: Int) : Unit = {
    27   if (n == 0) ()
    27   if (n == 0) ()