progs/crawler2.scala
changeset 254 dcd4688690ce
parent 242 35104ee14f87
child 303 4439c56d96cf
equal deleted inserted replaced
253:75c469893514 254:dcd4688690ce
    15 val http_pattern = """"https?://[^"]*"""".r
    15 val http_pattern = """"https?://[^"]*"""".r
    16 val my_urls = """urbanc""".r
    16 val my_urls = """urbanc""".r
    17 
    17 
    18 def unquote(s: String) = s.drop(1).dropRight(1)
    18 def unquote(s: String) = s.drop(1).dropRight(1)
    19 
    19 
    20 def get_all_URLs(page: String) : Set[String] = {
    20 def get_all_URLs(page: String) : Set[String] = 
    21   http_pattern.findAllIn(page).map(unquote).toSet
    21   http_pattern.findAllIn(page).map(unquote).toSet
    22 }
       
    23 
    22 
    24 def crawl(url: String, n: Int) : Unit = {
    23 def crawl(url: String, n: Int) : Unit = {
    25   if (n == 0) ()
    24   if (n == 0) ()
    26   else if (my_urls.findFirstIn(url) == None) { 
    25   else if (my_urls.findFirstIn(url) == None) { 
    27     println(s"Visiting: $n $url")
    26     println(s"Visiting: $n $url")