crawler1.scala
changeset 3 df423d3b7fa1
parent 1 b606c9439fa6
child 7 73cf4406b773
equal deleted inserted replaced
2:6e7da958ba8c 3:df423d3b7fa1
    12       ""
    12       ""
    13     }
    13     }
    14   }
    14   }
    15 }
    15 }
    16 
    16 
    17 // staring URL for the crawler
       
    18 val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/"""
       
    19 
    17 
    20 // regex for URLs
    18 // regex for URLs
    21 val http_pattern = """\"https?://[^\"]*\"""".r
    19 val http_pattern = """\"https?://[^\"]*\"""".r
    22 
    20 
    23 def unquote(s: String) = s.drop(1).dropRight(1)
    21 def unquote(s: String) = s.drop(1).dropRight(1)
    34     println("Visiting: " + n + " " + url)
    32     println("Visiting: " + n + " " + url)
    35     for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
    33     for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
    36   }
    34   }
    37 }
    35 }
    38 
    36 
       
    37 // staring URL for the crawler
       
    38 val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/"""
       
    39 
       
    40 // call on the command line 
    39 crawl(startURL, 2)
    41 crawl(startURL, 2)
    40 
    42