changeset 254 | dcd4688690ce |
parent 242 | 35104ee14f87 |
child 257 | 70c307641d05 |
--- a/progs/crawler1.scala Sun Sep 21 17:40:04 2014 +0100 +++ b/progs/crawler1.scala Sun Sep 21 23:23:43 2014 +0100 @@ -17,9 +17,9 @@ // drops the first and last character from a string def unquote(s: String) = s.drop(1).dropRight(1) -def get_all_URLs(page: String) : Set[String] = { +def get_all_URLs(page: String) : Set[String] = http_pattern.findAllIn(page).map(unquote).toSet -} + // naive version of crawl - searches until a given depth, // visits pages potentially more than once