--- a/progs/crawler1.scala Tue Mar 22 17:09:24 2016 +0000
+++ b/progs/crawler1.scala Wed Apr 06 11:51:33 2016 +0100
@@ -12,13 +12,13 @@
}
// regex for URLs
-val http_pattern = """"https?://[^"]*"""".r
+val http_pattern = """"https?://[^"]*"""".r (*@\label{httpline}@*)
// drops the first and last character from a string
def unquote(s: String) = s.drop(1).dropRight(1)
def get_all_URLs(page: String) : Set[String] =
- http_pattern.findAllIn(page).map(unquote).toSet
+ http_pattern.findAllIn(page).map(unquote).toSet (*@\label{findallline}@*)
// naive version of crawl - searches until a given depth,