progs/crawler3.scala
changeset 550 71fc4a7a7039
parent 428 a47c4227a0c6
equal deleted inserted replaced
549:352d15782d35 550:71fc4a7a7039
    23   if (s == "") () else println(s)
    23   if (s == "") () else println(s)
    24 
    24 
    25 def crawl(url: String, n: Int) : Unit = {
    25 def crawl(url: String, n: Int) : Unit = {
    26   if (n == 0) ()
    26   if (n == 0) ()
    27   else {
    27   else {
    28     println(s"Visiting: $n $url")
    28     println(s"  Visiting: $n $url")
    29     val page = get_page(url)
    29     val page = get_page(url)
    30     print_str(email_pattern.findAllIn(page).mkString("\n")) /*@\label{mainline}@*/
    30     print_str(email_pattern.findAllIn(page).mkString("\n")) /*@\label{mainline}@*/
    31     for (u <- get_all_URLs(page).par) crawl(u, n - 1)
    31     for (u <- get_all_URLs(page).par) crawl(u, n - 1)
    32   }
    32   }
    33 }
    33 }
    34 
    34 
    35 // staring URL for the crawler
    35 // staring URL for the crawler
    36 val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc"""
    36 val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
       
    37 
    37 
    38 
    38 crawl(startURL, 3)
    39 crawl(startURL, 3)