progs/lecture1.scala
changeset 137 53c769a78a1e
parent 125 dcaab8068baa
child 140 ecec79b9ab25
equal deleted inserted replaced
136:c08ba57a7f53 137:53c769a78a1e
   300 
   300 
   301 import io.Source
   301 import io.Source
   302 
   302 
   303 // obtaining a webpage
   303 // obtaining a webpage
   304 val url = """https://nms.kcl.ac.uk/christian.urban/""" 
   304 val url = """https://nms.kcl.ac.uk/christian.urban/""" 
       
   305 val url = """http://api.postcodes.io/postcodes/CR84LQ""" 
   305 Source.fromURL(url)("ISO-8859-1").mkString
   306 Source.fromURL(url)("ISO-8859-1").mkString
   306 
   307 
   307 
   308 
   308 // function for looking up stockmarket data 
   309 // a function for looking up constituency data
   309 def price_lookup(symbol: String) : String = {
   310 def consty_lookup(pcode: String) : String = {
   310   val url = "https://download.finance.yahoo.com/d/quotes.csv?s=" + symbol + "&f=snl1"
   311   val url = "http://api.postcodes.io/postcodes/" + pcode
   311   Source.fromURL(url).mkString.drop(1).dropRight(2)
   312   Source.fromURL(url).mkString.split(",")(16)
   312 }
   313 }
   313 
   314 
   314 price_lookup("GOOG")
   315 consty_lookup("CR84LQ")
   315 price_lookup("AAPL")
   316 consty_lookup("WC2B4BG")
   316 
   317 
   317 
   318 
   318 val companies = 
   319 val places = 
   319   List("GOOG", "AAPL", "MSFT", "IBM", "FB", "YHOO", "AMZN", "BIDU")
   320   List("CR84LQ", "WC2B4BG", "KY169QT", "CB11LY", "CB39AX")
   320 
   321 
   321 for (s <- companies) println(price_lookup(s))
   322 for (s <- places) println(consty_lookup(s))
       
   323 
       
   324 
   322 
   325 
   323 
   326 
   324 // A Web Crawler 
   327 // A Web Crawler 
   325 //===============
   328 //===============
   326 //
   329 //
   327 // the idea is to look for dead links using the
   330 // the idea is to look for dead links using the
   328 // regular expression "https?://[^"]*"
   331 // regular expression "https?://[^"]*"
   329 
   332 
   330 import io.Source
   333 import io.Source
   331 import scala.util.matching.Regex
       
   332 import scala.util._
   334 import scala.util._
   333 
   335 
   334 // gets the first 10K of a web-page
   336 // gets the first 10K of a web-page
   335 def get_page(url: String) : String = {
   337 def get_page(url: String) : String = {
   336   Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
   338   Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
   356   }
   358   }
   357 }
   359 }
   358 
   360 
   359 // some starting URLs for the crawler
   361 // some starting URLs for the crawler
   360 val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
   362 val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
   361 //val startURL = """http://www.inf.kcl.ac.uk/staff/mcburney"""
   363 //val startURL = """https://nms.kcl.ac.uk/luc.moreau/index.html"""
   362 
   364 
   363 crawl(startURL, 2)
   365 crawl(startURL, 2)
   364 
   366 
   365 
   367 
   366 
   368