diff -r 3a5e09a2ae54 -r b606c9439fa6 scraper.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scraper.scala Wed Sep 26 02:08:55 2012 +0100 @@ -0,0 +1,57 @@ +import java.io.OutputStreamWriter +import java.net.URL +import scala.io.Source.fromInputStream + +val url = new URL("http://www.envir.gov.cn/eng/airep/index.asp") + +//connect to url +val conn = url.openConnection +conn.setRequestProperty("User-Agent", "") +conn.setDoOutput(true) +conn.connect + +//sending data +val wr = new OutputStreamWriter(conn.getOutputStream()) +//wr.write("Fdate=2012-9-24&Tdate=2012-09-25") +//wr.write("Fdate=2012-9-18&Tdate=2012-09-25") +wr.write("Fdate=2001-5-18&Tdate=2012-09-25") +wr.flush +wr.close + +//receiving data +val page = fromInputStream(conn.getInputStream).getLines.mkString("\n") + +println(page) + +// regular expression . excludes newlines, +// therefore we have to use [\S\s] +val regex1 = """