12 import java.net.URL |
12 import java.net.URL |
13 import scala.io.Source.fromInputStream |
13 import scala.io.Source.fromInputStream |
14 |
14 |
15 val url = new URL("http://www.envir.gov.cn/eng/airep/index.asp") |
15 val url = new URL("http://www.envir.gov.cn/eng/airep/index.asp") |
16 |
16 |
17 //connecting to url |
17 // connecting to url |
18 val conn = url.openConnection |
18 val conn = url.openConnection |
19 conn.setRequestProperty("User-Agent", "") |
|
20 conn.setDoOutput(true) |
19 conn.setDoOutput(true) |
21 conn.connect |
20 conn.connect |
22 |
21 |
23 //sending data |
22 // sending data |
24 val wr = new OutputStreamWriter(conn.getOutputStream()) |
23 val wr = new OutputStreamWriter(conn.getOutputStream()) |
25 |
24 |
26 //possible date ranges |
25 //possible date ranges |
27 wr.write("Fdate=2012-8-24&Tdate=2012-09-25") |
26 wr.write("Fdate=2012-8-24&Tdate=2012-09-25") |
28 //wr.write("Fdate=2001-9-18&Tdate=2012-09-24") |
27 //wr.write("Fdate=2001-9-18&Tdate=2012-09-25") |
29 wr.flush |
28 wr.flush |
30 wr.close |
29 wr.close |
31 |
30 |
32 //receiving data |
31 // receiving data as page made of HTML |
33 val page = fromInputStream(conn.getInputStream).getLines.mkString("\n") |
32 val page = fromInputStream(conn.getInputStream).getLines.mkString("\n") |
34 |
33 |
35 //data encoded as an HTML-string, which you can see with |
34 // received data can be seen with |
36 //println(page) |
35 // println(page) |
37 |
36 |
38 // regular expression: excludes newlines, |
37 // regular expression for extracting rows: |
39 // therefore we have to use [\S\s] |
38 // - the usual . would exclude newlines, |
40 val regex1 = """<tr align=\"center\">[\S\s]*?</tr>""".r |
39 // - therefore we have to use [\S\s], which really |
|
40 // matches everything |
|
41 // - *? is the "lazy" version of * |
|
42 val regex1 = """<tr align="center">[\S\s]*?</tr>""".r |
41 val rows = regex1.findAllIn(page).toList |
43 val rows = regex1.findAllIn(page).toList |
42 |
44 |
43 //print(rows) |
45 // data in rows |
|
46 // println(rows) |
44 |
47 |
45 val regex2 = """<td align=\"center\">([\S\s]*?)</td>""".r |
48 // extracting row entries (date, PM_10, SO_2, NO_2) |
|
49 // the use of (..) allows us to extract the matched text |
|
50 val regex2 = """<td align="center">([\S\s]*?)</td>""".r |
46 |
51 |
47 def aux(s: String) : Array[String] = { |
52 def extract(s: String) : List[String] = { |
48 for (m <- regex2.findAllIn(s).toArray) yield m match { |
53 for (regex2(value) <- regex2.findAllIn(s).toList) yield value.trim |
49 case regex2(value) => value.trim |
|
50 } |
|
51 } |
54 } |
52 |
55 |
53 //data completely extracted |
56 //data completely extracted |
54 val data = rows.map { aux } |
57 val data = rows.map(extract) |
55 |
58 |
56 //for comparing elements from an array |
59 //for comparing elements from an array |
57 def compare(i: Int)(e: Array[String], f: Array[String]) = e(i).toInt < f(i).toInt |
60 def compare(i: Int)(e: List[String], f: List[String]) = e(i).toInt > f(i).toInt |
58 |
61 |
59 println("The day with highest particle pollution (PM_10)") |
62 println("The day with the highest particle pollution (PM_10)") |
60 println(data.sortWith(compare(1)).last.mkString(",")) |
63 println(data.sortWith(compare(1)).head.mkString(",")) |
61 |
64 |
62 println("The day with highest sulfur dioxide (SO_2)") |
65 println("The day with the highest sulfur dioxide (SO_2)") |
63 println(data.sortWith(compare(2)).last.mkString(",")) |
66 println(data.sortWith(compare(2)).head.mkString(",")) |
64 |
67 |
65 println("The day with highest nitro dioxide (NO_2)") |
68 println("The day with the highest nitro dioxide (NO_2)") |
66 println(data.sortWith(compare(3)).last.mkString(",")) |
69 println(data.sortWith(compare(3)).head.mkString(",")) |
67 |
70 |
68 println("The day(s) with highest PM_10") |
71 println("The day(s) with the highest PM_10") |
69 val groups1 = data.groupBy(_(1).toInt) |
72 val groups1 = data.groupBy(_(1).toInt) |
70 val max_key1 = groups1.keySet.max |
73 val max_key1 = groups1.keySet.max |
71 println(groups1(max_key1).map(_.mkString(",")).mkString("\n")) |
74 println(groups1(max_key1).map(_.mkString(",")).mkString("\n")) |
72 |
75 |
73 println("The day(s) with highest SO_2") |
76 println("The day(s) with the highest SO_2") |
74 val groups2 = data.groupBy(_(2).toInt) |
77 val groups2 = data.groupBy(_(2).toInt) |
75 val max_key2 = groups2.keySet.max |
78 val max_key2 = groups2.keySet.max |
76 println(groups2(max_key2).map(_.mkString(",")).mkString("\n")) |
79 println(groups2(max_key2).map(_.mkString(",")).mkString("\n")) |
77 |
80 |
78 println("The day(s) with highest NO_2") |
81 println("The day(s) with the highest NO_2") |
79 val groups3 = data.groupBy(_(3).toInt) |
82 val groups3 = data.groupBy(_(3).toInt) |
80 val max_key3 = groups3.keySet.max |
83 val max_key3 = groups3.keySet.max |
81 println(groups3(max_key3).map(_.mkString(",")).mkString("\n")) |
84 println(groups3(max_key3).map(_.mkString(",")).mkString("\n")) |