1 // Scala Lecture 3 |
1 // Scala Lecture 3 |
2 //================= |
2 //================= |
3 |
3 |
4 // Pattern Matching |
4 |
5 //================== |
5 // A Web Crawler / Email Harvester |
6 |
6 //================================= |
7 // A powerful tool which is supposed to come to Java in a few years |
|
8 // time (https://www.youtube.com/watch?v=oGll155-vuQ)...Scala already |
|
9 // has it for many years. Other functional languages have it already for |
|
10 // decades. I think I would be really upset if a programming language |
|
11 // I have to use does not have pattern matching....its is just so |
|
12 // useful. ;o) |
|
13 |
|
14 // The general schema: |
|
15 // |
7 // |
16 // expression match { |
8 // the idea is to look for links using the |
17 // case pattern1 => expression1 |
9 // regular expression "https?://[^"]*" and for |
18 // case pattern2 => expression2 |
10 // email addresses using another regex. |
19 // ... |
11 |
20 // case patternN => expressionN |
12 import io.Source |
21 // } |
13 import scala.util._ |
22 |
14 |
23 |
15 // gets the first 10K of a web-page |
24 // remember |
16 def get_page(url: String) : String = { |
25 val lst = List(None, Some(1), Some(2), None, Some(3)).flatten |
17 Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString). |
26 |
18 getOrElse { println(s" Problem with: $url"); ""} |
27 |
19 } |
28 def my_flatten(xs: List[Option[Int]]): List[Int] = { |
20 |
29 if (xs == Nil) Nil |
21 // regex for URLs and emails |
30 else if (xs.head == None) my_flatten(xs.tail) |
22 val http_pattern = """"https?://[^"]*"""".r |
31 else xs.head.get :: my_flatten(xs.tail) |
23 val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r |
32 } |
24 |
33 |
25 //email_pattern.findAllIn |
34 |
26 // ("foo bla christian@kcl.ac.uk 1234567").toList |
35 |
27 |
36 val lst = List(None, Some(1), Some(2), None, Some(3)) |
28 |
37 |
29 // drops the first and last character from a string |
38 def my_flatten(lst: List[Option[Int]]): List[Int] = lst match { |
30 def unquote(s: String) = s.drop(1).dropRight(1) |
39 case Nil => Nil |
31 |
40 case None::xs => my_flatten(xs) |
32 def get_all_URLs(page: String): Set[String] = |
41 case Some(n)::xs => n::my_flatten(xs) |
33 http_pattern.findAllIn(page).map(unquote).toSet |
42 } |
34 |
43 |
35 // naive version of crawl - searches until a given depth, |
44 my_flatten(lst) |
36 // visits pages potentially more than once |
45 |
37 def crawl(url: String, n: Int) : Set[String] = { |
46 Nil == List() |
38 if (n == 0) Set() |
47 |
39 else { |
48 |
40 println(s" Visiting: $n $url") |
49 // another example including a catch-all pattern |
41 val page = get_page(url) |
50 def get_me_a_string(n: Int): String = n match { |
42 val new_emails = email_pattern.findAllIn(page).toSet |
51 case 0 => "zero" |
43 new_emails ++ |
52 case 1 => "one" |
44 (for (u <- get_all_URLs(page)) yield crawl(u, n - 1)).flatten |
53 case 2 => "two" |
45 } |
54 case _ => "many" |
46 } |
55 } |
47 |
56 |
48 // some starting URLs for the crawler |
57 get_me_a_string(10) |
49 val startURL = """https://nms.kcl.ac.uk/christian.urban/""" |
58 |
50 |
59 // you can also have cases combined |
51 crawl(startURL, 2) |
60 def season(month: String) = month match { |
52 |
61 case "March" | "April" | "May" => "It's spring" |
53 |
62 case "June" | "July" | "August" => "It's summer" |
54 |
63 case "September" | "October" | "November" => "It's autumn" |
55 // User-defined Datatypes and Pattern Matching |
64 case "December" | "January" | "February" => "It's winter" |
56 //============================================ |
65 } |
57 |
66 |
58 |
67 println(season("November")) |
59 abstract class Exp |
68 |
60 case class N(n: Int) extends Exp |
69 // What happens if no case matches? |
61 case class Plus(e1: Exp, e2: Exp) extends Exp |
70 |
62 case class Times(e1: Exp, e2: Exp) extends Exp |
71 println(season("foobar")) |
63 |
72 |
64 |
73 |
65 |
74 // we can also match more complicated pattern |
66 // string of an Exp |
75 // |
67 // eval of an Exp |
76 // let's look at the Collatz function on binary strings |
68 // simp an Exp |
77 |
69 // Tokens |
78 // adding two binary strings in a very, very lazy manner |
70 // Reverse Polish Notation |
79 |
71 // compute RP |
80 def badd(s1: String, s2: String) : String = |
72 // transform RP into Exp |
81 (BigInt(s1, 2) + BigInt(s2, 2)).toString(2) |
73 // process RP string and generate Exp |
82 |
74 |
83 |
75 |
84 "111".dropRight(1) |
76 |
85 "111".last |
77 def string(e: Exp) : String = e match { |
86 |
78 case N(n) => n.toString |
87 def bcollatz(s: String) : Long = (s.dropRight(1), s.last) match { |
79 case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")" |
88 case ("", '1') => 1 // we reached 1 |
80 case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")" |
89 case (rest, '0') => 1 + bcollatz(rest) |
81 } |
90 // even number => divide by two |
82 |
91 case (rest, '1') => 1 + bcollatz(badd(s + '1', s)) |
83 val e = Plus(N(9), Times(N(3), N(4))) |
92 // odd number => s + '1' is 2 * s + 1 |
84 |
93 // add another s gives 3 * s + 1 |
85 println(string(e)) |
94 } |
86 |
95 |
87 def eval(e: Exp) : Int = e match { |
96 bcollatz(6.toBinaryString) |
88 case N(n) => n |
97 bcollatz(837799.toBinaryString) |
89 case Plus(e1, e2) => eval(e1) + eval(e2) |
98 bcollatz(100000000000000000L.toBinaryString) |
90 case Times(e1, e2) => eval(e1) * eval(e2) |
99 bcollatz(BigInt("1000000000000000000000000000000000000000000000000000000000000000000000000000").toString(2)) |
91 } |
100 |
92 |
101 |
93 eval(e) |
102 |
94 |
103 |
95 def simp(e: Exp) : Exp = e match { |
104 // User-defined Datatypes |
96 case N(n) => N(n) |
105 //======================== |
97 case Plus(e1, e2) => (simp(e1), simp(e2)) match { |
106 |
98 case (N(0), e2s) => e2s |
107 abstract class Colour |
99 case (e1s, N(0)) => e1s |
108 case object Red extends Colour |
100 case (e1s, e2s) => Plus(e1s, e2s) |
109 case object Green extends Colour |
101 } |
110 case object Blue extends Colour |
102 case Times(e1, e2) => (simp(e1), simp(e2)) match { |
111 |
103 case (N(0), e2s) => N(0) |
112 def fav_colour(c: Colour) : Boolean = c match { |
104 case (e1s, N(0)) => N(0) |
113 case Red => false |
105 case (N(1), e2s) => e2s |
114 case Green => true |
106 case (e1s, N(1)) => e1s |
115 case Blue => false |
107 case (e1s, e2s) => Times(e1s, e2s) |
116 } |
108 } |
117 |
109 } |
118 fav_colour(Green) |
110 |
119 |
111 |
120 |
112 val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9))) |
121 // actually colors can be written with "object", |
113 println(string(e2)) |
122 // because they do not take any arguments |
114 println(string(simp(e2))) |
123 |
115 |
124 abstract class Day |
116 // Token and Reverse Polish Notation |
125 case object Monday extends Day |
117 abstract class Token |
126 case object Tuesday extends Day |
118 case class T(n: Int) extends Token |
127 case object Wednesday extends Day |
119 case object PL extends Token |
128 case object Thursday extends Day |
120 case object TI extends Token |
129 case object Friday extends Day |
121 |
130 case object Saturday extends Day |
122 def rp(e: Exp) : List[Token] = e match { |
131 case object Sunday extends Day |
123 case N(n) => List(T(n)) |
132 |
124 case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL) |
133 abstract class Suit |
125 case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI) |
134 case object Spades extends Suit |
126 } |
135 case object Hearts extends Suit |
127 |
136 case object Diamonds extends Suit |
128 def comp(ts: List[Token], stk: List[Int]) : Int = (ts, stk) match { |
137 case object Clubs extends Suit |
129 case (Nil, st) => st.head |
138 |
130 case (T(n)::rest, st) => comp(rest, n::st) |
139 //define function for colour of suits |
131 case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st) |
140 |
132 case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st) |
141 abstract class Rank |
133 } |
142 case class Ace extends Rank |
134 |
143 case class King extends Rank |
135 def exp(ts: List[Token], st: List[Exp]) : Exp = (ts, st) match { |
144 case class Queen extends Rank |
136 case (Nil, st) => st.head |
145 case class Jack extends Rank |
137 case (T(n)::rest, st) => exp(rest, N(n)::st) |
146 case class Num(n: Int) extends Rank |
138 case (PL::rest, n1::n2::st) => exp(rest, Plus(n2, n1)::st) |
147 |
139 case (TI::rest, n1::n2::st) => exp(rest, Times(n2, n1)::st) |
148 //define functions for beats |
140 } |
149 //beats Ace _ => true |
141 |
150 //beats _ Acs => false |
142 exp(toks(e2), Nil) |
151 |
143 |
152 |
144 def proc(s: String) = s match { |
153 // ... a bit more useful: Roman Numerals |
145 case "+" => PL |
154 |
146 case "*" => TI |
155 abstract class RomanDigit |
147 case n => T(n.toInt) |
156 case object I extends RomanDigit |
148 } |
157 case object V extends RomanDigit |
149 |
158 case object X extends RomanDigit |
150 |
159 case object L extends RomanDigit |
151 string(exp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)) |
160 case object C extends RomanDigit |
|
161 case object D extends RomanDigit |
|
162 case object M extends RomanDigit |
|
163 |
|
164 type RomanNumeral = List[RomanDigit] |
|
165 |
|
166 def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { |
|
167 case Nil => 0 |
|
168 case M::r => 1000 + RomanNumeral2Int(r) |
|
169 case C::M::r => 900 + RomanNumeral2Int(r) |
|
170 case D::r => 500 + RomanNumeral2Int(r) |
|
171 case C::D::r => 400 + RomanNumeral2Int(r) |
|
172 case C::r => 100 + RomanNumeral2Int(r) |
|
173 case X::C::r => 90 + RomanNumeral2Int(r) |
|
174 case L::r => 50 + RomanNumeral2Int(r) |
|
175 case X::L::r => 40 + RomanNumeral2Int(r) |
|
176 case X::r => 10 + RomanNumeral2Int(r) |
|
177 case I::X::r => 9 + RomanNumeral2Int(r) |
|
178 case V::r => 5 + RomanNumeral2Int(r) |
|
179 case I::V::r => 4 + RomanNumeral2Int(r) |
|
180 case I::r => 1 + RomanNumeral2Int(r) |
|
181 } |
|
182 |
|
183 RomanNumeral2Int(List(I,V)) // 4 |
|
184 RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number) |
|
185 RomanNumeral2Int(List(V,I)) // 6 |
|
186 RomanNumeral2Int(List(I,X)) // 9 |
|
187 RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979 |
|
188 RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017 |
|
189 |
|
190 |
|
191 |
|
192 // another example |
|
193 //================= |
|
194 |
|
195 // Once upon a time, in a complete fictional country there were Persons... |
|
196 |
|
197 abstract class Person |
|
198 case object King extends Person |
|
199 case class Peer(deg: String, terr: String, succ: Int) extends Person |
|
200 case class Knight(name: String) extends Person |
|
201 case class Peasant(name: String) extends Person |
|
202 case object Clown extends Person |
|
203 |
|
204 def title(p: Person): String = p match { |
|
205 case King => "His Majesty the King" |
|
206 case Peer(deg, terr, _) => s"The ${deg} of ${terr}" |
|
207 case Knight(name) => s"Sir ${name}" |
|
208 case Peasant(name) => name |
|
209 case Clown => "My name is Boris Johnson" |
|
210 |
|
211 } |
|
212 |
|
213 title(Clown) |
|
214 |
|
215 |
|
216 |
|
217 def superior(p1: Person, p2: Person): Boolean = (p1, p2) match { |
|
218 case (King, _) => true |
|
219 case (Peer(_,_,_), Knight(_)) => true |
|
220 case (Peer(_,_,_), Peasant(_)) => true |
|
221 case (Peer(_,_,_), Clown) => true |
|
222 case (Knight(_), Peasant(_)) => true |
|
223 case (Knight(_), Clown) => true |
|
224 case (Clown, Peasant(_)) => true |
|
225 case _ => false |
|
226 } |
|
227 |
|
228 val people = List(Knight("David"), |
|
229 Peer("Duke", "Norfolk", 84), |
|
230 Peasant("Christian"), |
|
231 King, |
|
232 Clown) |
|
233 |
|
234 println(people.sortWith(superior(_, _)).mkString(", ")) |
|
235 |
|
236 |
152 |
237 |
153 |
238 |
154 |
239 // Tail recursion |
155 // Tail recursion |
240 //================ |
156 //================ |
267 // call; Scala can do this only for tail-recursive |
183 // call; Scala can do this only for tail-recursive |
268 // functions |
184 // functions |
269 |
185 |
270 |
186 |
271 |
187 |
272 // sudoku again |
188 // Jumping Towers |
|
189 //================ |
|
190 |
|
191 |
|
192 // the first n prefixes of xs |
|
193 // for 1 => include xs |
|
194 |
|
195 def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match { |
|
196 case (Nil, _) => Nil |
|
197 case (xs, 0) => Nil |
|
198 case (x::xs, n) => (x::xs) :: moves(xs, n - 1) |
|
199 } |
|
200 |
|
201 |
|
202 moves(List(5,1,0), 1) |
|
203 moves(List(5,1,0), 2) |
|
204 moves(List(5,1,0), 5) |
|
205 |
|
206 // checks whether a jump tour exists at all |
|
207 // in the second case it needs to be < instead of <= |
|
208 |
|
209 def search(xs: List[Int]) : Boolean = xs match { |
|
210 case Nil => true |
|
211 case (x::xs) => |
|
212 if (xs.length < x) true else moves(xs, x).exists(search(_)) |
|
213 } |
|
214 |
|
215 |
|
216 search(List(5,3,2,5,1,1)) |
|
217 search(List(3,5,1,0,0,0,1)) |
|
218 search(List(3,5,1,0,0,0,0,1)) |
|
219 search(List(3,5,1,0,0,0,1,1)) |
|
220 search(List(3,5,1)) |
|
221 search(List(5,1,1)) |
|
222 search(Nil) |
|
223 search(List(1)) |
|
224 search(List(5,1,1)) |
|
225 search(List(3,5,1,0,0,0,0,0,0,0,0,1)) |
|
226 |
|
227 // generates *all* jump tours |
|
228 // if we are only interested in the shortes one, we could |
|
229 // shortcircut the calculation and only return List(x) in |
|
230 // case where xs.length < x, because no tour can be shorter |
|
231 // than 1 |
|
232 // |
|
233 |
|
234 def jumps(xs: List[Int]) : List[List[Int]] = xs match { |
|
235 case Nil => Nil |
|
236 case (x::xs) => { |
|
237 val children = moves(xs, x) |
|
238 val results = children.flatMap((cs) => jumps(cs).map(x :: _)) |
|
239 if (xs.length < x) List(x) :: results else results |
|
240 } |
|
241 } |
|
242 |
|
243 |
|
244 |
|
245 jumps(List(5,3,2,5,1,1)) |
|
246 jumps(List(3,5,1,2,1,2,1)) |
|
247 jumps(List(3,5,1,2,3,4,1)) |
|
248 jumps(List(3,5,1,0,0,0,1)) |
|
249 jumps(List(3,5,1)) |
|
250 jumps(List(5,1,1)) |
|
251 jumps(Nil) |
|
252 jumps(List(1)) |
|
253 jumps(List(5,1,2)) |
|
254 moves(List(1,2), 5) |
|
255 jumps(List(1,5,1,2)) |
|
256 jumps(List(3,5,1,0,0,0,0,0,0,0,0,1)) |
|
257 |
|
258 jumps(List(5,3,2,5,1,1)).minBy(_.length) |
|
259 jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length) |
|
260 jumps(List(1,3,6,1,0,9)).minBy(_.length) |
|
261 jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length) |
|
262 |
|
263 |
|
264 |
|
265 |
|
266 |
|
267 |
|
268 |
|
269 |
|
270 |
|
271 // Sudoku |
|
272 //======== |
|
273 |
|
274 // THE POINT OF THIS CODE IS NOT TO BE SUPER |
|
275 // EFFICIENT AND FAST, just explaining exhaustive |
|
276 // depth-first search |
|
277 |
273 |
278 |
274 val game0 = """.14.6.3.. |
279 val game0 = """.14.6.3.. |
275 |62...4..9 |
280 |62...4..9 |
276 |.8..5.6.. |
281 |.8..5.6.. |
277 |.6.2....3 |
282 |.6.2....3 |