author | Christian Urban <urbanc@in.tum.de> |
Fri, 23 Nov 2018 01:52:37 +0000 | |
changeset 217 | e689375abcc1 |
parent 194 | 060b081523de |
child 218 | 22705d22c105 |
permissions | -rw-r--r-- |
67 | 1 |
// Scala Lecture 3 |
2 |
//================= |
|
3 |
||
217 | 4 |
|
5 |
// A Web Crawler / Email Harvester |
|
6 |
//================================= |
|
7 |
// |
|
8 |
// the idea is to look for links using the |
|
9 |
// regular expression "https?://[^"]*" and for |
|
10 |
// email addresses using another regex. |
|
11 |
||
12 |
import io.Source |
|
13 |
import scala.util._ |
|
155 | 14 |
|
217 | 15 |
// gets the first 10K of a web-page |
16 |
def get_page(url: String) : String = { |
|
17 |
Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString). |
|
18 |
getOrElse { println(s" Problem with: $url"); ""} |
|
19 |
} |
|
155 | 20 |
|
217 | 21 |
// regex for URLs and emails |
22 |
val http_pattern = """"https?://[^"]*"""".r |
|
23 |
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r |
|
24 |
||
25 |
//email_pattern.findAllIn |
|
26 |
// ("foo bla christian@kcl.ac.uk 1234567").toList |
|
155 | 27 |
|
28 |
||
217 | 29 |
// drops the first and last character from a string |
30 |
def unquote(s: String) = s.drop(1).dropRight(1) |
|
155 | 31 |
|
217 | 32 |
def get_all_URLs(page: String): Set[String] = |
33 |
http_pattern.findAllIn(page).map(unquote).toSet |
|
155 | 34 |
|
217 | 35 |
// naive version of crawl - searches until a given depth, |
36 |
// visits pages potentially more than once |
|
37 |
def crawl(url: String, n: Int) : Set[String] = { |
|
38 |
if (n == 0) Set() |
|
39 |
else { |
|
40 |
println(s" Visiting: $n $url") |
|
41 |
val page = get_page(url) |
|
42 |
val new_emails = email_pattern.findAllIn(page).toSet |
|
43 |
new_emails ++ |
|
44 |
(for (u <- get_all_URLs(page)) yield crawl(u, n - 1)).flatten |
|
45 |
} |
|
155 | 46 |
} |
47 |
||
217 | 48 |
// some starting URLs for the crawler |
49 |
val startURL = """https://nms.kcl.ac.uk/christian.urban/""" |
|
50 |
||
51 |
crawl(startURL, 2) |
|
52 |
||
155 | 53 |
|
54 |
||
217 | 55 |
// User-defined Datatypes and Pattern Matching |
56 |
//============================================ |
|
57 |
||
155 | 58 |
|
217 | 59 |
abstract class Exp |
60 |
case class N(n: Int) extends Exp |
|
61 |
case class Plus(e1: Exp, e2: Exp) extends Exp |
|
62 |
case class Times(e1: Exp, e2: Exp) extends Exp |
|
158 | 63 |
|
155 | 64 |
|
65 |
||
217 | 66 |
// string of an Exp |
67 |
// eval of an Exp |
|
68 |
// simp an Exp |
|
69 |
// Tokens |
|
70 |
// Reverse Polish Notation |
|
71 |
// compute RP |
|
72 |
// transform RP into Exp |
|
73 |
// process RP string and generate Exp |
|
155 | 74 |
|
75 |
||
76 |
||
217 | 77 |
def string(e: Exp) : String = e match { |
78 |
case N(n) => n.toString |
|
79 |
case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")" |
|
80 |
case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")" |
|
81 |
} |
|
155 | 82 |
|
217 | 83 |
val e = Plus(N(9), Times(N(3), N(4))) |
84 |
||
85 |
println(string(e)) |
|
155 | 86 |
|
217 | 87 |
def eval(e: Exp) : Int = e match { |
88 |
case N(n) => n |
|
89 |
case Plus(e1, e2) => eval(e1) + eval(e2) |
|
90 |
case Times(e1, e2) => eval(e1) * eval(e2) |
|
152 | 91 |
} |
92 |
||
217 | 93 |
eval(e) |
178 | 94 |
|
217 | 95 |
def simp(e: Exp) : Exp = e match { |
96 |
case N(n) => N(n) |
|
97 |
case Plus(e1, e2) => (simp(e1), simp(e2)) match { |
|
98 |
case (N(0), e2s) => e2s |
|
99 |
case (e1s, N(0)) => e1s |
|
100 |
case (e1s, e2s) => Plus(e1s, e2s) |
|
101 |
} |
|
102 |
case Times(e1, e2) => (simp(e1), simp(e2)) match { |
|
103 |
case (N(0), e2s) => N(0) |
|
104 |
case (e1s, N(0)) => N(0) |
|
105 |
case (N(1), e2s) => e2s |
|
106 |
case (e1s, N(1)) => e1s |
|
107 |
case (e1s, e2s) => Times(e1s, e2s) |
|
108 |
} |
|
109 |
} |
|
178 | 110 |
|
152 | 111 |
|
217 | 112 |
val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9))) |
113 |
println(string(e2)) |
|
114 |
println(string(simp(e2))) |
|
67 | 115 |
|
217 | 116 |
// Token and Reverse Polish Notation |
117 |
abstract class Token |
|
118 |
case class T(n: Int) extends Token |
|
119 |
case object PL extends Token |
|
120 |
case object TI extends Token |
|
121 |
||
122 |
def rp(e: Exp) : List[Token] = e match { |
|
123 |
case N(n) => List(T(n)) |
|
124 |
case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL) |
|
125 |
case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI) |
|
67 | 126 |
} |
127 |
||
217 | 128 |
def comp(ts: List[Token], stk: List[Int]) : Int = (ts, stk) match { |
129 |
case (Nil, st) => st.head |
|
130 |
case (T(n)::rest, st) => comp(rest, n::st) |
|
131 |
case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st) |
|
132 |
case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st) |
|
133 |
} |
|
67 | 134 |
|
217 | 135 |
def exp(ts: List[Token], st: List[Exp]) : Exp = (ts, st) match { |
136 |
case (Nil, st) => st.head |
|
137 |
case (T(n)::rest, st) => exp(rest, N(n)::st) |
|
138 |
case (PL::rest, n1::n2::st) => exp(rest, Plus(n2, n1)::st) |
|
139 |
case (TI::rest, n1::n2::st) => exp(rest, Times(n2, n1)::st) |
|
140 |
} |
|
141 |
||
142 |
exp(toks(e2), Nil) |
|
143 |
||
144 |
def proc(s: String) = s match { |
|
145 |
case "+" => PL |
|
146 |
case "*" => TI |
|
147 |
case n => T(n.toInt) |
|
148 |
} |
|
67 | 149 |
|
155 | 150 |
|
217 | 151 |
string(exp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)) |
67 | 152 |
|
155 | 153 |
|
154 |
||
155 |
// Tail recursion |
|
156 |
//================ |
|
72 | 157 |
|
67 | 158 |
|
159 |
def fact(n: Long): Long = |
|
160 |
if (n == 0) 1 else n * fact(n - 1) |
|
161 |
||
155 | 162 |
fact(10) //ok |
163 |
fact(10000) // produces a stackoverflow |
|
164 |
||
165 |
def factT(n: BigInt, acc: BigInt): BigInt = |
|
166 |
if (n == 0) acc else factT(n - 1, n * acc) |
|
167 |
||
158 | 168 |
factT(10, 1) |
155 | 169 |
factT(100000, 1) |
170 |
||
171 |
// there is a flag for ensuring a function is tail recursive |
|
172 |
import scala.annotation.tailrec |
|
67 | 173 |
|
72 | 174 |
@tailrec |
67 | 175 |
def factT(n: BigInt, acc: BigInt): BigInt = |
176 |
if (n == 0) acc else factT(n - 1, n * acc) |
|
177 |
||
178 |
||
179 |
||
155 | 180 |
// for tail-recursive functions the Scala compiler |
71 | 181 |
// generates loop-like code, which does not need |
67 | 182 |
// to allocate stack-space in each recursive |
155 | 183 |
// call; Scala can do this only for tail-recursive |
67 | 184 |
// functions |
185 |
||
155 | 186 |
|
187 |
||
217 | 188 |
// Jumping Towers |
189 |
//================ |
|
190 |
||
191 |
||
192 |
// the first n prefixes of xs |
|
193 |
// for 1 => include xs |
|
194 |
||
195 |
def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match { |
|
196 |
case (Nil, _) => Nil |
|
197 |
case (xs, 0) => Nil |
|
198 |
case (x::xs, n) => (x::xs) :: moves(xs, n - 1) |
|
199 |
} |
|
200 |
||
201 |
||
202 |
moves(List(5,1,0), 1) |
|
203 |
moves(List(5,1,0), 2) |
|
204 |
moves(List(5,1,0), 5) |
|
205 |
||
206 |
// checks whether a jump tour exists at all |
|
207 |
// in the second case it needs to be < instead of <= |
|
208 |
||
209 |
def search(xs: List[Int]) : Boolean = xs match { |
|
210 |
case Nil => true |
|
211 |
case (x::xs) => |
|
212 |
if (xs.length < x) true else moves(xs, x).exists(search(_)) |
|
213 |
} |
|
214 |
||
215 |
||
216 |
search(List(5,3,2,5,1,1)) |
|
217 |
search(List(3,5,1,0,0,0,1)) |
|
218 |
search(List(3,5,1,0,0,0,0,1)) |
|
219 |
search(List(3,5,1,0,0,0,1,1)) |
|
220 |
search(List(3,5,1)) |
|
221 |
search(List(5,1,1)) |
|
222 |
search(Nil) |
|
223 |
search(List(1)) |
|
224 |
search(List(5,1,1)) |
|
225 |
search(List(3,5,1,0,0,0,0,0,0,0,0,1)) |
|
226 |
||
227 |
// generates *all* jump tours |
|
228 |
// if we are only interested in the shortes one, we could |
|
229 |
// shortcircut the calculation and only return List(x) in |
|
230 |
// case where xs.length < x, because no tour can be shorter |
|
231 |
// than 1 |
|
232 |
// |
|
233 |
||
234 |
def jumps(xs: List[Int]) : List[List[Int]] = xs match { |
|
235 |
case Nil => Nil |
|
236 |
case (x::xs) => { |
|
237 |
val children = moves(xs, x) |
|
238 |
val results = children.flatMap((cs) => jumps(cs).map(x :: _)) |
|
239 |
if (xs.length < x) List(x) :: results else results |
|
240 |
} |
|
241 |
} |
|
242 |
||
243 |
||
244 |
||
245 |
jumps(List(5,3,2,5,1,1)) |
|
246 |
jumps(List(3,5,1,2,1,2,1)) |
|
247 |
jumps(List(3,5,1,2,3,4,1)) |
|
248 |
jumps(List(3,5,1,0,0,0,1)) |
|
249 |
jumps(List(3,5,1)) |
|
250 |
jumps(List(5,1,1)) |
|
251 |
jumps(Nil) |
|
252 |
jumps(List(1)) |
|
253 |
jumps(List(5,1,2)) |
|
254 |
moves(List(1,2), 5) |
|
255 |
jumps(List(1,5,1,2)) |
|
256 |
jumps(List(3,5,1,0,0,0,0,0,0,0,0,1)) |
|
257 |
||
258 |
jumps(List(5,3,2,5,1,1)).minBy(_.length) |
|
259 |
jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length) |
|
260 |
jumps(List(1,3,6,1,0,9)).minBy(_.length) |
|
261 |
jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length) |
|
262 |
||
263 |
||
264 |
||
265 |
||
266 |
||
267 |
||
268 |
||
269 |
||
270 |
||
271 |
// Sudoku |
|
272 |
//======== |
|
273 |
||
274 |
// THE POINT OF THIS CODE IS NOT TO BE SUPER |
|
275 |
// EFFICIENT AND FAST, just explaining exhaustive |
|
276 |
// depth-first search |
|
277 |
||
155 | 278 |
|
279 |
val game0 = """.14.6.3.. |
|
280 |
|62...4..9 |
|
281 |
|.8..5.6.. |
|
282 |
|.6.2....3 |
|
283 |
|.7..1..5. |
|
284 |
|5....9.6. |
|
285 |
|..6.2..3. |
|
286 |
|1..5...92 |
|
287 |
|..7.9.41.""".stripMargin.replaceAll("\\n", "") |
|
53 | 288 |
|
155 | 289 |
type Pos = (Int, Int) |
290 |
val EmptyValue = '.' |
|
291 |
val MaxValue = 9 |
|
292 |
||
293 |
val allValues = "123456789".toList |
|
294 |
val indexes = (0 to 8).toList |
|
295 |
||
296 |
||
297 |
def empty(game: String) = game.indexOf(EmptyValue) |
|
298 |
def isDone(game: String) = empty(game) == -1 |
|
299 |
def emptyPosition(game: String) = |
|
300 |
(empty(game) % MaxValue, empty(game) / MaxValue) |
|
301 |
||
67 | 302 |
|
155 | 303 |
def get_row(game: String, y: Int) = |
304 |
indexes.map(col => game(y * MaxValue + col)) |
|
305 |
def get_col(game: String, x: Int) = |
|
306 |
indexes.map(row => game(x + row * MaxValue)) |
|
307 |
||
308 |
def get_box(game: String, pos: Pos): List[Char] = { |
|
309 |
def base(p: Int): Int = (p / 3) * 3 |
|
310 |
val x0 = base(pos._1) |
|
311 |
val y0 = base(pos._2) |
|
312 |
val ys = (y0 until y0 + 3).toList |
|
313 |
(x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue))) |
|
314 |
} |
|
315 |
||
217 | 316 |
//get_row(game0, 0) |
317 |
//get_row(game0, 1) |
|
318 |
//get_box(game0, (3,1)) |
|
319 |
||
320 |
||
155 | 321 |
// this is not mutable!! |
322 |
def update(game: String, pos: Int, value: Char): String = |
|
323 |
game.updated(pos, value) |
|
324 |
||
325 |
def toAvoid(game: String, pos: Pos): List[Char] = |
|
326 |
(get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos)) |
|
327 |
||
328 |
def candidates(game: String, pos: Pos): List[Char] = |
|
329 |
allValues.diff(toAvoid(game,pos)) |
|
330 |
||
331 |
//candidates(game0, (0,0)) |
|
332 |
||
333 |
def pretty(game: String): String = |
|
334 |
"\n" + (game sliding (MaxValue, MaxValue) mkString "\n") |
|
335 |
||
158 | 336 |
///////////////////// |
155 | 337 |
// not tail recursive |
338 |
def search(game: String): List[String] = { |
|
339 |
if (isDone(game)) List(game) |
|
340 |
else { |
|
341 |
val cs = candidates(game, emptyPosition(game)) |
|
342 |
cs.map(c => search(update(game, empty(game), c))).toList.flatten |
|
67 | 343 |
} |
344 |
} |
|
345 |
||
217 | 346 |
search(game0).map(pretty) |
347 |
||
348 |
val game1 = """23.915... |
|
349 |
|...2..54. |
|
350 |
|6.7...... |
|
351 |
|..1.....9 |
|
352 |
|89.5.3.17 |
|
353 |
|5.....6.. |
|
354 |
|......9.5 |
|
355 |
|.16..7... |
|
356 |
|...329..1""".stripMargin.replaceAll("\\n", "") |
|
357 |
||
358 |
||
359 |
// game that is in the hard category |
|
360 |
val game2 = """8........ |
|
361 |
|..36..... |
|
362 |
|.7..9.2.. |
|
363 |
|.5...7... |
|
364 |
|....457.. |
|
365 |
|...1...3. |
|
366 |
|..1....68 |
|
367 |
|..85...1. |
|
368 |
|.9....4..""".stripMargin.replaceAll("\\n", "") |
|
369 |
||
370 |
// game with multiple solutions |
|
371 |
val game3 = """.8...9743 |
|
372 |
|.5...8.1. |
|
373 |
|.1....... |
|
374 |
|8....5... |
|
375 |
|...8.4... |
|
376 |
|...3....6 |
|
377 |
|.......7. |
|
378 |
|.3.5...8. |
|
379 |
|9724...5.""".stripMargin.replaceAll("\\n", "") |
|
380 |
||
381 |
||
382 |
||
383 |
||
384 |
search(game1).map(pretty) |
|
385 |
search(game3).map(pretty) |
|
386 |
search(game2).map(pretty) |
|
387 |
||
388 |
// for measuring time |
|
389 |
def time_needed[T](i: Int, code: => T) = { |
|
390 |
val start = System.nanoTime() |
|
391 |
for (j <- 1 to i) code |
|
392 |
val end = System.nanoTime() |
|
393 |
((end - start) / 1.0e9) + " secs" |
|
394 |
} |
|
395 |
||
396 |
time_needed(1, search(game2)) |
|
397 |
||
155 | 398 |
// tail recursive version that searches |
158 | 399 |
// for all solutions |
400 |
||
155 | 401 |
def searchT(games: List[String], sols: List[String]): List[String] = games match { |
402 |
case Nil => sols |
|
403 |
case game::rest => { |
|
404 |
if (isDone(game)) searchT(rest, game::sols) |
|
405 |
else { |
|
406 |
val cs = candidates(game, emptyPosition(game)) |
|
407 |
searchT(cs.map(c => update(game, empty(game), c)) ::: rest, sols) |
|
408 |
} |
|
409 |
} |
|
67 | 410 |
} |
411 |
||
158 | 412 |
searchT(List(game3), List()).map(pretty) |
413 |
||
414 |
||
155 | 415 |
// tail recursive version that searches |
416 |
// for a single solution |
|
158 | 417 |
|
155 | 418 |
def search1T(games: List[String]): Option[String] = games match { |
67 | 419 |
case Nil => None |
155 | 420 |
case game::rest => { |
421 |
if (isDone(game)) Some(game) |
|
422 |
else { |
|
423 |
val cs = candidates(game, emptyPosition(game)) |
|
424 |
search1T(cs.map(c => update(game, empty(game), c)) ::: rest) |
|
425 |
} |
|
426 |
} |
|
67 | 427 |
} |
428 |
||
158 | 429 |
search1T(List(game3)).map(pretty) |
217 | 430 |
time_needed(10, search1T(List(game3))) |
431 |
||
158 | 432 |
|
155 | 433 |
// game with multiple solutions |
434 |
val game3 = """.8...9743 |
|
435 |
|.5...8.1. |
|
436 |
|.1....... |
|
437 |
|8....5... |
|
438 |
|...8.4... |
|
439 |
|...3....6 |
|
440 |
|.......7. |
|
441 |
|.3.5...8. |
|
442 |
|9724...5.""".stripMargin.replaceAll("\\n", "") |
|
443 |
||
158 | 444 |
searchT(List(game3), Nil).map(pretty) |
155 | 445 |
search1T(List(game3)).map(pretty) |
67 | 446 |
|
77
3cbe3d90b77f
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
73
diff
changeset
|
447 |
// Moral: Whenever a recursive function is resource-critical |
158 | 448 |
// (i.e. works with large recursion depth), then you need to |
77
3cbe3d90b77f
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
73
diff
changeset
|
449 |
// write it in tail-recursive fashion. |
3cbe3d90b77f
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
73
diff
changeset
|
450 |
// |
155 | 451 |
// Unfortuantely, Scala because of current limitations in |
452 |
// the JVM is not as clever as other functional languages. It can |
|
77
3cbe3d90b77f
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
73
diff
changeset
|
453 |
// only optimise "self-tail calls". This excludes the cases of |
3cbe3d90b77f
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
73
diff
changeset
|
454 |
// multiple functions making tail calls to each other. Well, |
3cbe3d90b77f
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
73
diff
changeset
|
455 |
// nothing is perfect. |
3cbe3d90b77f
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
73
diff
changeset
|
456 |
|
3cbe3d90b77f
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
73
diff
changeset
|
457 |
|
67 | 458 |
|
459 |
||
71 | 460 |
// Polymorphic Types |
461 |
//=================== |
|
462 |
||
72 | 463 |
// You do not want to write functions like contains, first |
71 | 464 |
// and so on for every type of lists. |
465 |
||
67 | 466 |
|
72 | 467 |
def length_string_list(lst: List[String]): Int = lst match { |
67 | 468 |
case Nil => 0 |
72 | 469 |
case x::xs => 1 + length_string_list(xs) |
67 | 470 |
} |
471 |
||
158 | 472 |
def length_int_list(lst: List[Int]): Int = lst match { |
473 |
case Nil => 0 |
|
474 |
case x::xs => 1 + length_int_list(xs) |
|
475 |
} |
|
67 | 476 |
|
158 | 477 |
length_string_list(List("1", "2", "3", "4")) |
478 |
length_int_list(List(1, 2, 3, 4)) |
|
67 | 479 |
|
158 | 480 |
//----- |
67 | 481 |
def length[A](lst: List[A]): Int = lst match { |
482 |
case Nil => 0 |
|
483 |
case x::xs => 1 + length(xs) |
|
484 |
} |
|
158 | 485 |
length(List("1", "2", "3", "4")) |
486 |
length(List(1, 2, 3, 4)) |
|
53 | 487 |
|
158 | 488 |
def map[A, B](lst: List[A], f: A => B): List[B] = lst match { |
67 | 489 |
case Nil => Nil |
490 |
case x::xs => f(x)::map_int_list(xs, f) |
|
491 |
} |
|
492 |
||
493 |
map_int_list(List(1, 2, 3, 4), square) |
|
494 |
||
495 |
||
496 |
||
497 |
||
158 | 498 |
|
499 |
||
155 | 500 |
// Cool Stuff |
501 |
//============ |
|
72 | 502 |
|
155 | 503 |
|
504 |
// Implicits |
|
505 |
//=========== |
|
506 |
// |
|
507 |
// For example adding your own methods to Strings: |
|
508 |
// Imagine you want to increment strings, like |
|
509 |
// |
|
510 |
// "HAL".increment |
|
511 |
// |
|
512 |
// you can avoid ugly fudges, like a MyString, by |
|
513 |
// using implicit conversions. |
|
67 | 514 |
|
515 |
||
155 | 516 |
implicit class MyString(s: String) { |
517 |
def increment = for (c <- s) yield (c + 1).toChar |
|
67 | 518 |
} |
519 |
||
155 | 520 |
"HAL".increment |
67 | 521 |
|
53 | 522 |
|
523 |
||
524 |
||
67 | 525 |
|
526 |