author | Christian Urban <christian.urban@kcl.ac.uk> |
Sun, 22 Nov 2020 03:45:22 +0000 | |
changeset 364 | f1a6fa599d26 |
parent 343 | c8fcc0e0a57f |
child 366 | 1c829680503e |
permissions | -rw-r--r-- |
67 | 1 |
// Scala Lecture 3 |
2 |
//================= |
|
3 |
||
320 | 4 |
// - last week |
5 |
// |
|
6 |
// option type |
|
7 |
// higher-order function |
|
8 |
||
9 |
||
323 | 10 |
def add(x: Int, y: Int) : Int = x + y |
11 |
||
12 |
def plus5(x: Int) : Int = add(5, x) |
|
13 |
||
14 |
plus5(6) |
|
15 |
||
16 |
def add2(x: Int)(y: Int) : Int = x + y |
|
17 |
||
18 |
def plus3(y: Int) : Int => Int = add2(3)(y) |
|
19 |
||
20 |
plus3(9) |
|
21 |
||
22 |
List(1,2,3,4,5).map(add2(3)) |
|
23 |
List(1,2,3,4,5).map(add(3, _)) |
|
24 |
||
25 |
type Pos = (Int, Int) |
|
26 |
||
27 |
def test(p: Pos) = { |
|
28 |
if (p._1 < 5 && p._2 < 5) { |
|
29 |
Some(p) |
|
30 |
} |
|
31 |
} |
|
32 |
||
33 |
val l = List((1,2), (5,3), (2,5), (1,3)) |
|
34 |
||
35 |
l.map(test).flatten |
|
320 | 36 |
|
343 | 37 |
// naive quicksort with "On" function |
38 |
||
39 |
def sortOn(f: Int => Int, xs: List[Int]) : List[Int] = { |
|
40 |
if (xs.size < 2) xs |
|
41 |
else { |
|
42 |
val pivot = xs.head |
|
43 |
val (left, right) = xs.partition(f(_) < f(pivot)) |
|
44 |
sortOn(f, left) ::: pivot :: sortOn(f, right.tail) |
|
45 |
} |
|
46 |
} |
|
47 |
||
48 |
sortOn(identity, List(99,99,99,98,10,-3,2)) |
|
49 |
sortOn(n => - n, List(99,99,99,98,10,-3,2)) |
|
50 |
||
51 |
||
52 |
||
53 |
||
320 | 54 |
// Recursion Again ;o) |
55 |
//==================== |
|
56 |
||
217 | 57 |
|
58 |
// A Web Crawler / Email Harvester |
|
59 |
//================================= |
|
60 |
// |
|
61 |
// the idea is to look for links using the |
|
62 |
// regular expression "https?://[^"]*" and for |
|
218 | 63 |
// email addresses using yet another regex. |
217 | 64 |
|
65 |
import io.Source |
|
66 |
import scala.util._ |
|
155 | 67 |
|
217 | 68 |
// gets the first 10K of a web-page |
69 |
def get_page(url: String) : String = { |
|
70 |
Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString). |
|
320 | 71 |
getOrElse { println(s" Problem with: $url"); ""} |
217 | 72 |
} |
155 | 73 |
|
217 | 74 |
// regex for URLs and emails |
75 |
val http_pattern = """"https?://[^"]*"""".r |
|
76 |
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r |
|
77 |
||
218 | 78 |
// val s = "foo bla christian@kcl.ac.uk 1234567" |
79 |
// email_pattern.findAllIn(s).toList |
|
155 | 80 |
|
217 | 81 |
// drops the first and last character from a string |
82 |
def unquote(s: String) = s.drop(1).dropRight(1) |
|
155 | 83 |
|
217 | 84 |
def get_all_URLs(page: String): Set[String] = |
85 |
http_pattern.findAllIn(page).map(unquote).toSet |
|
155 | 86 |
|
320 | 87 |
// a naive version of crawl - searches until a given depth, |
217 | 88 |
// visits pages potentially more than once |
320 | 89 |
def crawl(url: String, n: Int) : Unit = { |
90 |
if (n == 0) () |
|
217 | 91 |
else { |
92 |
println(s" Visiting: $n $url") |
|
321 | 93 |
val page = get_page(url) |
94 |
for (u <- get_all_URLs(page)) crawl(u, n - 1) |
|
217 | 95 |
} |
155 | 96 |
} |
97 |
||
217 | 98 |
// some starting URLs for the crawler |
99 |
val startURL = """https://nms.kcl.ac.uk/christian.urban/""" |
|
320 | 100 |
|
217 | 101 |
crawl(startURL, 2) |
102 |
||
323 | 103 |
for (x <- List(1,2,3,4,5,6)) println(x) |
318 | 104 |
|
320 | 105 |
// a primitive email harvester |
106 |
def emails(url: String, n: Int) : Set[String] = { |
|
107 |
if (n == 0) Set() |
|
108 |
else { |
|
109 |
println(s" Visiting: $n $url") |
|
110 |
val page = get_page(url) |
|
111 |
val new_emails = email_pattern.findAllIn(page).toSet |
|
323 | 112 |
new_emails ++ (for (u <- get_all_URLs(page).par) yield emails(u, n - 1)).flatten |
320 | 113 |
} |
218 | 114 |
} |
115 |
||
323 | 116 |
emails(startURL, 3) |
218 | 117 |
|
118 |
||
320 | 119 |
// if we want to explore the internet "deeper", then we |
120 |
// first have to parallelise the request of webpages: |
|
121 |
// |
|
122 |
// scala -cp scala-parallel-collections_2.13-0.2.0.jar |
|
123 |
// import scala.collection.parallel.CollectionConverters._ |
|
155 | 124 |
|
125 |
||
126 |
||
320 | 127 |
// another well-known example |
128 |
//============================ |
|
178 | 129 |
|
320 | 130 |
def move(from: Char, to: Char) = |
131 |
println(s"Move disc from $from to $to!") |
|
67 | 132 |
|
320 | 133 |
def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = { |
134 |
if (n == 0) () |
|
135 |
else { |
|
136 |
hanoi(n - 1, from, to, via) |
|
137 |
move(from, to) |
|
138 |
hanoi(n - 1, via, from, to) |
|
139 |
} |
|
140 |
} |
|
67 | 141 |
|
320 | 142 |
hanoi(4, 'A', 'B', 'C') |
67 | 143 |
|
155 | 144 |
|
145 |
||
217 | 146 |
// Jumping Towers |
147 |
//================ |
|
148 |
||
149 |
||
150 |
// the first n prefixes of xs |
|
151 |
// for 1 => include xs |
|
152 |
||
153 |
def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match { |
|
154 |
case (Nil, _) => Nil |
|
323 | 155 |
case (_, 0) => Nil |
156 |
case (y::ys, n) => xs :: moves(ys, n - 1) |
|
217 | 157 |
} |
158 |
||
159 |
||
160 |
moves(List(5,1,0), 1) |
|
161 |
moves(List(5,1,0), 2) |
|
162 |
moves(List(5,1,0), 5) |
|
163 |
||
164 |
// checks whether a jump tour exists at all |
|
165 |
||
166 |
def search(xs: List[Int]) : Boolean = xs match { |
|
167 |
case Nil => true |
|
321 | 168 |
case x::xs => |
169 |
if (xs.length < x) true |
|
170 |
else moves(xs, x).exists(search(_)) |
|
217 | 171 |
} |
172 |
||
173 |
||
174 |
search(List(5,3,2,5,1,1)) |
|
175 |
search(List(3,5,1,0,0,0,1)) |
|
176 |
search(List(3,5,1,0,0,0,0,1)) |
|
177 |
search(List(3,5,1,0,0,0,1,1)) |
|
178 |
search(List(3,5,1)) |
|
179 |
search(List(5,1,1)) |
|
180 |
search(Nil) |
|
181 |
search(List(1)) |
|
182 |
search(List(5,1,1)) |
|
183 |
search(List(3,5,1,0,0,0,0,0,0,0,0,1)) |
|
184 |
||
185 |
// generates *all* jump tours |
|
321 | 186 |
// if we are only interested in the shortest one, we could |
217 | 187 |
// shortcircut the calculation and only return List(x) in |
188 |
// case where xs.length < x, because no tour can be shorter |
|
189 |
// than 1 |
|
190 |
// |
|
191 |
||
192 |
def jumps(xs: List[Int]) : List[List[Int]] = xs match { |
|
193 |
case Nil => Nil |
|
321 | 194 |
case x::xs => { |
217 | 195 |
val children = moves(xs, x) |
320 | 196 |
val results = children.map(cs => jumps(cs).map(x :: _)).flatten |
197 |
if (xs.length < x) List(x)::results else results |
|
217 | 198 |
} |
199 |
} |
|
200 |
||
320 | 201 |
jumps(List(5,3,2,5,1,1)).minBy(_.length) |
217 | 202 |
jumps(List(3,5,1,2,1,2,1)) |
203 |
jumps(List(3,5,1,2,3,4,1)) |
|
204 |
jumps(List(3,5,1,0,0,0,1)) |
|
205 |
jumps(List(3,5,1)) |
|
206 |
jumps(List(5,1,1)) |
|
207 |
jumps(Nil) |
|
208 |
jumps(List(1)) |
|
209 |
jumps(List(5,1,2)) |
|
210 |
moves(List(1,2), 5) |
|
211 |
jumps(List(1,5,1,2)) |
|
212 |
jumps(List(3,5,1,0,0,0,0,0,0,0,0,1)) |
|
213 |
||
214 |
jumps(List(5,3,2,5,1,1)).minBy(_.length) |
|
215 |
jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length) |
|
216 |
jumps(List(1,3,6,1,0,9)).minBy(_.length) |
|
217 |
jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length) |
|
218 |
||
219 |
||
220 |
||
318 | 221 |
|
222 |
||
223 |
||
320 | 224 |
// User-defined Datatypes |
225 |
//======================== |
|
226 |
||
323 | 227 |
abstract class Tree |
228 |
case class Leaf(x: Int) extends Tree |
|
229 |
case class Node(s: String, left: Tree, right: Tree) extends Tree |
|
230 |
||
231 |
List(Leaf(20), Node("foo", Leaf(1), Leaf(2))) |
|
320 | 232 |
|
321 | 233 |
sealed abstract class Colour |
320 | 234 |
case object Red extends Colour |
235 |
case object Green extends Colour |
|
236 |
case object Blue extends Colour |
|
323 | 237 |
case object Yellow extends Colour |
320 | 238 |
|
239 |
||
240 |
def fav_colour(c: Colour) : Boolean = c match { |
|
241 |
case Green => true |
|
323 | 242 |
case _ => false |
320 | 243 |
} |
244 |
||
245 |
fav_colour(Green) |
|
246 |
||
247 |
// ... a tiny bit more useful: Roman Numerals |
|
248 |
||
321 | 249 |
sealed abstract class RomanDigit |
320 | 250 |
case object I extends RomanDigit |
251 |
case object V extends RomanDigit |
|
252 |
case object X extends RomanDigit |
|
253 |
case object L extends RomanDigit |
|
254 |
case object C extends RomanDigit |
|
255 |
case object D extends RomanDigit |
|
256 |
case object M extends RomanDigit |
|
257 |
||
258 |
type RomanNumeral = List[RomanDigit] |
|
259 |
||
323 | 260 |
List(X,I,M,D) |
320 | 261 |
|
262 |
/* |
|
263 |
I -> 1 |
|
264 |
II -> 2 |
|
265 |
III -> 3 |
|
266 |
IV -> 4 |
|
267 |
V -> 5 |
|
268 |
VI -> 6 |
|
269 |
VII -> 7 |
|
270 |
VIII -> 8 |
|
271 |
IX -> 9 |
|
272 |
X -> 10 |
|
273 |
*/ |
|
274 |
||
275 |
def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { |
|
276 |
case Nil => 0 |
|
277 |
case M::r => 1000 + RomanNumeral2Int(r) |
|
278 |
case C::M::r => 900 + RomanNumeral2Int(r) |
|
279 |
case D::r => 500 + RomanNumeral2Int(r) |
|
280 |
case C::D::r => 400 + RomanNumeral2Int(r) |
|
281 |
case C::r => 100 + RomanNumeral2Int(r) |
|
282 |
case X::C::r => 90 + RomanNumeral2Int(r) |
|
283 |
case L::r => 50 + RomanNumeral2Int(r) |
|
284 |
case X::L::r => 40 + RomanNumeral2Int(r) |
|
285 |
case X::r => 10 + RomanNumeral2Int(r) |
|
286 |
case I::X::r => 9 + RomanNumeral2Int(r) |
|
287 |
case V::r => 5 + RomanNumeral2Int(r) |
|
288 |
case I::V::r => 4 + RomanNumeral2Int(r) |
|
289 |
case I::r => 1 + RomanNumeral2Int(r) |
|
290 |
} |
|
291 |
||
292 |
RomanNumeral2Int(List(I,V)) // 4 |
|
293 |
RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number) |
|
294 |
RomanNumeral2Int(List(V,I)) // 6 |
|
295 |
RomanNumeral2Int(List(I,X)) // 9 |
|
296 |
RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979 |
|
297 |
RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017 |
|
298 |
||
299 |
||
300 |
// String interpolations as patterns |
|
301 |
||
302 |
val date = "2019-11-26" |
|
303 |
val s"$year-$month-$day" = date |
|
304 |
||
305 |
def parse_date(date: String) : Option[(Int, Int, Int)]= date match { |
|
306 |
case s"$year-$month-$day" => Some((day.toInt, month.toInt, year.toInt)) |
|
307 |
case s"$day/$month/$year" => Some((day.toInt, month.toInt, year.toInt)) |
|
308 |
case s"$day.$month.$year" => Some((day.toInt, month.toInt, year.toInt)) |
|
309 |
case _ => None |
|
310 |
} |
|
318 | 311 |
|
320 | 312 |
parse_date("2019-11-26") |
313 |
parse_date("26/11/2019") |
|
314 |
parse_date("26.11.2019") |
|
315 |
||
316 |
||
317 |
// User-defined Datatypes and Pattern Matching |
|
318 |
//============================================= |
|
319 |
||
320 |
||
321 |
||
322 |
||
323 |
// Tail recursion |
|
324 |
//================ |
|
325 |
||
326 |
||
327 |
def fact(n: Long): Long = |
|
328 |
if (n == 0) 1 else n * fact(n - 1) |
|
329 |
||
330 |
def factB(n: BigInt): BigInt = |
|
331 |
if (n == 0) 1 else n * factB(n - 1) |
|
332 |
||
333 |
factB(100000) |
|
334 |
||
335 |
fact(10) //ok |
|
336 |
fact(10000) // produces a stackoverflow |
|
337 |
||
338 |
def factT(n: BigInt, acc: BigInt): BigInt = |
|
339 |
if (n == 0) acc else factT(n - 1, n * acc) |
|
340 |
||
341 |
factT(10, 1) |
|
342 |
println(factT(100000, 1)) |
|
343 |
||
344 |
// there is a flag for ensuring a function is tail recursive |
|
345 |
import scala.annotation.tailrec |
|
346 |
||
347 |
@tailrec |
|
348 |
def factT(n: BigInt, acc: BigInt): BigInt = |
|
349 |
if (n == 0) acc else factT(n - 1, n * acc) |
|
350 |
||
351 |
||
352 |
||
353 |
// for tail-recursive functions the Scala compiler |
|
354 |
// generates loop-like code, which does not need |
|
355 |
// to allocate stack-space in each recursive |
|
356 |
// call; Scala can do this only for tail-recursive |
|
357 |
// functions |
|
358 |
||
155 | 359 |
// tail recursive version that searches |
158 | 360 |
// for all solutions |
361 |
||
155 | 362 |
def searchT(games: List[String], sols: List[String]): List[String] = games match { |
363 |
case Nil => sols |
|
364 |
case game::rest => { |
|
365 |
if (isDone(game)) searchT(rest, game::sols) |
|
366 |
else { |
|
367 |
val cs = candidates(game, emptyPosition(game)) |
|
368 |
searchT(cs.map(c => update(game, empty(game), c)) ::: rest, sols) |
|
369 |
} |
|
370 |
} |
|
67 | 371 |
} |
372 |
||
158 | 373 |
searchT(List(game3), List()).map(pretty) |
374 |
||
375 |
||
155 | 376 |
// tail recursive version that searches |
377 |
// for a single solution |
|
158 | 378 |
|
155 | 379 |
def search1T(games: List[String]): Option[String] = games match { |
67 | 380 |
case Nil => None |
155 | 381 |
case game::rest => { |
382 |
if (isDone(game)) Some(game) |
|
383 |
else { |
|
384 |
val cs = candidates(game, emptyPosition(game)) |
|
385 |
search1T(cs.map(c => update(game, empty(game), c)) ::: rest) |
|
386 |
} |
|
387 |
} |
|
67 | 388 |
} |
389 |
||
158 | 390 |
search1T(List(game3)).map(pretty) |
217 | 391 |
time_needed(10, search1T(List(game3))) |
392 |
||
158 | 393 |
|
155 | 394 |
// game with multiple solutions |
395 |
val game3 = """.8...9743 |
|
396 |
|.5...8.1. |
|
397 |
|.1....... |
|
398 |
|8....5... |
|
399 |
|...8.4... |
|
400 |
|...3....6 |
|
401 |
|.......7. |
|
402 |
|.3.5...8. |
|
403 |
|9724...5.""".stripMargin.replaceAll("\\n", "") |
|
404 |
||
158 | 405 |
searchT(List(game3), Nil).map(pretty) |
155 | 406 |
search1T(List(game3)).map(pretty) |
67 | 407 |
|
77
3cbe3d90b77f
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
73
diff
changeset
|
408 |
// Moral: Whenever a recursive function is resource-critical |
158 | 409 |
// (i.e. works with large recursion depth), then you need to |
77
3cbe3d90b77f
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
73
diff
changeset
|
410 |
// write it in tail-recursive fashion. |
3cbe3d90b77f
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
73
diff
changeset
|
411 |
// |
155 | 412 |
// Unfortuantely, Scala because of current limitations in |
413 |
// the JVM is not as clever as other functional languages. It can |
|
77
3cbe3d90b77f
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
73
diff
changeset
|
414 |
// only optimise "self-tail calls". This excludes the cases of |
3cbe3d90b77f
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
73
diff
changeset
|
415 |
// multiple functions making tail calls to each other. Well, |
3cbe3d90b77f
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
73
diff
changeset
|
416 |
// nothing is perfect. |
3cbe3d90b77f
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
73
diff
changeset
|
417 |