| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Wed, 04 Nov 2020 14:46:03 +0000 | |
| changeset 349 | ef4bb09a01b7 | 
| parent 343 | 51e25cc30483 | 
| child 364 | 18942af74fa1 | 
| permissions | -rw-r--r-- | 
| 67 | 1 | // Scala Lecture 3 | 
| 2 | //================= | |
| 3 | ||
| 320 | 4 | // - last week | 
| 5 | // | |
| 6 | // option type | |
| 7 | // higher-order function | |
| 8 | ||
| 9 | ||
| 323 | 10 | def add(x: Int, y: Int) : Int = x + y | 
| 11 | ||
| 12 | def plus5(x: Int) : Int = add(5, x) | |
| 13 | ||
| 14 | plus5(6) | |
| 15 | ||
| 16 | def add2(x: Int)(y: Int) : Int = x + y | |
| 17 | ||
| 18 | def plus3(y: Int) : Int => Int = add2(3)(y) | |
| 19 | ||
| 20 | plus3(9) | |
| 21 | ||
| 22 | List(1,2,3,4,5).map(add2(3)) | |
| 23 | List(1,2,3,4,5).map(add(3, _)) | |
| 24 | ||
| 25 | type Pos = (Int, Int) | |
| 26 | ||
| 27 | def test(p: Pos) = {
 | |
| 28 |   if (p._1 < 5 && p._2 < 5) {
 | |
| 29 | Some(p) | |
| 30 | } | |
| 31 | } | |
| 32 | ||
| 33 | val l = List((1,2), (5,3), (2,5), (1,3)) | |
| 34 | ||
| 35 | l.map(test).flatten | |
| 320 | 36 | |
| 343 | 37 | // naive quicksort with "On" function | 
| 38 | ||
| 39 | def sortOn(f: Int => Int, xs: List[Int]) : List[Int] = {
 | |
| 40 | if (xs.size < 2) xs | |
| 41 |   else {
 | |
| 42 | val pivot = xs.head | |
| 43 | val (left, right) = xs.partition(f(_) < f(pivot)) | |
| 44 | sortOn(f, left) ::: pivot :: sortOn(f, right.tail) | |
| 45 | } | |
| 46 | } | |
| 47 | ||
| 48 | sortOn(identity, List(99,99,99,98,10,-3,2)) | |
| 49 | sortOn(n => - n, List(99,99,99,98,10,-3,2)) | |
| 50 | ||
| 51 | ||
| 52 | ||
| 53 | ||
| 320 | 54 | // Recursion Again ;o) | 
| 55 | //==================== | |
| 56 | ||
| 217 | 57 | |
| 58 | // A Web Crawler / Email Harvester | |
| 59 | //================================= | |
| 60 | // | |
| 61 | // the idea is to look for links using the | |
| 62 | // regular expression "https?://[^"]*" and for | |
| 218 | 63 | // email addresses using yet another regex. | 
| 217 | 64 | |
| 65 | import io.Source | |
| 66 | import scala.util._ | |
| 155 | 67 | |
| 217 | 68 | // gets the first 10K of a web-page | 
| 69 | def get_page(url: String) : String = {
 | |
| 70 |   Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
 | |
| 320 | 71 |     getOrElse { println(s" Problem with: $url"); ""}
 | 
| 217 | 72 | } | 
| 155 | 73 | |
| 217 | 74 | // regex for URLs and emails | 
| 75 | val http_pattern = """"https?://[^"]*"""".r | |
| 76 | val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
 | |
| 77 | ||
| 218 | 78 | // val s = "foo bla christian@kcl.ac.uk 1234567" | 
| 79 | // email_pattern.findAllIn(s).toList | |
| 155 | 80 | |
| 217 | 81 | // drops the first and last character from a string | 
| 82 | def unquote(s: String) = s.drop(1).dropRight(1) | |
| 155 | 83 | |
| 217 | 84 | def get_all_URLs(page: String): Set[String] = | 
| 85 | http_pattern.findAllIn(page).map(unquote).toSet | |
| 155 | 86 | |
| 320 | 87 | // a naive version of crawl - searches until a given depth, | 
| 217 | 88 | // visits pages potentially more than once | 
| 320 | 89 | def crawl(url: String, n: Int) : Unit = {
 | 
| 90 | if (n == 0) () | |
| 217 | 91 |   else {
 | 
| 92 | println(s" Visiting: $n $url") | |
| 321 | 93 | val page = get_page(url) | 
| 94 | for (u <- get_all_URLs(page)) crawl(u, n - 1) | |
| 217 | 95 | } | 
| 155 | 96 | } | 
| 97 | ||
| 217 | 98 | // some starting URLs for the crawler | 
| 99 | val startURL = """https://nms.kcl.ac.uk/christian.urban/""" | |
| 320 | 100 | |
| 217 | 101 | crawl(startURL, 2) | 
| 102 | ||
| 323 | 103 | for (x <- List(1,2,3,4,5,6)) println(x) | 
| 318 | 104 | |
| 320 | 105 | // a primitive email harvester | 
| 106 | def emails(url: String, n: Int) : Set[String] = {
 | |
| 107 | if (n == 0) Set() | |
| 108 |   else {
 | |
| 109 | println(s" Visiting: $n $url") | |
| 110 | val page = get_page(url) | |
| 111 | val new_emails = email_pattern.findAllIn(page).toSet | |
| 323 | 112 | new_emails ++ (for (u <- get_all_URLs(page).par) yield emails(u, n - 1)).flatten | 
| 320 | 113 | } | 
| 218 | 114 | } | 
| 115 | ||
| 323 | 116 | emails(startURL, 3) | 
| 218 | 117 | |
| 118 | ||
| 320 | 119 | // if we want to explore the internet "deeper", then we | 
| 120 | // first have to parallelise the request of webpages: | |
| 121 | // | |
| 122 | // scala -cp scala-parallel-collections_2.13-0.2.0.jar | |
| 123 | // import scala.collection.parallel.CollectionConverters._ | |
| 155 | 124 | |
| 125 | ||
| 126 | ||
| 320 | 127 | // another well-known example | 
| 128 | //============================ | |
| 178 | 129 | |
| 320 | 130 | def move(from: Char, to: Char) = | 
| 131 | println(s"Move disc from $from to $to!") | |
| 67 | 132 | |
| 320 | 133 | def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = {
 | 
| 134 | if (n == 0) () | |
| 135 |   else {
 | |
| 136 | hanoi(n - 1, from, to, via) | |
| 137 | move(from, to) | |
| 138 | hanoi(n - 1, via, from, to) | |
| 139 | } | |
| 140 | } | |
| 67 | 141 | |
| 320 | 142 | hanoi(4, 'A', 'B', 'C') | 
| 67 | 143 | |
| 155 | 144 | |
| 145 | ||
| 217 | 146 | // Jumping Towers | 
| 147 | //================ | |
| 148 | ||
| 149 | ||
| 150 | // the first n prefixes of xs | |
| 151 | // for 1 => include xs | |
| 152 | ||
| 153 | def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match {
 | |
| 154 | case (Nil, _) => Nil | |
| 323 | 155 | case (_, 0) => Nil | 
| 156 | case (y::ys, n) => xs :: moves(ys, n - 1) | |
| 217 | 157 | } | 
| 158 | ||
| 159 | ||
| 160 | moves(List(5,1,0), 1) | |
| 161 | moves(List(5,1,0), 2) | |
| 162 | moves(List(5,1,0), 5) | |
| 163 | ||
| 164 | // checks whether a jump tour exists at all | |
| 165 | ||
| 166 | def search(xs: List[Int]) : Boolean = xs match {
 | |
| 167 | case Nil => true | |
| 321 | 168 | case x::xs => | 
| 169 | if (xs.length < x) true | |
| 170 | else moves(xs, x).exists(search(_)) | |
| 217 | 171 | } | 
| 172 | ||
| 173 | ||
| 174 | search(List(5,3,2,5,1,1)) | |
| 175 | search(List(3,5,1,0,0,0,1)) | |
| 176 | search(List(3,5,1,0,0,0,0,1)) | |
| 177 | search(List(3,5,1,0,0,0,1,1)) | |
| 178 | search(List(3,5,1)) | |
| 179 | search(List(5,1,1)) | |
| 180 | search(Nil) | |
| 181 | search(List(1)) | |
| 182 | search(List(5,1,1)) | |
| 183 | search(List(3,5,1,0,0,0,0,0,0,0,0,1)) | |
| 184 | ||
| 185 | // generates *all* jump tours | |
| 321 | 186 | // if we are only interested in the shortest one, we could | 
| 217 | 187 | // shortcircut the calculation and only return List(x) in | 
| 188 | // case where xs.length < x, because no tour can be shorter | |
| 189 | // than 1 | |
| 190 | // | |
| 191 | ||
| 192 | def jumps(xs: List[Int]) : List[List[Int]] = xs match {
 | |
| 193 | case Nil => Nil | |
| 321 | 194 |   case x::xs => {
 | 
| 217 | 195 | val children = moves(xs, x) | 
| 320 | 196 | val results = children.map(cs => jumps(cs).map(x :: _)).flatten | 
| 197 | if (xs.length < x) List(x)::results else results | |
| 217 | 198 | } | 
| 199 | } | |
| 200 | ||
| 320 | 201 | jumps(List(5,3,2,5,1,1)).minBy(_.length) | 
| 217 | 202 | jumps(List(3,5,1,2,1,2,1)) | 
| 203 | jumps(List(3,5,1,2,3,4,1)) | |
| 204 | jumps(List(3,5,1,0,0,0,1)) | |
| 205 | jumps(List(3,5,1)) | |
| 206 | jumps(List(5,1,1)) | |
| 207 | jumps(Nil) | |
| 208 | jumps(List(1)) | |
| 209 | jumps(List(5,1,2)) | |
| 210 | moves(List(1,2), 5) | |
| 211 | jumps(List(1,5,1,2)) | |
| 212 | jumps(List(3,5,1,0,0,0,0,0,0,0,0,1)) | |
| 213 | ||
| 214 | jumps(List(5,3,2,5,1,1)).minBy(_.length) | |
| 215 | jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length) | |
| 216 | jumps(List(1,3,6,1,0,9)).minBy(_.length) | |
| 217 | jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length) | |
| 218 | ||
| 219 | ||
| 220 | ||
| 318 | 221 | |
| 222 | ||
| 223 | ||
| 320 | 224 | // User-defined Datatypes | 
| 225 | //======================== | |
| 226 | ||
| 323 | 227 | abstract class Tree | 
| 228 | case class Leaf(x: Int) extends Tree | |
| 229 | case class Node(s: String, left: Tree, right: Tree) extends Tree | |
| 230 | ||
| 231 | List(Leaf(20), Node("foo", Leaf(1), Leaf(2)))
 | |
| 320 | 232 | |
| 321 | 233 | sealed abstract class Colour | 
| 320 | 234 | case object Red extends Colour | 
| 235 | case object Green extends Colour | |
| 236 | case object Blue extends Colour | |
| 323 | 237 | case object Yellow extends Colour | 
| 320 | 238 | |
| 239 | ||
| 240 | def fav_colour(c: Colour) : Boolean = c match {
 | |
| 241 | case Green => true | |
| 323 | 242 | case _ => false | 
| 320 | 243 | } | 
| 244 | ||
| 245 | fav_colour(Green) | |
| 246 | ||
| 247 | // ... a tiny bit more useful: Roman Numerals | |
| 248 | ||
| 321 | 249 | sealed abstract class RomanDigit | 
| 320 | 250 | case object I extends RomanDigit | 
| 251 | case object V extends RomanDigit | |
| 252 | case object X extends RomanDigit | |
| 253 | case object L extends RomanDigit | |
| 254 | case object C extends RomanDigit | |
| 255 | case object D extends RomanDigit | |
| 256 | case object M extends RomanDigit | |
| 257 | ||
| 258 | type RomanNumeral = List[RomanDigit] | |
| 259 | ||
| 323 | 260 | List(X,I,M,D) | 
| 320 | 261 | |
| 262 | /* | |
| 263 | I -> 1 | |
| 264 | II -> 2 | |
| 265 | III -> 3 | |
| 266 | IV -> 4 | |
| 267 | V -> 5 | |
| 268 | VI -> 6 | |
| 269 | VII -> 7 | |
| 270 | VIII -> 8 | |
| 271 | IX -> 9 | |
| 272 | X -> 10 | |
| 273 | */ | |
| 274 | ||
| 275 | def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { 
 | |
| 276 | case Nil => 0 | |
| 277 | case M::r => 1000 + RomanNumeral2Int(r) | |
| 278 | case C::M::r => 900 + RomanNumeral2Int(r) | |
| 279 | case D::r => 500 + RomanNumeral2Int(r) | |
| 280 | case C::D::r => 400 + RomanNumeral2Int(r) | |
| 281 | case C::r => 100 + RomanNumeral2Int(r) | |
| 282 | case X::C::r => 90 + RomanNumeral2Int(r) | |
| 283 | case L::r => 50 + RomanNumeral2Int(r) | |
| 284 | case X::L::r => 40 + RomanNumeral2Int(r) | |
| 285 | case X::r => 10 + RomanNumeral2Int(r) | |
| 286 | case I::X::r => 9 + RomanNumeral2Int(r) | |
| 287 | case V::r => 5 + RomanNumeral2Int(r) | |
| 288 | case I::V::r => 4 + RomanNumeral2Int(r) | |
| 289 | case I::r => 1 + RomanNumeral2Int(r) | |
| 290 | } | |
| 291 | ||
| 292 | RomanNumeral2Int(List(I,V)) // 4 | |
| 293 | RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number) | |
| 294 | RomanNumeral2Int(List(V,I)) // 6 | |
| 295 | RomanNumeral2Int(List(I,X)) // 9 | |
| 296 | RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979 | |
| 297 | RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017 | |
| 298 | ||
| 299 | ||
| 300 | // String interpolations as patterns | |
| 301 | ||
| 302 | val date = "2019-11-26" | |
| 303 | val s"$year-$month-$day" = date | |
| 304 | ||
| 305 | def parse_date(date: String) : Option[(Int, Int, Int)]= date match {
 | |
| 306 | case s"$year-$month-$day" => Some((day.toInt, month.toInt, year.toInt)) | |
| 307 | case s"$day/$month/$year" => Some((day.toInt, month.toInt, year.toInt)) | |
| 308 | case s"$day.$month.$year" => Some((day.toInt, month.toInt, year.toInt)) | |
| 309 | case _ => None | |
| 310 | } | |
| 318 | 311 | |
| 320 | 312 | parse_date("2019-11-26")
 | 
| 313 | parse_date("26/11/2019")
 | |
| 314 | parse_date("26.11.2019")
 | |
| 315 | ||
| 316 | ||
| 317 | // User-defined Datatypes and Pattern Matching | |
| 318 | //============================================= | |
| 319 | ||
| 320 | ||
| 321 | ||
| 322 | ||
| 323 | // Tail recursion | |
| 324 | //================ | |
| 325 | ||
| 326 | ||
| 327 | def fact(n: Long): Long = | |
| 328 | if (n == 0) 1 else n * fact(n - 1) | |
| 329 | ||
| 330 | def factB(n: BigInt): BigInt = | |
| 331 | if (n == 0) 1 else n * factB(n - 1) | |
| 332 | ||
| 333 | factB(100000) | |
| 334 | ||
| 335 | fact(10) //ok | |
| 336 | fact(10000) // produces a stackoverflow | |
| 337 | ||
| 338 | def factT(n: BigInt, acc: BigInt): BigInt = | |
| 339 | if (n == 0) acc else factT(n - 1, n * acc) | |
| 340 | ||
| 341 | factT(10, 1) | |
| 342 | println(factT(100000, 1)) | |
| 343 | ||
| 344 | // there is a flag for ensuring a function is tail recursive | |
| 345 | import scala.annotation.tailrec | |
| 346 | ||
| 347 | @tailrec | |
| 348 | def factT(n: BigInt, acc: BigInt): BigInt = | |
| 349 | if (n == 0) acc else factT(n - 1, n * acc) | |
| 350 | ||
| 351 | ||
| 352 | ||
| 353 | // for tail-recursive functions the Scala compiler | |
| 354 | // generates loop-like code, which does not need | |
| 355 | // to allocate stack-space in each recursive | |
| 356 | // call; Scala can do this only for tail-recursive | |
| 357 | // functions | |
| 358 | ||
| 155 | 359 | // tail recursive version that searches | 
| 158 | 360 | // for all solutions | 
| 361 | ||
| 155 | 362 | def searchT(games: List[String], sols: List[String]): List[String] = games match {
 | 
| 363 | case Nil => sols | |
| 364 |   case game::rest => {
 | |
| 365 | if (isDone(game)) searchT(rest, game::sols) | |
| 366 |     else {
 | |
| 367 | val cs = candidates(game, emptyPosition(game)) | |
| 368 | searchT(cs.map(c => update(game, empty(game), c)) ::: rest, sols) | |
| 369 | } | |
| 370 | } | |
| 67 | 371 | } | 
| 372 | ||
| 158 | 373 | searchT(List(game3), List()).map(pretty) | 
| 374 | ||
| 375 | ||
| 155 | 376 | // tail recursive version that searches | 
| 377 | // for a single solution | |
| 158 | 378 | |
| 155 | 379 | def search1T(games: List[String]): Option[String] = games match {
 | 
| 67 | 380 | case Nil => None | 
| 155 | 381 |   case game::rest => {
 | 
| 382 | if (isDone(game)) Some(game) | |
| 383 |     else {
 | |
| 384 | val cs = candidates(game, emptyPosition(game)) | |
| 385 | search1T(cs.map(c => update(game, empty(game), c)) ::: rest) | |
| 386 | } | |
| 387 | } | |
| 67 | 388 | } | 
| 389 | ||
| 158 | 390 | search1T(List(game3)).map(pretty) | 
| 217 | 391 | time_needed(10, search1T(List(game3))) | 
| 392 | ||
| 158 | 393 | |
| 155 | 394 | // game with multiple solutions | 
| 395 | val game3 = """.8...9743 | |
| 396 | |.5...8.1. | |
| 397 | |.1....... | |
| 398 | |8....5... | |
| 399 | |...8.4... | |
| 400 | |...3....6 | |
| 401 | |.......7. | |
| 402 | |.3.5...8. | |
| 403 |               |9724...5.""".stripMargin.replaceAll("\\n", "")
 | |
| 404 | ||
| 158 | 405 | searchT(List(game3), Nil).map(pretty) | 
| 155 | 406 | search1T(List(game3)).map(pretty) | 
| 67 | 407 | |
| 77 
3cbe3d90b77f
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
73diff
changeset | 408 | // Moral: Whenever a recursive function is resource-critical | 
| 158 | 409 | // (i.e. works with large recursion depth), then you need to | 
| 77 
3cbe3d90b77f
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
73diff
changeset | 410 | // write it in tail-recursive fashion. | 
| 
3cbe3d90b77f
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
73diff
changeset | 411 | // | 
| 155 | 412 | // Unfortuantely, Scala because of current limitations in | 
| 413 | // the JVM is not as clever as other functional languages. It can | |
| 77 
3cbe3d90b77f
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
73diff
changeset | 414 | // only optimise "self-tail calls". This excludes the cases of | 
| 
3cbe3d90b77f
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
73diff
changeset | 415 | // multiple functions making tail calls to each other. Well, | 
| 
3cbe3d90b77f
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
73diff
changeset | 416 | // nothing is perfect. | 
| 
3cbe3d90b77f
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
73diff
changeset | 417 | |
| 
3cbe3d90b77f
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
73diff
changeset | 418 | |
| 67 | 419 | |
| 420 | ||
| 71 | 421 | |
| 67 | 422 | |
| 335 | 423 | |
| 424 | ||
| 425 | ||
| 426 | ||
| 427 | //************ | |
| 428 | // Either | |
| 429 | val either1 : Either[Exception,Int] = Right(1) | |
| 430 | val either2: Either[Exception, Int] = Right(2) | |
| 431 | ||
| 432 | for{
 | |
| 433 | one <- either1 | |
| 434 | two <- either2 | |
| 435 | } yield one + two |