// Scala Lecture 3//=================// last week:// higher-order functions// maps// - recursion// - Sudoku// - string interpolations// - Pattern-Matching// A Recursive Web Crawler / Email Harvester//===========================================//// the idea is to look for links using the// regular expression "https?://[^"]*" and for// email addresses using another regex.import io.Sourceimport scala.util._// gets the first 10K of a web-pagedef get_page(url: String) : String = { Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString). getOrElse { println(s" Problem with: $url"); ""}}// regex for URLs and emailsval http_pattern = """"https?://[^"]*"""".rval email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r//test case://email_pattern.findAllIn// ("foo bla christian@kcl.ac.uk 1234567").toList// drops the first and last character from a stringdef unquote(s: String) = s.drop(1).dropRight(1)def get_all_URLs(page: String): Set[String] = http_pattern.findAllIn(page).map(unquote).toSet// naive version of crawl - searches until a given depth,// visits pages potentially more than oncedef crawl(url: String, n: Int) : Unit = { if (n == 0) () else { println(s" Visiting: $n $url") for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) }}// some starting URLs for the crawlerval startURL = """https://nms.kcl.ac.uk/christian.urban/"""crawl(startURL, 2)// a primitive email harvesterdef emails(url: String, n: Int) : Set[String] = { if (n == 0) Set() else { println(s" Visiting: $n $url") val page = get_page(url) val new_emails = email_pattern.findAllIn(page).toSet new_emails ++ (for (u <- get_all_URLs(page)) yield emails(u, n - 1)).flatten }}emails(startURL, 2)// Sudoku //========// THE POINT OF THIS CODE IS NOT TO BE SUPER// EFFICIENT AND FAST, just explaining exhaustive// depth-first searchval game0 = """.14.6.3.. |62...4..9 |.8..5.6.. |.6.2....3 |.7..1..5. |5....9.6. |..6.2..3. |1..5...92 |..7.9.41.""".stripMargin.replaceAll("\\n", "")type Pos = (Int, Int)val EmptyValue = '.'val MaxValue = 9def pretty(game: String): String = "\n" + (game.grouped(MaxValue).mkString("\n"))pretty(game0)val allValues = "123456789".toListval indexes = (0 to 8).toListdef empty(game: String) = game.indexOf(EmptyValue)def isDone(game: String) = empty(game) == -1 def emptyPosition(game: String) : Pos = { val e = empty(game) (e % MaxValue, e / MaxValue)}def get_row(game: String, y: Int) = indexes.map(col => game(y * MaxValue + col))def get_col(game: String, x: Int) = indexes.map(row => game(x + row * MaxValue))//get_row(game0, 0)//get_row(game0, 1)//get_col(game0, 0)def get_box(game: String, pos: Pos): List[Char] = { def base(p: Int): Int = (p / 3) * 3 val x0 = base(pos._1) val y0 = base(pos._2) val ys = (y0 until y0 + 3).toList (x0 until x0 + 3).toList .flatMap(x => ys.map(y => game(x + y * MaxValue)))}//get_box(game0, (3, 1))// this is not mutable!!def update(game: String, pos: Int, value: Char): String = game.updated(pos, value)def toAvoid(game: String, pos: Pos): List[Char] = (get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos))def candidates(game: String, pos: Pos): List[Char] = allValues.diff(toAvoid(game, pos))//candidates(game0, (0,0))def search(game: String): List[String] = { if (isDone(game)) List(game) else { val cs = candidates(game, emptyPosition(game)) cs.par.map(c => search(update(game, empty(game), c))).flatten.toList }}pretty(game0)search(game0).map(pretty)val game1 = """23.915... |...2..54. |6.7...... |..1.....9 |89.5.3.17 |5.....6.. |......9.5 |.16..7... |...329..1""".stripMargin.replaceAll("\\n", "")search(game1).map(pretty)// a game that is in the hard categoryval game2 = """8........ |..36..... |.7..9.2.. |.5...7... |....457.. |...1...3. |..1....68 |..85...1. |.9....4..""".stripMargin.replaceAll("\\n", "")search(game2).map(pretty)// game with multiple solutionsval game3 = """.8...9743 |.5...8.1. |.1....... |8....5... |...8.4... |...3....6 |.......7. |.3.5...8. |9724...5.""".stripMargin.replaceAll("\\n", "")search(game3).map(pretty).foreach(println)// for measuring timedef time_needed[T](i: Int, code: => T) = { val start = System.nanoTime() for (j <- 1 to i) code val end = System.nanoTime() s"${(end - start) / 1.0e9} secs"}time_needed(2, search(game2))// concurrency // scala-cli --extra-jars scala-parallel-collections_3-1.0.4.jar // import scala.collection.parallel.CollectionConverters._// String Interpolations//=======================def cube(n: Int) : Int = n * n * nval n = 3println("The cube of " + n + " is " + cube(n) + ".")println(s"The cube of $n is ${cube(n)}.")// or evenprintln(s"The cube of $n is ${n * n * n}.")// helpful for debugging purposes//// "The most effective debugging tool is still careful // thought, coupled with judiciously placed print // statements."// — Brian W. Kernighan, in Unix for Beginners (1979)def gcd_db(a: Int, b: Int) : Int = { println(s"Function called with $a and $b.") if (b == 0) a else gcd_db(b, a % b)}gcd_db(48, 18)// Recursion Again ;o)//====================// another well-known example: Towers of Hanoi//=============================================def move(from: Char, to: Char) = println(s"Move disc from $from to $to!")def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = { if (n == 0) () else { hanoi(n - 1, from, to, via) move(from, to) hanoi(n - 1, via, from, to) }} hanoi(4, 'A', 'B', 'C')// Pattern Matching//==================// A powerful tool which has even landed in Java during // the last few years (https://inside.java/2021/06/13/podcast-017/).// ...Scala already has it for many years and the concept is// older than your friendly lecturer, that is stone old ;o)// The general schema://// expression match {// case pattern1 => expression1// case pattern2 => expression2// ...// case patternN => expressionN// }// recalldef len(xs: List[Int]) : Int = { if (xs == Nil) 0 else 1 + len(xs.tail)} def len(xs: List[Int]) : Int = xs match { case Nil => 0 case hd::tail => 1 + len(tail)} def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = lst match { case Nil => Nil case x::xs => f(x)::my_map_int(xs, f) }def my_map_option(opt: Option[Int], f: Int => Int) : Option[Int] = opt match { case None => None case Some(x) => Some(f(x)) }my_map_option(None, x => x * x)my_map_option(Some(8), x => x * x)// you can also have cases combineddef season(month: String) : String = month match { case "March" | "April" | "May" => "It's spring" case "June" | "July" | "August" => "It's summer" case "September" | "October" | "November" => "It's autumn" case "December" => "It's winter" case "January" | "February" => "It's unfortunately winter" case _ => "Wrong month"}// pattern-match on integersdef fib(n: Int) : Int = n match { case 0 | 1 => 1 case n => fib(n - 1) + fib(n - 2)}fib(10)// pattern-match on results// Silly: fizz buzzdef fizz_buzz(n: Int) : String = (n % 3, n % 5) match { case (0, 0) => "fizz buzz" case (0, _) => "fizz" case (_, 0) => "buzz" case _ => n.toString }for (n <- 1 to 20) println(fizz_buzz(n))// guards in pattern-matchingdef foo(xs: List[Int]) : String = xs match { case Nil => s"this list is empty" case x :: xs if x % 2 == 0 => s"the first elemnt is even" case x :: y :: rest if x == y => s"this has two elemnts that are the same" case hd :: tl => s"this list is standard $hd::$tl"}foo(Nil)foo(List(1,2,3))foo(List(1,2))foo(List(1,1,2,3))foo(List(2,2,2,3))// Treesabstract class Treecase class Leaf(x: Int) extends Treecase class Node(s: String, left: Tree, right: Tree) extends Tree val lf = Leaf(20)val tr = Node("foo", Leaf(10), Leaf(23))val lst : List[Tree] = List(lf, tr)abstract class Colourcase object Red extends Colour case object Green extends Colour case object Blue extends Colourcase object Yellow extends Colourdef fav_colour(c: Colour) : Boolean = c match { case Green => true case _ => false }fav_colour(Blue)// ... a tiny bit more useful: Roman Numeralssealed abstract class RomanDigit case object I extends RomanDigit case object V extends RomanDigit case object X extends RomanDigit case object L extends RomanDigit case object C extends RomanDigit case object D extends RomanDigit case object M extends RomanDigit type RomanNumeral = List[RomanDigit] List(X,I,M,A)/*I -> 1II -> 2III -> 3IV -> 4V -> 5VI -> 6VII -> 7VIII -> 8IX -> 9X -> 10*/def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { case Nil => 0 case M::r => 1000 + RomanNumeral2Int(r) case C::M::r => 900 + RomanNumeral2Int(r) case D::r => 500 + RomanNumeral2Int(r) case C::D::r => 400 + RomanNumeral2Int(r) case C::r => 100 + RomanNumeral2Int(r) case X::C::r => 90 + RomanNumeral2Int(r) case L::r => 50 + RomanNumeral2Int(r) case X::L::r => 40 + RomanNumeral2Int(r) case X::r => 10 + RomanNumeral2Int(r) case I::X::r => 9 + RomanNumeral2Int(r) case V::r => 5 + RomanNumeral2Int(r) case I::V::r => 4 + RomanNumeral2Int(r) case I::r => 1 + RomanNumeral2Int(r)}RomanNumeral2Int(List(I,V)) // 4RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number)RomanNumeral2Int(List(V,I)) // 6RomanNumeral2Int(List(I,X)) // 9RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017abstract class Rexpcase object ZERO extends Rexp // matches nothingcase object ONE extends Rexp // matches the empty stringcase class CHAR(c: Char) extends Rexp // matches a character ccase class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternativecase class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequencecase class STAR(r: Rexp) extends Rexp // stardef depth(r: Rexp) : Int = r match { case ZERO => 1 case ONE => 1 case CHAR(_) => 1 case ALT(r1, r2) => 1 + List(depth(r1), depth(r2)).max case SEQ(r1, r2) => 1 + List(depth(r1), depth(r2)).max case STAR(r1) => 1 + depth(r1)}// expressions (essentially trees)abstract class Expcase class N(n: Int) extends Exp // for numberscase class Plus(e1: Exp, e2: Exp) extends Expcase class Times(e1: Exp, e2: Exp) extends Expdef string(e: Exp) : String = e match { case N(n) => s"$n" case Plus(e1, e2) => s"(${string(e1)} + ${string(e2)})" case Times(e1, e2) => s"(${string(e1)} * ${string(e2)})"}val e = Plus(N(9), Times(N(3), N(4)))e.toStringprintln(string(e))def eval(e: Exp) : Int = e match { case N(n) => n case Plus(e1, e2) => eval(e1) + eval(e2) case Times(e1, e2) => eval(e1) * eval(e2) }println(eval(e))// simplification rules:// e + 0, 0 + e => e // e * 0, 0 * e => 0// e * 1, 1 * e => e//// (....9 ....)def simp(e: Exp) : Exp = e match { case N(n) => N(n) case Plus(e1, e2) => (simp(e1), simp(e2)) match { case (N(0), e2s) => e2s case (e1s, N(0)) => e1s case (e1s, e2s) => Plus(e1s, e2s) } case Times(e1, e2) => (simp(e1), simp(e2)) match { case (N(0), _) => N(0) case (_, N(0)) => N(0) case (N(1), e2s) => e2s case (e1s, N(1)) => e1s case (e1s, e2s) => Times(e1s, e2s) } }val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))println(string(e2))println(string(simp(e2)))// String interpolations as patternsval date = "2019-11-26"val s"$year-$month-$day" = datedef parse_date(date: String) : Option[(Int, Int, Int)]= date match { case s"$year-$month-$day" => Some((day.toInt, month.toInt, year.toInt)) case s"$day/$month/$year" => Some((day.toInt, month.toInt, year.toInt)) case s"$day.$month.$year" => Some((day.toInt, month.toInt, year.toInt)) case _ => None} parse_date("2019-11-26")parse_date("26/11/2019")parse_date("26.11.2019")// Map type (upper-case)//=======================// Note the difference between map and Mapval m = Map(1 -> "one", 2 -> "two", 10 -> "many")List((1, "one"), (2, "two"), (10, "many")).toMapm.get(1)m.get(4)m.getOrElse(1, "")m.getOrElse(4, "")val new_m = m + (10 -> "ten")new_m.get(10)val m2 = for ((k, v) <- m) yield (k, v.toUpperCase)// groupBy function on Mapsval lst = List("one", "two", "three", "four", "five")lst.groupBy(_.head)lst.groupBy(_.length)lst.groupBy(_.length).get(3)val grps = lst.groupBy(_.length)grps.keySet// Tail recursion//================def fact(n: BigInt): BigInt = if (n == 0) 1 else n * fact(n - 1)fact(10) //okfact(10000) // produces a stackoverflowdef factT(n: BigInt, acc: BigInt): BigInt = if (n == 0) acc else factT(n - 1, n * acc)factT(10, 1)println(factT(100000, 1))// there is a flag for ensuring a function is tail recursiveimport scala.annotation.tailrec@tailrecdef factT(n: BigInt, acc: BigInt): BigInt = if (n == 0) acc else factT(n - 1, n * acc)// for tail-recursive functions the Scala compiler// generates loop-like code, which does not need// to allocate stack-space in each recursive// call; Scala can do this only for tail-recursive// functionsdef length(xs: List[Int]) : Int = xs match { case Nil => 0 case _ :: tail => 1 + length(tail)}@tailrecdef lengthT(xs: List[Int], acc : Int) : Int = xs match { case Nil => acc case _ :: tail => lengthT(tail, 1 + acc)}lengthT(List.fill(10000000)(1), 0)// Aside: concurrency // scala-cli --extra-jars scala-parallel-collections_3-1.0.4.jar for (n <- (1 to 10)) println(n)import scala.collection.parallel.CollectionConverters._for (n <- (1 to 10).par) println(n)// for measuring timedef time_needed[T](n: Int, code: => T) = { val start = System.nanoTime() for (i <- (0 to n)) code val end = System.nanoTime() (end - start) / 1.0e9}val list = (1L to 10_000_000L).toListtime_needed(10, for (n <- list) yield n + 42)time_needed(10, for (n <- list.par) yield n + 42)// ...but par does not make everything fasterlist.sumlist.par.sumtime_needed(10, list.sum)time_needed(10, list.par.sum)// Mutable vs Immutable//======================//// Remember:// - no vars, no ++i, no +=// - no mutable data-structures (no Arrays, no ListBuffers)// But what the heck....lets try to count to 1 Mio in parallel// // requires// scala-cli --extra-jars scala- parallel-collections_3-1.0.4.jarimport scala.collection.parallel.CollectionConverters._def test() = { var cnt = 0 for(i <- (1 to 100_000).par) cnt += 1 println(s"Should be 100000: $cnt")}test()// Or// Q: Count how many elements are in the intersections of // two sets?// A; IMPROPER WAY (mutable counter)def count_intersection(A: Set[Int], B: Set[Int]) : Int = { var count = 0 for (x <- A.par; if B contains x) count += 1 count}val A = (0 to 999).toSetval B = (0 to 999 by 4).toSetcount_intersection(A, B)// but do not try to add .par to the for-loop above//propper parallel versiondef count_intersection2(A: Set[Int], B: Set[Int]) : Int = A.par.count(x => B contains x)count_intersection2(A, B)