diff -r 2403c931a32f -r 0e6ca70496c1 progs/lecture3.scala --- a/progs/lecture3.scala Mon Nov 06 21:49:55 2023 +0000 +++ b/progs/lecture3.scala Fri Dec 08 00:54:36 2023 +0000 @@ -1,37 +1,218 @@ // Scala Lecture 3 //================= -// - Higher-Order functions -// - maps (behind for-comprehensions) +// last week: +// higher-order functions +// maps +// - recursion +// - Sudoku +// - string interpolations // - Pattern-Matching -def fib(n: Int) : Int = n match { - case 0 => 1 - case 1 => 1 - case n => fib(n - 1) + fib(n - 2) +// A Recursive Web Crawler / Email Harvester +//=========================================== +// +// the idea is to look for links using the +// regular expression "https?://[^"]*" and for +// email addresses using another regex. + +import io.Source +import scala.util._ + +// gets the first 10K of a web-page +def get_page(url: String) : String = { + Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString). + getOrElse { println(s" Problem with: $url"); ""} +} + +// regex for URLs and emails +val http_pattern = """"https?://[^"]*"""".r +val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r + +//test case: +//email_pattern.findAllIn +// ("foo bla christian@kcl.ac.uk 1234567").toList + + +// drops the first and last character from a string +def unquote(s: String) = s.drop(1).dropRight(1) + +def get_all_URLs(page: String): Set[String] = + http_pattern.findAllIn(page).map(unquote).toSet + +// naive version of crawl - searches until a given depth, +// visits pages potentially more than once +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else { + println(s" Visiting: $n $url") + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) + } +} + +// some starting URLs for the crawler +val startURL = """https://nms.kcl.ac.uk/christian.urban/""" + +crawl(startURL, 2) + + +// a primitive email harvester +def emails(url: String, n: Int) : Set[String] = { + if (n == 0) Set() + else { + println(s" Visiting: $n $url") + val page = get_page(url) + val new_emails = email_pattern.findAllIn(page).toSet + new_emails ++ (for (u <- get_all_URLs(page)) yield emails(u, n - 1)).flatten + } +} + +emails(startURL, 2) + + + +// Sudoku +//======== + +// THE POINT OF THIS CODE IS NOT TO BE SUPER +// EFFICIENT AND FAST, just explaining exhaustive +// depth-first search + + +val game0 = """.14.6.3.. + |62...4..9 + |.8..5.6.. + |.6.2....3 + |.7..1..5. + |5....9.6. + |..6.2..3. + |1..5...92 + |..7.9.41.""".stripMargin.replaceAll("\\n", "") + +type Pos = (Int, Int) +val EmptyValue = '.' +val MaxValue = 9 + +def pretty(game: String): String = + "\n" + (game.grouped(MaxValue).mkString("\n")) + +pretty(game0) + + +val allValues = "123456789".toList +val indexes = (0 to 8).toList + +def empty(game: String) = game.indexOf(EmptyValue) +def isDone(game: String) = empty(game) == -1 +def emptyPosition(game: String) : Pos = { + val e = empty(game) + (e % MaxValue, e / MaxValue) +} + +def get_row(game: String, y: Int) = + indexes.map(col => game(y * MaxValue + col)) +def get_col(game: String, x: Int) = + indexes.map(row => game(x + row * MaxValue)) + +//get_row(game0, 0) +//get_row(game0, 1) +//get_col(game0, 0) + +def get_box(game: String, pos: Pos): List[Char] = { + def base(p: Int): Int = (p / 3) * 3 + val x0 = base(pos._1) + val y0 = base(pos._2) + val ys = (y0 until y0 + 3).toList + (x0 until x0 + 3).toList + .flatMap(x => ys.map(y => game(x + y * MaxValue))) } -abstract class Rexp -case object ZERO extends Rexp // matches nothing -case object ONE extends Rexp // matches the empty string -case class CHAR(c: Char) extends Rexp // matches a character c -case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence -case class STAR(r: Rexp) extends Rexp // star +//get_box(game0, (3, 1)) + + +// this is not mutable!! +def update(game: String, pos: Int, value: Char): String = + game.updated(pos, value) + +def toAvoid(game: String, pos: Pos): List[Char] = + (get_col(game, pos._1) ++ + get_row(game, pos._2) ++ + get_box(game, pos)) -def depth(r: Rexp) : Int = r match { - case ZERO => 1 - case ONE => 1 - case CHAR(_) => 1 - case ALT(r1, r2) => 1 + List(depth(r1), depth(r2)).max - case SEQ(r1, r2) => 1 + List(depth(r1), depth(r2)).max - case STAR(r1) => 1 + depth(r1) +def candidates(game: String, pos: Pos): List[Char] = + allValues.diff(toAvoid(game, pos)) + +//candidates(game0, (0,0)) + + +def search(game: String): List[String] = { + if (isDone(game)) List(game) + else { + val cs = candidates(game, emptyPosition(game)) + cs.par.map(c => search(update(game, empty(game), c))).flatten.toList + } } +pretty(game0) +search(game0).map(pretty) -// - String-Interpolations +val game1 = """23.915... + |...2..54. + |6.7...... + |..1.....9 + |89.5.3.17 + |5.....6.. + |......9.5 + |.16..7... + |...329..1""".stripMargin.replaceAll("\\n", "") + +search(game1).map(pretty) + +// a game that is in the hard category +val game2 = """8........ + |..36..... + |.7..9.2.. + |.5...7... + |....457.. + |...1...3. + |..1....68 + |..85...1. + |.9....4..""".stripMargin.replaceAll("\\n", "") + +search(game2).map(pretty) + +// game with multiple solutions +val game3 = """.8...9743 + |.5...8.1. + |.1....... + |8....5... + |...8.4... + |...3....6 + |.......7. + |.3.5...8. + |9724...5.""".stripMargin.replaceAll("\\n", "") + +search(game3).map(pretty).foreach(println) + +// for measuring time +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + s"${(end - start) / 1.0e9} secs" +} + +time_needed(2, search(game2)) + + +// concurrency +// scala-cli --extra-jars scala-parallel-collections_3-1.0.4.jar +// import scala.collection.parallel.CollectionConverters._ + + + // String Interpolations //======================= @@ -63,19 +244,6 @@ gcd_db(48, 18) -// naive quicksort with "On" function - -def sortOn(f: Int => Int, xs: List[Int]) : List[Int] = { - if (xs.size < 2) xs - else { - val pivot = xs.head - val (left, right) = xs.partition(f(_) < f(pivot)) - sortOn(f, left) ::: pivot :: sortOn(f, right.tail) - } -} - -sortOn(identity, List(99,99,99,98,10,-3,2)) -sortOn(n => - n, List(99,99,99,98,10,-3,2)) // Recursion Again ;o) @@ -101,8 +269,103 @@ -// User-defined Datatypes -//======================== +// Pattern Matching +//================== + +// A powerful tool which has even landed in Java during +// the last few years (https://inside.java/2021/06/13/podcast-017/). +// ...Scala already has it for many years and the concept is +// older than your friendly lecturer, that is stone old ;o) + +// The general schema: +// +// expression match { +// case pattern1 => expression1 +// case pattern2 => expression2 +// ... +// case patternN => expressionN +// } + + +// recall +def len(xs: List[Int]) : Int = { + if (xs == Nil) 0 + else 1 + len(xs.tail) +} + +def len(xs: List[Int]) : Int = xs match { + case Nil => 0 + case hd::tail => 1 + len(tail) +} + + +def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = + lst match { + case Nil => Nil + case x::xs => f(x)::my_map_int(xs, f) + } + +def my_map_option(opt: Option[Int], f: Int => Int) : Option[Int] = + opt match { + case None => None + case Some(x) => Some(f(x)) + } + +my_map_option(None, x => x * x) +my_map_option(Some(8), x => x * x) + + +// you can also have cases combined +def season(month: String) : String = month match { + case "March" | "April" | "May" => "It's spring" + case "June" | "July" | "August" => "It's summer" + case "September" | "October" | "November" => "It's autumn" + case "December" => "It's winter" + case "January" | "February" => "It's unfortunately winter" + case _ => "Wrong month" +} + +// pattern-match on integers + +def fib(n: Int) : Int = n match { + case 0 | 1 => 1 + case n => fib(n - 1) + fib(n - 2) +} + +fib(10) + +// pattern-match on results + +// Silly: fizz buzz +def fizz_buzz(n: Int) : String = (n % 3, n % 5) match { + case (0, 0) => "fizz buzz" + case (0, _) => "fizz" + case (_, 0) => "buzz" + case _ => n.toString +} + +for (n <- 1 to 20) + println(fizz_buzz(n)) + +// guards in pattern-matching + +def foo(xs: List[Int]) : String = xs match { + case Nil => s"this list is empty" + case x :: xs if x % 2 == 0 + => s"the first elemnt is even" + case x :: y :: rest if x == y + => s"this has two elemnts that are the same" + case hd :: tl => s"this list is standard $hd::$tl" +} + +foo(Nil) +foo(List(1,2,3)) +foo(List(1,2)) +foo(List(1,1,2,3)) +foo(List(2,2,2,3)) + + +// Trees abstract class Tree case class Leaf(x: Int) extends Tree @@ -182,6 +445,27 @@ RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017 +abstract class Rexp +case object ZERO extends Rexp // matches nothing +case object ONE extends Rexp // matches the empty string +case class CHAR(c: Char) extends Rexp // matches a character c +case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence +case class STAR(r: Rexp) extends Rexp // star + +def depth(r: Rexp) : Int = r match { + case ZERO => 1 + case ONE => 1 + case CHAR(_) => 1 + case ALT(r1, r2) => 1 + List(depth(r1), depth(r2)).max + case SEQ(r1, r2) => 1 + List(depth(r1), depth(r2)).max + case STAR(r1) => 1 + depth(r1) +} + + + + + // expressions (essentially trees) abstract class Exp @@ -254,22 +538,44 @@ parse_date("26.11.2019") -// guards in pattern-matching + + +// Map type (upper-case) +//======================= + +// Note the difference between map and Map + +val m = Map(1 -> "one", 2 -> "two", 10 -> "many") + +List((1, "one"), (2, "two"), (10, "many")).toMap + +m.get(1) +m.get(4) + +m.getOrElse(1, "") +m.getOrElse(4, "") + +val new_m = m + (10 -> "ten") -def foo(xs: List[Int]) : String = xs match { - case Nil => s"this list is empty" - case x :: xs if x % 2 == 0 - => s"the first elemnt is even" - case x :: y :: rest if x == y - => s"this has two elemnts that are the same" - case hd :: tl => s"this list is standard $hd::$tl" -} +new_m.get(10) + +val m2 = for ((k, v) <- m) yield (k, v.toUpperCase) + + + +// groupBy function on Maps +val lst = List("one", "two", "three", "four", "five") +lst.groupBy(_.head) -foo(Nil) -foo(List(1,2,3)) -foo(List(1,2)) -foo(List(1,1,2,3)) -foo(List(2,2,2,3)) +lst.groupBy(_.length) + +lst.groupBy(_.length).get(3) + +val grps = lst.groupBy(_.length) +grps.keySet + + + // Tail recursion //================ @@ -316,125 +622,89 @@ lengthT(List.fill(10000000)(1), 0) -// Sudoku -//======== - -// uses Strings for games - -type Pos = (Int, Int) -val emptyValue = '.' -val maxValue = 9 - -val allValues = "123456789".toList -val indexes = (0 to 8).toList -def empty(game: String) = game.indexOf(emptyValue) -def isDone(game: String) = empty(game) == -1 -def emptyPosition(game: String) : Pos = - (empty(game) % maxValue, empty(game) / maxValue) + -def get_row(game: String, y: Int) = indexes.map(col => game(y * maxValue + col)) -def get_col(game: String, x: Int) = indexes.map(row => game(x + row * maxValue)) +// Aside: concurrency +// scala-cli --extra-jars scala-parallel-collections_3-1.0.4.jar -def get_box(game: String, pos: Pos): List[Char] = { - def base(p: Int): Int = (p / 3) * 3 - val x0 = base(pos._1) - val y0 = base(pos._2) - for (x <- (x0 until x0 + 3).toList; - y <- (y0 until y0 + 3).toList) yield game(x + y * maxValue) -} +for (n <- (1 to 10)) println(n) + +import scala.collection.parallel.CollectionConverters._ + +for (n <- (1 to 10).par) println(n) -def update(game: String, pos: Int, value: Char): String = - game.updated(pos, value) - -def toAvoid(game: String, pos: Pos): List[Char] = - (get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos)) - -def candidates(game: String, pos: Pos): List[Char] = - allValues.diff(toAvoid(game, pos)) - -def search(game: String): List[String] = { - if (isDone(game)) List(game) - else - candidates(game, emptyPosition(game)). - map(c => search(update(game, empty(game), c))).flatten -} - - -def search1T(games: List[String]): Option[String] = games match { - case Nil => None - case game::rest => { - if (isDone(game)) Some(game) - else { - val cs = candidates(game, emptyPosition(game)) - search1T(cs.map(c => update(game, empty(game), c)) ::: rest) - } - } +// for measuring time +def time_needed[T](n: Int, code: => T) = { + val start = System.nanoTime() + for (i <- (0 to n)) code + val end = System.nanoTime() + (end - start) / 1.0e9 } -def pretty(game: String): String = - "\n" + (game.sliding(maxValue, maxValue).mkString(",\n")) +val list = (1L to 10_000_000L).toList +time_needed(10, for (n <- list) yield n + 42) +time_needed(10, for (n <- list.par) yield n + 42) - -// tail recursive version that searches -// for all solutions +// ...but par does not make everything faster -def searchT(games: List[String], sols: List[String]): List[String] = games match { - case Nil => sols - case game::rest => { - if (isDone(game)) searchT(rest, game::sols) - else { - val cs = candidates(game, emptyPosition(game)) - searchT(cs.map(c => update(game, empty(game), c)) ::: rest, sols) - } - } -} +list.sum +list.par.sum -searchT(List(game3), List()).map(pretty) +time_needed(10, list.sum) +time_needed(10, list.par.sum) -// tail recursive version that searches -// for a single solution +// Mutable vs Immutable +//====================== +// +// Remember: +// - no vars, no ++i, no += +// - no mutable data-structures (no Arrays, no ListBuffers) -def search1T(games: List[String]): Option[String] = games match { - case Nil => None - case game::rest => { - if (isDone(game)) Some(game) - else { - val cs = candidates(game, emptyPosition(game)) - search1T(cs.map(c => update(game, empty(game), c)) ::: rest) - } - } +// But what the heck....lets try to count to 1 Mio in parallel +// +// requires +// scala-cli --extra-jars scala- parallel-collections_3-1.0.4.jar + +import scala.collection.parallel.CollectionConverters._ + +def test() = { + var cnt = 0 + + for(i <- (1 to 100_000).par) cnt += 1 + + println(s"Should be 100000: $cnt") } -search1T(List(game3)).map(pretty) -time_needed(10, search1T(List(game3))) +test() + +// Or +// Q: Count how many elements are in the intersections of +// two sets? +// A; IMPROPER WAY (mutable counter) + +def count_intersection(A: Set[Int], B: Set[Int]) : Int = { + var count = 0 + for (x <- A.par; if B contains x) count += 1 + count +} + +val A = (0 to 999).toSet +val B = (0 to 999 by 4).toSet + +count_intersection(A, B) + +// but do not try to add .par to the for-loop above -// game with multiple solutions -val game3 = """.8...9743 - |.5...8.1. - |.1....... - |8....5... - |...8.4... - |...3....6 - |.......7. - |.3.5...8. - |9724...5.""".stripMargin.replaceAll("\\n", "") +//propper parallel version +def count_intersection2(A: Set[Int], B: Set[Int]) : Int = + A.par.count(x => B contains x) -searchT(List(game3), Nil).map(pretty) -search1T(List(game3)).map(pretty) +count_intersection2(A, B) -// Moral: Whenever a recursive function is resource-critical -// (i.e. works with large recursion depth), then you need to -// write it in tail-recursive fashion. -// -// Unfortuantely, Scala because of current limitations in -// the JVM is not as clever as other functional languages. It can -// only optimise "self-tail calls". This excludes the cases of -// multiple functions making tail calls to each other. Well, -// nothing is perfect.