--- a/progs/lecture3.scala Mon Nov 06 21:49:55 2023 +0000
+++ b/progs/lecture3.scala Fri Dec 08 00:54:36 2023 +0000
@@ -1,37 +1,218 @@
// Scala Lecture 3
//=================
-// - Higher-Order functions
-// - maps (behind for-comprehensions)
+// last week:
+// higher-order functions
+// maps
+// - recursion
+// - Sudoku
+// - string interpolations
// - Pattern-Matching
-def fib(n: Int) : Int = n match {
- case 0 => 1
- case 1 => 1
- case n => fib(n - 1) + fib(n - 2)
+// A Recursive Web Crawler / Email Harvester
+//===========================================
+//
+// the idea is to look for links using the
+// regular expression "https?://[^"]*" and for
+// email addresses using another regex.
+
+import io.Source
+import scala.util._
+
+// gets the first 10K of a web-page
+def get_page(url: String) : String = {
+ Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
+ getOrElse { println(s" Problem with: $url"); ""}
+}
+
+// regex for URLs and emails
+val http_pattern = """"https?://[^"]*"""".r
+val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
+
+//test case:
+//email_pattern.findAllIn
+// ("foo bla christian@kcl.ac.uk 1234567").toList
+
+
+// drops the first and last character from a string
+def unquote(s: String) = s.drop(1).dropRight(1)
+
+def get_all_URLs(page: String): Set[String] =
+ http_pattern.findAllIn(page).map(unquote).toSet
+
+// naive version of crawl - searches until a given depth,
+// visits pages potentially more than once
+def crawl(url: String, n: Int) : Unit = {
+ if (n == 0) ()
+ else {
+ println(s" Visiting: $n $url")
+ for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
+ }
+}
+
+// some starting URLs for the crawler
+val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
+
+crawl(startURL, 2)
+
+
+// a primitive email harvester
+def emails(url: String, n: Int) : Set[String] = {
+ if (n == 0) Set()
+ else {
+ println(s" Visiting: $n $url")
+ val page = get_page(url)
+ val new_emails = email_pattern.findAllIn(page).toSet
+ new_emails ++ (for (u <- get_all_URLs(page)) yield emails(u, n - 1)).flatten
+ }
+}
+
+emails(startURL, 2)
+
+
+
+// Sudoku
+//========
+
+// THE POINT OF THIS CODE IS NOT TO BE SUPER
+// EFFICIENT AND FAST, just explaining exhaustive
+// depth-first search
+
+
+val game0 = """.14.6.3..
+ |62...4..9
+ |.8..5.6..
+ |.6.2....3
+ |.7..1..5.
+ |5....9.6.
+ |..6.2..3.
+ |1..5...92
+ |..7.9.41.""".stripMargin.replaceAll("\\n", "")
+
+type Pos = (Int, Int)
+val EmptyValue = '.'
+val MaxValue = 9
+
+def pretty(game: String): String =
+ "\n" + (game.grouped(MaxValue).mkString("\n"))
+
+pretty(game0)
+
+
+val allValues = "123456789".toList
+val indexes = (0 to 8).toList
+
+def empty(game: String) = game.indexOf(EmptyValue)
+def isDone(game: String) = empty(game) == -1
+def emptyPosition(game: String) : Pos = {
+ val e = empty(game)
+ (e % MaxValue, e / MaxValue)
+}
+
+def get_row(game: String, y: Int) =
+ indexes.map(col => game(y * MaxValue + col))
+def get_col(game: String, x: Int) =
+ indexes.map(row => game(x + row * MaxValue))
+
+//get_row(game0, 0)
+//get_row(game0, 1)
+//get_col(game0, 0)
+
+def get_box(game: String, pos: Pos): List[Char] = {
+ def base(p: Int): Int = (p / 3) * 3
+ val x0 = base(pos._1)
+ val y0 = base(pos._2)
+ val ys = (y0 until y0 + 3).toList
+ (x0 until x0 + 3).toList
+ .flatMap(x => ys.map(y => game(x + y * MaxValue)))
}
-abstract class Rexp
-case object ZERO extends Rexp // matches nothing
-case object ONE extends Rexp // matches the empty string
-case class CHAR(c: Char) extends Rexp // matches a character c
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence
-case class STAR(r: Rexp) extends Rexp // star
+//get_box(game0, (3, 1))
+
+
+// this is not mutable!!
+def update(game: String, pos: Int, value: Char): String =
+ game.updated(pos, value)
+
+def toAvoid(game: String, pos: Pos): List[Char] =
+ (get_col(game, pos._1) ++
+ get_row(game, pos._2) ++
+ get_box(game, pos))
-def depth(r: Rexp) : Int = r match {
- case ZERO => 1
- case ONE => 1
- case CHAR(_) => 1
- case ALT(r1, r2) => 1 + List(depth(r1), depth(r2)).max
- case SEQ(r1, r2) => 1 + List(depth(r1), depth(r2)).max
- case STAR(r1) => 1 + depth(r1)
+def candidates(game: String, pos: Pos): List[Char] =
+ allValues.diff(toAvoid(game, pos))
+
+//candidates(game0, (0,0))
+
+
+def search(game: String): List[String] = {
+ if (isDone(game)) List(game)
+ else {
+ val cs = candidates(game, emptyPosition(game))
+ cs.par.map(c => search(update(game, empty(game), c))).flatten.toList
+ }
}
+pretty(game0)
+search(game0).map(pretty)
-// - String-Interpolations
+val game1 = """23.915...
+ |...2..54.
+ |6.7......
+ |..1.....9
+ |89.5.3.17
+ |5.....6..
+ |......9.5
+ |.16..7...
+ |...329..1""".stripMargin.replaceAll("\\n", "")
+
+search(game1).map(pretty)
+
+// a game that is in the hard category
+val game2 = """8........
+ |..36.....
+ |.7..9.2..
+ |.5...7...
+ |....457..
+ |...1...3.
+ |..1....68
+ |..85...1.
+ |.9....4..""".stripMargin.replaceAll("\\n", "")
+
+search(game2).map(pretty)
+
+// game with multiple solutions
+val game3 = """.8...9743
+ |.5...8.1.
+ |.1.......
+ |8....5...
+ |...8.4...
+ |...3....6
+ |.......7.
+ |.3.5...8.
+ |9724...5.""".stripMargin.replaceAll("\\n", "")
+
+search(game3).map(pretty).foreach(println)
+
+// for measuring time
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ s"${(end - start) / 1.0e9} secs"
+}
+
+time_needed(2, search(game2))
+
+
+// concurrency
+// scala-cli --extra-jars scala-parallel-collections_3-1.0.4.jar
+// import scala.collection.parallel.CollectionConverters._
+
+
+
// String Interpolations
//=======================
@@ -63,19 +244,6 @@
gcd_db(48, 18)
-// naive quicksort with "On" function
-
-def sortOn(f: Int => Int, xs: List[Int]) : List[Int] = {
- if (xs.size < 2) xs
- else {
- val pivot = xs.head
- val (left, right) = xs.partition(f(_) < f(pivot))
- sortOn(f, left) ::: pivot :: sortOn(f, right.tail)
- }
-}
-
-sortOn(identity, List(99,99,99,98,10,-3,2))
-sortOn(n => - n, List(99,99,99,98,10,-3,2))
// Recursion Again ;o)
@@ -101,8 +269,103 @@
-// User-defined Datatypes
-//========================
+// Pattern Matching
+//==================
+
+// A powerful tool which has even landed in Java during
+// the last few years (https://inside.java/2021/06/13/podcast-017/).
+// ...Scala already has it for many years and the concept is
+// older than your friendly lecturer, that is stone old ;o)
+
+// The general schema:
+//
+// expression match {
+// case pattern1 => expression1
+// case pattern2 => expression2
+// ...
+// case patternN => expressionN
+// }
+
+
+// recall
+def len(xs: List[Int]) : Int = {
+ if (xs == Nil) 0
+ else 1 + len(xs.tail)
+}
+
+def len(xs: List[Int]) : Int = xs match {
+ case Nil => 0
+ case hd::tail => 1 + len(tail)
+}
+
+
+def my_map_int(lst: List[Int], f: Int => Int) : List[Int] =
+ lst match {
+ case Nil => Nil
+ case x::xs => f(x)::my_map_int(xs, f)
+ }
+
+def my_map_option(opt: Option[Int], f: Int => Int) : Option[Int] =
+ opt match {
+ case None => None
+ case Some(x) => Some(f(x))
+ }
+
+my_map_option(None, x => x * x)
+my_map_option(Some(8), x => x * x)
+
+
+// you can also have cases combined
+def season(month: String) : String = month match {
+ case "March" | "April" | "May" => "It's spring"
+ case "June" | "July" | "August" => "It's summer"
+ case "September" | "October" | "November" => "It's autumn"
+ case "December" => "It's winter"
+ case "January" | "February" => "It's unfortunately winter"
+ case _ => "Wrong month"
+}
+
+// pattern-match on integers
+
+def fib(n: Int) : Int = n match {
+ case 0 | 1 => 1
+ case n => fib(n - 1) + fib(n - 2)
+}
+
+fib(10)
+
+// pattern-match on results
+
+// Silly: fizz buzz
+def fizz_buzz(n: Int) : String = (n % 3, n % 5) match {
+ case (0, 0) => "fizz buzz"
+ case (0, _) => "fizz"
+ case (_, 0) => "buzz"
+ case _ => n.toString
+}
+
+for (n <- 1 to 20)
+ println(fizz_buzz(n))
+
+// guards in pattern-matching
+
+def foo(xs: List[Int]) : String = xs match {
+ case Nil => s"this list is empty"
+ case x :: xs if x % 2 == 0
+ => s"the first elemnt is even"
+ case x :: y :: rest if x == y
+ => s"this has two elemnts that are the same"
+ case hd :: tl => s"this list is standard $hd::$tl"
+}
+
+foo(Nil)
+foo(List(1,2,3))
+foo(List(1,2))
+foo(List(1,1,2,3))
+foo(List(2,2,2,3))
+
+
+// Trees
abstract class Tree
case class Leaf(x: Int) extends Tree
@@ -182,6 +445,27 @@
RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017
+abstract class Rexp
+case object ZERO extends Rexp // matches nothing
+case object ONE extends Rexp // matches the empty string
+case class CHAR(c: Char) extends Rexp // matches a character c
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence
+case class STAR(r: Rexp) extends Rexp // star
+
+def depth(r: Rexp) : Int = r match {
+ case ZERO => 1
+ case ONE => 1
+ case CHAR(_) => 1
+ case ALT(r1, r2) => 1 + List(depth(r1), depth(r2)).max
+ case SEQ(r1, r2) => 1 + List(depth(r1), depth(r2)).max
+ case STAR(r1) => 1 + depth(r1)
+}
+
+
+
+
+
// expressions (essentially trees)
abstract class Exp
@@ -254,22 +538,44 @@
parse_date("26.11.2019")
-// guards in pattern-matching
+
+
+// Map type (upper-case)
+//=======================
+
+// Note the difference between map and Map
+
+val m = Map(1 -> "one", 2 -> "two", 10 -> "many")
+
+List((1, "one"), (2, "two"), (10, "many")).toMap
+
+m.get(1)
+m.get(4)
+
+m.getOrElse(1, "")
+m.getOrElse(4, "")
+
+val new_m = m + (10 -> "ten")
-def foo(xs: List[Int]) : String = xs match {
- case Nil => s"this list is empty"
- case x :: xs if x % 2 == 0
- => s"the first elemnt is even"
- case x :: y :: rest if x == y
- => s"this has two elemnts that are the same"
- case hd :: tl => s"this list is standard $hd::$tl"
-}
+new_m.get(10)
+
+val m2 = for ((k, v) <- m) yield (k, v.toUpperCase)
+
+
+
+// groupBy function on Maps
+val lst = List("one", "two", "three", "four", "five")
+lst.groupBy(_.head)
-foo(Nil)
-foo(List(1,2,3))
-foo(List(1,2))
-foo(List(1,1,2,3))
-foo(List(2,2,2,3))
+lst.groupBy(_.length)
+
+lst.groupBy(_.length).get(3)
+
+val grps = lst.groupBy(_.length)
+grps.keySet
+
+
+
// Tail recursion
//================
@@ -316,125 +622,89 @@
lengthT(List.fill(10000000)(1), 0)
-// Sudoku
-//========
-
-// uses Strings for games
-
-type Pos = (Int, Int)
-val emptyValue = '.'
-val maxValue = 9
-
-val allValues = "123456789".toList
-val indexes = (0 to 8).toList
-def empty(game: String) = game.indexOf(emptyValue)
-def isDone(game: String) = empty(game) == -1
-def emptyPosition(game: String) : Pos =
- (empty(game) % maxValue, empty(game) / maxValue)
+
-def get_row(game: String, y: Int) = indexes.map(col => game(y * maxValue + col))
-def get_col(game: String, x: Int) = indexes.map(row => game(x + row * maxValue))
+// Aside: concurrency
+// scala-cli --extra-jars scala-parallel-collections_3-1.0.4.jar
-def get_box(game: String, pos: Pos): List[Char] = {
- def base(p: Int): Int = (p / 3) * 3
- val x0 = base(pos._1)
- val y0 = base(pos._2)
- for (x <- (x0 until x0 + 3).toList;
- y <- (y0 until y0 + 3).toList) yield game(x + y * maxValue)
-}
+for (n <- (1 to 10)) println(n)
+
+import scala.collection.parallel.CollectionConverters._
+
+for (n <- (1 to 10).par) println(n)
-def update(game: String, pos: Int, value: Char): String =
- game.updated(pos, value)
-
-def toAvoid(game: String, pos: Pos): List[Char] =
- (get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos))
-
-def candidates(game: String, pos: Pos): List[Char] =
- allValues.diff(toAvoid(game, pos))
-
-def search(game: String): List[String] = {
- if (isDone(game)) List(game)
- else
- candidates(game, emptyPosition(game)).
- map(c => search(update(game, empty(game), c))).flatten
-}
-
-
-def search1T(games: List[String]): Option[String] = games match {
- case Nil => None
- case game::rest => {
- if (isDone(game)) Some(game)
- else {
- val cs = candidates(game, emptyPosition(game))
- search1T(cs.map(c => update(game, empty(game), c)) ::: rest)
- }
- }
+// for measuring time
+def time_needed[T](n: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (i <- (0 to n)) code
+ val end = System.nanoTime()
+ (end - start) / 1.0e9
}
-def pretty(game: String): String =
- "\n" + (game.sliding(maxValue, maxValue).mkString(",\n"))
+val list = (1L to 10_000_000L).toList
+time_needed(10, for (n <- list) yield n + 42)
+time_needed(10, for (n <- list.par) yield n + 42)
-
-// tail recursive version that searches
-// for all solutions
+// ...but par does not make everything faster
-def searchT(games: List[String], sols: List[String]): List[String] = games match {
- case Nil => sols
- case game::rest => {
- if (isDone(game)) searchT(rest, game::sols)
- else {
- val cs = candidates(game, emptyPosition(game))
- searchT(cs.map(c => update(game, empty(game), c)) ::: rest, sols)
- }
- }
-}
+list.sum
+list.par.sum
-searchT(List(game3), List()).map(pretty)
+time_needed(10, list.sum)
+time_needed(10, list.par.sum)
-// tail recursive version that searches
-// for a single solution
+// Mutable vs Immutable
+//======================
+//
+// Remember:
+// - no vars, no ++i, no +=
+// - no mutable data-structures (no Arrays, no ListBuffers)
-def search1T(games: List[String]): Option[String] = games match {
- case Nil => None
- case game::rest => {
- if (isDone(game)) Some(game)
- else {
- val cs = candidates(game, emptyPosition(game))
- search1T(cs.map(c => update(game, empty(game), c)) ::: rest)
- }
- }
+// But what the heck....lets try to count to 1 Mio in parallel
+//
+// requires
+// scala-cli --extra-jars scala- parallel-collections_3-1.0.4.jar
+
+import scala.collection.parallel.CollectionConverters._
+
+def test() = {
+ var cnt = 0
+
+ for(i <- (1 to 100_000).par) cnt += 1
+
+ println(s"Should be 100000: $cnt")
}
-search1T(List(game3)).map(pretty)
-time_needed(10, search1T(List(game3)))
+test()
+
+// Or
+// Q: Count how many elements are in the intersections of
+// two sets?
+// A; IMPROPER WAY (mutable counter)
+
+def count_intersection(A: Set[Int], B: Set[Int]) : Int = {
+ var count = 0
+ for (x <- A.par; if B contains x) count += 1
+ count
+}
+
+val A = (0 to 999).toSet
+val B = (0 to 999 by 4).toSet
+
+count_intersection(A, B)
+
+// but do not try to add .par to the for-loop above
-// game with multiple solutions
-val game3 = """.8...9743
- |.5...8.1.
- |.1.......
- |8....5...
- |...8.4...
- |...3....6
- |.......7.
- |.3.5...8.
- |9724...5.""".stripMargin.replaceAll("\\n", "")
+//propper parallel version
+def count_intersection2(A: Set[Int], B: Set[Int]) : Int =
+ A.par.count(x => B contains x)
-searchT(List(game3), Nil).map(pretty)
-search1T(List(game3)).map(pretty)
+count_intersection2(A, B)
-// Moral: Whenever a recursive function is resource-critical
-// (i.e. works with large recursion depth), then you need to
-// write it in tail-recursive fashion.
-//
-// Unfortuantely, Scala because of current limitations in
-// the JVM is not as clever as other functional languages. It can
-// only optimise "self-tail calls". This excludes the cases of
-// multiple functions making tail calls to each other. Well,
-// nothing is perfect.