// Scala Lecture 3
//=================
// last week:
// higher-order functions
// maps
// - recursion
// - Sudoku
// - string interpolations
// - Pattern-Matching
// A Recursive Web Crawler / Email Harvester
//===========================================
//
// the idea is to look for links using the
// regular expression "https?://[^"]*" and for
// email addresses using another regex.
import io.Source
import scala.util._
// gets the first 10K of a web-page
def get_page(url: String) : String = {
Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
getOrElse { println(s" Problem with: $url"); ""}
}
// regex for URLs and emails
val http_pattern = """"https?://[^"]*"""".r
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
//test case:
//email_pattern.findAllIn
// ("foo bla christian@kcl.ac.uk 1234567").toList
// drops the first and last character from a string
def unquote(s: String) = s.drop(1).dropRight(1)
def get_all_URLs(page: String): Set[String] =
http_pattern.findAllIn(page).map(unquote).toSet
// naive version of crawl - searches until a given depth,
// visits pages potentially more than once
def crawl(url: String, n: Int) : Unit = {
if (n == 0) ()
else {
println(s" Visiting: $n $url")
for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
}
}
// some starting URLs for the crawler
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
crawl(startURL, 2)
// a primitive email harvester
def emails(url: String, n: Int) : Set[String] = {
if (n == 0) Set()
else {
println(s" Visiting: $n $url")
val page = get_page(url)
val new_emails = email_pattern.findAllIn(page).toSet
new_emails ++ (for (u <- get_all_URLs(page)) yield emails(u, n - 1)).flatten
}
}
emails(startURL, 2)
// Sudoku
//========
// THE POINT OF THIS CODE IS NOT TO BE SUPER
// EFFICIENT AND FAST, just explaining exhaustive
// depth-first search
val game0 = """.14.6.3..
|62...4..9
|.8..5.6..
|.6.2....3
|.7..1..5.
|5....9.6.
|..6.2..3.
|1..5...92
|..7.9.41.""".stripMargin.replaceAll("\\n", "")
type Pos = (Int, Int)
val EmptyValue = '.'
val MaxValue = 9
def pretty(game: String): String =
"\n" + (game.grouped(MaxValue).mkString("\n"))
pretty(game0)
val allValues = "123456789".toList
val indexes = (0 to 8).toList
def empty(game: String) = game.indexOf(EmptyValue)
def isDone(game: String) = empty(game) == -1
def emptyPosition(game: String) : Pos = {
val e = empty(game)
(e % MaxValue, e / MaxValue)
}
def get_row(game: String, y: Int) =
indexes.map(col => game(y * MaxValue + col))
def get_col(game: String, x: Int) =
indexes.map(row => game(x + row * MaxValue))
//get_row(game0, 0)
//get_row(game0, 1)
//get_col(game0, 0)
def get_box(game: String, pos: Pos): List[Char] = {
def base(p: Int): Int = (p / 3) * 3
val x0 = base(pos._1)
val y0 = base(pos._2)
val ys = (y0 until y0 + 3).toList
(x0 until x0 + 3).toList
.flatMap(x => ys.map(y => game(x + y * MaxValue)))
}
//get_box(game0, (3, 1))
// this is not mutable!!
def update(game: String, pos: Int, value: Char): String =
game.updated(pos, value)
def toAvoid(game: String, pos: Pos): List[Char] =
(get_col(game, pos._1) ++
get_row(game, pos._2) ++
get_box(game, pos))
def candidates(game: String, pos: Pos): List[Char] =
allValues.diff(toAvoid(game, pos))
//candidates(game0, (0,0))
def search(game: String): List[String] = {
if (isDone(game)) List(game)
else {
val cs = candidates(game, emptyPosition(game))
cs.par.map(c => search(update(game, empty(game), c))).flatten.toList
}
}
pretty(game0)
search(game0).map(pretty)
val game1 = """23.915...
|...2..54.
|6.7......
|..1.....9
|89.5.3.17
|5.....6..
|......9.5
|.16..7...
|...329..1""".stripMargin.replaceAll("\\n", "")
search(game1).map(pretty)
// a game that is in the hard category
val game2 = """8........
|..36.....
|.7..9.2..
|.5...7...
|....457..
|...1...3.
|..1....68
|..85...1.
|.9....4..""".stripMargin.replaceAll("\\n", "")
search(game2).map(pretty)
// game with multiple solutions
val game3 = """.8...9743
|.5...8.1.
|.1.......
|8....5...
|...8.4...
|...3....6
|.......7.
|.3.5...8.
|9724...5.""".stripMargin.replaceAll("\\n", "")
search(game3).map(pretty).foreach(println)
// for measuring time
def time_needed[T](i: Int, code: => T) = {
val start = System.nanoTime()
for (j <- 1 to i) code
val end = System.nanoTime()
s"${(end - start) / 1.0e9} secs"
}
time_needed(2, search(game2))
// concurrency
// scala-cli --extra-jars scala-parallel-collections_3-1.0.4.jar
// import scala.collection.parallel.CollectionConverters._
// String Interpolations
//=======================
def cube(n: Int) : Int = n * n * n
val n = 3
println("The cube of " + n + " is " + cube(n) + ".")
println(s"The cube of $n is ${cube(n)}.")
// or even
println(s"The cube of $n is ${n * n * n}.")
// helpful for debugging purposes
//
// "The most effective debugging tool is still careful
// thought, coupled with judiciously placed print
// statements."
// — Brian W. Kernighan, in Unix for Beginners (1979)
def gcd_db(a: Int, b: Int) : Int = {
println(s"Function called with $a and $b.")
if (b == 0) a else gcd_db(b, a % b)
}
gcd_db(48, 18)
// Recursion Again ;o)
//====================
// another well-known example: Towers of Hanoi
//=============================================
def move(from: Char, to: Char) =
println(s"Move disc from $from to $to!")
def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = {
if (n == 0) ()
else {
hanoi(n - 1, from, to, via)
move(from, to)
hanoi(n - 1, via, from, to)
}
}
hanoi(4, 'A', 'B', 'C')
// Pattern Matching
//==================
// A powerful tool which has even landed in Java during
// the last few years (https://inside.java/2021/06/13/podcast-017/).
// ...Scala already has it for many years and the concept is
// older than your friendly lecturer, that is stone old ;o)
// The general schema:
//
// expression match {
// case pattern1 => expression1
// case pattern2 => expression2
// ...
// case patternN => expressionN
// }
// recall
def len(xs: List[Int]) : Int = {
if (xs == Nil) 0
else 1 + len(xs.tail)
}
def len(xs: List[Int]) : Int = xs match {
case Nil => 0
case hd::tail => 1 + len(tail)
}
def my_map_int(lst: List[Int], f: Int => Int) : List[Int] =
lst match {
case Nil => Nil
case x::xs => f(x)::my_map_int(xs, f)
}
def my_map_option(opt: Option[Int], f: Int => Int) : Option[Int] =
opt match {
case None => None
case Some(x) => Some(f(x))
}
my_map_option(None, x => x * x)
my_map_option(Some(8), x => x * x)
// you can also have cases combined
def season(month: String) : String = month match {
case "March" | "April" | "May" => "It's spring"
case "June" | "July" | "August" => "It's summer"
case "September" | "October" | "November" => "It's autumn"
case "December" => "It's winter"
case "January" | "February" => "It's unfortunately winter"
case _ => "Wrong month"
}
// pattern-match on integers
def fib(n: Int) : Int = n match {
case 0 | 1 => 1
case n => fib(n - 1) + fib(n - 2)
}
fib(10)
// pattern-match on results
// Silly: fizz buzz
def fizz_buzz(n: Int) : String = (n % 3, n % 5) match {
case (0, 0) => "fizz buzz"
case (0, _) => "fizz"
case (_, 0) => "buzz"
case _ => n.toString
}
for (n <- 1 to 20)
println(fizz_buzz(n))
// guards in pattern-matching
def foo(xs: List[Int]) : String = xs match {
case Nil => s"this list is empty"
case x :: xs if x % 2 == 0
=> s"the first elemnt is even"
case x :: y :: rest if x == y
=> s"this has two elemnts that are the same"
case hd :: tl => s"this list is standard $hd::$tl"
}
foo(Nil)
foo(List(1,2,3))
foo(List(1,2))
foo(List(1,1,2,3))
foo(List(2,2,2,3))
// Trees
abstract class Tree
case class Leaf(x: Int) extends Tree
case class Node(s: String, left: Tree, right: Tree) extends Tree
val lf = Leaf(20)
val tr = Node("foo", Leaf(10), Leaf(23))
val lst : List[Tree] = List(lf, tr)
abstract class Colour
case object Red extends Colour
case object Green extends Colour
case object Blue extends Colour
case object Yellow extends Colour
def fav_colour(c: Colour) : Boolean = c match {
case Green => true
case _ => false
}
fav_colour(Blue)
// ... a tiny bit more useful: Roman Numerals
sealed abstract class RomanDigit
case object I extends RomanDigit
case object V extends RomanDigit
case object X extends RomanDigit
case object L extends RomanDigit
case object C extends RomanDigit
case object D extends RomanDigit
case object M extends RomanDigit
type RomanNumeral = List[RomanDigit]
List(X,I,M,A)
/*
I -> 1
II -> 2
III -> 3
IV -> 4
V -> 5
VI -> 6
VII -> 7
VIII -> 8
IX -> 9
X -> 10
*/
def RomanNumeral2Int(rs: RomanNumeral): Int = rs match {
case Nil => 0
case M::r => 1000 + RomanNumeral2Int(r)
case C::M::r => 900 + RomanNumeral2Int(r)
case D::r => 500 + RomanNumeral2Int(r)
case C::D::r => 400 + RomanNumeral2Int(r)
case C::r => 100 + RomanNumeral2Int(r)
case X::C::r => 90 + RomanNumeral2Int(r)
case L::r => 50 + RomanNumeral2Int(r)
case X::L::r => 40 + RomanNumeral2Int(r)
case X::r => 10 + RomanNumeral2Int(r)
case I::X::r => 9 + RomanNumeral2Int(r)
case V::r => 5 + RomanNumeral2Int(r)
case I::V::r => 4 + RomanNumeral2Int(r)
case I::r => 1 + RomanNumeral2Int(r)
}
RomanNumeral2Int(List(I,V)) // 4
RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number)
RomanNumeral2Int(List(V,I)) // 6
RomanNumeral2Int(List(I,X)) // 9
RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017
abstract class Rexp
case object ZERO extends Rexp // matches nothing
case object ONE extends Rexp // matches the empty string
case class CHAR(c: Char) extends Rexp // matches a character c
case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence
case class STAR(r: Rexp) extends Rexp // star
def depth(r: Rexp) : Int = r match {
case ZERO => 1
case ONE => 1
case CHAR(_) => 1
case ALT(r1, r2) => 1 + List(depth(r1), depth(r2)).max
case SEQ(r1, r2) => 1 + List(depth(r1), depth(r2)).max
case STAR(r1) => 1 + depth(r1)
}
// expressions (essentially trees)
abstract class Exp
case class N(n: Int) extends Exp // for numbers
case class Plus(e1: Exp, e2: Exp) extends Exp
case class Times(e1: Exp, e2: Exp) extends Exp
def string(e: Exp) : String = e match {
case N(n) => s"$n"
case Plus(e1, e2) => s"(${string(e1)} + ${string(e2)})"
case Times(e1, e2) => s"(${string(e1)} * ${string(e2)})"
}
val e = Plus(N(9), Times(N(3), N(4)))
e.toString
println(string(e))
def eval(e: Exp) : Int = e match {
case N(n) => n
case Plus(e1, e2) => eval(e1) + eval(e2)
case Times(e1, e2) => eval(e1) * eval(e2)
}
println(eval(e))
// simplification rules:
// e + 0, 0 + e => e
// e * 0, 0 * e => 0
// e * 1, 1 * e => e
//
// (....9 ....)
def simp(e: Exp) : Exp = e match {
case N(n) => N(n)
case Plus(e1, e2) => (simp(e1), simp(e2)) match {
case (N(0), e2s) => e2s
case (e1s, N(0)) => e1s
case (e1s, e2s) => Plus(e1s, e2s)
}
case Times(e1, e2) => (simp(e1), simp(e2)) match {
case (N(0), _) => N(0)
case (_, N(0)) => N(0)
case (N(1), e2s) => e2s
case (e1s, N(1)) => e1s
case (e1s, e2s) => Times(e1s, e2s)
}
}
val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
println(string(e2))
println(string(simp(e2)))
// String interpolations as patterns
val date = "2019-11-26"
val s"$year-$month-$day" = date
def parse_date(date: String) : Option[(Int, Int, Int)]= date match {
case s"$year-$month-$day" => Some((day.toInt, month.toInt, year.toInt))
case s"$day/$month/$year" => Some((day.toInt, month.toInt, year.toInt))
case s"$day.$month.$year" => Some((day.toInt, month.toInt, year.toInt))
case _ => None
}
parse_date("2019-11-26")
parse_date("26/11/2019")
parse_date("26.11.2019")
// Map type (upper-case)
//=======================
// Note the difference between map and Map
val m = Map(1 -> "one", 2 -> "two", 10 -> "many")
List((1, "one"), (2, "two"), (10, "many")).toMap
m.get(1)
m.get(4)
m.getOrElse(1, "")
m.getOrElse(4, "")
val new_m = m + (10 -> "ten")
new_m.get(10)
val m2 = for ((k, v) <- m) yield (k, v.toUpperCase)
// groupBy function on Maps
val lst = List("one", "two", "three", "four", "five")
lst.groupBy(_.head)
lst.groupBy(_.length)
lst.groupBy(_.length).get(3)
val grps = lst.groupBy(_.length)
grps.keySet
// Tail recursion
//================
def fact(n: BigInt): BigInt =
if (n == 0) 1 else n * fact(n - 1)
fact(10) //ok
fact(10000) // produces a stackoverflow
def factT(n: BigInt, acc: BigInt): BigInt =
if (n == 0) acc else factT(n - 1, n * acc)
factT(10, 1)
println(factT(100000, 1))
// there is a flag for ensuring a function is tail recursive
import scala.annotation.tailrec
@tailrec
def factT(n: BigInt, acc: BigInt): BigInt =
if (n == 0) acc else factT(n - 1, n * acc)
// for tail-recursive functions the Scala compiler
// generates loop-like code, which does not need
// to allocate stack-space in each recursive
// call; Scala can do this only for tail-recursive
// functions
def length(xs: List[Int]) : Int = xs match {
case Nil => 0
case _ :: tail => 1 + length(tail)
}
@tailrec
def lengthT(xs: List[Int], acc : Int) : Int = xs match {
case Nil => acc
case _ :: tail => lengthT(tail, 1 + acc)
}
lengthT(List.fill(10000000)(1), 0)
// Aside: concurrency
// scala-cli --extra-jars scala-parallel-collections_3-1.0.4.jar
for (n <- (1 to 10)) println(n)
import scala.collection.parallel.CollectionConverters._
for (n <- (1 to 10).par) println(n)
// for measuring time
def time_needed[T](n: Int, code: => T) = {
val start = System.nanoTime()
for (i <- (0 to n)) code
val end = System.nanoTime()
(end - start) / 1.0e9
}
val list = (1L to 10_000_000L).toList
time_needed(10, for (n <- list) yield n + 42)
time_needed(10, for (n <- list.par) yield n + 42)
// ...but par does not make everything faster
list.sum
list.par.sum
time_needed(10, list.sum)
time_needed(10, list.par.sum)
// Mutable vs Immutable
//======================
//
// Remember:
// - no vars, no ++i, no +=
// - no mutable data-structures (no Arrays, no ListBuffers)
// But what the heck....lets try to count to 1 Mio in parallel
//
// requires
// scala-cli --extra-jars scala- parallel-collections_3-1.0.4.jar
import scala.collection.parallel.CollectionConverters._
def test() = {
var cnt = 0
for(i <- (1 to 100_000).par) cnt += 1
println(s"Should be 100000: $cnt")
}
test()
// Or
// Q: Count how many elements are in the intersections of
// two sets?
// A; IMPROPER WAY (mutable counter)
def count_intersection(A: Set[Int], B: Set[Int]) : Int = {
var count = 0
for (x <- A.par; if B contains x) count += 1
count
}
val A = (0 to 999).toSet
val B = (0 to 999 by 4).toSet
count_intersection(A, B)
// but do not try to add .par to the for-loop above
//propper parallel version
def count_intersection2(A: Set[Int], B: Set[Int]) : Int =
A.par.count(x => B contains x)
count_intersection2(A, B)