// Scala Lecture 3+ −
//=================+ −
+ −
// last week:+ −
// higher-order functions+ −
// maps+ −
+ −
// - recursion+ −
// - Sudoku+ −
// - string interpolations+ −
// - Pattern-Matching+ −
+ −
// A Recursive Web Crawler / Email Harvester+ −
//===========================================+ −
//+ −
// the idea is to look for links using the+ −
// regular expression "https?://[^"]*" and for+ −
// email addresses using another regex.+ −
+ −
import io.Source+ −
import scala.util._+ −
+ −
// gets the first 10K of a web-page+ −
def get_page(url: String) : String = {+ −
Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).+ −
getOrElse { println(s" Problem with: $url"); ""}+ −
}+ −
+ −
// regex for URLs and emails+ −
val http_pattern = """"https?://[^"]*"""".r+ −
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r+ −
+ −
//test case:+ −
//email_pattern.findAllIn+ −
// ("foo bla christian@kcl.ac.uk 1234567").toList+ −
+ −
+ −
// drops the first and last character from a string+ −
def unquote(s: String) = s.drop(1).dropRight(1)+ −
+ −
def get_all_URLs(page: String): Set[String] = + −
http_pattern.findAllIn(page).map(unquote).toSet+ −
+ −
// naive version of crawl - searches until a given depth,+ −
// visits pages potentially more than once+ −
def crawl(url: String, n: Int) : Unit = {+ −
if (n == 0) ()+ −
else {+ −
println(s" Visiting: $n $url")+ −
for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)+ −
}+ −
}+ −
+ −
// some starting URLs for the crawler+ −
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""+ −
+ −
crawl(startURL, 2)+ −
+ −
+ −
// a primitive email harvester+ −
def emails(url: String, n: Int) : Set[String] = {+ −
if (n == 0) Set()+ −
else {+ −
println(s" Visiting: $n $url")+ −
val page = get_page(url)+ −
val new_emails = email_pattern.findAllIn(page).toSet+ −
new_emails ++ (for (u <- get_all_URLs(page)) yield emails(u, n - 1)).flatten+ −
}+ −
}+ −
+ −
emails(startURL, 2)+ −
+ −
+ −
+ −
// Sudoku + −
//========+ −
+ −
// THE POINT OF THIS CODE IS NOT TO BE SUPER+ −
// EFFICIENT AND FAST, just explaining exhaustive+ −
// depth-first search+ −
+ −
+ −
val game0 = """.14.6.3..+ −
|62...4..9+ −
|.8..5.6..+ −
|.6.2....3+ −
|.7..1..5.+ −
|5....9.6.+ −
|..6.2..3.+ −
|1..5...92+ −
|..7.9.41.""".stripMargin.replaceAll("\\n", "")+ −
+ −
type Pos = (Int, Int)+ −
val EmptyValue = '.'+ −
val MaxValue = 9+ −
+ −
def pretty(game: String): String = + −
"\n" + (game.grouped(MaxValue).mkString("\n"))+ −
+ −
pretty(game0)+ −
+ −
+ −
val allValues = "123456789".toList+ −
val indexes = (0 to 8).toList+ −
+ −
def empty(game: String) = game.indexOf(EmptyValue)+ −
def isDone(game: String) = empty(game) == -1 + −
def emptyPosition(game: String) : Pos = {+ −
val e = empty(game)+ −
(e % MaxValue, e / MaxValue)+ −
}+ −
+ −
def get_row(game: String, y: Int) = + −
indexes.map(col => game(y * MaxValue + col))+ −
def get_col(game: String, x: Int) = + −
indexes.map(row => game(x + row * MaxValue))+ −
+ −
//get_row(game0, 0)+ −
//get_row(game0, 1)+ −
//get_col(game0, 0)+ −
+ −
def get_box(game: String, pos: Pos): List[Char] = {+ −
def base(p: Int): Int = (p / 3) * 3+ −
val x0 = base(pos._1)+ −
val y0 = base(pos._2)+ −
val ys = (y0 until y0 + 3).toList+ −
(x0 until x0 + 3).toList+ −
.flatMap(x => ys.map(y => game(x + y * MaxValue)))+ −
}+ −
+ −
+ −
//get_box(game0, (3, 1))+ −
+ −
+ −
// this is not mutable!!+ −
def update(game: String, pos: Int, value: Char): String = + −
game.updated(pos, value)+ −
+ −
def toAvoid(game: String, pos: Pos): List[Char] = + −
(get_col(game, pos._1) ++ + −
get_row(game, pos._2) ++ + −
get_box(game, pos))+ −
+ −
def candidates(game: String, pos: Pos): List[Char] = + −
allValues.diff(toAvoid(game, pos))+ −
+ −
//candidates(game0, (0,0))+ −
+ −
+ −
def search(game: String): List[String] = {+ −
if (isDone(game)) List(game)+ −
else {+ −
val cs = candidates(game, emptyPosition(game))+ −
cs.par.map(c => search(update(game, empty(game), c))).flatten.toList+ −
}+ −
}+ −
+ −
pretty(game0)+ −
search(game0).map(pretty)+ −
+ −
val game1 = """23.915...+ −
|...2..54.+ −
|6.7......+ −
|..1.....9+ −
|89.5.3.17+ −
|5.....6..+ −
|......9.5+ −
|.16..7...+ −
|...329..1""".stripMargin.replaceAll("\\n", "")+ −
+ −
search(game1).map(pretty)+ −
+ −
// a game that is in the hard category+ −
val game2 = """8........+ −
|..36.....+ −
|.7..9.2..+ −
|.5...7...+ −
|....457..+ −
|...1...3.+ −
|..1....68+ −
|..85...1.+ −
|.9....4..""".stripMargin.replaceAll("\\n", "")+ −
+ −
search(game2).map(pretty)+ −
+ −
// game with multiple solutions+ −
val game3 = """.8...9743+ −
|.5...8.1.+ −
|.1.......+ −
|8....5...+ −
|...8.4...+ −
|...3....6+ −
|.......7.+ −
|.3.5...8.+ −
|9724...5.""".stripMargin.replaceAll("\\n", "")+ −
+ −
search(game3).map(pretty).foreach(println)+ −
+ −
// for measuring time+ −
def time_needed[T](i: Int, code: => T) = {+ −
val start = System.nanoTime()+ −
for (j <- 1 to i) code+ −
val end = System.nanoTime()+ −
s"${(end - start) / 1.0e9} secs"+ −
}+ −
+ −
time_needed(2, search(game2))+ −
+ −
+ −
// concurrency + −
// scala-cli --extra-jars scala-parallel-collections_3-1.0.4.jar + −
// import scala.collection.parallel.CollectionConverters._+ −
+ −
+ −
+ −
+ −
// String Interpolations+ −
//=======================+ −
+ −
def cube(n: Int) : Int = n * n * n+ −
+ −
val n = 3+ −
println("The cube of " + n + " is " + cube(n) + ".")+ −
+ −
println(s"The cube of $n is ${cube(n)}.")+ −
+ −
// or even+ −
+ −
println(s"The cube of $n is ${n * n * n}.")+ −
+ −
// helpful for debugging purposes+ −
//+ −
// "The most effective debugging tool is still careful + −
// thought, coupled with judiciously placed print + −
// statements."+ −
// — Brian W. Kernighan, in Unix for Beginners (1979)+ −
+ −
+ −
def gcd_db(a: Int, b: Int) : Int = {+ −
println(s"Function called with $a and $b.")+ −
if (b == 0) a else gcd_db(b, a % b)+ −
}+ −
+ −
gcd_db(48, 18)+ −
+ −
+ −
+ −
+ −
// Recursion Again ;o)+ −
//====================+ −
+ −
+ −
// another well-known example: Towers of Hanoi+ −
//=============================================+ −
+ −
def move(from: Char, to: Char) =+ −
println(s"Move disc from $from to $to!")+ −
+ −
def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = {+ −
if (n == 0) ()+ −
else {+ −
hanoi(n - 1, from, to, via)+ −
move(from, to)+ −
hanoi(n - 1, via, from, to)+ −
}+ −
} + −
+ −
hanoi(4, 'A', 'B', 'C')+ −
+ −
+ −
+ −
// Pattern Matching+ −
//==================+ −
+ −
// A powerful tool which has even landed in Java during + −
// the last few years (https://inside.java/2021/06/13/podcast-017/).+ −
// ...Scala already has it for many years and the concept is+ −
// older than your friendly lecturer, that is stone old ;o)+ −
+ −
// The general schema:+ −
//+ −
// expression match {+ −
// case pattern1 => expression1+ −
// case pattern2 => expression2+ −
// ...+ −
// case patternN => expressionN+ −
// }+ −
+ −
+ −
// recall+ −
def len(xs: List[Int]) : Int = {+ −
if (xs == Nil) 0+ −
else 1 + len(xs.tail)+ −
} + −
+ −
def len(xs: List[Int]) : Int = xs match {+ −
case Nil => 0+ −
case hd::tail => 1 + len(tail)+ −
} + −
+ −
+ −
def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = + −
lst match {+ −
case Nil => Nil+ −
case x::xs => f(x)::my_map_int(xs, f)+ −
}+ −
+ −
def my_map_option(opt: Option[Int], f: Int => Int) : Option[Int] = + −
opt match {+ −
case None => None+ −
case Some(x) => Some(f(x))+ −
}+ −
+ −
my_map_option(None, x => x * x)+ −
my_map_option(Some(8), x => x * x)+ −
+ −
+ −
// you can also have cases combined+ −
def season(month: String) : String = month match {+ −
case "March" | "April" | "May" => "It's spring"+ −
case "June" | "July" | "August" => "It's summer"+ −
case "September" | "October" | "November" => "It's autumn"+ −
case "December" => "It's winter"+ −
case "January" | "February" => "It's unfortunately winter"+ −
case _ => "Wrong month"+ −
}+ −
+ −
// pattern-match on integers+ −
+ −
def fib(n: Int) : Int = n match { + −
case 0 | 1 => 1+ −
case n => fib(n - 1) + fib(n - 2)+ −
}+ −
+ −
fib(10)+ −
+ −
// pattern-match on results+ −
+ −
// Silly: fizz buzz+ −
def fizz_buzz(n: Int) : String = (n % 3, n % 5) match {+ −
case (0, 0) => "fizz buzz"+ −
case (0, _) => "fizz"+ −
case (_, 0) => "buzz"+ −
case _ => n.toString + −
}+ −
+ −
for (n <- 1 to 20) + −
println(fizz_buzz(n))+ −
+ −
// guards in pattern-matching+ −
+ −
def foo(xs: List[Int]) : String = xs match {+ −
case Nil => s"this list is empty"+ −
case x :: xs if x % 2 == 0 + −
=> s"the first elemnt is even"+ −
case x :: y :: rest if x == y+ −
=> s"this has two elemnts that are the same"+ −
case hd :: tl => s"this list is standard $hd::$tl"+ −
}+ −
+ −
foo(Nil)+ −
foo(List(1,2,3))+ −
foo(List(1,2))+ −
foo(List(1,1,2,3))+ −
foo(List(2,2,2,3))+ −
+ −
+ −
// Trees+ −
+ −
abstract class Tree+ −
case class Leaf(x: Int) extends Tree+ −
case class Node(s: String, left: Tree, right: Tree) extends Tree + −
+ −
val lf = Leaf(20)+ −
val tr = Node("foo", Leaf(10), Leaf(23))+ −
+ −
val lst : List[Tree] = List(lf, tr)+ −
+ −
+ −
abstract class Colour+ −
case object Red extends Colour + −
case object Green extends Colour + −
case object Blue extends Colour+ −
case object Yellow extends Colour+ −
+ −
+ −
def fav_colour(c: Colour) : Boolean = c match {+ −
case Green => true+ −
case _ => false + −
}+ −
+ −
fav_colour(Blue)+ −
+ −
+ −
// ... a tiny bit more useful: Roman Numerals+ −
+ −
sealed abstract class RomanDigit + −
case object I extends RomanDigit + −
case object V extends RomanDigit + −
case object X extends RomanDigit + −
case object L extends RomanDigit + −
case object C extends RomanDigit + −
case object D extends RomanDigit + −
case object M extends RomanDigit + −
+ −
type RomanNumeral = List[RomanDigit] + −
+ −
List(X,I,M,A)+ −
+ −
/*+ −
I -> 1+ −
II -> 2+ −
III -> 3+ −
IV -> 4+ −
V -> 5+ −
VI -> 6+ −
VII -> 7+ −
VIII -> 8+ −
IX -> 9+ −
X -> 10+ −
*/+ −
+ −
def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { + −
case Nil => 0+ −
case M::r => 1000 + RomanNumeral2Int(r) + −
case C::M::r => 900 + RomanNumeral2Int(r)+ −
case D::r => 500 + RomanNumeral2Int(r)+ −
case C::D::r => 400 + RomanNumeral2Int(r)+ −
case C::r => 100 + RomanNumeral2Int(r)+ −
case X::C::r => 90 + RomanNumeral2Int(r)+ −
case L::r => 50 + RomanNumeral2Int(r)+ −
case X::L::r => 40 + RomanNumeral2Int(r)+ −
case X::r => 10 + RomanNumeral2Int(r)+ −
case I::X::r => 9 + RomanNumeral2Int(r)+ −
case V::r => 5 + RomanNumeral2Int(r)+ −
case I::V::r => 4 + RomanNumeral2Int(r)+ −
case I::r => 1 + RomanNumeral2Int(r)+ −
}+ −
+ −
RomanNumeral2Int(List(I,V)) // 4+ −
RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number)+ −
RomanNumeral2Int(List(V,I)) // 6+ −
RomanNumeral2Int(List(I,X)) // 9+ −
RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979+ −
RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017+ −
+ −
+ −
abstract class Rexp+ −
case object ZERO extends Rexp // matches nothing+ −
case object ONE extends Rexp // matches the empty string+ −
case class CHAR(c: Char) extends Rexp // matches a character c+ −
case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative+ −
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence+ −
case class STAR(r: Rexp) extends Rexp // star+ −
+ −
def depth(r: Rexp) : Int = r match {+ −
case ZERO => 1+ −
case ONE => 1+ −
case CHAR(_) => 1+ −
case ALT(r1, r2) => 1 + List(depth(r1), depth(r2)).max+ −
case SEQ(r1, r2) => 1 + List(depth(r1), depth(r2)).max+ −
case STAR(r1) => 1 + depth(r1)+ −
}+ −
+ −
+ −
+ −
+ −
+ −
// expressions (essentially trees)+ −
+ −
abstract class Exp+ −
case class N(n: Int) extends Exp // for numbers+ −
case class Plus(e1: Exp, e2: Exp) extends Exp+ −
case class Times(e1: Exp, e2: Exp) extends Exp+ −
+ −
def string(e: Exp) : String = e match {+ −
case N(n) => s"$n"+ −
case Plus(e1, e2) => s"(${string(e1)} + ${string(e2)})" + −
case Times(e1, e2) => s"(${string(e1)} * ${string(e2)})"+ −
}+ −
+ −
val e = Plus(N(9), Times(N(3), N(4)))+ −
e.toString+ −
println(string(e))+ −
+ −
def eval(e: Exp) : Int = e match {+ −
case N(n) => n+ −
case Plus(e1, e2) => eval(e1) + eval(e2) + −
case Times(e1, e2) => eval(e1) * eval(e2) + −
}+ −
+ −
println(eval(e))+ −
+ −
// simplification rules:+ −
// e + 0, 0 + e => e + −
// e * 0, 0 * e => 0+ −
// e * 1, 1 * e => e+ −
//+ −
// (....9 ....)+ −
+ −
def simp(e: Exp) : Exp = e match {+ −
case N(n) => N(n)+ −
case Plus(e1, e2) => (simp(e1), simp(e2)) match {+ −
case (N(0), e2s) => e2s+ −
case (e1s, N(0)) => e1s+ −
case (e1s, e2s) => Plus(e1s, e2s)+ −
} + −
case Times(e1, e2) => (simp(e1), simp(e2)) match {+ −
case (N(0), _) => N(0)+ −
case (_, N(0)) => N(0)+ −
case (N(1), e2s) => e2s+ −
case (e1s, N(1)) => e1s+ −
case (e1s, e2s) => Times(e1s, e2s)+ −
} + −
}+ −
+ −
+ −
val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))+ −
println(string(e2))+ −
println(string(simp(e2)))+ −
+ −
+ −
+ −
// String interpolations as patterns+ −
+ −
val date = "2019-11-26"+ −
val s"$year-$month-$day" = date+ −
+ −
def parse_date(date: String) : Option[(Int, Int, Int)]= date match {+ −
case s"$year-$month-$day" => Some((day.toInt, month.toInt, year.toInt))+ −
case s"$day/$month/$year" => Some((day.toInt, month.toInt, year.toInt))+ −
case s"$day.$month.$year" => Some((day.toInt, month.toInt, year.toInt))+ −
case _ => None+ −
} + −
+ −
parse_date("2019-11-26")+ −
parse_date("26/11/2019")+ −
parse_date("26.11.2019")+ −
+ −
+ −
+ −
+ −
// Map type (upper-case)+ −
//=======================+ −
+ −
// Note the difference between map and Map+ −
+ −
val m = Map(1 -> "one", 2 -> "two", 10 -> "many")+ −
+ −
List((1, "one"), (2, "two"), (10, "many")).toMap+ −
+ −
m.get(1)+ −
m.get(4)+ −
+ −
m.getOrElse(1, "")+ −
m.getOrElse(4, "")+ −
+ −
val new_m = m + (10 -> "ten")+ −
+ −
new_m.get(10)+ −
+ −
val m2 = for ((k, v) <- m) yield (k, v.toUpperCase)+ −
+ −
+ −
+ −
// groupBy function on Maps+ −
val lst = List("one", "two", "three", "four", "five")+ −
lst.groupBy(_.head)+ −
+ −
lst.groupBy(_.length)+ −
+ −
lst.groupBy(_.length).get(3)+ −
+ −
val grps = lst.groupBy(_.length)+ −
grps.keySet+ −
+ −
+ −
+ −
+ −
// Tail recursion+ −
//================+ −
+ −
def fact(n: BigInt): BigInt = + −
if (n == 0) 1 else n * fact(n - 1)+ −
+ −
fact(10) //ok+ −
fact(10000) // produces a stackoverflow+ −
+ −
+ −
def factT(n: BigInt, acc: BigInt): BigInt =+ −
if (n == 0) acc else factT(n - 1, n * acc)+ −
+ −
factT(10, 1)+ −
println(factT(100000, 1))+ −
+ −
// there is a flag for ensuring a function is tail recursive+ −
import scala.annotation.tailrec+ −
+ −
@tailrec+ −
def factT(n: BigInt, acc: BigInt): BigInt =+ −
if (n == 0) acc else factT(n - 1, n * acc)+ −
+ −
+ −
+ −
// for tail-recursive functions the Scala compiler+ −
// generates loop-like code, which does not need+ −
// to allocate stack-space in each recursive+ −
// call; Scala can do this only for tail-recursive+ −
// functions+ −
+ −
def length(xs: List[Int]) : Int = xs match {+ −
case Nil => 0+ −
case _ :: tail => 1 + length(tail)+ −
}+ −
+ −
@tailrec+ −
def lengthT(xs: List[Int], acc : Int) : Int = xs match {+ −
case Nil => acc+ −
case _ :: tail => lengthT(tail, 1 + acc)+ −
}+ −
+ −
lengthT(List.fill(10000000)(1), 0)+ −
+ −
+ −
+ −
+ −
+ −
+ −
+ −
// Aside: concurrency + −
// scala-cli --extra-jars scala-parallel-collections_3-1.0.4.jar + −
+ −
for (n <- (1 to 10)) println(n)+ −
+ −
import scala.collection.parallel.CollectionConverters._+ −
+ −
for (n <- (1 to 10).par) println(n)+ −
+ −
+ −
// for measuring time+ −
def time_needed[T](n: Int, code: => T) = {+ −
val start = System.nanoTime()+ −
for (i <- (0 to n)) code+ −
val end = System.nanoTime()+ −
(end - start) / 1.0e9+ −
}+ −
+ −
val list = (1L to 10_000_000L).toList+ −
time_needed(10, for (n <- list) yield n + 42)+ −
time_needed(10, for (n <- list.par) yield n + 42)+ −
+ −
// ...but par does not make everything faster+ −
+ −
list.sum+ −
list.par.sum+ −
+ −
time_needed(10, list.sum)+ −
time_needed(10, list.par.sum)+ −
+ −
+ −
// Mutable vs Immutable+ −
//======================+ −
//+ −
// Remember:+ −
// - no vars, no ++i, no +=+ −
// - no mutable data-structures (no Arrays, no ListBuffers)+ −
+ −
// But what the heck....lets try to count to 1 Mio in parallel+ −
// + −
// requires+ −
// scala-cli --extra-jars scala- parallel-collections_3-1.0.4.jar+ −
+ −
import scala.collection.parallel.CollectionConverters._+ −
+ −
def test() = {+ −
var cnt = 0+ −
+ −
for(i <- (1 to 100_000).par) cnt += 1+ −
+ −
println(s"Should be 100000: $cnt")+ −
}+ −
+ −
test()+ −
+ −
// Or+ −
// Q: Count how many elements are in the intersections of + −
// two sets?+ −
// A; IMPROPER WAY (mutable counter)+ −
+ −
def count_intersection(A: Set[Int], B: Set[Int]) : Int = {+ −
var count = 0+ −
for (x <- A.par; if B contains x) count += 1 + −
count+ −
}+ −
+ −
val A = (0 to 999).toSet+ −
val B = (0 to 999 by 4).toSet+ −
+ −
count_intersection(A, B)+ −
+ −
// but do not try to add .par to the for-loop above+ −
+ −
+ −
//propper parallel version+ −
def count_intersection2(A: Set[Int], B: Set[Int]) : Int = + −
A.par.count(x => B contains x)+ −
+ −
count_intersection2(A, B)+ −
+ −
+ −