// Scala Lecture 3
//=================
// - last week
//
// option type
// higher-order function
// Recursion Again ;o)
//====================
// A Web Crawler / Email Harvester
//=================================
//
// the idea is to look for links using the
// regular expression "https?://[^"]*" and for
// email addresses using yet another regex.
import io.Source
import scala.util._
// gets the first 10K of a web-page
def get_page(url: String) : String = {
Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
getOrElse { println(s" Problem with: $url"); ""}
}
// regex for URLs and emails
val http_pattern = """"https?://[^"]*"""".r
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
// val s = "foo bla christian@kcl.ac.uk 1234567"
// email_pattern.findAllIn(s).toList
// drops the first and last character from a string
def unquote(s: String) = s.drop(1).dropRight(1)
def get_all_URLs(page: String): Set[String] =
http_pattern.findAllIn(page).map(unquote).toSet
// a naive version of crawl - searches until a given depth,
// visits pages potentially more than once
def crawl(url: String, n: Int) : Unit = {
if (n == 0) ()
else {
println(s" Visiting: $n $url")
for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
}
}
// some starting URLs for the crawler
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
crawl(startURL, 2)
// a primitive email harvester
def emails(url: String, n: Int) : Set[String] = {
if (n == 0) Set()
else {
println(s" Visiting: $n $url")
val page = get_page(url)
val new_emails = email_pattern.findAllIn(page).toSet
new_emails ++ (for (u <- get_all_URLs(page)) yield emails(u, n - 1)).flatten
}
}
emails(startURL, 2)
// if we want to explore the internet "deeper", then we
// first have to parallelise the request of webpages:
//
// scala -cp scala-parallel-collections_2.13-0.2.0.jar
// import scala.collection.parallel.CollectionConverters._
// another well-known example
//============================
def move(from: Char, to: Char) =
println(s"Move disc from $from to $to!")
def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = {
if (n == 0) ()
else {
hanoi(n - 1, from, to, via)
move(from, to)
hanoi(n - 1, via, from, to)
}
}
hanoi(4, 'A', 'B', 'C')
// Jumping Towers
//================
// the first n prefixes of xs
// for 1 => include xs
def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match {
case (Nil, _) => Nil
case (xs, 0) => Nil
case (x::xs, n) => (x::xs) :: moves(xs, n - 1)
}
moves(List(5,1,0), 1)
moves(List(5,1,0), 2)
moves(List(5,1,0), 5)
// checks whether a jump tour exists at all
def search(xs: List[Int]) : Boolean = xs match {
case Nil => true
case (x::xs) =>
if (xs.length < x) true else moves(xs, x).exists(search(_))
}
search(List(5,3,2,5,1,1))
search(List(3,5,1,0,0,0,1))
search(List(3,5,1,0,0,0,0,1))
search(List(3,5,1,0,0,0,1,1))
search(List(3,5,1))
search(List(5,1,1))
search(Nil)
search(List(1))
search(List(5,1,1))
search(List(3,5,1,0,0,0,0,0,0,0,0,1))
// generates *all* jump tours
// if we are only interested in the shortes one, we could
// shortcircut the calculation and only return List(x) in
// case where xs.length < x, because no tour can be shorter
// than 1
//
def jumps(xs: List[Int]) : List[List[Int]] = xs match {
case Nil => Nil
case (x::xs) => {
val children = moves(xs, x)
val results = children.map(cs => jumps(cs).map(x :: _)).flatten
if (xs.length < x) List(x)::results else results
}
}
jumps(List(5,3,2,5,1,1)).minBy(_.length)
jumps(List(3,5,1,2,1,2,1))
jumps(List(3,5,1,2,3,4,1))
jumps(List(3,5,1,0,0,0,1))
jumps(List(3,5,1))
jumps(List(5,1,1))
jumps(Nil)
jumps(List(1))
jumps(List(5,1,2))
moves(List(1,2), 5)
jumps(List(1,5,1,2))
jumps(List(3,5,1,0,0,0,0,0,0,0,0,1))
jumps(List(5,3,2,5,1,1)).minBy(_.length)
jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length)
jumps(List(1,3,6,1,0,9)).minBy(_.length)
jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length)
// User-defined Datatypes
//========================
abstract class Colour
case object Red extends Colour
case object Green extends Colour
case object Blue extends Colour
def fav_colour(c: Colour) : Boolean = c match {
case Red => false
case Green => true
case Blue => false
}
fav_colour(Green)
// ... a tiny bit more useful: Roman Numerals
abstract class RomanDigit
case object I extends RomanDigit
case object V extends RomanDigit
case object X extends RomanDigit
case object L extends RomanDigit
case object C extends RomanDigit
case object D extends RomanDigit
case object M extends RomanDigit
type RomanNumeral = List[RomanDigit]
List(X,I)
/*
I -> 1
II -> 2
III -> 3
IV -> 4
V -> 5
VI -> 6
VII -> 7
VIII -> 8
IX -> 9
X -> 10
*/
def RomanNumeral2Int(rs: RomanNumeral): Int = rs match {
case Nil => 0
case M::r => 1000 + RomanNumeral2Int(r)
case C::M::r => 900 + RomanNumeral2Int(r)
case D::r => 500 + RomanNumeral2Int(r)
case C::D::r => 400 + RomanNumeral2Int(r)
case C::r => 100 + RomanNumeral2Int(r)
case X::C::r => 90 + RomanNumeral2Int(r)
case L::r => 50 + RomanNumeral2Int(r)
case X::L::r => 40 + RomanNumeral2Int(r)
case X::r => 10 + RomanNumeral2Int(r)
case I::X::r => 9 + RomanNumeral2Int(r)
case V::r => 5 + RomanNumeral2Int(r)
case I::V::r => 4 + RomanNumeral2Int(r)
case I::r => 1 + RomanNumeral2Int(r)
}
RomanNumeral2Int(List(I,V)) // 4
RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number)
RomanNumeral2Int(List(V,I)) // 6
RomanNumeral2Int(List(I,X)) // 9
RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017
// String interpolations as patterns
val date = "2019-11-26"
val s"$year-$month-$day" = date
def parse_date(date: String) : Option[(Int, Int, Int)]= date match {
case s"$year-$month-$day" => Some((day.toInt, month.toInt, year.toInt))
case s"$day/$month/$year" => Some((day.toInt, month.toInt, year.toInt))
case s"$day.$month.$year" => Some((day.toInt, month.toInt, year.toInt))
case _ => None
}
parse_date("2019-11-26")
parse_date("26/11/2019")
parse_date("26.11.2019")
// User-defined Datatypes and Pattern Matching
//=============================================
// trees
abstract class Exp
case class N(n: Int) extends Exp // for numbers
case class Plus(e1: Exp, e2: Exp) extends Exp
case class Times(e1: Exp, e2: Exp) extends Exp
def string(e: Exp) : String = e match {
case N(n) => s"$n"
case Plus(e1, e2) => s"(${string(e1)} + ${string(e2)})"
case Times(e1, e2) => s"(${string(e1)} * ${string(e2)})"
}
val e = Plus(N(9), Times(N(3), N(4)))
println(string(e))
def eval(e: Exp) : Int = e match {
case N(n) => n
case Plus(e1, e2) => eval(e1) + eval(e2)
case Times(e1, e2) => eval(e1) * eval(e2)
}
println(eval(e))
def simp(e: Exp) : Exp = e match {
case N(n) => N(n)
case Plus(e1, e2) => (simp(e1), simp(e2)) match {
case (N(0), e2s) => e2s
case (e1s, N(0)) => e1s
case (e1s, e2s) => Plus(e1s, e2s)
}
case Times(e1, e2) => (simp(e1), simp(e2)) match {
case (N(0), _) => N(0)
case (_, N(0)) => N(0)
case (N(1), e2s) => e2s
case (e1s, N(1)) => e1s
case (e1s, e2s) => Times(e1s, e2s)
}
}
val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
println(string(e2))
println(string(simp(e2)))
// Tokens and Reverse Polish Notation
abstract class Token
case class T(n: Int) extends Token
case object PL extends Token
case object TI extends Token
def rp(e: Exp) : List[Token] = e match {
case N(n) => List(T(n))
case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL)
case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI)
}
println(string(e2))
println(rp(e2))
def comp(ls: List[Token], st: List[Int]) : Int = (ls, st) match {
case (Nil, st) => st.head
case (T(n)::rest, st) => comp(rest, n::st)
case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)
case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)
}
comp(rp(e), Nil)
def proc(s: String) : Token = s match {
case "+" => PL
case "*" => TI
case _ => T(s.toInt)
}
comp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)
// Sudoku
//========
// THE POINT OF THIS CODE IS NOT TO BE SUPER
// EFFICIENT AND FAST, just explaining exhaustive
// depth-first search
val game0 = """.14.6.3..
|62...4..9
|.8..5.6..
|.6.2....3
|.7..1..5.
|5....9.6.
|..6.2..3.
|1..5...92
|..7.9.41.""".stripMargin.replaceAll("\\n", "")
type Pos = (Int, Int)
val EmptyValue = '.'
val MaxValue = 9
val allValues = "123456789".toList
val indexes = (0 to 8).toList
def empty(game: String) = game.indexOf(EmptyValue)
def isDone(game: String) = empty(game) == -1
def emptyPosition(game: String) =
(empty(game) % MaxValue, empty(game) / MaxValue)
def get_row(game: String, y: Int) =
indexes.map(col => game(y * MaxValue + col))
def get_col(game: String, x: Int) =
indexes.map(row => game(x + row * MaxValue))
def get_box(game: String, pos: Pos): List[Char] = {
def base(p: Int): Int = (p / 3) * 3
val x0 = base(pos._1)
val y0 = base(pos._2)
val ys = (y0 until y0 + 3).toList
(x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue)))
}
//get_row(game0, 0)
//get_row(game0, 1)
//get_col(game0, 0)
//get_box(game0, (3, 1))
// this is not mutable!!
def update(game: String, pos: Int, value: Char): String =
game.updated(pos, value)
def toAvoid(game: String, pos: Pos): List[Char] =
(get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos))
def candidates(game: String, pos: Pos): List[Char] =
allValues.diff(toAvoid(game, pos))
//candidates(game0, (0,0))
def pretty(game: String): String =
"\n" + (game.sliding(MaxValue, MaxValue).mkString("\n"))
def search(game: String): List[String] = {
if (isDone(game)) List(game)
else {
val cs = candidates(game, emptyPosition(game))
cs.map(c => search(update(game, empty(game), c))).toList.flatten
}
}
search(game0).map(pretty)
val game1 = """23.915...
|...2..54.
|6.7......
|..1.....9
|89.5.3.17
|5.....6..
|......9.5
|.16..7...
|...329..1""".stripMargin.replaceAll("\\n", "")
// game that is in the hard category
val game2 = """8........
|..36.....
|.7..9.2..
|.5...7...
|....457..
|...1...3.
|..1....68
|..85...1.
|.9....4..""".stripMargin.replaceAll("\\n", "")
// game with multiple solutions
val game3 = """.8...9743
|.5...8.1.
|.1.......
|8....5...
|...8.4...
|...3....6
|.......7.
|.3.5...8.
|9724...5.""".stripMargin.replaceAll("\\n", "")
search(game1).map(pretty)
search(game3).map(pretty)
search(game2).map(pretty)
// for measuring time
def time_needed[T](i: Int, code: => T) = {
val start = System.nanoTime()
for (j <- 1 to i) code
val end = System.nanoTime()
((end - start) / 1.0e9) + " secs"
}
time_needed(1, search(game2))
// Tail recursion
//================
def fact(n: Long): Long =
if (n == 0) 1 else n * fact(n - 1)
def factB(n: BigInt): BigInt =
if (n == 0) 1 else n * factB(n - 1)
factB(100000)
fact(10) //ok
fact(10000) // produces a stackoverflow
def factT(n: BigInt, acc: BigInt): BigInt =
if (n == 0) acc else factT(n - 1, n * acc)
factT(10, 1)
println(factT(100000, 1))
// there is a flag for ensuring a function is tail recursive
import scala.annotation.tailrec
@tailrec
def factT(n: BigInt, acc: BigInt): BigInt =
if (n == 0) acc else factT(n - 1, n * acc)
// for tail-recursive functions the Scala compiler
// generates loop-like code, which does not need
// to allocate stack-space in each recursive
// call; Scala can do this only for tail-recursive
// functions
// tail recursive version that searches
// for all solutions
def searchT(games: List[String], sols: List[String]): List[String] = games match {
case Nil => sols
case game::rest => {
if (isDone(game)) searchT(rest, game::sols)
else {
val cs = candidates(game, emptyPosition(game))
searchT(cs.map(c => update(game, empty(game), c)) ::: rest, sols)
}
}
}
searchT(List(game3), List()).map(pretty)
// tail recursive version that searches
// for a single solution
def search1T(games: List[String]): Option[String] = games match {
case Nil => None
case game::rest => {
if (isDone(game)) Some(game)
else {
val cs = candidates(game, emptyPosition(game))
search1T(cs.map(c => update(game, empty(game), c)) ::: rest)
}
}
}
search1T(List(game3)).map(pretty)
time_needed(10, search1T(List(game3)))
// game with multiple solutions
val game3 = """.8...9743
|.5...8.1.
|.1.......
|8....5...
|...8.4...
|...3....6
|.......7.
|.3.5...8.
|9724...5.""".stripMargin.replaceAll("\\n", "")
searchT(List(game3), Nil).map(pretty)
search1T(List(game3)).map(pretty)
// Moral: Whenever a recursive function is resource-critical
// (i.e. works with large recursion depth), then you need to
// write it in tail-recursive fashion.
//
// Unfortuantely, Scala because of current limitations in
// the JVM is not as clever as other functional languages. It can
// only optimise "self-tail calls". This excludes the cases of
// multiple functions making tail calls to each other. Well,
// nothing is perfect.