// Scala Lecture 3+ −
//=================+ −
+ −
+ −
// A Web Crawler / Email Harvester+ −
//=================================+ −
//+ −
// the idea is to look for links using the+ −
// regular expression "https?://[^"]*" and for+ −
// email addresses using yet another regex.+ −
+ −
import io.Source+ −
import scala.util._+ −
+ −
// gets the first 10K of a web-page+ −
def get_page(url: String) : String = {+ −
Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).+ −
getOrElse { println(s" Problem with: $url"); ""}+ −
}+ −
+ −
// regex for URLs and emails+ −
val http_pattern = """"https?://[^"]*"""".r+ −
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r+ −
+ −
// val s = "foo bla christian@kcl.ac.uk 1234567"+ −
// email_pattern.findAllIn(s).toList+ −
+ −
// drops the first and last character from a string+ −
def unquote(s: String) = s.drop(1).dropRight(1)+ −
+ −
def get_all_URLs(page: String): Set[String] = + −
http_pattern.findAllIn(page).map(unquote).toSet+ −
+ −
// naive version of crawl - searches until a given depth,+ −
// visits pages potentially more than once+ −
+ −
def crawl(url: String, n: Int) : Set[String] = {+ −
if (n == 0) Set()+ −
else {+ −
println(s" Visiting: $n $url")+ −
val page = get_page(url)+ −
val new_emails = email_pattern.findAllIn(page).toSet+ −
new_emails ++ + −
(for (u <- get_all_URLs(page).par) yield crawl(u, n - 1)).flatten+ −
}+ −
}+ −
+ −
// some starting URLs for the crawler+ −
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""+ −
crawl(startURL, 2)+ −
+ −
+ −
+ −
// User-defined Datatypes and Pattern Matching+ −
//=============================================+ −
+ −
abstract class Exp+ −
case class N(n: Int) extends Exp // for numbers+ −
case class Plus(e1: Exp, e2: Exp) extends Exp+ −
case class Times(e1: Exp, e2: Exp) extends Exp+ −
+ −
def string(e: Exp) : String = e match {+ −
case N(n) => n.toString+ −
case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")" + −
case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")" + −
}+ −
+ −
val e = Plus(N(9), Times(N(3), N(4)))+ −
println(string(e))+ −
+ −
def eval(e: Exp) : Int = e match {+ −
case N(n) => n+ −
case Plus(e1, e2) => eval(e1) + eval(e2) + −
case Times(e1, e2) => eval(e1) * eval(e2) + −
}+ −
+ −
def simp(e: Exp) : Exp = e match {+ −
case N(n) => N(n)+ −
case Plus(e1, e2) => (simp(e1), simp(e2)) match {+ −
case (N(0), e2s) => e2s+ −
case (e1s, N(0)) => e1s+ −
case (e1s, e2s) => Plus(e1s, e2s)+ −
} + −
case Times(e1, e2) => (simp(e1), simp(e2)) match {+ −
case (N(0), _) => N(0)+ −
case (_, N(0)) => N(0)+ −
case (N(1), e2s) => e2s+ −
case (e1s, N(1)) => e1s+ −
case (e1s, e2s) => Times(e1s, e2s)+ −
} + −
}+ −
+ −
println(eval(e))+ −
+ −
val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))+ −
println(string(e2))+ −
println(string(simp(e2)))+ −
+ −
// Tokens and Reverse Polish Notation+ −
abstract class Token+ −
case class T(n: Int) extends Token+ −
case object PL extends Token+ −
case object TI extends Token+ −
+ −
def rp(e: Exp) : List[Token] = e match {+ −
case N(n) => List(T(n))+ −
case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL) + −
case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI) + −
}+ −
println(string(e2))+ −
println(rp(e2))+ −
+ −
def comp(ls: List[Token], st: List[Int]) : Int = (ls, st) match {+ −
case (Nil, st) => st.head + −
case (T(n)::rest, st) => comp(rest, n::st)+ −
case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)+ −
case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)+ −
}+ −
+ −
comp(rp(e), Nil)+ −
+ −
def proc(s: String) : Token = s match {+ −
case "+" => PL+ −
case "*" => TI+ −
case _ => T(s.toInt) + −
}+ −
+ −
comp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)+ −
+ −
+ −
+ −
+ −
def string(e: Exp) : String = e match {+ −
case N(n) => n.toString+ −
case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")"+ −
case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")"+ −
}+ −
+ −
val e = Plus(N(9), Times(N(3), N(4)))+ −
+ −
println(string(e))+ −
+ −
def eval(e: Exp) : Int = e match {+ −
case N(n) => n+ −
case Plus(e1, e2) => eval(e1) + eval(e2)+ −
case Times(e1, e2) => eval(e1) * eval(e2)+ −
}+ −
+ −
eval(e)+ −
+ −
def simp(e: Exp) : Exp = e match {+ −
case N(n) => N(n)+ −
case Plus(e1, e2) => (simp(e1), simp(e2)) match {+ −
case (N(0), e2s) => e2s+ −
case (e1s, N(0)) => e1s+ −
case (e1s, e2s) => Plus(e1s, e2s) + −
}+ −
case Times(e1, e2) => (simp(e1), simp(e2)) match {+ −
case (N(0), e2s) => N(0)+ −
case (e1s, N(0)) => N(0)+ −
case (N(1), e2s) => e2s+ −
case (e1s, N(1)) => e1s+ −
case (e1s, e2s) => Times(e1s, e2s) + −
}+ −
}+ −
+ −
+ −
val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))+ −
println(string(e2))+ −
println(string(simp(e2)))+ −
+ −
// Token and Reverse Polish Notation+ −
abstract class Token+ −
case class T(n: Int) extends Token+ −
case object PL extends Token+ −
case object TI extends Token+ −
+ −
def rp(e: Exp) : List[Token] = e match {+ −
case N(n) => List(T(n))+ −
case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL)+ −
case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI)+ −
}+ −
+ −
def comp(ts: List[Token], stk: List[Int]) : Int = (ts, stk) match {+ −
case (Nil, st) => st.head+ −
case (T(n)::rest, st) => comp(rest, n::st)+ −
case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)+ −
case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)+ −
}+ −
+ −
def exp(ts: List[Token], st: List[Exp]) : Exp = (ts, st) match {+ −
case (Nil, st) => st.head+ −
case (T(n)::rest, st) => exp(rest, N(n)::st)+ −
case (PL::rest, n1::n2::st) => exp(rest, Plus(n2, n1)::st)+ −
case (TI::rest, n1::n2::st) => exp(rest, Times(n2, n1)::st)+ −
}+ −
+ −
exp(toks(e2), Nil)+ −
+ −
def proc(s: String) = s match {+ −
case "+" => PL+ −
case "*" => TI+ −
case n => T(n.toInt)+ −
}+ −
+ −
+ −
string(exp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil))+ −
+ −
+ −
+ −
// Tail recursion+ −
//================+ −
+ −
+ −
def fact(n: Long): Long = + −
if (n == 0) 1 else n * fact(n - 1)+ −
+ −
def factB(n: BigInt): BigInt = + −
if (n == 0) 1 else n * factB(n - 1)+ −
+ −
factB(100000)+ −
+ −
fact(10) //ok+ −
fact(10000) // produces a stackoverflow+ −
+ −
def factT(n: BigInt, acc: BigInt): BigInt =+ −
if (n == 0) acc else factT(n - 1, n * acc)+ −
+ −
factT(10, 1)+ −
println(factT(100000, 1))+ −
+ −
// there is a flag for ensuring a function is tail recursive+ −
import scala.annotation.tailrec+ −
+ −
@tailrec+ −
def factT(n: BigInt, acc: BigInt): BigInt =+ −
if (n == 0) acc else factT(n - 1, n * acc)+ −
+ −
+ −
+ −
// for tail-recursive functions the Scala compiler+ −
// generates loop-like code, which does not need+ −
// to allocate stack-space in each recursive+ −
// call; Scala can do this only for tail-recursive+ −
// functions+ −
+ −
+ −
+ −
// Jumping Towers+ −
//================+ −
+ −
+ −
// the first n prefixes of xs+ −
// for 1 => include xs+ −
+ −
+ −
+ −
def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match {+ −
case (Nil, _) => Nil+ −
case (xs, 0) => Nil+ −
case (x::xs, n) => (x::xs) :: moves(xs, n - 1)+ −
}+ −
+ −
+ −
moves(List(5,1,0), 1)+ −
moves(List(5,1,0), 2)+ −
moves(List(5,1,0), 5)+ −
+ −
// checks whether a jump tour exists at all+ −
+ −
def search(xs: List[Int]) : Boolean = xs match {+ −
case Nil => true+ −
case (x::xs) =>+ −
if (xs.length < x) true else moves(xs, x).exists(search(_))+ −
}+ −
+ −
+ −
search(List(5,3,2,5,1,1))+ −
search(List(3,5,1,0,0,0,1))+ −
search(List(3,5,1,0,0,0,0,1))+ −
search(List(3,5,1,0,0,0,1,1))+ −
search(List(3,5,1))+ −
search(List(5,1,1))+ −
search(Nil)+ −
search(List(1))+ −
search(List(5,1,1))+ −
search(List(3,5,1,0,0,0,0,0,0,0,0,1))+ −
+ −
// generates *all* jump tours+ −
// if we are only interested in the shortes one, we could+ −
// shortcircut the calculation and only return List(x) in+ −
// case where xs.length < x, because no tour can be shorter+ −
// than 1+ −
// + −
+ −
def jumps(xs: List[Int]) : List[List[Int]] = xs match {+ −
case Nil => Nil+ −
case (x::xs) => {+ −
val children = moves(xs, x)+ −
val results = children.map((cs) => jumps(cs).map(x :: _)).flatten+ −
if (xs.length < x) List(x) :: results else results+ −
}+ −
}+ −
+ −
println(jumps(List(5,3,2,5,1,1)).minBy(_.length))+ −
jumps(List(3,5,1,2,1,2,1))+ −
jumps(List(3,5,1,2,3,4,1))+ −
jumps(List(3,5,1,0,0,0,1))+ −
jumps(List(3,5,1))+ −
jumps(List(5,1,1))+ −
jumps(Nil)+ −
jumps(List(1))+ −
jumps(List(5,1,2))+ −
moves(List(1,2), 5)+ −
jumps(List(1,5,1,2))+ −
jumps(List(3,5,1,0,0,0,0,0,0,0,0,1))+ −
+ −
jumps(List(5,3,2,5,1,1)).minBy(_.length)+ −
jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length)+ −
jumps(List(1,3,6,1,0,9)).minBy(_.length)+ −
jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length)+ −
+ −
+ −
+ −
+ −
+ −
+ −
+ −
+ −
+ −
// Sudoku + −
//========+ −
+ −
// THE POINT OF THIS CODE IS NOT TO BE SUPER+ −
// EFFICIENT AND FAST, just explaining exhaustive+ −
// depth-first search+ −
+ −
+ −
val game0 = """.14.6.3..+ −
|62...4..9+ −
|.8..5.6..+ −
|.6.2....3+ −
|.7..1..5.+ −
|5....9.6.+ −
|..6.2..3.+ −
|1..5...92+ −
|..7.9.41.""".stripMargin.replaceAll("\\n", "")+ −
+ −
type Pos = (Int, Int)+ −
val EmptyValue = '.'+ −
val MaxValue = 9+ −
+ −
val allValues = "123456789".toList+ −
val indexes = (0 to 8).toList+ −
+ −
+ −
def empty(game: String) = game.indexOf(EmptyValue)+ −
def isDone(game: String) = empty(game) == -1 + −
def emptyPosition(game: String) = + −
(empty(game) % MaxValue, empty(game) / MaxValue)+ −
+ −
+ −
def get_row(game: String, y: Int) = + −
indexes.map(col => game(y * MaxValue + col))+ −
def get_col(game: String, x: Int) = + −
indexes.map(row => game(x + row * MaxValue))+ −
+ −
def get_box(game: String, pos: Pos): List[Char] = {+ −
def base(p: Int): Int = (p / 3) * 3+ −
val x0 = base(pos._1)+ −
val y0 = base(pos._2)+ −
val ys = (y0 until y0 + 3).toList+ −
(x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue)))+ −
}+ −
+ −
//get_row(game0, 0)+ −
//get_row(game0, 1)+ −
//get_col(game0, 0)+ −
//get_box(game0, (3, 1))+ −
+ −
+ −
// this is not mutable!!+ −
def update(game: String, pos: Int, value: Char): String = + −
game.updated(pos, value)+ −
+ −
def toAvoid(game: String, pos: Pos): List[Char] = + −
(get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos))+ −
+ −
def candidates(game: String, pos: Pos): List[Char] = + −
allValues.diff(toAvoid(game, pos))+ −
+ −
//candidates(game0, (0,0))+ −
+ −
def pretty(game: String): String = + −
"\n" + (game.sliding(MaxValue, MaxValue).mkString("\n"))+ −
+ −
+ −
def search(game: String): List[String] = {+ −
if (isDone(game)) List(game)+ −
else {+ −
val cs = candidates(game, emptyPosition(game))+ −
cs.par.map(c => search(update(game, empty(game), c))).toList.flatten+ −
}+ −
}+ −
+ −
search(game0).map(pretty)+ −
+ −
val game1 = """23.915...+ −
|...2..54.+ −
|6.7......+ −
|..1.....9+ −
|89.5.3.17+ −
|5.....6..+ −
|......9.5+ −
|.16..7...+ −
|...329..1""".stripMargin.replaceAll("\\n", "")+ −
+ −
+ −
// game that is in the hard category+ −
val game2 = """8........+ −
|..36.....+ −
|.7..9.2..+ −
|.5...7...+ −
|....457..+ −
|...1...3.+ −
|..1....68+ −
|..85...1.+ −
|.9....4..""".stripMargin.replaceAll("\\n", "")+ −
+ −
// game with multiple solutions+ −
val game3 = """.8...9743+ −
|.5...8.1.+ −
|.1.......+ −
|8....5...+ −
|...8.4...+ −
|...3....6+ −
|.......7.+ −
|.3.5...8.+ −
|9724...5.""".stripMargin.replaceAll("\\n", "")+ −
+ −
+ −
search(game1).map(pretty)+ −
search(game3).map(pretty)+ −
search(game2).map(pretty)+ −
+ −
// for measuring time+ −
def time_needed[T](i: Int, code: => T) = {+ −
val start = System.nanoTime()+ −
for (j <- 1 to i) code+ −
val end = System.nanoTime()+ −
((end - start) / 1.0e9) + " secs"+ −
}+ −
+ −
time_needed(1, search(game2))+ −
+ −
// tail recursive version that searches + −
// for all solutions+ −
+ −
def searchT(games: List[String], sols: List[String]): List[String] = games match {+ −
case Nil => sols+ −
case game::rest => {+ −
if (isDone(game)) searchT(rest, game::sols)+ −
else {+ −
val cs = candidates(game, emptyPosition(game))+ −
searchT(cs.map(c => update(game, empty(game), c)) ::: rest, sols)+ −
}+ −
}+ −
}+ −
+ −
searchT(List(game3), List()).map(pretty)+ −
+ −
+ −
// tail recursive version that searches + −
// for a single solution+ −
+ −
def search1T(games: List[String]): Option[String] = games match {+ −
case Nil => None+ −
case game::rest => {+ −
if (isDone(game)) Some(game)+ −
else {+ −
val cs = candidates(game, emptyPosition(game))+ −
search1T(cs.map(c => update(game, empty(game), c)) ::: rest)+ −
}+ −
}+ −
}+ −
+ −
search1T(List(game3)).map(pretty)+ −
time_needed(10, search1T(List(game3)))+ −
+ −
+ −
// game with multiple solutions+ −
val game3 = """.8...9743+ −
|.5...8.1.+ −
|.1.......+ −
|8....5...+ −
|...8.4...+ −
|...3....6+ −
|.......7.+ −
|.3.5...8.+ −
|9724...5.""".stripMargin.replaceAll("\\n", "")+ −
+ −
searchT(List(game3), Nil).map(pretty)+ −
search1T(List(game3)).map(pretty)+ −
+ −
// Moral: Whenever a recursive function is resource-critical+ −
// (i.e. works with large recursion depth), then you need to+ −
// write it in tail-recursive fashion.+ −
// + −
// Unfortuantely, Scala because of current limitations in + −
// the JVM is not as clever as other functional languages. It can + −
// only optimise "self-tail calls". This excludes the cases of + −
// multiple functions making tail calls to each other. Well,+ −
// nothing is perfect. + −
+ −
+ −
+ −
+ −
+ −
+ −