// Scala Lecture 3
//=================
// A Web Crawler / Email Harvester
//=================================
//
// the idea is to look for links using the
// regular expression "https?://[^"]*" and for
// email addresses using yet another regex.
import io.Source
import scala.util._
// gets the first 10K of a web-page
def get_page(url: String) : String = {
Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
getOrElse { println(s" Problem with: $url"); ""}
}
// regex for URLs and emails
val http_pattern = """"https?://[^"]*"""".r
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
// val s = "foo bla christian@kcl.ac.uk 1234567"
// email_pattern.findAllIn(s).toList
// drops the first and last character from a string
def unquote(s: String) = s.drop(1).dropRight(1)
def get_all_URLs(page: String): Set[String] =
http_pattern.findAllIn(page).map(unquote).toSet
// naive version of crawl - searches until a given depth,
// visits pages potentially more than once
def crawl(url: String, n: Int) : Set[String] = {
if (n == 0) Set()
else {
println(s" Visiting: $n $url")
val page = get_page(url)
val new_emails = email_pattern.findAllIn(page).toSet
new_emails ++
(for (u <- get_all_URLs(page).par) yield crawl(u, n - 1)).flatten
}
}
// some starting URLs for the crawler
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
crawl(startURL, 2)
// User-defined Datatypes and Pattern Matching
//=============================================
abstract class Exp
case class N(n: Int) extends Exp // for numbers
case class Plus(e1: Exp, e2: Exp) extends Exp
case class Times(e1: Exp, e2: Exp) extends Exp
def string(e: Exp) : String = e match {
case N(n) => n.toString
case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")"
case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")"
}
val e = Plus(N(9), Times(N(3), N(4)))
println(string(e))
def eval(e: Exp) : Int = e match {
case N(n) => n
case Plus(e1, e2) => eval(e1) + eval(e2)
case Times(e1, e2) => eval(e1) * eval(e2)
}
def simp(e: Exp) : Exp = e match {
case N(n) => N(n)
case Plus(e1, e2) => (simp(e1), simp(e2)) match {
case (N(0), e2s) => e2s
case (e1s, N(0)) => e1s
case (e1s, e2s) => Plus(e1s, e2s)
}
case Times(e1, e2) => (simp(e1), simp(e2)) match {
case (N(0), _) => N(0)
case (_, N(0)) => N(0)
case (N(1), e2s) => e2s
case (e1s, N(1)) => e1s
case (e1s, e2s) => Times(e1s, e2s)
}
}
println(eval(e))
val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
println(string(e2))
println(string(simp(e2)))
// Tokens and Reverse Polish Notation
abstract class Token
case class T(n: Int) extends Token
case object PL extends Token
case object TI extends Token
def rp(e: Exp) : List[Token] = e match {
case N(n) => List(T(n))
case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL)
case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI)
}
println(string(e2))
println(rp(e2))
def comp(ls: List[Token], st: List[Int]) : Int = (ls, st) match {
case (Nil, st) => st.head
case (T(n)::rest, st) => comp(rest, n::st)
case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)
case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)
}
comp(rp(e), Nil)
def proc(s: String) : Token = s match {
case "+" => PL
case "*" => TI
case _ => T(s.toInt)
}
comp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)
def string(e: Exp) : String = e match {
case N(n) => n.toString
case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")"
case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")"
}
val e = Plus(N(9), Times(N(3), N(4)))
println(string(e))
def eval(e: Exp) : Int = e match {
case N(n) => n
case Plus(e1, e2) => eval(e1) + eval(e2)
case Times(e1, e2) => eval(e1) * eval(e2)
}
eval(e)
def simp(e: Exp) : Exp = e match {
case N(n) => N(n)
case Plus(e1, e2) => (simp(e1), simp(e2)) match {
case (N(0), e2s) => e2s
case (e1s, N(0)) => e1s
case (e1s, e2s) => Plus(e1s, e2s)
}
case Times(e1, e2) => (simp(e1), simp(e2)) match {
case (N(0), e2s) => N(0)
case (e1s, N(0)) => N(0)
case (N(1), e2s) => e2s
case (e1s, N(1)) => e1s
case (e1s, e2s) => Times(e1s, e2s)
}
}
val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
println(string(e2))
println(string(simp(e2)))
// Token and Reverse Polish Notation
abstract class Token
case class T(n: Int) extends Token
case object PL extends Token
case object TI extends Token
def rp(e: Exp) : List[Token] = e match {
case N(n) => List(T(n))
case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL)
case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI)
}
def comp(ts: List[Token], stk: List[Int]) : Int = (ts, stk) match {
case (Nil, st) => st.head
case (T(n)::rest, st) => comp(rest, n::st)
case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)
case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)
}
def exp(ts: List[Token], st: List[Exp]) : Exp = (ts, st) match {
case (Nil, st) => st.head
case (T(n)::rest, st) => exp(rest, N(n)::st)
case (PL::rest, n1::n2::st) => exp(rest, Plus(n2, n1)::st)
case (TI::rest, n1::n2::st) => exp(rest, Times(n2, n1)::st)
}
exp(toks(e2), Nil)
def proc(s: String) = s match {
case "+" => PL
case "*" => TI
case n => T(n.toInt)
}
string(exp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil))
// Tail recursion
//================
def fact(n: Long): Long =
if (n == 0) 1 else n * fact(n - 1)
def factB(n: BigInt): BigInt =
if (n == 0) 1 else n * factB(n - 1)
factB(100000)
fact(10) //ok
fact(10000) // produces a stackoverflow
def factT(n: BigInt, acc: BigInt): BigInt =
if (n == 0) acc else factT(n - 1, n * acc)
factT(10, 1)
println(factT(100000, 1))
// there is a flag for ensuring a function is tail recursive
import scala.annotation.tailrec
@tailrec
def factT(n: BigInt, acc: BigInt): BigInt =
if (n == 0) acc else factT(n - 1, n * acc)
// for tail-recursive functions the Scala compiler
// generates loop-like code, which does not need
// to allocate stack-space in each recursive
// call; Scala can do this only for tail-recursive
// functions
// Jumping Towers
//================
// the first n prefixes of xs
// for 1 => include xs
def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match {
case (Nil, _) => Nil
case (xs, 0) => Nil
case (x::xs, n) => (x::xs) :: moves(xs, n - 1)
}
moves(List(5,1,0), 1)
moves(List(5,1,0), 2)
moves(List(5,1,0), 5)
// checks whether a jump tour exists at all
def search(xs: List[Int]) : Boolean = xs match {
case Nil => true
case (x::xs) =>
if (xs.length < x) true else moves(xs, x).exists(search(_))
}
search(List(5,3,2,5,1,1))
search(List(3,5,1,0,0,0,1))
search(List(3,5,1,0,0,0,0,1))
search(List(3,5,1,0,0,0,1,1))
search(List(3,5,1))
search(List(5,1,1))
search(Nil)
search(List(1))
search(List(5,1,1))
search(List(3,5,1,0,0,0,0,0,0,0,0,1))
// generates *all* jump tours
// if we are only interested in the shortes one, we could
// shortcircut the calculation and only return List(x) in
// case where xs.length < x, because no tour can be shorter
// than 1
//
def jumps(xs: List[Int]) : List[List[Int]] = xs match {
case Nil => Nil
case (x::xs) => {
val children = moves(xs, x)
val results = children.map((cs) => jumps(cs).map(x :: _)).flatten
if (xs.length < x) List(x) :: results else results
}
}
println(jumps(List(5,3,2,5,1,1)).minBy(_.length))
jumps(List(3,5,1,2,1,2,1))
jumps(List(3,5,1,2,3,4,1))
jumps(List(3,5,1,0,0,0,1))
jumps(List(3,5,1))
jumps(List(5,1,1))
jumps(Nil)
jumps(List(1))
jumps(List(5,1,2))
moves(List(1,2), 5)
jumps(List(1,5,1,2))
jumps(List(3,5,1,0,0,0,0,0,0,0,0,1))
jumps(List(5,3,2,5,1,1)).minBy(_.length)
jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length)
jumps(List(1,3,6,1,0,9)).minBy(_.length)
jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length)
// Sudoku
//========
// THE POINT OF THIS CODE IS NOT TO BE SUPER
// EFFICIENT AND FAST, just explaining exhaustive
// depth-first search
val game0 = """.14.6.3..
|62...4..9
|.8..5.6..
|.6.2....3
|.7..1..5.
|5....9.6.
|..6.2..3.
|1..5...92
|..7.9.41.""".stripMargin.replaceAll("\\n", "")
type Pos = (Int, Int)
val EmptyValue = '.'
val MaxValue = 9
val allValues = "123456789".toList
val indexes = (0 to 8).toList
def empty(game: String) = game.indexOf(EmptyValue)
def isDone(game: String) = empty(game) == -1
def emptyPosition(game: String) =
(empty(game) % MaxValue, empty(game) / MaxValue)
def get_row(game: String, y: Int) =
indexes.map(col => game(y * MaxValue + col))
def get_col(game: String, x: Int) =
indexes.map(row => game(x + row * MaxValue))
def get_box(game: String, pos: Pos): List[Char] = {
def base(p: Int): Int = (p / 3) * 3
val x0 = base(pos._1)
val y0 = base(pos._2)
val ys = (y0 until y0 + 3).toList
(x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue)))
}
//get_row(game0, 0)
//get_row(game0, 1)
//get_col(game0, 0)
//get_box(game0, (3, 1))
// this is not mutable!!
def update(game: String, pos: Int, value: Char): String =
game.updated(pos, value)
def toAvoid(game: String, pos: Pos): List[Char] =
(get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos))
def candidates(game: String, pos: Pos): List[Char] =
allValues.diff(toAvoid(game, pos))
//candidates(game0, (0,0))
def pretty(game: String): String =
"\n" + (game.sliding(MaxValue, MaxValue).mkString("\n"))
def search(game: String): List[String] = {
if (isDone(game)) List(game)
else {
val cs = candidates(game, emptyPosition(game))
cs.par.map(c => search(update(game, empty(game), c))).toList.flatten
}
}
search(game0).map(pretty)
val game1 = """23.915...
|...2..54.
|6.7......
|..1.....9
|89.5.3.17
|5.....6..
|......9.5
|.16..7...
|...329..1""".stripMargin.replaceAll("\\n", "")
// game that is in the hard category
val game2 = """8........
|..36.....
|.7..9.2..
|.5...7...
|....457..
|...1...3.
|..1....68
|..85...1.
|.9....4..""".stripMargin.replaceAll("\\n", "")
// game with multiple solutions
val game3 = """.8...9743
|.5...8.1.
|.1.......
|8....5...
|...8.4...
|...3....6
|.......7.
|.3.5...8.
|9724...5.""".stripMargin.replaceAll("\\n", "")
search(game1).map(pretty)
search(game3).map(pretty)
search(game2).map(pretty)
// for measuring time
def time_needed[T](i: Int, code: => T) = {
val start = System.nanoTime()
for (j <- 1 to i) code
val end = System.nanoTime()
((end - start) / 1.0e9) + " secs"
}
time_needed(1, search(game2))
// tail recursive version that searches
// for all solutions
def searchT(games: List[String], sols: List[String]): List[String] = games match {
case Nil => sols
case game::rest => {
if (isDone(game)) searchT(rest, game::sols)
else {
val cs = candidates(game, emptyPosition(game))
searchT(cs.map(c => update(game, empty(game), c)) ::: rest, sols)
}
}
}
searchT(List(game3), List()).map(pretty)
// tail recursive version that searches
// for a single solution
def search1T(games: List[String]): Option[String] = games match {
case Nil => None
case game::rest => {
if (isDone(game)) Some(game)
else {
val cs = candidates(game, emptyPosition(game))
search1T(cs.map(c => update(game, empty(game), c)) ::: rest)
}
}
}
search1T(List(game3)).map(pretty)
time_needed(10, search1T(List(game3)))
// game with multiple solutions
val game3 = """.8...9743
|.5...8.1.
|.1.......
|8....5...
|...8.4...
|...3....6
|.......7.
|.3.5...8.
|9724...5.""".stripMargin.replaceAll("\\n", "")
searchT(List(game3), Nil).map(pretty)
search1T(List(game3)).map(pretty)
// Moral: Whenever a recursive function is resource-critical
// (i.e. works with large recursion depth), then you need to
// write it in tail-recursive fashion.
//
// Unfortuantely, Scala because of current limitations in
// the JVM is not as clever as other functional languages. It can
// only optimise "self-tail calls". This excludes the cases of
// multiple functions making tail calls to each other. Well,
// nothing is perfect.
// Polymorphic Types
//===================
// You do not want to write functions like contains, first
// and so on for every type of lists.
def length_string_list(lst: List[String]): Int = lst match {
case Nil => 0
case x::xs => 1 + length_string_list(xs)
}
def length_int_list(lst: List[Int]): Int = lst match {
case Nil => 0
case x::xs => 1 + length_int_list(xs)
}
length_string_list(List("1", "2", "3", "4"))
length_int_list(List(1, 2, 3, 4))
//-----
def length[A](lst: List[A]): Int = lst match {
case Nil => 0
case x::xs => 1 + length(xs)
}
length(List("1", "2", "3", "4"))
length(List(1, 2, 3, 4))
def map[A, B](lst: List[A], f: A => B): List[B] = lst match {
case Nil => Nil
case x::xs => f(x)::map_int_list(xs, f)
}
map_int_list(List(1, 2, 3, 4), square)
// Cool Stuff
//============
// Implicits
//===========
//
// For example adding your own methods to Strings:
// Imagine you want to increment strings, like
//
// "HAL".increment
//
// you can avoid ugly fudges, like a MyString, by
// using implicit conversions.
implicit class MyString(s: String) {
def increment = for (c <- s) yield (c + 1).toChar
}
"HAL".increment