// Scala Lecture 3//=================// A Web Crawler / Email Harvester//=================================//// the idea is to look for links using the// regular expression "https?://[^"]*" and for// email addresses using yet another regex.import io.Sourceimport scala.util._// gets the first 10K of a web-pagedef get_page(url: String) : String = { Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString). getOrElse { println(s" Problem with: $url"); ""}}// regex for URLs and emailsval http_pattern = """"https?://[^"]*"""".rval email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r// val s = "foo bla christian@kcl.ac.uk 1234567"// email_pattern.findAllIn(s).toList// drops the first and last character from a stringdef unquote(s: String) = s.drop(1).dropRight(1)def get_all_URLs(page: String): Set[String] = http_pattern.findAllIn(page).map(unquote).toSet// naive version of crawl - searches until a given depth,// visits pages potentially more than oncedef crawl(url: String, n: Int) : Set[String] = { if (n == 0) Set() else { println(s" Visiting: $n $url") val page = get_page(url) val new_emails = email_pattern.findAllIn(page).toSet new_emails ++ (for (u <- get_all_URLs(page).par) yield crawl(u, n - 1)).flatten }}// some starting URLs for the crawlerval startURL = """https://nms.kcl.ac.uk/christian.urban/"""crawl(startURL, 2)// User-defined Datatypes and Pattern Matching//=============================================abstract class Expcase class N(n: Int) extends Exp // for numberscase class Plus(e1: Exp, e2: Exp) extends Expcase class Times(e1: Exp, e2: Exp) extends Expdef string(e: Exp) : String = e match { case N(n) => n.toString case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")" case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")" }val e = Plus(N(9), Times(N(3), N(4)))println(string(e))def eval(e: Exp) : Int = e match { case N(n) => n case Plus(e1, e2) => eval(e1) + eval(e2) case Times(e1, e2) => eval(e1) * eval(e2) }def simp(e: Exp) : Exp = e match { case N(n) => N(n) case Plus(e1, e2) => (simp(e1), simp(e2)) match { case (N(0), e2s) => e2s case (e1s, N(0)) => e1s case (e1s, e2s) => Plus(e1s, e2s) } case Times(e1, e2) => (simp(e1), simp(e2)) match { case (N(0), _) => N(0) case (_, N(0)) => N(0) case (N(1), e2s) => e2s case (e1s, N(1)) => e1s case (e1s, e2s) => Times(e1s, e2s) } }println(eval(e))val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))println(string(e2))println(string(simp(e2)))// Tokens and Reverse Polish Notationabstract class Tokencase class T(n: Int) extends Tokencase object PL extends Tokencase object TI extends Tokendef rp(e: Exp) : List[Token] = e match { case N(n) => List(T(n)) case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL) case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI) }println(string(e2))println(rp(e2))def comp(ls: List[Token], st: List[Int]) : Int = (ls, st) match { case (Nil, st) => st.head case (T(n)::rest, st) => comp(rest, n::st) case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st) case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)}comp(rp(e), Nil)def proc(s: String) : Token = s match { case "+" => PL case "*" => TI case _ => T(s.toInt) }comp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)def exp(ts: List[Token], st: List[Exp]) : Exp = (ts, st) match { case (Nil, st) => st.head case (T(n)::rest, st) => exp(rest, N(n)::st) case (PL::rest, n1::n2::st) => exp(rest, Plus(n2, n1)::st) case (TI::rest, n1::n2::st) => exp(rest, Times(n2, n1)::st)}exp(toks(e2), Nil)def proc(s: String) = s match { case "+" => PL case "*" => TI case n => T(n.toInt)}string(exp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)) 1)def factB(n: BigInt): BigInt = if (n == 0) 1 else n * factB(n - 1)factB(100000)fact(10) //okfact(10000) // produces a stackoverflowdef factT(n: BigInt, acc: BigInt): BigInt = if (n == 0) acc else factT(n - 1, n * acc)factT(10, 1)println(factT(100000, 1))// there is a flag for ensuring a function is tail recursiveimport scala.annotation.tailrec@tailrecdef factT(n: BigInt, acc: BigInt): BigInt = if (n == 0) acc else factT(n - 1, n * acc)// for tail-recursive functions the Scala compiler// generates loop-like code, which does not need// to allocate stack-space in each recursive// call; Scala can do this only for tail-recursive// functions// Jumping Towers//================// the first n prefixes of xs// for 1 => include xsdef moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match { case (Nil, _) => Nil case (xs, 0) => Nil case (x::xs, n) => (x::xs) :: moves(xs, n - 1)}moves(List(5,1,0), 1)moves(List(5,1,0), 2)moves(List(5,1,0), 5)// checks whether a jump tour exists at alldef search(xs: List[Int]) : Boolean = xs match { case Nil => true case (x::xs) => if (xs.length < x) true else moves(xs, x).exists(search(_))}search(List(5,3,2,5,1,1))search(List(3,5,1,0,0,0,1))search(List(3,5,1,0,0,0,0,1))search(List(3,5,1,0,0,0,1,1))search(List(3,5,1))search(List(5,1,1))search(Nil)search(List(1))search(List(5,1,1))search(List(3,5,1,0,0,0,0,0,0,0,0,1))// generates *all* jump tours// if we are only interested in the shortes one, we could// shortcircut the calculation and only return List(x) in// case where xs.length < x, because no tour can be shorter// than 1// def jumps(xs: List[Int]) : List[List[Int]] = xs match { case Nil => Nil case (x::xs) => { val children = moves(xs, x) val results = children.map((cs) => jumps(cs).map(x :: _)).flatten if (xs.length < x) List(x) :: results else results }}println(jumps(List(5,3,2,5,1,1)).minBy(_.length))jumps(List(3,5,1,2,1,2,1))jumps(List(3,5,1,2,3,4,1))jumps(List(3,5,1,0,0,0,1))jumps(List(3,5,1))jumps(List(5,1,1))jumps(Nil)jumps(List(1))jumps(List(5,1,2))moves(List(1,2), 5)jumps(List(1,5,1,2))jumps(List(3,5,1,0,0,0,0,0,0,0,0,1))jumps(List(5,3,2,5,1,1)).minBy(_.length)jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length)jumps(List(1,3,6,1,0,9)).minBy(_.length)jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length)// Sudoku //========// THE POINT OF THIS CODE IS NOT TO BE SUPER// EFFICIENT AND FAST, just explaining exhaustive// depth-first searchval game0 = """.14.6.3.. |62...4..9 |.8..5.6.. |.6.2....3 |.7..1..5. |5....9.6. |..6.2..3. |1..5...92 |..7.9.41.""".stripMargin.replaceAll("\\n", "")type Pos = (Int, Int)val EmptyValue = '.'val MaxValue = 9val allValues = "123456789".toListval indexes = (0 to 8).toListdef empty(game: String) = game.indexOf(EmptyValue)def isDone(game: String) = empty(game) == -1 def emptyPosition(game: String) = (empty(game) % MaxValue, empty(game) / MaxValue)def get_row(game: String, y: Int) = indexes.map(col => game(y * MaxValue + col))def get_col(game: String, x: Int) = indexes.map(row => game(x + row * MaxValue))def get_box(game: String, pos: Pos): List[Char] = { def base(p: Int): Int = (p / 3) * 3 val x0 = base(pos._1) val y0 = base(pos._2) val ys = (y0 until y0 + 3).toList (x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue)))}//get_row(game0, 0)//get_row(game0, 1)//get_col(game0, 0)//get_box(game0, (3, 1))// this is not mutable!!def update(game: String, pos: Int, value: Char): String = game.updated(pos, value)def toAvoid(game: String, pos: Pos): List[Char] = (get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos))def candidates(game: String, pos: Pos): List[Char] = allValues.diff(toAvoid(game, pos))//candidates(game0, (0,0))def pretty(game: String): String = "\n" + (game.sliding(MaxValue, MaxValue).mkString("\n"))def search(game: String): List[String] = { if (isDone(game)) List(game) else { val cs = candidates(game, emptyPosition(game)) cs.par.map(c => search(update(game, empty(game), c))).toList.flatten }}search(game0).map(pretty)val game1 = """23.915... |...2..54. |6.7...... |..1.....9 | |5.....6.. |......9.5 |.16..7... |...329..1""".stripMargin.replaceAll("\\n", "")// game that is in the hard categoryval game2 = """8........ |..36..... |.7..9.2.. |.5...7... |....457.. |...1...3. |..1....68 |..85...1. |.9....4..""".stripMargin.replaceAll("\\n", "")// game with multiple solutionsval game3 = """.8...9743 |.5...8.1. |.1....... |8....5... |...8.4... |...3....6 |.......7. |.3.5...8. |9724...5.""".stripMargin.replaceAll("\\n", "")search(game1).map(pretty)search(game3).map(pretty)search(game2).map(pretty)// for measuring timedef time_needed[T](i: Int, code: => T) = { val start = System.nanoTime() for (j <- 1 to i) code val end = System.nanoTime() ((end - start) / 1.0e9) + " secs"}time_needed(1, search(game2))// tail recursive version that searches // for all solutionsdef searchT(games: List[String], sols: List[String]): List[String] = games match { case Nil => sols case game::rest => { if (isDone(game)) searchT(rest, game::sols) else { val cs = candidates(game, emptyPosition(game)) searchT(cs.map(c => update(game, empty(game), c)) ::: rest, sols) } }}searchT(List(game3), List()).map(pretty)// tail recursive version that searches // for a single solutiondef search1T(games: List[String]): Option[String] = games match { case Nil => None case game::rest => { if (isDone(game)) Some(game) else { val cs = candidates(game, emptyPosition(game)) search1T(cs.map(c => update(game, empty(game), c)) ::: rest) } }}search1T(List(game3)).map(pretty)time_needed(10, search1T(List(game3)))// game with multiple solutionsval game3 = """.8...9743 |.5...8.1. |.1....... |8....5... |...8.4... |...3....6 |.......7. |.3.5...8. |9724...5.""".stripMargin.replaceAll("\\n", "")searchT(List(game3), Nil).map(pretty)search1T(List(game3)).map(pretty)// Moral: Whenever a recursive function is resource-critical// (i.e. works with large recursion depth), then you need to// write it in tail-recursive fashion.// // Unfortuantely, Scala because of current limitations in // the JVM is not as clever as other functional languages. It can // only optimise "self-tail calls". This excludes the cases of // multiple functions making tail calls to each other. Well,// nothing is perfect.