progs/lecture3.scala
author Christian Urban <urbanc@in.tum.de>
Fri, 16 Aug 2019 08:45:21 +0100
changeset 277 acaf2099406a
parent 223 c6453f3547ec
child 318 029e2862bb4e
permissions -rw-r--r--
updated

// Scala Lecture 3
//=================


// A Web Crawler / Email Harvester
//=================================
//
// the idea is to look for links using the
// regular expression "https?://[^"]*" and for
// email addresses using yet another regex.

import io.Source
import scala.util._

// gets the first 10K of a web-page
def get_page(url: String) : String = {
  Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
    getOrElse { println(s"  Problem with: $url"); ""}
}

// regex for URLs and emails
val http_pattern = """"https?://[^"]*"""".r
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r

//  val s = "foo bla christian@kcl.ac.uk 1234567"
//  email_pattern.findAllIn(s).toList

// drops the first and last character from a string
def unquote(s: String) = s.drop(1).dropRight(1)

def get_all_URLs(page: String): Set[String] = 
  http_pattern.findAllIn(page).map(unquote).toSet

// naive version of crawl - searches until a given depth,
// visits pages potentially more than once

def crawl(url: String, n: Int) : Set[String] = {
  if (n == 0) Set()
  else {
    println(s"  Visiting: $n $url")
    val page = get_page(url)
    val new_emails = email_pattern.findAllIn(page).toSet
    new_emails ++ 
      (for (u <- get_all_URLs(page).par) yield crawl(u, n - 1)).flatten
  }
}

// some starting URLs for the crawler
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
crawl(startURL, 2)



// User-defined Datatypes and Pattern Matching
//=============================================

abstract class Exp
case class N(n: Int) extends Exp                  // for numbers
case class Plus(e1: Exp, e2: Exp) extends Exp
case class Times(e1: Exp, e2: Exp) extends Exp

def string(e: Exp) : String = e match {
  case N(n) => n.toString
  case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")" 
  case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")" 
}

val e = Plus(N(9), Times(N(3), N(4)))
println(string(e))

def eval(e: Exp) : Int = e match {
  case N(n) => n
  case Plus(e1, e2) => eval(e1) + eval(e2) 
  case Times(e1, e2) => eval(e1) * eval(e2) 
}

def simp(e: Exp) : Exp = e match {
  case N(n) => N(n)
  case Plus(e1, e2) => (simp(e1), simp(e2)) match {
    case (N(0), e2s) => e2s
    case (e1s, N(0)) => e1s
    case (e1s, e2s) => Plus(e1s, e2s)
  }  
  case Times(e1, e2) => (simp(e1), simp(e2)) match {
    case (N(0), _) => N(0)
    case (_, N(0)) => N(0)
    case (N(1), e2s) => e2s
    case (e1s, N(1)) => e1s
    case (e1s, e2s) => Times(e1s, e2s)
  }  
}

println(eval(e))

val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
println(string(e2))
println(string(simp(e2)))

// Tokens and Reverse Polish Notation
abstract class Token
case class T(n: Int) extends Token
case object PL extends Token
case object TI extends Token

def rp(e: Exp) : List[Token] = e match {
  case N(n) => List(T(n))
  case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL) 
  case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI) 
}
println(string(e2))
println(rp(e2))

def comp(ls: List[Token], st: List[Int]) : Int = (ls, st) match {
  case (Nil, st) => st.head 
  case (T(n)::rest, st) => comp(rest, n::st)
  case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)
  case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)
}

comp(rp(e), Nil)

def proc(s: String) : Token = s match {
  case  "+" => PL
  case  "*" => TI
  case  _ => T(s.toInt) 
}

comp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)




def string(e: Exp) : String = e match {
  case N(n) => n.toString
  case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")"
  case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")"
}

val e = Plus(N(9), Times(N(3), N(4)))

println(string(e))

def eval(e: Exp) : Int = e match {
  case N(n) => n
  case Plus(e1, e2) => eval(e1) + eval(e2)
  case Times(e1, e2) => eval(e1) * eval(e2)
}

eval(e)

def simp(e: Exp) : Exp = e match {
  case N(n) => N(n)
  case Plus(e1, e2) => (simp(e1), simp(e2)) match {
    case (N(0), e2s) => e2s
    case (e1s, N(0)) => e1s
    case (e1s, e2s) => Plus(e1s, e2s) 
  }
  case Times(e1, e2) => (simp(e1), simp(e2)) match {
    case (N(0), e2s) => N(0)
    case (e1s, N(0)) => N(0)
    case (N(1), e2s) => e2s
    case (e1s, N(1)) => e1s
    case (e1s, e2s) => Times(e1s, e2s) 
  }
}


val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
println(string(e2))
println(string(simp(e2)))

// Token and Reverse Polish Notation
abstract class Token
case class T(n: Int) extends Token
case object PL extends Token
case object TI extends Token

def rp(e: Exp) : List[Token] = e match {
  case N(n) => List(T(n))
  case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL)
  case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI)
}

def comp(ts: List[Token], stk: List[Int]) : Int = (ts, stk) match {
  case (Nil, st) => st.head
  case (T(n)::rest, st) => comp(rest, n::st)
  case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)
  case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)
}

def exp(ts: List[Token], st: List[Exp]) : Exp = (ts, st) match {
  case (Nil, st) => st.head
  case (T(n)::rest, st) => exp(rest, N(n)::st)
  case (PL::rest, n1::n2::st) => exp(rest, Plus(n2, n1)::st)
  case (TI::rest, n1::n2::st) => exp(rest, Times(n2, n1)::st)
}

exp(toks(e2), Nil)

def proc(s: String) = s match {
  case "+" => PL
  case "*" => TI
  case n => T(n.toInt)
}


string(exp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil))



// Tail recursion
//================


def fact(n: Long): Long = 
  if (n == 0) 1 else n * fact(n - 1)

def factB(n: BigInt): BigInt = 
  if (n == 0) 1 else n * factB(n - 1)

factB(100000)

fact(10)              //ok
fact(10000)           // produces a stackoverflow

def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)

factT(10, 1)
println(factT(100000, 1))

// there is a flag for ensuring a function is tail recursive
import scala.annotation.tailrec

@tailrec
def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)



// for tail-recursive functions the Scala compiler
// generates loop-like code, which does not need
// to allocate stack-space in each recursive
// call; Scala can do this only for tail-recursive
// functions



// Jumping Towers
//================


// the first n prefixes of xs
// for 1 => include xs



def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match {
  case (Nil, _) => Nil
  case (xs, 0) => Nil
  case (x::xs, n) => (x::xs) :: moves(xs, n - 1)
}


moves(List(5,1,0), 1)
moves(List(5,1,0), 2)
moves(List(5,1,0), 5)

// checks whether a jump tour exists at all

def search(xs: List[Int]) : Boolean = xs match {
  case Nil => true
  case (x::xs) =>
    if (xs.length < x) true else moves(xs, x).exists(search(_))
}


search(List(5,3,2,5,1,1))
search(List(3,5,1,0,0,0,1))
search(List(3,5,1,0,0,0,0,1))
search(List(3,5,1,0,0,0,1,1))
search(List(3,5,1))
search(List(5,1,1))
search(Nil)
search(List(1))
search(List(5,1,1))
search(List(3,5,1,0,0,0,0,0,0,0,0,1))

// generates *all* jump tours
//    if we are only interested in the shortes one, we could
//    shortcircut the calculation and only return List(x) in
//    case where xs.length < x, because no tour can be shorter
//    than 1
// 

def jumps(xs: List[Int]) : List[List[Int]] = xs match {
  case Nil => Nil
  case (x::xs) => {
    val children = moves(xs, x)
    val results = children.map((cs) => jumps(cs).map(x :: _)).flatten
    if (xs.length < x) List(x) :: results else results
  }
}

println(jumps(List(5,3,2,5,1,1)).minBy(_.length))
jumps(List(3,5,1,2,1,2,1))
jumps(List(3,5,1,2,3,4,1))
jumps(List(3,5,1,0,0,0,1))
jumps(List(3,5,1))
jumps(List(5,1,1))
jumps(Nil)
jumps(List(1))
jumps(List(5,1,2))
moves(List(1,2), 5)
jumps(List(1,5,1,2))
jumps(List(3,5,1,0,0,0,0,0,0,0,0,1))

jumps(List(5,3,2,5,1,1)).minBy(_.length)
jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length)
jumps(List(1,3,6,1,0,9)).minBy(_.length)
jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length)









// Sudoku 
//========

// THE POINT OF THIS CODE IS NOT TO BE SUPER
// EFFICIENT AND FAST, just explaining exhaustive
// depth-first search


val game0 = """.14.6.3..
              |62...4..9
              |.8..5.6..
              |.6.2....3
              |.7..1..5.
              |5....9.6.
              |..6.2..3.
              |1..5...92
              |..7.9.41.""".stripMargin.replaceAll("\\n", "")

type Pos = (Int, Int)
val EmptyValue = '.'
val MaxValue = 9

val allValues = "123456789".toList
val indexes = (0 to 8).toList


def empty(game: String) = game.indexOf(EmptyValue)
def isDone(game: String) = empty(game) == -1 
def emptyPosition(game: String) = 
  (empty(game) % MaxValue, empty(game) / MaxValue)


def get_row(game: String, y: Int) = 
  indexes.map(col => game(y * MaxValue + col))
def get_col(game: String, x: Int) = 
  indexes.map(row => game(x + row * MaxValue))

def get_box(game: String, pos: Pos): List[Char] = {
    def base(p: Int): Int = (p / 3) * 3
    val x0 = base(pos._1)
    val y0 = base(pos._2)
    val ys = (y0 until y0 + 3).toList
    (x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue)))
}

//get_row(game0, 0)
//get_row(game0, 1)
//get_col(game0, 0)
//get_box(game0, (3, 1))


// this is not mutable!!
def update(game: String, pos: Int, value: Char): String = 
  game.updated(pos, value)

def toAvoid(game: String, pos: Pos): List[Char] = 
  (get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos))

def candidates(game: String, pos: Pos): List[Char] = 
  allValues.diff(toAvoid(game, pos))

//candidates(game0, (0,0))

def pretty(game: String): String = 
  "\n" + (game.sliding(MaxValue, MaxValue).mkString("\n"))


def search(game: String): List[String] = {
  if (isDone(game)) List(game)
  else {
    val cs = candidates(game, emptyPosition(game))
    cs.par.map(c => search(update(game, empty(game), c))).toList.flatten
  }
}

search(game0).map(pretty)

val game1 = """23.915...
              |...2..54.
              |6.7......
              |..1.....9
              |89.5.3.17
              |5.....6..
              |......9.5
              |.16..7...
              |...329..1""".stripMargin.replaceAll("\\n", "")


// game that is in the hard category
val game2 = """8........
              |..36.....
              |.7..9.2..
              |.5...7...
              |....457..
              |...1...3.
              |..1....68
              |..85...1.
              |.9....4..""".stripMargin.replaceAll("\\n", "")

// game with multiple solutions
val game3 = """.8...9743
              |.5...8.1.
              |.1.......
              |8....5...
              |...8.4...
              |...3....6
              |.......7.
              |.3.5...8.
              |9724...5.""".stripMargin.replaceAll("\\n", "")


search(game1).map(pretty)
search(game3).map(pretty)
search(game2).map(pretty)

// for measuring time
def time_needed[T](i: Int, code: => T) = {
  val start = System.nanoTime()
  for (j <- 1 to i) code
  val end = System.nanoTime()
  ((end - start) / 1.0e9) + " secs"
}

time_needed(1, search(game2))

// tail recursive version that searches 
// for all solutions

def searchT(games: List[String], sols: List[String]): List[String] = games match {
  case Nil => sols
  case game::rest => {
    if (isDone(game)) searchT(rest, game::sols)
    else {
      val cs = candidates(game, emptyPosition(game))
      searchT(cs.map(c => update(game, empty(game), c)) ::: rest, sols)
    }
  }
}

searchT(List(game3), List()).map(pretty)


// tail recursive version that searches 
// for a single solution

def search1T(games: List[String]): Option[String] = games match {
  case Nil => None
  case game::rest => {
    if (isDone(game)) Some(game)
    else {
      val cs = candidates(game, emptyPosition(game))
      search1T(cs.map(c => update(game, empty(game), c)) ::: rest)
    }
  }
}

search1T(List(game3)).map(pretty)
time_needed(10, search1T(List(game3)))


// game with multiple solutions
val game3 = """.8...9743
              |.5...8.1.
              |.1.......
              |8....5...
              |...8.4...
              |...3....6
              |.......7.
              |.3.5...8.
              |9724...5.""".stripMargin.replaceAll("\\n", "")

searchT(List(game3), Nil).map(pretty)
search1T(List(game3)).map(pretty)

// Moral: Whenever a recursive function is resource-critical
// (i.e. works with large recursion depth), then you need to
// write it in tail-recursive fashion.
// 
// Unfortuantely, Scala because of current limitations in 
// the JVM is not as clever as other functional languages. It can 
// only optimise "self-tail calls". This excludes the cases of 
// multiple functions making tail calls to each other. Well,
// nothing is perfect.