progs/lecture3.scala
author Christian Urban <urbanc@in.tum.de>
Tue, 19 Nov 2019 06:38:20 +0000
changeset 322 755d165633ec
parent 321 7b0055205ec9
child 323 1f8005b4cdf6
permissions -rw-r--r--
updated

// Scala Lecture 3
//=================

// - last week
//
// option type 
// higher-order function



// Recursion Again ;o)
//====================


// A Web Crawler / Email Harvester
//=================================
//
// the idea is to look for links using the
// regular expression "https?://[^"]*" and for
// email addresses using yet another regex.

import io.Source
import scala.util._

// gets the first 10K of a web-page
def get_page(url: String) : String = {
  Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
    getOrElse { println(s" Problem with: $url"); ""}
}

// regex for URLs and emails
val http_pattern = """"https?://[^"]*"""".r
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r

//  val s = "foo bla christian@kcl.ac.uk 1234567"
//  email_pattern.findAllIn(s).toList

// drops the first and last character from a string
def unquote(s: String) = s.drop(1).dropRight(1)

def get_all_URLs(page: String): Set[String] = 
  http_pattern.findAllIn(page).map(unquote).toSet

// a naive version of crawl - searches until a given depth,
// visits pages potentially more than once
def crawl(url: String, n: Int) : Unit = {
  if (n == 0) ()
  else {
    println(s"  Visiting: $n $url")
    val page = get_page(url)
    for (u <- get_all_URLs(page)) crawl(u, n - 1)
  }
}

// some starting URLs for the crawler
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""

crawl(startURL, 2)



// a primitive email harvester
def emails(url: String, n: Int) : Set[String] = {
  if (n == 0) Set()
  else {
    println(s"  Visiting: $n $url")
    val page = get_page(url)
    val new_emails = email_pattern.findAllIn(page).toSet
    new_emails ++ (for (u <- get_all_URLs(page)) yield emails(u, n - 1)).flatten
  }
}

emails(startURL, 2)


// if we want to explore the internet "deeper", then we
// first have to parallelise the request of webpages:
//
// scala -cp scala-parallel-collections_2.13-0.2.0.jar 
// import scala.collection.parallel.CollectionConverters._



// another well-known example
//============================

def move(from: Char, to: Char) =
  println(s"Move disc from $from to $to!")

def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = {
  if (n == 0) ()
  else {
    hanoi(n - 1, from, to, via)
    move(from, to)
    hanoi(n - 1, via, from, to)
  }
} 

hanoi(4, 'A', 'B', 'C')



// Jumping Towers
//================


// the first n prefixes of xs
// for 1 => include xs

def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match {
  case (Nil, _) => Nil
  case (xs, 0) => Nil
  case (x::xs, n) => (x::xs) :: moves(xs, n - 1)
}


moves(List(5,1,0), 1)
moves(List(5,1,0), 2)
moves(List(5,1,0), 5)

// checks whether a jump tour exists at all

def search(xs: List[Int]) : Boolean = xs match {
  case Nil => true
  case x::xs =>
    if (xs.length < x) true 
    else moves(xs, x).exists(search(_))
}


search(List(5,3,2,5,1,1))
search(List(3,5,1,0,0,0,1))
search(List(3,5,1,0,0,0,0,1))
search(List(3,5,1,0,0,0,1,1))
search(List(3,5,1))
search(List(5,1,1))
search(Nil)
search(List(1))
search(List(5,1,1))
search(List(3,5,1,0,0,0,0,0,0,0,0,1))

// generates *all* jump tours
//    if we are only interested in the shortest one, we could
//    shortcircut the calculation and only return List(x) in
//    case where xs.length < x, because no tour can be shorter
//    than 1
// 

def jumps(xs: List[Int]) : List[List[Int]] = xs match {
  case Nil => Nil
  case x::xs => {
    val children = moves(xs, x)
    val results = children.map(cs => jumps(cs).map(x :: _)).flatten
    if (xs.length < x) List(x)::results else results
  }
}

jumps(List(5,3,2,5,1,1)).minBy(_.length)
jumps(List(3,5,1,2,1,2,1))
jumps(List(3,5,1,2,3,4,1))
jumps(List(3,5,1,0,0,0,1))
jumps(List(3,5,1))
jumps(List(5,1,1))
jumps(Nil)
jumps(List(1))
jumps(List(5,1,2))
moves(List(1,2), 5)
jumps(List(1,5,1,2))
jumps(List(3,5,1,0,0,0,0,0,0,0,0,1))

jumps(List(5,3,2,5,1,1)).minBy(_.length)
jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length)
jumps(List(1,3,6,1,0,9)).minBy(_.length)
jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length)






// User-defined Datatypes
//========================


sealed abstract class Colour
case object Red extends Colour 
case object Green extends Colour 
case object Blue extends Colour


def fav_colour(c: Colour) : Boolean = c match {
  case Red   => false
  case Green => true
  case Blue  => false 
}

fav_colour(Green)

// ... a tiny bit more useful: Roman Numerals

sealed abstract class RomanDigit 
case object I extends RomanDigit 
case object V extends RomanDigit 
case object X extends RomanDigit 
case object L extends RomanDigit 
case object C extends RomanDigit 
case object D extends RomanDigit 
case object M extends RomanDigit 

type RomanNumeral = List[RomanDigit] 

List(X,I)

/*
I    -> 1
II   -> 2
III  -> 3
IV   -> 4
V    -> 5
VI   -> 6
VII  -> 7
VIII -> 8
IX   -> 9
X    -> 10
*/

def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { 
  case Nil => 0
  case M::r    => 1000 + RomanNumeral2Int(r)  
  case C::M::r => 900 + RomanNumeral2Int(r)
  case D::r    => 500 + RomanNumeral2Int(r)
  case C::D::r => 400 + RomanNumeral2Int(r)
  case C::r    => 100 + RomanNumeral2Int(r)
  case X::C::r => 90 + RomanNumeral2Int(r)
  case L::r    => 50 + RomanNumeral2Int(r)
  case X::L::r => 40 + RomanNumeral2Int(r)
  case X::r    => 10 + RomanNumeral2Int(r)
  case I::X::r => 9 + RomanNumeral2Int(r)
  case V::r    => 5 + RomanNumeral2Int(r)
  case I::V::r => 4 + RomanNumeral2Int(r)
  case I::r    => 1 + RomanNumeral2Int(r)
}

RomanNumeral2Int(List(I,V))             // 4
RomanNumeral2Int(List(I,I,I,I))         // 4 (invalid Roman number)
RomanNumeral2Int(List(V,I))             // 6
RomanNumeral2Int(List(I,X))             // 9
RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
RomanNumeral2Int(List(M,M,X,V,I,I))     // 2017


// String interpolations as patterns

val date = "2019-11-26"
val s"$year-$month-$day" = date

def parse_date(date: String) : Option[(Int, Int, Int)]= date match {
  case s"$year-$month-$day" => Some((day.toInt, month.toInt, year.toInt))
  case s"$day/$month/$year" => Some((day.toInt, month.toInt, year.toInt))
  case s"$day.$month.$year" => Some((day.toInt, month.toInt, year.toInt))
  case _ => None
} 

parse_date("2019-11-26")
parse_date("26/11/2019")
parse_date("26.11.2019")


// User-defined Datatypes and Pattern Matching
//=============================================

// trees

sealed abstract class Exp
case class N(n: Int) extends Exp                  // for numbers
case class Plus(e1: Exp, e2: Exp) extends Exp
case class Times(e1: Exp, e2: Exp) extends Exp

def string(e: Exp) : String = e match {
  case N(n) => s"$n"
  case Plus(e1, e2) => s"(${string(e1)} + ${string(e2)})" 
  case Times(e1, e2) => s"(${string(e1)} * ${string(e2)})"
}

val e = Plus(N(9), Times(N(3), N(4)))
println(string(e))

def eval(e: Exp) : Int = e match {
  case N(n) => n
  case Plus(e1, e2) => eval(e1) + eval(e2) 
  case Times(e1, e2) => eval(e1) * eval(e2) 
}

println(eval(e))

def simp(e: Exp) : Exp = e match {
  case N(n) => N(n)
  case Plus(e1, e2) => (simp(e1), simp(e2)) match {
    case (N(0), e2s) => e2s
    case (e1s, N(0)) => e1s
    case (e1s, e2s) => Plus(e1s, e2s)
  }  
  case Times(e1, e2) => (simp(e1), simp(e2)) match {
    case (N(0), _) => N(0)
    case (_, N(0)) => N(0)
    case (N(1), e2s) => e2s
    case (e1s, N(1)) => e1s
    case (e1s, e2s) => Times(e1s, e2s)
  }  
}


val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
println(string(e2))
println(string(simp(e2)))


// Tokens and Reverse Polish Notation
sealed abstract class Token
case class T(n: Int) extends Token
case object PL extends Token
case object TI extends Token

def rp(e: Exp) : List[Token] = e match {
  case N(n) => List(T(n))
  case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL) 
  case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI) 
}
println(string(e2))
println(rp(e2))

def comp(ls: List[Token], st: List[Int]) : Int = (ls, st) match {
  case (Nil, st) => st.head 
  case (T(n)::rest, st) => comp(rest, n::st)
  case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)
  case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)
}

comp(rp(e), Nil)

def proc(s: String) : Token = s match {
  case  "+" => PL
  case  "*" => TI
  case  _ => T(s.toInt) 
}

comp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)




// Sudoku 
//========

// THE POINT OF THIS CODE IS NOT TO BE SUPER
// EFFICIENT AND FAST, just explaining exhaustive
// depth-first search


val game0 = """.14.6.3..
              |62...4..9
              |.8..5.6..
              |.6.2....3
              |.7..1..5.
              |5....9.6.
              |..6.2..3.
              |1..5...92
              |..7.9.41.""".stripMargin.replaceAll("\\n", "")

type Pos = (Int, Int)
val EmptyValue = '.'
val MaxValue = 9

val allValues = "123456789".toList
val indexes = (0 to 8).toList


def empty(game: String) = game.indexOf(EmptyValue)
def isDone(game: String) = empty(game) == -1 
def emptyPosition(game: String) = 
  (empty(game) % MaxValue, empty(game) / MaxValue)


def get_row(game: String, y: Int) = 
  indexes.map(col => game(y * MaxValue + col))
def get_col(game: String, x: Int) = 
  indexes.map(row => game(x + row * MaxValue))

def get_box(game: String, pos: Pos): List[Char] = {
    def base(p: Int): Int = (p / 3) * 3
    val x0 = base(pos._1)
    val y0 = base(pos._2)
    val ys = (y0 until y0 + 3).toList
    (x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue)))
}

//get_row(game0, 0)
//get_row(game0, 1)
//get_col(game0, 0)
//get_box(game0, (3, 1))


// this is not mutable!!
def update(game: String, pos: Int, value: Char): String = 
  game.updated(pos, value)

def toAvoid(game: String, pos: Pos): List[Char] = 
  (get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos))

def candidates(game: String, pos: Pos): List[Char] = 
  allValues.diff(toAvoid(game, pos))

//candidates(game0, (0,0))

def pretty(game: String): String = 
  "\n" + (game.sliding(MaxValue, MaxValue).mkString("\n"))


def search(game: String): List[String] = {
  if (isDone(game)) List(game)
  else {
    val cs = candidates(game, emptyPosition(game))
    cs.map(c => search(update(game, empty(game), c))).toList.flatten
  }
}

search(game0).map(pretty)

val game1 = """23.915...
              |...2..54.
              |6.7......
              |..1.....9
              |89.5.3.17
              |5.....6..
              |......9.5
              |.16..7...
              |...329..1""".stripMargin.replaceAll("\\n", "")


// game that is in the hard category
val game2 = """8........
              |..36.....
              |.7..9.2..
              |.5...7...
              |....457..
              |...1...3.
              |..1....68
              |..85...1.
              |.9....4..""".stripMargin.replaceAll("\\n", "")

// game with multiple solutions
val game3 = """.8...9743
              |.5...8.1.
              |.1.......
              |8....5...
              |...8.4...
              |...3....6
              |.......7.
              |.3.5...8.
              |9724...5.""".stripMargin.replaceAll("\\n", "")


search(game1).map(pretty)
search(game3).map(pretty)
search(game2).map(pretty)

// for measuring time
def time_needed[T](i: Int, code: => T) = {
  val start = System.nanoTime()
  for (j <- 1 to i) code
  val end = System.nanoTime()
  ((end - start) / 1.0e9) + " secs"
}

time_needed(1, search(game2))




// Tail recursion
//================


def fact(n: Long): Long = 
  if (n == 0) 1 else n * fact(n - 1)

def factB(n: BigInt): BigInt = 
  if (n == 0) 1 else n * factB(n - 1)

factB(100000)

fact(10)              //ok
fact(10000)           // produces a stackoverflow

def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)

factT(10, 1)
println(factT(100000, 1))

// there is a flag for ensuring a function is tail recursive
import scala.annotation.tailrec

@tailrec
def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)



// for tail-recursive functions the Scala compiler
// generates loop-like code, which does not need
// to allocate stack-space in each recursive
// call; Scala can do this only for tail-recursive
// functions

// tail recursive version that searches 
// for all solutions

def searchT(games: List[String], sols: List[String]): List[String] = games match {
  case Nil => sols
  case game::rest => {
    if (isDone(game)) searchT(rest, game::sols)
    else {
      val cs = candidates(game, emptyPosition(game))
      searchT(cs.map(c => update(game, empty(game), c)) ::: rest, sols)
    }
  }
}

searchT(List(game3), List()).map(pretty)


// tail recursive version that searches 
// for a single solution

def search1T(games: List[String]): Option[String] = games match {
  case Nil => None
  case game::rest => {
    if (isDone(game)) Some(game)
    else {
      val cs = candidates(game, emptyPosition(game))
      search1T(cs.map(c => update(game, empty(game), c)) ::: rest)
    }
  }
}

search1T(List(game3)).map(pretty)
time_needed(10, search1T(List(game3)))


// game with multiple solutions
val game3 = """.8...9743
              |.5...8.1.
              |.1.......
              |8....5...
              |...8.4...
              |...3....6
              |.......7.
              |.3.5...8.
              |9724...5.""".stripMargin.replaceAll("\\n", "")

searchT(List(game3), Nil).map(pretty)
search1T(List(game3)).map(pretty)

// Moral: Whenever a recursive function is resource-critical
// (i.e. works with large recursion depth), then you need to
// write it in tail-recursive fashion.
// 
// Unfortuantely, Scala because of current limitations in 
// the JVM is not as clever as other functional languages. It can 
// only optimise "self-tail calls". This excludes the cases of 
// multiple functions making tail calls to each other. Well,
// nothing is perfect.