progs/lecture3.scala
author Christian Urban <urbanc@in.tum.de>
Fri, 22 Nov 2019 16:41:45 +0000
changeset 323 1f8005b4cdf6
parent 321 7b0055205ec9
child 325 ca9c1cf929fa
permissions -rw-r--r--
updated

// Scala Lecture 3
//=================

// - last week
//
// option type 
// higher-order function


def add(x: Int, y: Int) : Int = x + y

def plus5(x: Int) : Int = add(5, x)

plus5(6)

def add2(x: Int)(y: Int) : Int = x + y

def plus3(y: Int) : Int => Int = add2(3)(y)

plus3(9)

List(1,2,3,4,5).map(add2(3))
List(1,2,3,4,5).map(add(3, _))

type Pos = (Int, Int)

def test(p: Pos) = {
  if (p._1 < 5 && p._2 < 5) {
    Some(p)
  }
}

val l = List((1,2), (5,3), (2,5), (1,3))

l.map(test).flatten

// Recursion Again ;o)
//====================


// A Web Crawler / Email Harvester
//=================================
//
// the idea is to look for links using the
// regular expression "https?://[^"]*" and for
// email addresses using yet another regex.

import io.Source
import scala.util._

// gets the first 10K of a web-page
def get_page(url: String) : String = {
  Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
    getOrElse { println(s" Problem with: $url"); ""}
}

// regex for URLs and emails
val http_pattern = """"https?://[^"]*"""".r
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r

//  val s = "foo bla christian@kcl.ac.uk 1234567"
//  email_pattern.findAllIn(s).toList

// drops the first and last character from a string
def unquote(s: String) = s.drop(1).dropRight(1)

def get_all_URLs(page: String): Set[String] = 
  http_pattern.findAllIn(page).map(unquote).toSet

// a naive version of crawl - searches until a given depth,
// visits pages potentially more than once
def crawl(url: String, n: Int) : Unit = {
  if (n == 0) ()
  else {
    println(s"  Visiting: $n $url")
    val page = get_page(url)
    for (u <- get_all_URLs(page)) crawl(u, n - 1)
  }
}

// some starting URLs for the crawler
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""

crawl(startURL, 2)

for (x <- List(1,2,3,4,5,6)) println(x)

// a primitive email harvester
def emails(url: String, n: Int) : Set[String] = {
  if (n == 0) Set()
  else {
    println(s"  Visiting: $n $url")
    val page = get_page(url)
    val new_emails = email_pattern.findAllIn(page).toSet
    new_emails ++ (for (u <- get_all_URLs(page).par) yield emails(u, n - 1)).flatten
  }
}

emails(startURL, 3)


// if we want to explore the internet "deeper", then we
// first have to parallelise the request of webpages:
//
// scala -cp scala-parallel-collections_2.13-0.2.0.jar 
// import scala.collection.parallel.CollectionConverters._



// another well-known example
//============================

def move(from: Char, to: Char) =
  println(s"Move disc from $from to $to!")

def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = {
  if (n == 0) ()
  else {
    hanoi(n - 1, from, to, via)
    move(from, to)
    hanoi(n - 1, via, from, to)
  }
} 

hanoi(4, 'A', 'B', 'C')



// Jumping Towers
//================


// the first n prefixes of xs
// for 1 => include xs

def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match {
  case (Nil, _) => Nil
  case (_, 0) => Nil
  case (y::ys, n) => xs :: moves(ys, n - 1)
}


moves(List(5,1,0), 1)
moves(List(5,1,0), 2)
moves(List(5,1,0), 5)

// checks whether a jump tour exists at all

def search(xs: List[Int]) : Boolean = xs match {
  case Nil => true
  case x::xs =>
    if (xs.length < x) true 
    else moves(xs, x).exists(search(_))
}


search(List(5,3,2,5,1,1))
search(List(3,5,1,0,0,0,1))
search(List(3,5,1,0,0,0,0,1))
search(List(3,5,1,0,0,0,1,1))
search(List(3,5,1))
search(List(5,1,1))
search(Nil)
search(List(1))
search(List(5,1,1))
search(List(3,5,1,0,0,0,0,0,0,0,0,1))

// generates *all* jump tours
//    if we are only interested in the shortest one, we could
//    shortcircut the calculation and only return List(x) in
//    case where xs.length < x, because no tour can be shorter
//    than 1
// 

def jumps(xs: List[Int]) : List[List[Int]] = xs match {
  case Nil => Nil
  case x::xs => {
    val children = moves(xs, x)
    val results = children.map(cs => jumps(cs).map(x :: _)).flatten
    if (xs.length < x) List(x)::results else results
  }
}

jumps(List(5,3,2,5,1,1)).minBy(_.length)
jumps(List(3,5,1,2,1,2,1))
jumps(List(3,5,1,2,3,4,1))
jumps(List(3,5,1,0,0,0,1))
jumps(List(3,5,1))
jumps(List(5,1,1))
jumps(Nil)
jumps(List(1))
jumps(List(5,1,2))
moves(List(1,2), 5)
jumps(List(1,5,1,2))
jumps(List(3,5,1,0,0,0,0,0,0,0,0,1))

jumps(List(5,3,2,5,1,1)).minBy(_.length)
jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length)
jumps(List(1,3,6,1,0,9)).minBy(_.length)
jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length)






// User-defined Datatypes
//========================

abstract class Tree
case class Leaf(x: Int) extends Tree
case class Node(s: String, left: Tree, right: Tree) extends Tree 

List(Leaf(20), Node("foo", Leaf(1), Leaf(2)))

sealed abstract class Colour
case object Red extends Colour 
case object Green extends Colour 
case object Blue extends Colour
case object Yellow extends Colour


def fav_colour(c: Colour) : Boolean = c match {
  case Green => true
  case _  => false 
}

fav_colour(Green)

// ... a tiny bit more useful: Roman Numerals

sealed abstract class RomanDigit 
case object I extends RomanDigit 
case object V extends RomanDigit 
case object X extends RomanDigit 
case object L extends RomanDigit 
case object C extends RomanDigit 
case object D extends RomanDigit 
case object M extends RomanDigit 

type RomanNumeral = List[RomanDigit] 

List(X,I,M,D)

/*
I    -> 1
II   -> 2
III  -> 3
IV   -> 4
V    -> 5
VI   -> 6
VII  -> 7
VIII -> 8
IX   -> 9
X    -> 10
*/

def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { 
  case Nil => 0
  case M::r    => 1000 + RomanNumeral2Int(r)  
  case C::M::r => 900 + RomanNumeral2Int(r)
  case D::r    => 500 + RomanNumeral2Int(r)
  case C::D::r => 400 + RomanNumeral2Int(r)
  case C::r    => 100 + RomanNumeral2Int(r)
  case X::C::r => 90 + RomanNumeral2Int(r)
  case L::r    => 50 + RomanNumeral2Int(r)
  case X::L::r => 40 + RomanNumeral2Int(r)
  case X::r    => 10 + RomanNumeral2Int(r)
  case I::X::r => 9 + RomanNumeral2Int(r)
  case V::r    => 5 + RomanNumeral2Int(r)
  case I::V::r => 4 + RomanNumeral2Int(r)
  case I::r    => 1 + RomanNumeral2Int(r)
}

RomanNumeral2Int(List(I,V))             // 4
RomanNumeral2Int(List(I,I,I,I))         // 4 (invalid Roman number)
RomanNumeral2Int(List(V,I))             // 6
RomanNumeral2Int(List(I,X))             // 9
RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
RomanNumeral2Int(List(M,M,X,V,I,I))     // 2017


// String interpolations as patterns

val date = "2019-11-26"
val s"$year-$month-$day" = date

def parse_date(date: String) : Option[(Int, Int, Int)]= date match {
  case s"$year-$month-$day" => Some((day.toInt, month.toInt, year.toInt))
  case s"$day/$month/$year" => Some((day.toInt, month.toInt, year.toInt))
  case s"$day.$month.$year" => Some((day.toInt, month.toInt, year.toInt))
  case _ => None
} 

parse_date("2019-11-26")
parse_date("26/11/2019")
parse_date("26.11.2019")


// User-defined Datatypes and Pattern Matching
//=============================================

// trees



// expressions

sealed abstract class Exp
case class N(n: Int) extends Exp                  // for numbers
case class Plus(e1: Exp, e2: Exp) extends Exp
case class Times(e1: Exp, e2: Exp) extends Exp

def string(e: Exp) : String = e match {
  case N(n) => s"$n"
  case Plus(e1, e2) => s"(${string(e1)} + ${string(e2)})" 
  case Times(e1, e2) => s"(${string(e1)} * ${string(e2)})"
}

val e = Plus(N(9), Times(N(3), N(4)))
println(string(e))

def eval(e: Exp) : Int = e match {
  case N(n) => n
  case Plus(e1, e2) => eval(e1) + eval(e2) 
  case Times(e1, e2) => eval(e1) * eval(e2) 
}

println(eval(e))

def simp(e: Exp) : Exp = e match {
  case N(n) => N(n)
  case Plus(e1, e2) => (simp(e1), simp(e2)) match {
    case (N(0), e2s) => e2s
    case (e1s, N(0)) => e1s
    case (e1s, e2s) => Plus(e1s, e2s)
  }  
  case Times(e1, e2) => (simp(e1), simp(e2)) match {
    case (N(0), _) => N(0)
    case (_, N(0)) => N(0)
    case (N(1), e2s) => e2s
    case (e1s, N(1)) => e1s
    case (e1s, e2s) => Times(e1s, e2s)
  }  
}


val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
println(string(e2))
println(string(simp(e2)))


// Tokens and Reverse Polish Notation
sealed abstract class Token
case class T(n: Int) extends Token
case object PL extends Token
case object TI extends Token

def rp(e: Exp) : List[Token] = e match {
  case N(n) => List(T(n))
  case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL) 
  case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI) 
}
println(string(e2))
println(rp(e2))

def comp(ls: List[Token], st: List[Int]) : Int = (ls, st) match {
  case (Nil, st) => st.head 
  case (T(n)::rest, st) => comp(rest, n::st)
  case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)
  case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)
}

comp(rp(e), Nil)

def proc(s: String) : Token = s match {
  case  "+" => PL
  case  "*" => TI
  case  _ => T(s.toInt) 
}

comp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)




// Sudoku 
//========

// THE POINT OF THIS CODE IS NOT TO BE SUPER
// EFFICIENT AND FAST, just explaining exhaustive
// depth-first search


val game0 = """.14.6.3..
              |62...4..9
              |.8..5.6..
              |.6.2....3
              |.7..1..5.
              |5....9.6.
              |..6.2..3.
              |1..5...92
              |..7.9.41.""".stripMargin.replaceAll("\\n", "")

type Pos = (Int, Int)
val EmptyValue = '.'
val MaxValue = 9

val allValues = "123456789".toList
val indexes = (0 to 8).toList


def empty(game: String) = game.indexOf(EmptyValue)
def isDone(game: String) = empty(game) == -1 
def emptyPosition(game: String) = 
  (empty(game) % MaxValue, empty(game) / MaxValue)


def get_row(game: String, y: Int) = 
  indexes.map(col => game(y * MaxValue + col))
def get_col(game: String, x: Int) = 
  indexes.map(row => game(x + row * MaxValue))

def get_box(game: String, pos: Pos): List[Char] = {
    def base(p: Int): Int = (p / 3) * 3
    val x0 = base(pos._1)
    val y0 = base(pos._2)
    val ys = (y0 until y0 + 3).toList
    (x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue)))
}

//get_row(game0, 0)
//get_row(game0, 1)
//get_col(game0, 0)
//get_box(game0, (3, 1))


// this is not mutable!!
def update(game: String, pos: Int, value: Char): String = 
  game.updated(pos, value)

def toAvoid(game: String, pos: Pos): List[Char] = 
  (get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos))

def candidates(game: String, pos: Pos): List[Char] = 
  allValues.diff(toAvoid(game, pos))

//candidates(game0, (0,0))

def pretty(game: String): String = 
  "\n" + (game.sliding(MaxValue, MaxValue).mkString("\n"))


def search(game: String): List[String] = {
  if (isDone(game)) List(game)
  else {
    val cs = candidates(game, emptyPosition(game))
    cs.map(c => search(update(game, empty(game), c))).toList.flatten
  }
}

search(game0).map(pretty)

val game1 = """23.915...
              |...2..54.
              |6.7......
              |..1.....9
              |89.5.3.17
              |5.....6..
              |......9.5
              |.16..7...
              |...329..1""".stripMargin.replaceAll("\\n", "")


// game that is in the hard category
val game2 = """8........
              |..36.....
              |.7..9.2..
              |.5...7...
              |....457..
              |...1...3.
              |..1....68
              |..85...1.
              |.9....4..""".stripMargin.replaceAll("\\n", "")

// game with multiple solutions
val game3 = """.8...9743
              |.5...8.1.
              |.1.......
              |8....5...
              |...8.4...
              |...3....6
              |.......7.
              |.3.5...8.
              |9724...5.""".stripMargin.replaceAll("\\n", "")


search(game1).map(pretty)
search(game3).map(pretty)
search(game2).map(pretty)

// for measuring time
def time_needed[T](i: Int, code: => T) = {
  val start = System.nanoTime()
  for (j <- 1 to i) code
  val end = System.nanoTime()
  ((end - start) / 1.0e9) + " secs"
}

time_needed(1, search(game2))




// Tail recursion
//================


def fact(n: Long): Long = 
  if (n == 0) 1 else n * fact(n - 1)

def factB(n: BigInt): BigInt = 
  if (n == 0) 1 else n * factB(n - 1)

factB(100000)

fact(10)              //ok
fact(10000)           // produces a stackoverflow

def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)

factT(10, 1)
println(factT(100000, 1))

// there is a flag for ensuring a function is tail recursive
import scala.annotation.tailrec

@tailrec
def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)



// for tail-recursive functions the Scala compiler
// generates loop-like code, which does not need
// to allocate stack-space in each recursive
// call; Scala can do this only for tail-recursive
// functions

// tail recursive version that searches 
// for all solutions

def searchT(games: List[String], sols: List[String]): List[String] = games match {
  case Nil => sols
  case game::rest => {
    if (isDone(game)) searchT(rest, game::sols)
    else {
      val cs = candidates(game, emptyPosition(game))
      searchT(cs.map(c => update(game, empty(game), c)) ::: rest, sols)
    }
  }
}

searchT(List(game3), List()).map(pretty)


// tail recursive version that searches 
// for a single solution

def search1T(games: List[String]): Option[String] = games match {
  case Nil => None
  case game::rest => {
    if (isDone(game)) Some(game)
    else {
      val cs = candidates(game, emptyPosition(game))
      search1T(cs.map(c => update(game, empty(game), c)) ::: rest)
    }
  }
}

search1T(List(game3)).map(pretty)
time_needed(10, search1T(List(game3)))


// game with multiple solutions
val game3 = """.8...9743
              |.5...8.1.
              |.1.......
              |8....5...
              |...8.4...
              |...3....6
              |.......7.
              |.3.5...8.
              |9724...5.""".stripMargin.replaceAll("\\n", "")

searchT(List(game3), Nil).map(pretty)
search1T(List(game3)).map(pretty)

// Moral: Whenever a recursive function is resource-critical
// (i.e. works with large recursion depth), then you need to
// write it in tail-recursive fashion.
// 
// Unfortuantely, Scala because of current limitations in 
// the JVM is not as clever as other functional languages. It can 
// only optimise "self-tail calls". This excludes the cases of 
// multiple functions making tail calls to each other. Well,
// nothing is perfect.