progs/lecture2.scala
author Christian Urban <urbanc@in.tum.de>
Fri, 30 Nov 2018 14:13:11 +0000
changeset 227 b5f3e814a710
parent 212 4bda49ec24da
child 266 ca48ac1d3c3e
permissions -rw-r--r--
updated

// Scala Lecture 2
//=================

// UNFINISHED BUSINESS from Lecture 1
//====================================


// for measuring time
def time_needed[T](n: Int, code: => T) = {
  val start = System.nanoTime()
  for (i <- (0 to n)) code
  val end = System.nanoTime()
  (end - start) / 1.0e9
}


val list = (1 to 1000000).toList
time_needed(10, for (n <- list) yield n + 42)
time_needed(10, for (n <- list.par) yield n + 42)

// (ONLY WORKS OUT-OF-THE-BOX IN SCALA 2.11.8, not in SCALA 2.12)
// (would need to have this wrapped into a function, or
//  REPL called with scala -Yrepl-class-based)


// Just for Fun: Mutable vs Immutable
//====================================
//
// - no vars, no ++i, no +=
// - no mutable data-structures (no Arrays, no ListBuffers)


// Q: Count how many elements are in the intersections of 
//    two sets?

def count_intersection(A: Set[Int], B: Set[Int]) : Int = {
  var count = 0
  for (x <- A; if B contains x) count += 1 
  count
}

val A = (1 to 1000).toSet
val B = (1 to 1000 by 4).toSet

count_intersection(A, B)

// but do not try to add .par to the for-loop above


//propper parallel version
def count_intersection2(A: Set[Int], B: Set[Int]) : Int = 
  A.par.count(x => B contains x)

count_intersection2(A, B)


val A = (1 to 1000000).toSet
val B = (1 to 1000000 by 4).toSet

time_needed(100, count_intersection(A, B))
time_needed(100, count_intersection2(A, B))



// For-Comprehensions Again
//==========================

// the first produces a result, while the second does not
for (n <- List(1, 2, 3, 4, 5)) yield n * n


for (n <- List(1, 2, 3, 4, 5)) println(n)



// Higher-Order Functions
//========================

// functions can take functions as arguments

def even(x: Int) : Boolean = x % 2 == 0
def odd(x: Int) : Boolean = x % 2 == 1

val lst = (1 to 10).toList

lst.filter(x => even(x))
lst.filter(even(_))
lst.filter(even)

lst.count(even)


lst.find(even)

val ps = List((3, 0), (3, 2), (4, 2), (2, 2), (2, 0), (1, 1), (1, 0))

lst.sortWith(_ > _)
lst.sortWith(_ < _)

def lex(x: (Int, Int), y: (Int, Int)) : Boolean = 
  if (x._1 == y._1) x._2 < y._2 else x._1 < y._1

ps.sortWith(lex)

ps.sortBy(_._1)
ps.sortBy(_._2)

ps.maxBy(_._1)
ps.maxBy(_._2)



// maps (lower-case)
//===================

def double(x: Int): Int = x + x
def square(x: Int): Int = x * x



val lst = (1 to 10).toList

lst.map(x => (double(x), square(x)))

lst.map(square)

// this is actually what for is defined at in Scala

lst.map(n => square(n))
for (n <- lst) yield square(n)

// this can be iterated

lst.map(square).filter(_ > 4)

lst.map(square).filter(_ > 4).map(square)


// lets define our own functions
// type of functions, for example f: Int => Int

lst.tail

def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = {
  if (lst == Nil) Nil
  else f(lst.head) :: my_map_int(lst.tail, f)
}

my_map_int(lst, square)


// same function using pattern matching: a kind
// of switch statement on steroids (see more later on)

def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = lst match {
  case Nil => Nil
  case x::xs => f(x)::my_map_int(xs, f)
}


// other function types
//
// f1: (Int, Int) => Int
// f2: List[String] => Option[Int]
// ... 
val lst = (1 to 10).toList

def sumOf(f: Int => Int, lst: List[Int]): Int = lst match {
  case Nil => 0
  case x::xs => f(x) + sumOf(f, xs)
}

def sum_squares(lst: List[Int]) = sumOf(square, lst)
def sum_cubes(lst: List[Int])   = sumOf(x => x * x * x, lst)

sum_squares(lst)
sum_cubes(lst)

// lets try it factorial
def fact(n: Int) : Int = 
  if (n == 0) 1 else n * fact(n - 1)

def sum_fact(lst: List[Int]) = sumOf(fact, lst)
sum_fact(lst)





// Map type (upper-case)
//=======================

// Note the difference between map and Map

def factors(n: Int) : List[Int] =
  ((1 until n).filter { divisor =>
      n % divisor == 0
    }).toList


var ls = (1 to 10).toList

val facs = ls.map(n => (n, factors(n)))

facs.find(_._1 == 4)

// works for lists of pairs
facs.toMap


facs.toMap.get(4)
facs.toMap.getOrElse(42, Nil)

val facsMap = facs.toMap

val facsMap0 = facsMap + (0 -> List(1,2,3,4,5))
facsMap0.get(1)

val facsMap4 = facsMap + (1 -> List(1,2,3,4,5))
facsMap.get(1)
facsMap4.get(1)

val ls = List("one", "two", "three", "four", "five")
ls.groupBy(_.length)

ls.groupBy(_.length).get(2)



// Option type
//=============

//in Java if something unusually happens, you return null;
//
//in Scala you use Option
//   - if the value is present, you use Some(value)
//   - if no value is present, you use None


List(7,2,3,4,5,6).find(_ < 4)
List(5,6,7,8,9).find(_ < 4)

// operations on options

val lst = List(None, Some(1), Some(2), None, Some(3))

lst.flatten

Some(1).get
None.get

Some(1).isDefined
None.isDefined


None.isDefined

val ps = List((3, 0), (3, 2), (4, 2), (2, 0), (1, 0), (1, 1))

for ((x, y) <- ps) yield {
  if (y == 0) None else Some(x / y)
}

// getOrElse is for setting a default value

val lst = List(None, Some(1), Some(2), None, Some(3))

for (x <- lst) yield x.getOrElse(0)




// error handling with Option (no exceptions)
//
//  Try(something).getOrElse(what_to_do_in_an_exception)
//
import scala.util._
import io.Source


Source.fromURL("""http://www.inf.ucl.ac.uk/staff/urbanc/""").mkString

Try(Source.fromURL("""http://www.inf.kcl.ac.uk/staff/urbanc/""").mkString).getOrElse("")

Try(Some(Source.fromURL("""http://www.inf.kcl.ac.uk/staff/urbanc/""").mkString)).getOrElse(None)


// a function that turns strings into numbers (similar to .toInt)
Integer.parseInt("12u34")


def get_me_an_int(s: String) : Option[Int] = 
 Try(Some(Integer.parseInt(s))).getOrElse(None)

val lst = List("12345", "foo", "5432", "bar", "x21", "456")
for (x <- lst) yield get_me_an_int(x)

// summing all the numbers

lst.map(get_me_an_int).flatten.sum
lst.map(get_me_an_int).flatten.sum


lst.flatMap(get_me_an_int).map(_.toString)


// This may not look any better than working with null in Java, but to
// see the value, you have to put yourself in the shoes of the
// consumer of the get_me_an_int function, and imagine you didn't
// write that function.
//
// In Java, if you didn't write this function, you'd have to depend on
// the Javadoc of the get_me_an_int. If you didn't look at the Javadoc, 
// you might not know that get_me_an_int could return a null, and your 
// code could potentially throw a NullPointerException.



// even Scala is not immune to problems like this:

List(5,6,7,8,9).indexOf(7)
List(5,6,7,8,9).indexOf(10)
List(5,6,7,8,9)(-1)



// Pattern Matching
//==================

// A powerful tool which is supposed to come to Java in a few years
// time (https://www.youtube.com/watch?v=oGll155-vuQ)...Scala already
// has it for many years ;o)

// The general schema:
//
//    expression match {
//       case pattern1 => expression1
//       case pattern2 => expression2
//       ...
//       case patternN => expressionN
//    }




// remember?
val lst = List(None, Some(1), Some(2), None, Some(3)).flatten


def my_flatten(xs: List[Option[Int]]): List[Int] = xs match {
  case Nil => Nil 
  case None::rest => my_flatten(rest)
  case Some(v)::foo => {
      v :: my_flatten(foo)
  } 
}


// another example
def get_me_a_string(n: Int): String = n match {
  case 0 | 1 | 2 => "small"
  case _ => "big"
}

get_me_a_string(0)


// you can also have cases combined
def season(month: String) = month match {
  case "March" | "April" | "May" => "It's spring"
  case "June" | "July" | "August" => "It's summer"
  case "September" | "October" | "November" => "It's autumn"
  case "December" => "It's winter"
  case "January" | "February" => "It's unfortunately winter"
}
 
println(season("November"))

// What happens if no case matches?
println(season("foobar"))


// Silly: fizz buzz
def fizz_buzz(n: Int) : String = (n % 3, n % 5) match {
  case (0, 0) => "fizz buzz"
  case (0, _) => "fizz"
  case (_, 0) => "buzz"
  case _ => n.toString  
}

for (n <- 0 to 20) 
 println(fizz_buzz(n))


// User-defined Datatypes
//========================


abstract class Colour
case object Red extends Colour 
case object Green extends Colour 
case object Blue extends Colour

def fav_colour(c: Colour) : Boolean = c match {
  case Red   => false
  case Green => true
  case Blue  => false 
}

fav_colour(Green)


// ... a bit more useful: Roman Numerals

abstract class RomanDigit 
case object I extends RomanDigit 
case object V extends RomanDigit 
case object X extends RomanDigit 
case object L extends RomanDigit 
case object C extends RomanDigit 
case object D extends RomanDigit 
case object M extends RomanDigit 

type RomanNumeral = List[RomanDigit] 

List(X,I)

I -> 1
II -> 2
III  -> 3
IV -> 4
V -> 5
VI -> 6
VII -> 7
VIII -> 8
IX -> 9
X -> X

def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { 
  case Nil => 0
  case M::r    => 1000 + RomanNumeral2Int(r)  
  case C::M::r => 900 + RomanNumeral2Int(r)
  case D::r    => 500 + RomanNumeral2Int(r)
  case C::D::r => 400 + RomanNumeral2Int(r)
  case C::r    => 100 + RomanNumeral2Int(r)
  case X::C::r => 90 + RomanNumeral2Int(r)
  case L::r    => 50 + RomanNumeral2Int(r)
  case X::L::r => 40 + RomanNumeral2Int(r)
  case X::r    => 10 + RomanNumeral2Int(r)
  case I::X::r => 9 + RomanNumeral2Int(r)
  case V::r    => 5 + RomanNumeral2Int(r)
  case I::V::r => 4 + RomanNumeral2Int(r)
  case I::r    => 1 + RomanNumeral2Int(r)
}

RomanNumeral2Int(List(I,V))             // 4
RomanNumeral2Int(List(I,I,I,I))         // 4 (invalid Roman number)
RomanNumeral2Int(List(V,I))             // 6
RomanNumeral2Int(List(I,X))             // 9
RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
RomanNumeral2Int(List(M,M,X,V,I,I))     // 2017


// another example
//=================

// Once upon a time, in a complete fictional 
// country there were Persons...


abstract class Person
case object King extends Person
case class Peer(deg: String, terr: String, succ: Int) extends Person
case class Knight(name: String) extends Person
case class Peasant(name: String) extends Person


def title(p: Person): String = p match {
  case King => "His Majesty the King"
  case Peer(deg, terr, _) => s"The ${deg} of ${terr}"
  case Knight(name) => s"Sir ${name}"
  case Peasant(name) => name
}

def superior(p1: Person, p2: Person): Boolean = (p1, p2) match {
  case (King, _) => true
  case (Peer(_,_,_), Knight(_)) => true
  case (Peer(_,_,_), Peasant(_)) => true
  case (Peer(_,_,_), Clown) => true
  case (Knight(_), Peasant(_)) => true
  case (Knight(_), Clown) => true
  case (Clown, Peasant(_)) => true
  case _ => false
}

val people = List(Knight("David"), 
                  Peer("Duke", "Norfolk", 84), 
                  Peasant("Christian"), 
                  King, 
                  Clown)

println(people.sortWith(superior).mkString("\n"))

print("123\\n456")


// Tail recursion
//================


def fact(n: Long): Long = 
  if (n == 0) 1 else n * fact(n - 1)

fact(10)              //ok
fact(10000)           // produces a stackoverflow

def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)

factT(10, 1)
factT(100000, 1)

// there is a flag for ensuring a function is tail recursive
import scala.annotation.tailrec

@tailrec
def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)



// for tail-recursive functions the Scala compiler
// generates loop-like code, which does not need
// to allocate stack-space in each recursive
// call; Scala can do this only for tail-recursive
// functions


// A Web Crawler / Email Harvester
//=================================
//
// the idea is to look for links using the
// regular expression "https?://[^"]*" and for
// email addresses using another regex.

import io.Source
import scala.util._

// gets the first 10K of a web-page
def get_page(url: String) : String = {
  Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
    getOrElse { println(s"  Problem with: $url"); ""}
}

// regex for URLs and emails
val http_pattern = """"https?://[^"]*"""".r
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r

//email_pattern.findAllIn
//  ("foo bla christian@kcl.ac.uk 1234567").toList


// drops the first and last character from a string
def unquote(s: String) = s.drop(1).dropRight(1)

def get_all_URLs(page: String): Set[String] = 
  http_pattern.findAllIn(page).map(unquote).toSet

// naive version of crawl - searches until a given depth,
// visits pages potentially more than once
def crawl(url: String, n: Int) : Set[String] = {
  if (n == 0) Set()
  else {
    println(s"  Visiting: $n $url")
    val page = get_page(url)
    val new_emails = email_pattern.findAllIn(page).toSet
    new_emails ++ (for (u <- get_all_URLs(page)) yield crawl(u, n - 1)).flatten
  }
}

// some starting URLs for the crawler
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""

crawl(startURL, 2)







// Sudoku
//========

// THE POINT OF THIS CODE IS NOT TO BE SUPER
// EFFICIENT AND FAST, just explaining exhaustive
// depth-first search


val game0 = """.14.6.3..
              |62...4..9
              |.8..5.6..
              |.6.2....3
              |.7..1..5.
              |5....9.6.
              |..6.2..3.
              |1..5...92
              |..7.9.41.""".stripMargin.replaceAll("\\n", "")

type Pos = (Int, Int)
val EmptyValue = '.'
val MaxValue = 9

val allValues = "123456789".toList
val indexes = (0 to 8).toList




def empty(game: String) = game.indexOf(EmptyValue)
def isDone(game: String) = empty(game) == -1 
def emptyPosition(game: String) = (empty(game) % MaxValue, empty(game) / MaxValue)


def get_row(game: String, y: Int) = indexes.map(col => game(y * MaxValue + col))
def get_col(game: String, x: Int) = indexes.map(row => game(x + row * MaxValue))

def get_box(game: String, pos: Pos): List[Char] = {
    def base(p: Int): Int = (p / 3) * 3
    val x0 = base(pos._1)
    val y0 = base(pos._2)
    val ys = (y0 until y0 + 3).toList
    (x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue)))
}


//get_row(game0, 0)
//get_row(game0, 1)
//get_box(game0, (3,1))

def update(game: String, pos: Int, value: Char): String = game.updated(pos, value)

def toAvoid(game: String, pos: Pos): List[Char] = 
  (get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos))

def candidates(game: String, pos: Pos): List[Char] = allValues diff toAvoid(game,pos)

//candidates(game0, (0,0))

def pretty(game: String): String = "\n" + (game sliding (MaxValue, MaxValue) mkString "\n")

def search(game: String): List[String] = {
  if (isDone(game)) List(game)
  else 
    candidates(game, emptyPosition(game)).map(c => search(update(game, empty(game), c))).toList.flatten
}


val game1 = """23.915...
              |...2..54.
              |6.7......
              |..1.....9
              |89.5.3.17
              |5.....6..
              |......9.5
              |.16..7...
              |...329..1""".stripMargin.replaceAll("\\n", "")


// game that is in the hard category
val game2 = """8........
              |..36.....
              |.7..9.2..
              |.5...7...
              |....457..
              |...1...3.
              |..1....68
              |..85...1.
              |.9....4..""".stripMargin.replaceAll("\\n", "")

// game with multiple solutions
val game3 = """.8...9743
              |.5...8.1.
              |.1.......
              |8....5...
              |...8.4...
              |...3....6
              |.......7.
              |.3.5...8.
              |9724...5.""".stripMargin.replaceAll("\\n", "")


search(game0).map(pretty)
search(game1).map(pretty)

// for measuring time
def time_needed[T](i: Int, code: => T) = {
  val start = System.nanoTime()
  for (j <- 1 to i) code
  val end = System.nanoTime()
  ((end - start) / i / 1.0e9) + " secs"
}

search(game2).map(pretty)
search(game3).distinct.length
time_needed(3, search(game2))
time_needed(3, search(game3))