progs/lecture2.scala
author Christian Urban <urbanc@in.tum.de>
Mon, 04 Nov 2019 11:48:37 +0000
changeset 310 335079d938aa
parent 309 b192bc772613
child 316 8b57dd326a91
permissions -rw-r--r--
updated

// Scala Lecture 2
//=================

// UNFINISHED BUSINESS from Lecture 1
//====================================


// for measuring time
def time_needed[T](n: Int, code: => T) = {
  val start = System.nanoTime()
  for (i <- (0 to n)) code
  val end = System.nanoTime()
  (end - start) / 1.0e9
}


val list = (1 to 1000000).toList
time_needed(10, for (n <- list) yield n + 42)
time_needed(10, for (n <- list.par) yield n + 42)

// (needs a library and 'magic' option -Yrepl-class-based)




// Just for Fun: Mutable vs Immutable
//====================================
//
// - no vars, no ++i, no +=
// - no mutable data-structures (no Arrays, no ListBuffers)


// Q: Count how many elements are in the intersections of 
//    two sets?

def count_intersection(A: Set[Int], B: Set[Int]) : Int = {
  var count = 0
  for (x <- A; if B contains x) count += 1 
  count
}

val A = (1 to 1000).toSet
val B = (1 to 1000 by 4).toSet

count_intersection(A, B)

// but do not try to add .par to the for-loop above


//propper parallel version
def count_intersection2(A: Set[Int], B: Set[Int]) : Int = 
  A.par.count(x => B contains x)

count_intersection2(A, B)


val A = (1 to 1000000).toSet
val B = (1 to 1000000 by 4).toSet

time_needed(100, count_intersection(A, B))
time_needed(100, count_intersection2(A, B))



// For-Comprehensions Again
//==========================

// the first produces a result, while the second does not
for (n <- List(1, 2, 3, 4, 5)) yield n * n


for (n <- List(1, 2, 3, 4, 5)) println(n)


// String Interpolations
//=======================

val n = 3
println("The square of " + n + " is " + square(n) + ".")

println(s"The square of ${n} is ${square(n)}.")


// helpful for debugging purposes
//
//         "The most effective debugging tool is still careful thought, 
//          coupled with judiciously placed print statements."
//                   — Brian W. Kernighan, in Unix for Beginners (1979)


def gcd_db(a: Int, b: Int) : Int = {
  println(s"Function called with ${a} and ${b}.")
  if (b == 0) a else gcd_db(b, a % b)
}

gcd_db(48, 18)


// Asserts/Testing
//=================

assert(gcd(48, 18) == 6)

assert(gcd(48, 18) == 5, "The gcd test failed")



// Higher-Order Functions
//========================

// functions can take functions as arguments

def even(x: Int) : Boolean = x % 2 == 0
def odd(x: Int) : Boolean = x % 2 == 1

val lst = (1 to 10).toList

lst.filter(x => even(x))
lst.filter(even(_))
lst.filter(even)

lst.count(even)


lst.find(even)

val ps = List((3, 0), (3, 2), (4, 2), (2, 2), (2, 0), (1, 1), (1, 0))

lst.sortWith(_ > _)
lst.sortWith(_ < _)

def lex(x: (Int, Int), y: (Int, Int)) : Boolean = 
  if (x._1 == y._1) x._2 < y._2 else x._1 < y._1

ps.sortWith(lex)

ps.sortBy(_._1)
ps.sortBy(_._2)

ps.maxBy(_._1)
ps.maxBy(_._2)



// maps (lower-case)
//===================

def double(x: Int): Int = x + x
def square(x: Int): Int = x * x



val lst = (1 to 10).toList

lst.map(x => (double(x), square(x)))

lst.map(square)

// this is actually how for-comprehensions 
// defined as in Scala

lst.map(n => square(n))
for (n <- lst) yield square(n)

// this can be iterated

lst.map(square).filter(_ > 4)

lst.map(square).filter(_ > 4).map(square)


// lets define our own functions
// type of functions, for example f: Int => Int

lst.tail

def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = {
  if (lst == Nil) Nil
  else f(lst.head) :: my_map_int(lst.tail, f)
}

my_map_int(lst, square)


// same function using pattern matching: a kind
// of switch statement on steroids (see more later on)

def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = lst match {
  case Nil => Nil
  case x::xs => f(x)::my_map_int(xs, f)
}


// other function types
//
// f1: (Int, Int) => Int
// f2: List[String] => Option[Int]
// ... 
val lst = (1 to 10).toList

def sumOf(f: Int => Int, lst: List[Int]): Int = lst match {
  case Nil => 0
  case x::xs => f(x) + sumOf(f, xs)
}

def sum_squares(lst: List[Int]) = sumOf(square, lst)
def sum_cubes(lst: List[Int])   = sumOf(x => x * x * x, lst)

sum_squares(lst)
sum_cubes(lst)

// lets try it factorial
def fact(n: Int) : Int = 
  if (n == 0) 1 else n * fact(n - 1)

def sum_fact(lst: List[Int]) = sumOf(fact, lst)
sum_fact(lst)





// Map type (upper-case)
//=======================

// Note the difference between map and Map

def factors(n: Int) : List[Int] =
  ((1 until n).filter { divisor =>
      n % divisor == 0
    }).toList


var ls = (1 to 10).toList

val facs = ls.map(n => (n, factors(n)))

facs.find(_._1 == 4)

// works for lists of pairs
facs.toMap


facs.toMap.get(4)
facs.toMap.getOrElse(42, Nil)

val facsMap = facs.toMap

val facsMap0 = facsMap + (0 -> List(1,2,3,4,5))
facsMap0.get(1)

val facsMap4 = facsMap + (1 -> List(1,2,3,4,5))
facsMap.get(1)
facsMap4.get(1)

val ls = List("one", "two", "three", "four", "five")
ls.groupBy(_.length)

ls.groupBy(_.length).get(2)



// Option type (again)
//=====================

// remember, in Java if something unusually happens, 
// you return null;
//
// in Scala you use Option
//   - if the value is present, you use Some(value)
//   - if no value is present, you use None


List(7,2,3,4,5,6).find(_ < 4)
List(5,6,7,8,9).find(_ < 4)

// operations on options

val lst = List(None, Some(1), Some(2), None, Some(3))

lst.flatten

Some(1).get
None.get

Some(1).isDefined
None.isDefined


None.isDefined

val ps = List((3, 0), (3, 2), (4, 2), (2, 0), (1, 0), (1, 1))

for ((x, y) <- ps) yield {
  if (y == 0) None else Some(x / y)
}

// getOrElse is for setting a default value

val lst = List(None, Some(1), Some(2), None, Some(3))

for (x <- lst) yield x.getOrElse(0)




// error handling with Option (no exceptions)
//
//  Try(something).getOrElse(what_to_do_in_an_exception)
//
import scala.util._
import io.Source


Source.fromURL("""http://www.inf.ucl.ac.uk/staff/urbanc/""").mkString

Try(Source.fromURL("""http://www.inf.kcl.ac.uk/staff/urbanc/""").mkString).getOrElse("")

Try(Some(Source.fromURL("""http://www.inf.kcl.ac.uk/staff/urbanc/""").mkString)).getOrElse(None)


// a function that turns strings into numbers (similar to .toInt)
Integer.parseInt("12u34")


def get_me_an_int(s: String) : Option[Int] = 
 Try(Some(Integer.parseInt(s))).getOrElse(None)

val lst = List("12345", "foo", "5432", "bar", "x21", "456")
for (x <- lst) yield get_me_an_int(x)

// summing up all the numbers

lst.map(get_me_an_int).flatten.sum
lst.map(get_me_an_int).flatten.sum


lst.flatMap(get_me_an_int).map(_.toString)


// This may not look any better than working with null in Java, but to
// see the value, you have to put yourself in the shoes of the
// consumer of the get_me_an_int function, and imagine you didn't
// write that function.
//
// In Java, if you didn't write this function, you'd have to depend on
// the Javadoc of the get_me_an_int. If you didn't look at the Javadoc, 
// you might not know that get_me_an_int could return a null, and your 
// code could potentially throw a NullPointerException.



// even Scala is not immune to problems like this:

List(5,6,7,8,9).indexOf(7)
List(5,6,7,8,9).indexOf(10)
List(5,6,7,8,9)(-1)



// Pattern Matching
//==================

// A powerful tool which is supposed to come to Java in a few years
// time (https://www.youtube.com/watch?v=oGll155-vuQ)...Scala already
// has it for many years ;o)

// The general schema:
//
//    expression match {
//       case pattern1 => expression1
//       case pattern2 => expression2
//       ...
//       case patternN => expressionN
//    }




// remember?
val lst = List(None, Some(1), Some(2), None, Some(3)).flatten


def my_flatten(xs: List[Option[Int]]): List[Int] = xs match {
  case Nil => Nil 
  case None::rest => my_flatten(rest)
  case Some(v)::foo => {
      v :: my_flatten(foo)
  } 
}


// another example
def get_me_a_string(n: Int): String = n match {
  case 0 | 1 | 2 => "small"
  case _ => "big"
}

get_me_a_string(0)


// you can also have cases combined
def season(month: String) : String = month match {
  case "March" | "April" | "May" => "It's spring"
  case "June" | "July" | "August" => "It's summer"
  case "September" | "October" | "November" => "It's autumn"
  case "December" => "It's winter"
  case "January" | "February" => "It's unfortunately winter"
}
 
println(season("November"))

// What happens if no case matches?
println(season("foobar"))


// Days of the months
def days(month: String) : Int = month match {
  case "March" | "April" | "May" => 31
  case "June" | "July" | "August" => 30
}




// Silly: fizz buzz
def fizz_buzz(n: Int) : String = (n % 3, n % 5) match {
  case (0, 0) => "fizz buzz"
  case (0, _) => "fizz"
  case (_, 0) => "buzz"
  case _ => n.toString  
}

for (n <- 0 to 20) 
 println(fizz_buzz(n))


// User-defined Datatypes
//========================


abstract class Colour
case object Red extends Colour 
case object Green extends Colour 
case object Blue extends Colour

def fav_colour(c: Colour) : Boolean = c match {
  case Red   => false
  case Green => true
  case Blue  => false 
}

fav_colour(Green)


// ... a tiny bit more useful: Roman Numerals

abstract class RomanDigit 
case object I extends RomanDigit 
case object V extends RomanDigit 
case object X extends RomanDigit 
case object L extends RomanDigit 
case object C extends RomanDigit 
case object D extends RomanDigit 
case object M extends RomanDigit 

type RomanNumeral = List[RomanDigit] 

List(X,I)

/*
I -> 1
II -> 2
III  -> 3
IV -> 4
V -> 5
VI -> 6
VII -> 7
VIII -> 8
IX -> 9
X -> X
*/

def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { 
  case Nil => 0
  case M::r    => 1000 + RomanNumeral2Int(r)  
  case C::M::r => 900 + RomanNumeral2Int(r)
  case D::r    => 500 + RomanNumeral2Int(r)
  case C::D::r => 400 + RomanNumeral2Int(r)
  case C::r    => 100 + RomanNumeral2Int(r)
  case X::C::r => 90 + RomanNumeral2Int(r)
  case L::r    => 50 + RomanNumeral2Int(r)
  case X::L::r => 40 + RomanNumeral2Int(r)
  case X::r    => 10 + RomanNumeral2Int(r)
  case I::X::r => 9 + RomanNumeral2Int(r)
  case V::r    => 5 + RomanNumeral2Int(r)
  case I::V::r => 4 + RomanNumeral2Int(r)
  case I::r    => 1 + RomanNumeral2Int(r)
}

RomanNumeral2Int(List(I,V))             // 4
RomanNumeral2Int(List(I,I,I,I))         // 4 (invalid Roman number)
RomanNumeral2Int(List(V,I))             // 6
RomanNumeral2Int(List(I,X))             // 9
RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
RomanNumeral2Int(List(M,M,X,V,I,I))     // 2017


// another example
//=================

// Once upon a time, in a complete fictional 
// country there were Persons...


abstract class Person
case object King extends Person
case class Peer(deg: String, terr: String, succ: Int) extends Person
case class Knight(name: String) extends Person
case class Peasant(name: String) extends Person


def title(p: Person): String = p match {
  case King => "His Majesty the King"
  case Peer(deg, terr, _) => s"The ${deg} of ${terr}"
  case Knight(name) => s"Sir ${name}"
  case Peasant(name) => name
}

def superior(p1: Person, p2: Person): Boolean = (p1, p2) match {
  case (King, _) => true
  case (Peer(_,_,_), Knight(_)) => true
  case (Peer(_,_,_), Peasant(_)) => true
  case (Peer(_,_,_), Clown) => true
  case (Knight(_), Peasant(_)) => true
  case (Knight(_), Clown) => true
  case (Clown, Peasant(_)) => true
  case _ => false
}

val people = List(Knight("David"), 
                  Peer("Duke", "Norfolk", 84), 
                  Peasant("Christian"), 
                  King, 
                  Clown)

println(people.sortWith(superior).mkString("\n"))


// String interpolations as patterns

val date = "2000-01-01"
val s"$year-$month-$day" = date

def parse_date(date: String) = date match {
  case s"$year-$month-$day" => Some((year.toInt, month.toInt, day.toInt))
  case s"$day/$month/$year" => Some((year.toInt, month.toInt, day.toInt))
  case _ => None
} 


// Recursion
//===========

/* a, b, c

aa         aaa
ab         baa 
ac         caa 
ba  =>     ......
bb
bc
ca
cb
cc

*/

def perms(cs: List[Char], l: Int) : List[String] = {
  if (l == 0) List("")
  else for (c <- cs; s <- perms(cs, l - 1)) yield s"$c$s"
}

perms("abc".toList, 2)

def move(from: Char, to: Char) =
  println(s"Move disc from $from to $to!")

def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = {
  if (n == 0) ()
  else {
    hanoi(n - 1, from, to, via)
    move(from, to)
    hanoi(n - 1, via, from, to)
  }
} 

hanoi(40, 'A', 'B', 'C')


// Tail Recursion
//================


def fact(n: Long): Long = 
  if (n == 0) 1 else n * fact(n - 1)

fact(10)              //ok
fact(10000)           // produces a stackoverflow

def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)

factT(10, 1)
factT(100000, 1)

// there is a flag for ensuring a function is tail recursive
import scala.annotation.tailrec

@tailrec
def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)



// for tail-recursive functions the Scala compiler
// generates loop-like code, which does not need
// to allocate stack-space in each recursive
// call; Scala can do this only for tail-recursive
// functions


// A Web Crawler / Email Harvester
//=================================
//
// the idea is to look for links using the
// regular expression "https?://[^"]*" and for
// email addresses using another regex.

import io.Source
import scala.util._

// gets the first 10K of a web-page
def get_page(url: String) : String = {
  Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
    getOrElse { println(s"  Problem with: $url"); ""}
}

// regex for URLs and emails
val http_pattern = """"https?://[^"]*"""".r
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r

//test case:
//email_pattern.findAllIn
//  ("foo bla christian@kcl.ac.uk 1234567").toList


// drops the first and last character from a string
def unquote(s: String) = s.drop(1).dropRight(1)

def get_all_URLs(page: String): Set[String] = 
  http_pattern.findAllIn(page).map(unquote).toSet

// naive version of crawl - searches until a given depth,
// visits pages potentially more than once
def crawl(url: String, n: Int) : Set[String] = {
  if (n == 0) Set()
  else {
    println(s"  Visiting: $n $url")
    val page = get_page(url)
    val new_emails = email_pattern.findAllIn(page).toSet
    new_emails ++ (for (u <- get_all_URLs(page)) yield crawl(u, n - 1)).flatten
  }
}

// some starting URLs for the crawler
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""

crawl(startURL, 2)







// Sudoku
//========

// THE POINT OF THIS CODE IS NOT TO BE SUPER
// EFFICIENT AND FAST, just explaining exhaustive
// depth-first search


val game0 = """.14.6.3..
              |62...4..9
              |.8..5.6..
              |.6.2....3
              |.7..1..5.
              |5....9.6.
              |..6.2..3.
              |1..5...92
              |..7.9.41.""".stripMargin.replaceAll("\\n", "")

type Pos = (Int, Int)
val emptyValue = '.'
val maxValue = 9

val allValues = "123456789".toList
val indexes = (0 to 8).toList


def empty(game: String) = game.indexOf(emptyValue)
def isDone(game: String) = empty(game) == -1 
def emptyPosition(game: String) : Pos = 
  (empty(game) % maxValue, empty(game) / maxValue)


def get_row(game: String, y: Int) = indexes.map(col => game(y * maxValue + col))
def get_col(game: String, x: Int) = indexes.map(row => game(x + row * maxValue))

def get_box(game: String, pos: Pos): List[Char] = {
    def base(p: Int): Int = (p / 3) * 3
    val x0 = base(pos._1)
    val y0 = base(pos._2)
    for (x <- (x0 until x0 + 3).toList;
         y <- (y0 until y0 + 3).toList) yield game(x + y * maxValue)
}         


//get_row(game0, 0)
//get_row(game0, 1)
//get_box(game0, (3,1))

def update(game: String, pos: Int, value: Char): String = 
  game.updated(pos, value)

def toAvoid(game: String, pos: Pos): List[Char] = 
  (get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos))

def candidates(game: String, pos: Pos): List[Char] = 
  allValues.diff(toAvoid(game, pos))

//candidates(game0, (0, 0))

def pretty(game: String): String = 
  "\n" ++ (game.sliding(maxValue, maxValue).mkString("\n"))

def search(game: String): List[String] = {
  if (isDone(game)) List(game)
  else 
    candidates(game, emptyPosition(game)).
      map(c => search(update(game, empty(game), c))).flatten
}

// an easy game
val game1 = """23.915...
              |...2..54.
              |6.7......
              |..1.....9
              |89.5.3.17
              |5.....6..
              |......9.5
              |.16..7...
              |...329..1""".stripMargin.replaceAll("\\n", "")


// a game that is in the sligtly harder category
val game2 = """8........
              |..36.....
              |.7..9.2..
              |.5...7...
              |....457..
              |...1...3.
              |..1....68
              |..85...1.
              |.9....4..""".stripMargin.replaceAll("\\n", "")

// a game with multiple solutions
val game3 = """.8...9743
              |.5...8.1.
              |.1.......
              |8....5...
              |...8.4...
              |...3....6
              |.......7.
              |.3.5...8.
              |9724...5.""".stripMargin.replaceAll("\\n", "")


search(game0).map(pretty)
search(game1).map(pretty)

// for measuring time
def time_needed[T](i: Int, code: => T) = {
  val start = System.nanoTime()
  for (j <- 1 to i) code
  val end = System.nanoTime()
  s"${(end - start) / i / 1.0e9} secs"
}

search(game2).map(pretty)
search(game3).distinct.length
time_needed(3, search(game2))
time_needed(3, search(game3))





// if you like verbosity, you can full-specify the literal. 
// Don't go telling that to people, though
(1 to 100).filter((x: Int) => x % 2 == 0).sum 

// As x is known to be an Int anyway, you can omit that part
(1 to 100).filter(x => x % 2 == 0).sum

// As each parameter (only x in this case) is passed only once
// you can use the wizardy placeholder syntax
(1 to 100).filter(_ % 2 == 0).sum

// But if you want to re-use your literal, you can also put it in a value
// In this case, explicit types are required because there's nothing to infer from
val isEven: (x: Int) => x % 2 == 0
(1 to 100).filter(isEven).sum