// Scala Lecture 2
//=================
// UNFINISHED BUSINESS from Lecture 1
//====================================
// for measuring time
def time_needed[T](n: Int, code: => T) = {
val start = System.nanoTime()
for (i <- (0 to n)) code
val end = System.nanoTime()
(end - start) / 1.0e9
}
val list = (1 to 1000000).toList
time_needed(10, for (n <- list) yield n + 42)
time_needed(10, for (n <- list.par) yield n + 42)
// (ONLY WORKS OUT-OF-THE-BOX IN SCALA 2.11.8, not in SCALA 2.12)
// (would need to have this wrapped into a function, or
// REPL called with scala -Yrepl-class-based)
// Just for Fun: Mutable vs Immutable
//====================================
//
// - no vars, no ++i, no +=
// - no mutable data-structures (no Arrays, no ListBuffers)
// Q: Count how many elements are in the intersections of
// two sets?
def count_intersection(A: Set[Int], B: Set[Int]) : Int = {
var count = 0
for (x <- A; if B contains x) count += 1
count
}
val A = (1 to 1000).toSet
val B = (1 to 1000 by 4).toSet
count_intersection(A, B)
// but do not try to add .par to the for-loop above
//propper parallel version
def count_intersection2(A: Set[Int], B: Set[Int]) : Int =
A.par.count(x => B contains x)
count_intersection2(A, B)
val A = (1 to 1000000).toSet
val B = (1 to 1000000 by 4).toSet
time_needed(100, count_intersection(A, B))
time_needed(100, count_intersection2(A, B))
// For-Comprehensions Again
//==========================
// the first produces a result, while the second does not
for (n <- List(1, 2, 3, 4, 5)) yield n * n
for (n <- List(1, 2, 3, 4, 5)) println(n)
// Higher-Order Functions
//========================
// functions can take functions as arguments
def even(x: Int) : Boolean = x % 2 == 0
def odd(x: Int) : Boolean = x % 2 == 1
val lst = (1 to 10).toList
lst.filter(x => even(x))
lst.filter(even(_))
lst.filter(even)
lst.count(even)
lst.find(even)
val ps = List((3, 0), (3, 2), (4, 2), (2, 2), (2, 0), (1, 1), (1, 0))
lst.sortWith(_ > _)
lst.sortWith(_ < _)
def lex(x: (Int, Int), y: (Int, Int)) : Boolean =
if (x._1 == y._1) x._2 < y._2 else x._1 < y._1
ps.sortWith(lex)
ps.sortBy(_._1)
ps.sortBy(_._2)
ps.maxBy(_._1)
ps.maxBy(_._2)
// maps (lower-case)
//===================
def double(x: Int): Int = x + x
def square(x: Int): Int = x * x
val lst = (1 to 10).toList
lst.map(x => (double(x), square(x)))
lst.map(square)
// this is actually what for is defined at in Scala
lst.map(n => square(n))
for (n <- lst) yield square(n)
// this can be iterated
lst.map(square).filter(_ > 4)
lst.map(square).filter(_ > 4).map(square)
// lets define our own functions
// type of functions, for example f: Int => Int
lst.tail
def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = {
if (lst == Nil) Nil
else f(lst.head) :: my_map_int(lst.tail, f)
}
my_map_int(lst, square)
// same function using pattern matching: a kind
// of switch statement on steroids (see more later on)
def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = lst match {
case Nil => Nil
case x::xs => f(x)::my_map_int(xs, f)
}
// other function types
//
// f1: (Int, Int) => Int
// f2: List[String] => Option[Int]
// ...
val lst = (1 to 10).toList
def sumOf(f: Int => Int, lst: List[Int]): Int = lst match {
case Nil => 0
case x::xs => f(x) + sumOf(f, xs)
}
def sum_squares(lst: List[Int]) = sumOf(square, lst)
def sum_cubes(lst: List[Int]) = sumOf(x => x * x * x, lst)
sum_squares(lst)
sum_cubes(lst)
// lets try it factorial
def fact(n: Int) : Int =
if (n == 0) 1 else n * fact(n - 1)
def sum_fact(lst: List[Int]) = sumOf(fact, lst)
sum_fact(lst)
// Map type (upper-case)
//=======================
// Note the difference between map and Map
def factors(n: Int) : List[Int] =
((1 until n).filter { divisor =>
n % divisor == 0
}).toList
var ls = (1 to 10).toList
val facs = ls.map(n => (n, factors(n)))
facs.find(_._1 == 4)
// works for lists of pairs
facs.toMap
facs.toMap.get(4)
facs.toMap.getOrElse(42, Nil)
val facsMap = facs.toMap
val facsMap0 = facsMap + (0 -> List(1,2,3,4,5))
facsMap0.get(1)
val facsMap4 = facsMap + (1 -> List(1,2,3,4,5))
facsMap.get(1)
facsMap4.get(1)
val ls = List("one", "two", "three", "four", "five")
ls.groupBy(_.length)
ls.groupBy(_.length).get(2)
// Option type
//=============
//in Java if something unusually happens, you return null;
//
//in Scala you use Option
// - if the value is present, you use Some(value)
// - if no value is present, you use None
List(7,2,3,4,5,6).find(_ < 4)
List(5,6,7,8,9).find(_ < 4)
// operations on options
val lst = List(None, Some(1), Some(2), None, Some(3))
lst.flatten
Some(1).get
None.get
Some(1).isDefined
None.isDefined
None.isDefined
val ps = List((3, 0), (3, 2), (4, 2), (2, 0), (1, 0), (1, 1))
for ((x, y) <- ps) yield {
if (y == 0) None else Some(x / y)
}
// getOrElse is for setting a default value
val lst = List(None, Some(1), Some(2), None, Some(3))
for (x <- lst) yield x.getOrElse(0)
// error handling with Option (no exceptions)
//
// Try(something).getOrElse(what_to_do_in_an_exception)
//
import scala.util._
import io.Source
Source.fromURL("""http://www.inf.ucl.ac.uk/staff/urbanc/""").mkString
Try(Source.fromURL("""http://www.inf.kcl.ac.uk/staff/urbanc/""").mkString).getOrElse("")
Try(Some(Source.fromURL("""http://www.inf.kcl.ac.uk/staff/urbanc/""").mkString)).getOrElse(None)
// a function that turns strings into numbers (similar to .toInt)
Integer.parseInt("12u34")
def get_me_an_int(s: String) : Option[Int] =
Try(Some(Integer.parseInt(s))).getOrElse(None)
val lst = List("12345", "foo", "5432", "bar", "x21", "456")
for (x <- lst) yield get_me_an_int(x)
// summing all the numbers
lst.map(get_me_an_int).flatten.sum
lst.map(get_me_an_int).flatten.sum
lst.flatMap(get_me_an_int).map(_.toString)
// This may not look any better than working with null in Java, but to
// see the value, you have to put yourself in the shoes of the
// consumer of the get_me_an_int function, and imagine you didn't
// write that function.
//
// In Java, if you didn't write this function, you'd have to depend on
// the Javadoc of the get_me_an_int. If you didn't look at the Javadoc,
// you might not know that get_me_an_int could return a null, and your
// code could potentially throw a NullPointerException.
// even Scala is not immune to problems like this:
List(5,6,7,8,9).indexOf(7)
List(5,6,7,8,9).indexOf(10)
List(5,6,7,8,9)(-1)
// Pattern Matching
//==================
// A powerful tool which is supposed to come to Java in a few years
// time (https://www.youtube.com/watch?v=oGll155-vuQ)...Scala already
// has it for many years ;o)
// The general schema:
//
// expression match {
// case pattern1 => expression1
// case pattern2 => expression2
// ...
// case patternN => expressionN
// }
// remember?
val lst = List(None, Some(1), Some(2), None, Some(3)).flatten
def my_flatten(xs: List[Option[Int]]): List[Int] = xs match {
case Nil => Nil
case None::rest => my_flatten(rest)
case Some(v)::foo => {
v :: my_flatten(foo)
}
}
// another example
def get_me_a_string(n: Int): String = n match {
case 0 | 1 | 2 => "small"
case _ => "big"
}
get_me_a_string(0)
// you can also have cases combined
def season(month: String) = month match {
case "March" | "April" | "May" => "It's spring"
case "June" | "July" | "August" => "It's summer"
case "September" | "October" | "November" => "It's autumn"
case "December" => "It's winter"
case "January" | "February" => "It's unfortunately winter"
}
println(season("November"))
// What happens if no case matches?
println(season("foobar"))
// Silly: fizz buzz
def fizz_buzz(n: Int) : String = (n % 3, n % 5) match {
case (0, 0) => "fizz buzz"
case (0, _) => "fizz"
case (_, 0) => "buzz"
case _ => n.toString
}
for (n <- 0 to 20)
println(fizz_buzz(n))
// User-defined Datatypes
//========================
abstract class Colour
case object Red extends Colour
case object Green extends Colour
case object Blue extends Colour
def fav_colour(c: Colour) : Boolean = c match {
case Red => false
case Green => true
case Blue => false
}
fav_colour(Green)
// ... a bit more useful: Roman Numerals
abstract class RomanDigit
case object I extends RomanDigit
case object V extends RomanDigit
case object X extends RomanDigit
case object L extends RomanDigit
case object C extends RomanDigit
case object D extends RomanDigit
case object M extends RomanDigit
type RomanNumeral = List[RomanDigit]
List(X,I)
I -> 1
II -> 2
III -> 3
IV -> 4
V -> 5
VI -> 6
VII -> 7
VIII -> 8
IX -> 9
X -> X
def RomanNumeral2Int(rs: RomanNumeral): Int = rs match {
case Nil => 0
case M::r => 1000 + RomanNumeral2Int(r)
case C::M::r => 900 + RomanNumeral2Int(r)
case D::r => 500 + RomanNumeral2Int(r)
case C::D::r => 400 + RomanNumeral2Int(r)
case C::r => 100 + RomanNumeral2Int(r)
case X::C::r => 90 + RomanNumeral2Int(r)
case L::r => 50 + RomanNumeral2Int(r)
case X::L::r => 40 + RomanNumeral2Int(r)
case X::r => 10 + RomanNumeral2Int(r)
case I::X::r => 9 + RomanNumeral2Int(r)
case V::r => 5 + RomanNumeral2Int(r)
case I::V::r => 4 + RomanNumeral2Int(r)
case I::r => 1 + RomanNumeral2Int(r)
}
RomanNumeral2Int(List(I,V)) // 4
RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number)
RomanNumeral2Int(List(V,I)) // 6
RomanNumeral2Int(List(I,X)) // 9
RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017
// another example
//=================
// Once upon a time, in a complete fictional
// country there were Persons...
abstract class Person
case object King extends Person
case class Peer(deg: String, terr: String, succ: Int) extends Person
case class Knight(name: String) extends Person
case class Peasant(name: String) extends Person
def title(p: Person): String = p match {
case King => "His Majesty the King"
case Peer(deg, terr, _) => s"The ${deg} of ${terr}"
case Knight(name) => s"Sir ${name}"
case Peasant(name) => name
}
def superior(p1: Person, p2: Person): Boolean = (p1, p2) match {
case (King, _) => true
case (Peer(_,_,_), Knight(_)) => true
case (Peer(_,_,_), Peasant(_)) => true
case (Peer(_,_,_), Clown) => true
case (Knight(_), Peasant(_)) => true
case (Knight(_), Clown) => true
case (Clown, Peasant(_)) => true
case _ => false
}
val people = List(Knight("David"),
Peer("Duke", "Norfolk", 84),
Peasant("Christian"),
King,
Clown)
println(people.sortWith(superior).mkString("\n"))
print("123\\n456")
// Tail recursion
//================
def fact(n: Long): Long =
if (n == 0) 1 else n * fact(n - 1)
fact(10) //ok
fact(10000) // produces a stackoverflow
def factT(n: BigInt, acc: BigInt): BigInt =
if (n == 0) acc else factT(n - 1, n * acc)
factT(10, 1)
factT(100000, 1)
// there is a flag for ensuring a function is tail recursive
import scala.annotation.tailrec
@tailrec
def factT(n: BigInt, acc: BigInt): BigInt =
if (n == 0) acc else factT(n - 1, n * acc)
// for tail-recursive functions the Scala compiler
// generates loop-like code, which does not need
// to allocate stack-space in each recursive
// call; Scala can do this only for tail-recursive
// functions
// A Web Crawler / Email Harvester
//=================================
//
// the idea is to look for links using the
// regular expression "https?://[^"]*" and for
// email addresses using another regex.
import io.Source
import scala.util._
// gets the first 10K of a web-page
def get_page(url: String) : String = {
Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
getOrElse { println(s" Problem with: $url"); ""}
}
// regex for URLs and emails
val http_pattern = """"https?://[^"]*"""".r
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
//email_pattern.findAllIn
// ("foo bla christian@kcl.ac.uk 1234567").toList
// drops the first and last character from a string
def unquote(s: String) = s.drop(1).dropRight(1)
def get_all_URLs(page: String): Set[String] =
http_pattern.findAllIn(page).map(unquote).toSet
// naive version of crawl - searches until a given depth,
// visits pages potentially more than once
def crawl(url: String, n: Int) : Set[String] = {
if (n == 0) Set()
else {
println(s" Visiting: $n $url")
val page = get_page(url)
val new_emails = email_pattern.findAllIn(page).toSet
new_emails ++ (for (u <- get_all_URLs(page)) yield crawl(u, n - 1)).flatten
}
}
// some starting URLs for the crawler
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
crawl(startURL, 2)
// Sudoku
//========
// THE POINT OF THIS CODE IS NOT TO BE SUPER
// EFFICIENT AND FAST, just explaining exhaustive
// depth-first search
val game0 = """.14.6.3..
|62...4..9
|.8..5.6..
|.6.2....3
|.7..1..5.
|5....9.6.
|..6.2..3.
|1..5...92
|..7.9.41.""".stripMargin.replaceAll("\\n", "")
type Pos = (Int, Int)
val EmptyValue = '.'
val MaxValue = 9
val allValues = "123456789".toList
val indexes = (0 to 8).toList
def empty(game: String) = game.indexOf(EmptyValue)
def isDone(game: String) = empty(game) == -1
def emptyPosition(game: String) = (empty(game) % MaxValue, empty(game) / MaxValue)
def get_row(game: String, y: Int) = indexes.map(col => game(y * MaxValue + col))
def get_col(game: String, x: Int) = indexes.map(row => game(x + row * MaxValue))
def get_box(game: String, pos: Pos): List[Char] = {
def base(p: Int): Int = (p / 3) * 3
val x0 = base(pos._1)
val y0 = base(pos._2)
val ys = (y0 until y0 + 3).toList
(x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue)))
}
//get_row(game0, 0)
//get_row(game0, 1)
//get_box(game0, (3,1))
def update(game: String, pos: Int, value: Char): String = game.updated(pos, value)
def toAvoid(game: String, pos: Pos): List[Char] =
(get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos))
def candidates(game: String, pos: Pos): List[Char] = allValues diff toAvoid(game,pos)
//candidates(game0, (0,0))
def pretty(game: String): String = "\n" + (game sliding (MaxValue, MaxValue) mkString "\n")
def search(game: String): List[String] = {
if (isDone(game)) List(game)
else
candidates(game, emptyPosition(game)).map(c => search(update(game, empty(game), c))).toList.flatten
}
val game1 = """23.915...
|...2..54.
|6.7......
|..1.....9
|89.5.3.17
|5.....6..
|......9.5
|.16..7...
|...329..1""".stripMargin.replaceAll("\\n", "")
// game that is in the hard category
val game2 = """8........
|..36.....
|.7..9.2..
|.5...7...
|....457..
|...1...3.
|..1....68
|..85...1.
|.9....4..""".stripMargin.replaceAll("\\n", "")
// game with multiple solutions
val game3 = """.8...9743
|.5...8.1.
|.1.......
|8....5...
|...8.4...
|...3....6
|.......7.
|.3.5...8.
|9724...5.""".stripMargin.replaceAll("\\n", "")
search(game0).map(pretty)
search(game1).map(pretty)
// for measuring time
def time_needed[T](i: Int, code: => T) = {
val start = System.nanoTime()
for (j <- 1 to i) code
val end = System.nanoTime()
((end - start) / i / 1.0e9) + " secs"
}
search(game2).map(pretty)
search(game3).distinct.length
time_needed(3, search(game2))
time_needed(3, search(game3))