progs/lecture3.scala
author Christian Urban <christian.urban@kcl.ac.uk>
Sun, 15 Sep 2024 12:57:59 +0100
changeset 493 244df77507c2
parent 481 e03a0100ec46
permissions -rw-r--r--
updated

// Scala Lecture 3
//=================

// last week:
// higher-order functions
// maps

// - recursion
// - Sudoku
// - string interpolations
// - Pattern-Matching

// A Recursive Web Crawler / Email Harvester
//===========================================
//
// the idea is to look for links using the
// regular expression "https?://[^"]*" and for
// email addresses using another regex.

import io.Source
import scala.util._

// gets the first 10K of a web-page
def get_page(url: String) : String = {
  Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
    getOrElse { println(s"  Problem with: $url"); ""}
}

// regex for URLs and emails
val http_pattern = """"https?://[^"]*"""".r
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r

//test case:
//email_pattern.findAllIn
//  ("foo bla christian@kcl.ac.uk 1234567").toList


// drops the first and last character from a string
def unquote(s: String) = s.drop(1).dropRight(1)

def get_all_URLs(page: String): Set[String] = 
  http_pattern.findAllIn(page).map(unquote).toSet

// naive version of crawl - searches until a given depth,
// visits pages potentially more than once
def crawl(url: String, n: Int) : Unit = {
  if (n == 0) ()
  else {
    println(s"  Visiting: $n $url")
    for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
  }
}

// some starting URLs for the crawler
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""

crawl(startURL, 2)


// a primitive email harvester
def emails(url: String, n: Int) : Set[String] = {
  if (n == 0) Set()
  else {
    println(s"  Visiting: $n $url")
    val page = get_page(url)
    val new_emails = email_pattern.findAllIn(page).toSet
    new_emails ++ (for (u <- get_all_URLs(page)) yield emails(u, n - 1)).flatten
  }
}

emails(startURL, 2)



// Sudoku 
//========

// THE POINT OF THIS CODE IS NOT TO BE SUPER
// EFFICIENT AND FAST, just explaining exhaustive
// depth-first search


val game0 = """.14.6.3..
              |62...4..9
              |.8..5.6..
              |.6.2....3
              |.7..1..5.
              |5....9.6.
              |..6.2..3.
              |1..5...92
              |..7.9.41.""".stripMargin.replaceAll("\\n", "")

type Pos = (Int, Int)
val EmptyValue = '.'
val MaxValue = 9

def pretty(game: String): String = 
  "\n" + (game.grouped(MaxValue).mkString("\n"))

pretty(game0)


val allValues = "123456789".toList
val indexes = (0 to 8).toList

def empty(game: String) = game.indexOf(EmptyValue)
def isDone(game: String) = empty(game) == -1 
def emptyPosition(game: String) : Pos = {
  val e = empty(game)
  (e % MaxValue, e / MaxValue)
}

def get_row(game: String, y: Int) = 
  indexes.map(col => game(y * MaxValue + col))
def get_col(game: String, x: Int) = 
  indexes.map(row => game(x + row * MaxValue))

//get_row(game0, 0)
//get_row(game0, 1)
//get_col(game0, 0)

def get_box(game: String, pos: Pos): List[Char] = {
    def base(p: Int): Int = (p / 3) * 3
    val x0 = base(pos._1)
    val y0 = base(pos._2)
    val ys = (y0 until y0 + 3).toList
    (x0 until x0 + 3).toList
      .flatMap(x => ys.map(y => game(x + y * MaxValue)))
}


//get_box(game0, (3, 1))


// this is not mutable!!
def update(game: String, pos: Int, value: Char): String = 
  game.updated(pos, value)

def toAvoid(game: String, pos: Pos): List[Char] = 
  (get_col(game, pos._1) ++ 
   get_row(game, pos._2) ++ 
   get_box(game, pos))

def candidates(game: String, pos: Pos): List[Char] = 
  allValues.diff(toAvoid(game, pos))

//candidates(game0, (0,0))


def search(game: String): List[String] = {
  if (isDone(game)) List(game)
  else {
    val cs = candidates(game, emptyPosition(game))
    cs.par.map(c => search(update(game, empty(game), c))).flatten.toList
  }
}

pretty(game0)
search(game0).map(pretty)

val game1 = """23.915...
              |...2..54.
              |6.7......
              |..1.....9
              |89.5.3.17
              |5.....6..
              |......9.5
              |.16..7...
              |...329..1""".stripMargin.replaceAll("\\n", "")

search(game1).map(pretty)

// a game that is in the hard category
val game2 = """8........
              |..36.....
              |.7..9.2..
              |.5...7...
              |....457..
              |...1...3.
              |..1....68
              |..85...1.
              |.9....4..""".stripMargin.replaceAll("\\n", "")

search(game2).map(pretty)

// game with multiple solutions
val game3 = """.8...9743
              |.5...8.1.
              |.1.......
              |8....5...
              |...8.4...
              |...3....6
              |.......7.
              |.3.5...8.
              |9724...5.""".stripMargin.replaceAll("\\n", "")

search(game3).map(pretty).foreach(println)

// for measuring time
def time_needed[T](i: Int, code: => T) = {
  val start = System.nanoTime()
  for (j <- 1 to i) code
  val end = System.nanoTime()
  s"${(end - start) / 1.0e9} secs"
}

time_needed(2, search(game2))


// concurrency 
// scala-cli --extra-jars scala-parallel-collections_3-1.0.4.jar 
// import scala.collection.parallel.CollectionConverters._




// String Interpolations
//=======================

def cube(n: Int) : Int = n * n * n

val n = 3
println("The cube of " + n + " is " + cube(n) + ".")

println(s"The cube of $n is ${cube(n)}.")

// or even

println(s"The cube of $n is ${n * n * n}.")

// helpful for debugging purposes
//
//     "The most effective debugging tool is still careful 
//          thought, coupled with judiciously placed print 
//                                             statements."
//       — Brian W. Kernighan, in Unix for Beginners (1979)


def gcd_db(a: Int, b: Int) : Int = {
  println(s"Function called with $a and $b.")
  if (b == 0) a else gcd_db(b, a % b)
}

gcd_db(48, 18)




// Recursion Again ;o)
//====================


// another well-known example: Towers of Hanoi
//=============================================

def move(from: Char, to: Char) =
  println(s"Move disc from $from to $to!")

def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = {
  if (n == 0) ()
  else {
    hanoi(n - 1, from, to, via)
    move(from, to)
    hanoi(n - 1, via, from, to)
  }
} 

hanoi(4, 'A', 'B', 'C')



// Pattern Matching
//==================

// A powerful tool which has even landed in Java during 
// the last few years (https://inside.java/2021/06/13/podcast-017/).
// ...Scala already has it for many years and the concept is
// older than your friendly lecturer, that is stone old  ;o)

// The general schema:
//
//    expression match {
//       case pattern1 => expression1
//       case pattern2 => expression2
//       ...
//       case patternN => expressionN
//    }


// recall
def len(xs: List[Int]) : Int = {
    if (xs == Nil) 0
    else 1 + len(xs.tail)
}    

def len(xs: List[Int]) : Int = xs match {
    case Nil => 0
    case hd::tail => 1 + len(tail)
}  


def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = 
  lst match {
    case Nil => Nil
    case x::xs => f(x)::my_map_int(xs, f)
  }

def my_map_option(opt: Option[Int], f: Int => Int) : Option[Int] = 
  opt match {
    case None => None
    case Some(x) => Some(f(x))
  }

my_map_option(None, x => x * x)
my_map_option(Some(8), x => x * x)


// you can also have cases combined
def season(month: String) : String = month match {
  case "March" | "April" | "May" => "It's spring"
  case "June" | "July" | "August" => "It's summer"
  case "September" | "October" | "November" => "It's autumn"
  case "December" => "It's winter"
  case "January" | "February" => "It's unfortunately winter"
  case _ => "Wrong month"
}

// pattern-match on integers

def fib(n: Int) : Int = n match { 
  case 0 | 1 => 1
  case n => fib(n - 1) + fib(n - 2)
}

fib(10)

// pattern-match on results

// Silly: fizz buzz
def fizz_buzz(n: Int) : String = (n % 3, n % 5) match {
  case (0, 0) => "fizz buzz"
  case (0, _) => "fizz"
  case (_, 0) => "buzz"
  case _ => n.toString  
}

for (n <- 1 to 20) 
 println(fizz_buzz(n))

// more interesting patterns for lists - calculate the deltas between 
// elements

def delta(xs: List[Int]) : List[Int] = xs match {
  case Nil => Nil
  case x::Nil => x::Nil
  case x::y::xs => (x - y)::delta(y::xs)
}

delta(List(10, 7, 8, 2, 5, 10))


// guards in pattern-matching

def foo(xs: List[Int]) : String = xs match {
  case Nil => s"this list is empty"
  case x :: xs if x % 2 == 0 
     => s"the first elemnt is even"
  case x :: y :: rest if x == y
     => s"this has two elemnts that are the same"
  case hd :: tl => s"this list is standard $hd::$tl"
}

foo(Nil)
foo(List(1,2,3))
foo(List(1,2))
foo(List(1,1,2,3))
foo(List(2,2,2,3))


// Trees

abstract class Tree
case class Leaf(x: Int) extends Tree
case class Node(s: String, left: Tree, right: Tree) extends Tree 

val lf = Leaf(20)
val tr = Node("foo", Leaf(10), Leaf(23))

val lst : List[Tree] = List(lf, tr)


abstract class Colour
case object Red extends Colour 
case object Green extends Colour 
case object Blue extends Colour
case object Yellow extends Colour


def fav_colour(c: Colour) : Boolean = c match {
  case Green => true
  case _  => false 
}

fav_colour(Blue)

enum ChessPiece:
  case Queen, Rook, Bishop, Knight, Pawn
  def value = this match
    case Queen  => 9
    case Rook   => 5
    case Bishop => 3
    case Knight => 3
    case Pawn   => 1



// ... a tiny bit more useful: Roman Numerals

sealed abstract class RomanDigit 
case object I extends RomanDigit 
case object V extends RomanDigit 
case object X extends RomanDigit 
case object L extends RomanDigit 
case object C extends RomanDigit 
case object D extends RomanDigit 
case object M extends RomanDigit 

type RomanNumeral = List[RomanDigit] 

List(X,I,M,A)

/*
I    -> 1
II   -> 2
III  -> 3
IV   -> 4
V    -> 5
VI   -> 6
VII  -> 7
VIII -> 8
IX   -> 9
X    -> 10
*/

def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { 
  case Nil => 0
  case M::r    => 1000 + RomanNumeral2Int(r)  
  case C::M::r => 900 + RomanNumeral2Int(r)
  case D::r    => 500 + RomanNumeral2Int(r)
  case C::D::r => 400 + RomanNumeral2Int(r)
  case C::r    => 100 + RomanNumeral2Int(r)
  case X::C::r => 90 + RomanNumeral2Int(r)
  case L::r    => 50 + RomanNumeral2Int(r)
  case X::L::r => 40 + RomanNumeral2Int(r)
  case X::r    => 10 + RomanNumeral2Int(r)
  case I::X::r => 9 + RomanNumeral2Int(r)
  case V::r    => 5 + RomanNumeral2Int(r)
  case I::V::r => 4 + RomanNumeral2Int(r)
  case I::r    => 1 + RomanNumeral2Int(r)
}

RomanNumeral2Int(List(I,V))             // 4
RomanNumeral2Int(List(I,I,I,I))         // 4 (invalid Roman number)
RomanNumeral2Int(List(V,I))             // 6
RomanNumeral2Int(List(I,X))             // 9
RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
RomanNumeral2Int(List(M,M,X,V,I,I))     // 2017


abstract class Rexp
case object ZERO extends Rexp                      // matches nothing
case object ONE extends Rexp                       // matches the empty string
case class CHAR(c: Char) extends Rexp              // matches a character c
case class ALT(r1: Rexp, r2: Rexp) extends Rexp    // alternative
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp    // sequence
case class STAR(r: Rexp) extends Rexp              // star

def depth(r: Rexp) : Int = r match {
  case ZERO => 1
  case ONE => 1
  case CHAR(_) => 1
  case ALT(r1, r2) => 1 + List(depth(r1), depth(r2)).max
  case SEQ(r1, r2) => 1 + List(depth(r1), depth(r2)).max
  case STAR(r1) => 1 + depth(r1)
}





// expressions (essentially trees)

abstract class Exp
case class N(n: Int) extends Exp                  // for numbers
case class Plus(e1: Exp, e2: Exp) extends Exp
case class Times(e1: Exp, e2: Exp) extends Exp

def string(e: Exp) : String = e match {
  case N(n) => s"$n"
  case Plus(e1, e2) => s"(${string(e1)} + ${string(e2)})" 
  case Times(e1, e2) => s"(${string(e1)} * ${string(e2)})"
}

val e = Plus(N(9), Times(N(3), N(4)))
e.toString
println(string(e))

def eval(e: Exp) : Int = e match {
  case N(n) => n
  case Plus(e1, e2) => eval(e1) + eval(e2) 
  case Times(e1, e2) => eval(e1) * eval(e2) 
}

println(eval(e))

// simplification rules:
// e + 0, 0 + e => e 
// e * 0, 0 * e => 0
// e * 1, 1 * e => e
//
// (....9 ....)

def simp(e: Exp) : Exp = e match {
  case N(n) => N(n)
  case Plus(e1, e2) => (simp(e1), simp(e2)) match {
    case (N(0), e2s) => e2s
    case (e1s, N(0)) => e1s
    case (e1s, e2s) => Plus(e1s, e2s)
  }  
  case Times(e1, e2) => (simp(e1), simp(e2)) match {
    case (N(0), _) => N(0)
    case (_, N(0)) => N(0)
    case (N(1), e2s) => e2s
    case (e1s, N(1)) => e1s
    case (e1s, e2s) => Times(e1s, e2s)
  }  
}


val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
println(string(e2))
println(string(simp(e2)))



// String interpolations as patterns

val date = "2019-11-26"
val s"$year-$month-$day" = date

def parse_date(date: String) : Option[(Int, Int, Int)]= date match {
  case s"$year-$month-$day" => Some((day.toInt, month.toInt, year.toInt))
  case s"$day/$month/$year" => Some((day.toInt, month.toInt, year.toInt))
  case s"$day.$month.$year" => Some((day.toInt, month.toInt, year.toInt))
  case _ => None
} 

parse_date("2019-11-26")
parse_date("26/11/2019")
parse_date("26.11.2019")




// Map type (upper-case)
//=======================

// Note the difference between map and Map

val m = Map(1 -> "one", 2 -> "two", 10 -> "many")

List((1, "one"), (2, "two"), (10, "many")).toMap

m.get(1)
m.get(4)

m.getOrElse(1, "")
m.getOrElse(4, "")

val new_m = m + (10 -> "ten")

new_m.get(10)

val m2 = for ((k, v) <- m) yield (k, v.toUpperCase)



// groupBy function on Maps
val lst = List("one", "two", "three", "four", "five")
lst.groupBy(_.head)

lst.groupBy(_.length)

lst.groupBy(_.length).get(3)

val grps = lst.groupBy(_.length)
grps.keySet




// Tail recursion
//================

def fact(n: BigInt): BigInt = 
  if (n == 0) 1 else n * fact(n - 1)

fact(10)              //ok
fact(10000)           // produces a stackoverflow


def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)

factT(10, 1)
println(factT(100000, 1))

// there is a flag for ensuring a function is tail recursive
import scala.annotation.tailrec

@tailrec
def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)



// for tail-recursive functions the Scala compiler
// generates loop-like code, which does not need
// to allocate stack-space in each recursive
// call; Scala can do this only for tail-recursive
// functions

def length(xs: List[Int]) : Int = xs match {
  case Nil => 0
  case _ :: tail => 1 + length(tail)
}

@tailrec
def lengthT(xs: List[Int], acc : Int) : Int = xs match {
  case Nil => acc
  case _ :: tail => lengthT(tail, 1 + acc)
}

lengthT(List.fill(10000000)(1), 0)







// Aside: concurrency 
// scala-cli --extra-jars scala-parallel-collections_3-1.0.4.jar 

for (n <- (1 to 10)) println(n)

import scala.collection.parallel.CollectionConverters._

for (n <- (1 to 10).par) println(n)


// for measuring time
def time_needed[T](n: Int, code: => T) = {
  val start = System.nanoTime()
  for (i <- (0 to n)) code
  val end = System.nanoTime()
  (end - start) / 1.0e9
}

val list = (1L to 10_000_000L).toList
time_needed(10, for (n <- list) yield n + 42)
time_needed(10, for (n <- list.par) yield n + 42)

// ...but par does not make everything faster

list.sum
list.par.sum

time_needed(10, list.sum)
time_needed(10, list.par.sum)


// Mutable vs Immutable
//======================
//
// Remember:
// - no vars, no ++i, no +=
// - no mutable data-structures (no Arrays, no ListBuffers)

// But what the heck....lets try to count to 1 Mio in parallel
// 
// requires
// scala-cli --extra-jars scala- parallel-collections_3-1.0.4.jar

import scala.collection.parallel.CollectionConverters._

def test() = {
  var cnt = 0

  for(i <- (1 to 100_000).par) cnt += 1

  println(s"Should be 100000: $cnt")
}

test()

// Or
// Q: Count how many elements are in the intersections of 
//    two sets?
// A; IMPROPER WAY (mutable counter)

def count_intersection(A: Set[Int], B: Set[Int]) : Int = {
  var count = 0
  for (x <- A.par; if B contains x) count += 1 
  count
}

val A = (0 to 999).toSet
val B = (0 to 999 by 4).toSet

count_intersection(A, B)

// but do not try to add .par to the for-loop above


//propper parallel version
def count_intersection2(A: Set[Int], B: Set[Int]) : Int = 
  A.par.count(x => B contains x)

count_intersection2(A, B)