// Scala Lecture 3
//=================
// last week:
// higher-order functions
// maps
// - recursion
// - Sudoku
// - string interpolations
// - Pattern-Matching
// A Recursive Web Crawler / Email Harvester
//===========================================
//
// the idea is to look for links using the
// regular expression "https?://[^"]*" and for
// email addresses using another regex.
import io.Source
import scala.util._
// gets the first 10K of a web-page
def get_page(url: String) : String = {
  Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
    getOrElse { println(s"  Problem with: $url"); ""}
}
// regex for URLs and emails
val http_pattern = """"https?://[^"]*"""".r
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
//test case:
//email_pattern.findAllIn
//  ("foo bla christian@kcl.ac.uk 1234567").toList
// drops the first and last character from a string
def unquote(s: String) = s.drop(1).dropRight(1)
def get_all_URLs(page: String): Set[String] = 
  http_pattern.findAllIn(page).map(unquote).toSet
// naive version of crawl - searches until a given depth,
// visits pages potentially more than once
def crawl(url: String, n: Int) : Unit = {
  if (n == 0) ()
  else {
    println(s"  Visiting: $n $url")
    for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
  }
}
// some starting URLs for the crawler
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
crawl(startURL, 2)
// a primitive email harvester
def emails(url: String, n: Int) : Set[String] = {
  if (n == 0) Set()
  else {
    println(s"  Visiting: $n $url")
    val page = get_page(url)
    val new_emails = email_pattern.findAllIn(page).toSet
    new_emails ++ (for (u <- get_all_URLs(page)) yield emails(u, n - 1)).flatten
  }
}
emails(startURL, 2)
// Sudoku 
//========
// THE POINT OF THIS CODE IS NOT TO BE SUPER
// EFFICIENT AND FAST, just explaining exhaustive
// depth-first search
val game0 = """.14.6.3..
              |62...4..9
              |.8..5.6..
              |.6.2....3
              |.7..1..5.
              |5....9.6.
              |..6.2..3.
              |1..5...92
              |..7.9.41.""".stripMargin.replaceAll("\\n", "")
type Pos = (Int, Int)
val EmptyValue = '.'
val MaxValue = 9
def pretty(game: String): String = 
  "\n" + (game.grouped(MaxValue).mkString("\n"))
pretty(game0)
val allValues = "123456789".toList
val indexes = (0 to 8).toList
def empty(game: String) = game.indexOf(EmptyValue)
def isDone(game: String) = empty(game) == -1 
def emptyPosition(game: String) : Pos = {
  val e = empty(game)
  (e % MaxValue, e / MaxValue)
}
def get_row(game: String, y: Int) = 
  indexes.map(col => game(y * MaxValue + col))
def get_col(game: String, x: Int) = 
  indexes.map(row => game(x + row * MaxValue))
//get_row(game0, 0)
//get_row(game0, 1)
//get_col(game0, 0)
def get_box(game: String, pos: Pos): List[Char] = {
    def base(p: Int): Int = (p / 3) * 3
    val x0 = base(pos._1)
    val y0 = base(pos._2)
    val ys = (y0 until y0 + 3).toList
    (x0 until x0 + 3).toList
      .flatMap(x => ys.map(y => game(x + y * MaxValue)))
}
//get_box(game0, (3, 1))
// this is not mutable!!
def update(game: String, pos: Int, value: Char): String = 
  game.updated(pos, value)
def toAvoid(game: String, pos: Pos): List[Char] = 
  (get_col(game, pos._1) ++ 
   get_row(game, pos._2) ++ 
   get_box(game, pos))
def candidates(game: String, pos: Pos): List[Char] = 
  allValues.diff(toAvoid(game, pos))
//candidates(game0, (0,0))
def search(game: String): List[String] = {
  if (isDone(game)) List(game)
  else {
    val cs = candidates(game, emptyPosition(game))
    cs.par.map(c => search(update(game, empty(game), c))).flatten.toList
  }
}
pretty(game0)
search(game0).map(pretty)
val game1 = """23.915...
              |...2..54.
              |6.7......
              |..1.....9
              |89.5.3.17
              |5.....6..
              |......9.5
              |.16..7...
              |...329..1""".stripMargin.replaceAll("\\n", "")
search(game1).map(pretty)
// a game that is in the hard category
val game2 = """8........
              |..36.....
              |.7..9.2..
              |.5...7...
              |....457..
              |...1...3.
              |..1....68
              |..85...1.
              |.9....4..""".stripMargin.replaceAll("\\n", "")
search(game2).map(pretty)
// game with multiple solutions
val game3 = """.8...9743
              |.5...8.1.
              |.1.......
              |8....5...
              |...8.4...
              |...3....6
              |.......7.
              |.3.5...8.
              |9724...5.""".stripMargin.replaceAll("\\n", "")
search(game3).map(pretty).foreach(println)
// for measuring time
def time_needed[T](i: Int, code: => T) = {
  val start = System.nanoTime()
  for (j <- 1 to i) code
  val end = System.nanoTime()
  s"${(end - start) / 1.0e9} secs"
}
time_needed(2, search(game2))
// concurrency 
// scala-cli --extra-jars scala-parallel-collections_3-1.0.4.jar 
// import scala.collection.parallel.CollectionConverters._
// String Interpolations
//=======================
def cube(n: Int) : Int = n * n * n
val n = 3
println("The cube of " + n + " is " + cube(n) + ".")
println(s"The cube of $n is ${cube(n)}.")
// or even
println(s"The cube of $n is ${n * n * n}.")
// helpful for debugging purposes
//
//     "The most effective debugging tool is still careful 
//          thought, coupled with judiciously placed print 
//                                             statements."
//       — Brian W. Kernighan, in Unix for Beginners (1979)
def gcd_db(a: Int, b: Int) : Int = {
  println(s"Function called with $a and $b.")
  if (b == 0) a else gcd_db(b, a % b)
}
gcd_db(48, 18)
// Recursion Again ;o)
//====================
// another well-known example: Towers of Hanoi
//=============================================
def move(from: Char, to: Char) =
  println(s"Move disc from $from to $to!")
def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = {
  if (n == 0) ()
  else {
    hanoi(n - 1, from, to, via)
    move(from, to)
    hanoi(n - 1, via, from, to)
  }
} 
hanoi(4, 'A', 'B', 'C')
// Pattern Matching
//==================
// A powerful tool which has even landed in Java during 
// the last few years (https://inside.java/2021/06/13/podcast-017/).
// ...Scala already has it for many years and the concept is
// older than your friendly lecturer, that is stone old  ;o)
// The general schema:
//
//    expression match {
//       case pattern1 => expression1
//       case pattern2 => expression2
//       ...
//       case patternN => expressionN
//    }
// recall
def len(xs: List[Int]) : Int = {
    if (xs == Nil) 0
    else 1 + len(xs.tail)
}    
def len(xs: List[Int]) : Int = xs match {
    case Nil => 0
    case hd::tail => 1 + len(tail)
}  
def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = 
  lst match {
    case Nil => Nil
    case x::xs => f(x)::my_map_int(xs, f)
  }
def my_map_option(opt: Option[Int], f: Int => Int) : Option[Int] = 
  opt match {
    case None => None
    case Some(x) => Some(f(x))
  }
my_map_option(None, x => x * x)
my_map_option(Some(8), x => x * x)
// you can also have cases combined
def season(month: String) : String = month match {
  case "March" | "April" | "May" => "It's spring"
  case "June" | "July" | "August" => "It's summer"
  case "September" | "October" | "November" => "It's autumn"
  case "December" => "It's winter"
  case "January" | "February" => "It's unfortunately winter"
  case _ => "Wrong month"
}
// pattern-match on integers
def fib(n: Int) : Int = n match { 
  case 0 | 1 => 1
  case n => fib(n - 1) + fib(n - 2)
}
fib(10)
// pattern-match on results
// Silly: fizz buzz
def fizz_buzz(n: Int) : String = (n % 3, n % 5) match {
  case (0, 0) => "fizz buzz"
  case (0, _) => "fizz"
  case (_, 0) => "buzz"
  case _ => n.toString  
}
for (n <- 1 to 20) 
 println(fizz_buzz(n))
// guards in pattern-matching
def foo(xs: List[Int]) : String = xs match {
  case Nil => s"this list is empty"
  case x :: xs if x % 2 == 0 
     => s"the first elemnt is even"
  case x :: y :: rest if x == y
     => s"this has two elemnts that are the same"
  case hd :: tl => s"this list is standard $hd::$tl"
}
foo(Nil)
foo(List(1,2,3))
foo(List(1,2))
foo(List(1,1,2,3))
foo(List(2,2,2,3))
// Trees
abstract class Tree
case class Leaf(x: Int) extends Tree
case class Node(s: String, left: Tree, right: Tree) extends Tree 
val lf = Leaf(20)
val tr = Node("foo", Leaf(10), Leaf(23))
val lst : List[Tree] = List(lf, tr)
abstract class Colour
case object Red extends Colour 
case object Green extends Colour 
case object Blue extends Colour
case object Yellow extends Colour
def fav_colour(c: Colour) : Boolean = c match {
  case Green => true
  case _  => false 
}
fav_colour(Blue)
// ... a tiny bit more useful: Roman Numerals
sealed abstract class RomanDigit 
case object I extends RomanDigit 
case object V extends RomanDigit 
case object X extends RomanDigit 
case object L extends RomanDigit 
case object C extends RomanDigit 
case object D extends RomanDigit 
case object M extends RomanDigit 
type RomanNumeral = List[RomanDigit] 
List(X,I,M,A)
/*
I    -> 1
II   -> 2
III  -> 3
IV   -> 4
V    -> 5
VI   -> 6
VII  -> 7
VIII -> 8
IX   -> 9
X    -> 10
*/
def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { 
  case Nil => 0
  case M::r    => 1000 + RomanNumeral2Int(r)  
  case C::M::r => 900 + RomanNumeral2Int(r)
  case D::r    => 500 + RomanNumeral2Int(r)
  case C::D::r => 400 + RomanNumeral2Int(r)
  case C::r    => 100 + RomanNumeral2Int(r)
  case X::C::r => 90 + RomanNumeral2Int(r)
  case L::r    => 50 + RomanNumeral2Int(r)
  case X::L::r => 40 + RomanNumeral2Int(r)
  case X::r    => 10 + RomanNumeral2Int(r)
  case I::X::r => 9 + RomanNumeral2Int(r)
  case V::r    => 5 + RomanNumeral2Int(r)
  case I::V::r => 4 + RomanNumeral2Int(r)
  case I::r    => 1 + RomanNumeral2Int(r)
}
RomanNumeral2Int(List(I,V))             // 4
RomanNumeral2Int(List(I,I,I,I))         // 4 (invalid Roman number)
RomanNumeral2Int(List(V,I))             // 6
RomanNumeral2Int(List(I,X))             // 9
RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
RomanNumeral2Int(List(M,M,X,V,I,I))     // 2017
abstract class Rexp
case object ZERO extends Rexp                      // matches nothing
case object ONE extends Rexp                       // matches the empty string
case class CHAR(c: Char) extends Rexp              // matches a character c
case class ALT(r1: Rexp, r2: Rexp) extends Rexp    // alternative
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp    // sequence
case class STAR(r: Rexp) extends Rexp              // star
def depth(r: Rexp) : Int = r match {
  case ZERO => 1
  case ONE => 1
  case CHAR(_) => 1
  case ALT(r1, r2) => 1 + List(depth(r1), depth(r2)).max
  case SEQ(r1, r2) => 1 + List(depth(r1), depth(r2)).max
  case STAR(r1) => 1 + depth(r1)
}
// expressions (essentially trees)
abstract class Exp
case class N(n: Int) extends Exp                  // for numbers
case class Plus(e1: Exp, e2: Exp) extends Exp
case class Times(e1: Exp, e2: Exp) extends Exp
def string(e: Exp) : String = e match {
  case N(n) => s"$n"
  case Plus(e1, e2) => s"(${string(e1)} + ${string(e2)})" 
  case Times(e1, e2) => s"(${string(e1)} * ${string(e2)})"
}
val e = Plus(N(9), Times(N(3), N(4)))
e.toString
println(string(e))
def eval(e: Exp) : Int = e match {
  case N(n) => n
  case Plus(e1, e2) => eval(e1) + eval(e2) 
  case Times(e1, e2) => eval(e1) * eval(e2) 
}
println(eval(e))
// simplification rules:
// e + 0, 0 + e => e 
// e * 0, 0 * e => 0
// e * 1, 1 * e => e
//
// (....9 ....)
def simp(e: Exp) : Exp = e match {
  case N(n) => N(n)
  case Plus(e1, e2) => (simp(e1), simp(e2)) match {
    case (N(0), e2s) => e2s
    case (e1s, N(0)) => e1s
    case (e1s, e2s) => Plus(e1s, e2s)
  }  
  case Times(e1, e2) => (simp(e1), simp(e2)) match {
    case (N(0), _) => N(0)
    case (_, N(0)) => N(0)
    case (N(1), e2s) => e2s
    case (e1s, N(1)) => e1s
    case (e1s, e2s) => Times(e1s, e2s)
  }  
}
val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
println(string(e2))
println(string(simp(e2)))
// String interpolations as patterns
val date = "2019-11-26"
val s"$year-$month-$day" = date
def parse_date(date: String) : Option[(Int, Int, Int)]= date match {
  case s"$year-$month-$day" => Some((day.toInt, month.toInt, year.toInt))
  case s"$day/$month/$year" => Some((day.toInt, month.toInt, year.toInt))
  case s"$day.$month.$year" => Some((day.toInt, month.toInt, year.toInt))
  case _ => None
} 
parse_date("2019-11-26")
parse_date("26/11/2019")
parse_date("26.11.2019")
// Map type (upper-case)
//=======================
// Note the difference between map and Map
val m = Map(1 -> "one", 2 -> "two", 10 -> "many")
List((1, "one"), (2, "two"), (10, "many")).toMap
m.get(1)
m.get(4)
m.getOrElse(1, "")
m.getOrElse(4, "")
val new_m = m + (10 -> "ten")
new_m.get(10)
val m2 = for ((k, v) <- m) yield (k, v.toUpperCase)
// groupBy function on Maps
val lst = List("one", "two", "three", "four", "five")
lst.groupBy(_.head)
lst.groupBy(_.length)
lst.groupBy(_.length).get(3)
val grps = lst.groupBy(_.length)
grps.keySet
// Tail recursion
//================
def fact(n: BigInt): BigInt = 
  if (n == 0) 1 else n * fact(n - 1)
fact(10)              //ok
fact(10000)           // produces a stackoverflow
def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)
factT(10, 1)
println(factT(100000, 1))
// there is a flag for ensuring a function is tail recursive
import scala.annotation.tailrec
@tailrec
def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)
// for tail-recursive functions the Scala compiler
// generates loop-like code, which does not need
// to allocate stack-space in each recursive
// call; Scala can do this only for tail-recursive
// functions
def length(xs: List[Int]) : Int = xs match {
  case Nil => 0
  case _ :: tail => 1 + length(tail)
}
@tailrec
def lengthT(xs: List[Int], acc : Int) : Int = xs match {
  case Nil => acc
  case _ :: tail => lengthT(tail, 1 + acc)
}
lengthT(List.fill(10000000)(1), 0)
// Aside: concurrency 
// scala-cli --extra-jars scala-parallel-collections_3-1.0.4.jar 
for (n <- (1 to 10)) println(n)
import scala.collection.parallel.CollectionConverters._
for (n <- (1 to 10).par) println(n)
// for measuring time
def time_needed[T](n: Int, code: => T) = {
  val start = System.nanoTime()
  for (i <- (0 to n)) code
  val end = System.nanoTime()
  (end - start) / 1.0e9
}
val list = (1L to 10_000_000L).toList
time_needed(10, for (n <- list) yield n + 42)
time_needed(10, for (n <- list.par) yield n + 42)
// ...but par does not make everything faster
list.sum
list.par.sum
time_needed(10, list.sum)
time_needed(10, list.par.sum)
// Mutable vs Immutable
//======================
//
// Remember:
// - no vars, no ++i, no +=
// - no mutable data-structures (no Arrays, no ListBuffers)
// But what the heck....lets try to count to 1 Mio in parallel
// 
// requires
// scala-cli --extra-jars scala- parallel-collections_3-1.0.4.jar
import scala.collection.parallel.CollectionConverters._
def test() = {
  var cnt = 0
  for(i <- (1 to 100_000).par) cnt += 1
  println(s"Should be 100000: $cnt")
}
test()
// Or
// Q: Count how many elements are in the intersections of 
//    two sets?
// A; IMPROPER WAY (mutable counter)
def count_intersection(A: Set[Int], B: Set[Int]) : Int = {
  var count = 0
  for (x <- A.par; if B contains x) count += 1 
  count
}
val A = (0 to 999).toSet
val B = (0 to 999 by 4).toSet
count_intersection(A, B)
// but do not try to add .par to the for-loop above
//propper parallel version
def count_intersection2(A: Set[Int], B: Set[Int]) : Int = 
  A.par.count(x => B contains x)
count_intersection2(A, B)