progs/lecture3.scala
author Christian Urban <christian.urban@kcl.ac.uk>
Tue, 13 Oct 2020 10:21:21 +0100
changeset 343 c8fcc0e0a57f
parent 335 7e00d2b13b04
child 364 f1a6fa599d26
permissions -rw-r--r--
updated

// Scala Lecture 3
//=================

// - last week
//
// option type 
// higher-order function


def add(x: Int, y: Int) : Int = x + y

def plus5(x: Int) : Int = add(5, x)

plus5(6)

def add2(x: Int)(y: Int) : Int = x + y

def plus3(y: Int) : Int => Int = add2(3)(y)

plus3(9)

List(1,2,3,4,5).map(add2(3))
List(1,2,3,4,5).map(add(3, _))

type Pos = (Int, Int)

def test(p: Pos) = {
  if (p._1 < 5 && p._2 < 5) {
    Some(p)
  }
}

val l = List((1,2), (5,3), (2,5), (1,3))

l.map(test).flatten

// naive quicksort with "On" function

def sortOn(f: Int => Int, xs: List[Int]) : List[Int] = {
  if (xs.size < 2) xs
  else {
   val pivot = xs.head
   val (left, right) = xs.partition(f(_) < f(pivot))
   sortOn(f, left) ::: pivot :: sortOn(f, right.tail)
  }
} 

sortOn(identity, List(99,99,99,98,10,-3,2)) 
sortOn(n => - n, List(99,99,99,98,10,-3,2))




// Recursion Again ;o)
//====================


// A Web Crawler / Email Harvester
//=================================
//
// the idea is to look for links using the
// regular expression "https?://[^"]*" and for
// email addresses using yet another regex.

import io.Source
import scala.util._

// gets the first 10K of a web-page
def get_page(url: String) : String = {
  Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
    getOrElse { println(s" Problem with: $url"); ""}
}

// regex for URLs and emails
val http_pattern = """"https?://[^"]*"""".r
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r

//  val s = "foo bla christian@kcl.ac.uk 1234567"
//  email_pattern.findAllIn(s).toList

// drops the first and last character from a string
def unquote(s: String) = s.drop(1).dropRight(1)

def get_all_URLs(page: String): Set[String] = 
  http_pattern.findAllIn(page).map(unquote).toSet

// a naive version of crawl - searches until a given depth,
// visits pages potentially more than once
def crawl(url: String, n: Int) : Unit = {
  if (n == 0) ()
  else {
    println(s"  Visiting: $n $url")
    val page = get_page(url)
    for (u <- get_all_URLs(page)) crawl(u, n - 1)
  }
}

// some starting URLs for the crawler
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""

crawl(startURL, 2)

for (x <- List(1,2,3,4,5,6)) println(x)

// a primitive email harvester
def emails(url: String, n: Int) : Set[String] = {
  if (n == 0) Set()
  else {
    println(s"  Visiting: $n $url")
    val page = get_page(url)
    val new_emails = email_pattern.findAllIn(page).toSet
    new_emails ++ (for (u <- get_all_URLs(page).par) yield emails(u, n - 1)).flatten
  }
}

emails(startURL, 3)


// if we want to explore the internet "deeper", then we
// first have to parallelise the request of webpages:
//
// scala -cp scala-parallel-collections_2.13-0.2.0.jar 
// import scala.collection.parallel.CollectionConverters._



// another well-known example
//============================

def move(from: Char, to: Char) =
  println(s"Move disc from $from to $to!")

def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = {
  if (n == 0) ()
  else {
    hanoi(n - 1, from, to, via)
    move(from, to)
    hanoi(n - 1, via, from, to)
  }
} 

hanoi(4, 'A', 'B', 'C')



// Jumping Towers
//================


// the first n prefixes of xs
// for 1 => include xs

def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match {
  case (Nil, _) => Nil
  case (_, 0) => Nil
  case (y::ys, n) => xs :: moves(ys, n - 1)
}


moves(List(5,1,0), 1)
moves(List(5,1,0), 2)
moves(List(5,1,0), 5)

// checks whether a jump tour exists at all

def search(xs: List[Int]) : Boolean = xs match {
  case Nil => true
  case x::xs =>
    if (xs.length < x) true 
    else moves(xs, x).exists(search(_))
}


search(List(5,3,2,5,1,1))
search(List(3,5,1,0,0,0,1))
search(List(3,5,1,0,0,0,0,1))
search(List(3,5,1,0,0,0,1,1))
search(List(3,5,1))
search(List(5,1,1))
search(Nil)
search(List(1))
search(List(5,1,1))
search(List(3,5,1,0,0,0,0,0,0,0,0,1))

// generates *all* jump tours
//    if we are only interested in the shortest one, we could
//    shortcircut the calculation and only return List(x) in
//    case where xs.length < x, because no tour can be shorter
//    than 1
// 

def jumps(xs: List[Int]) : List[List[Int]] = xs match {
  case Nil => Nil
  case x::xs => {
    val children = moves(xs, x)
    val results = children.map(cs => jumps(cs).map(x :: _)).flatten
    if (xs.length < x) List(x)::results else results
  }
}

jumps(List(5,3,2,5,1,1)).minBy(_.length)
jumps(List(3,5,1,2,1,2,1))
jumps(List(3,5,1,2,3,4,1))
jumps(List(3,5,1,0,0,0,1))
jumps(List(3,5,1))
jumps(List(5,1,1))
jumps(Nil)
jumps(List(1))
jumps(List(5,1,2))
moves(List(1,2), 5)
jumps(List(1,5,1,2))
jumps(List(3,5,1,0,0,0,0,0,0,0,0,1))

jumps(List(5,3,2,5,1,1)).minBy(_.length)
jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length)
jumps(List(1,3,6,1,0,9)).minBy(_.length)
jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length)






// User-defined Datatypes
//========================

abstract class Tree
case class Leaf(x: Int) extends Tree
case class Node(s: String, left: Tree, right: Tree) extends Tree 

List(Leaf(20), Node("foo", Leaf(1), Leaf(2)))

sealed abstract class Colour
case object Red extends Colour 
case object Green extends Colour 
case object Blue extends Colour
case object Yellow extends Colour


def fav_colour(c: Colour) : Boolean = c match {
  case Green => true
  case _  => false 
}

fav_colour(Green)

// ... a tiny bit more useful: Roman Numerals

sealed abstract class RomanDigit 
case object I extends RomanDigit 
case object V extends RomanDigit 
case object X extends RomanDigit 
case object L extends RomanDigit 
case object C extends RomanDigit 
case object D extends RomanDigit 
case object M extends RomanDigit 

type RomanNumeral = List[RomanDigit] 

List(X,I,M,D)

/*
I    -> 1
II   -> 2
III  -> 3
IV   -> 4
V    -> 5
VI   -> 6
VII  -> 7
VIII -> 8
IX   -> 9
X    -> 10
*/

def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { 
  case Nil => 0
  case M::r    => 1000 + RomanNumeral2Int(r)  
  case C::M::r => 900 + RomanNumeral2Int(r)
  case D::r    => 500 + RomanNumeral2Int(r)
  case C::D::r => 400 + RomanNumeral2Int(r)
  case C::r    => 100 + RomanNumeral2Int(r)
  case X::C::r => 90 + RomanNumeral2Int(r)
  case L::r    => 50 + RomanNumeral2Int(r)
  case X::L::r => 40 + RomanNumeral2Int(r)
  case X::r    => 10 + RomanNumeral2Int(r)
  case I::X::r => 9 + RomanNumeral2Int(r)
  case V::r    => 5 + RomanNumeral2Int(r)
  case I::V::r => 4 + RomanNumeral2Int(r)
  case I::r    => 1 + RomanNumeral2Int(r)
}

RomanNumeral2Int(List(I,V))             // 4
RomanNumeral2Int(List(I,I,I,I))         // 4 (invalid Roman number)
RomanNumeral2Int(List(V,I))             // 6
RomanNumeral2Int(List(I,X))             // 9
RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
RomanNumeral2Int(List(M,M,X,V,I,I))     // 2017


// String interpolations as patterns

val date = "2019-11-26"
val s"$year-$month-$day" = date

def parse_date(date: String) : Option[(Int, Int, Int)]= date match {
  case s"$year-$month-$day" => Some((day.toInt, month.toInt, year.toInt))
  case s"$day/$month/$year" => Some((day.toInt, month.toInt, year.toInt))
  case s"$day.$month.$year" => Some((day.toInt, month.toInt, year.toInt))
  case _ => None
} 

parse_date("2019-11-26")
parse_date("26/11/2019")
parse_date("26.11.2019")


// User-defined Datatypes and Pattern Matching
//=============================================




// Tail recursion
//================


def fact(n: Long): Long = 
  if (n == 0) 1 else n * fact(n - 1)

def factB(n: BigInt): BigInt = 
  if (n == 0) 1 else n * factB(n - 1)

factB(100000)

fact(10)              //ok
fact(10000)           // produces a stackoverflow

def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)

factT(10, 1)
println(factT(100000, 1))

// there is a flag for ensuring a function is tail recursive
import scala.annotation.tailrec

@tailrec
def factT(n: BigInt, acc: BigInt): BigInt =
  if (n == 0) acc else factT(n - 1, n * acc)



// for tail-recursive functions the Scala compiler
// generates loop-like code, which does not need
// to allocate stack-space in each recursive
// call; Scala can do this only for tail-recursive
// functions

// tail recursive version that searches 
// for all solutions

def searchT(games: List[String], sols: List[String]): List[String] = games match {
  case Nil => sols
  case game::rest => {
    if (isDone(game)) searchT(rest, game::sols)
    else {
      val cs = candidates(game, emptyPosition(game))
      searchT(cs.map(c => update(game, empty(game), c)) ::: rest, sols)
    }
  }
}

searchT(List(game3), List()).map(pretty)


// tail recursive version that searches 
// for a single solution

def search1T(games: List[String]): Option[String] = games match {
  case Nil => None
  case game::rest => {
    if (isDone(game)) Some(game)
    else {
      val cs = candidates(game, emptyPosition(game))
      search1T(cs.map(c => update(game, empty(game), c)) ::: rest)
    }
  }
}

search1T(List(game3)).map(pretty)
time_needed(10, search1T(List(game3)))


// game with multiple solutions
val game3 = """.8...9743
              |.5...8.1.
              |.1.......
              |8....5...
              |...8.4...
              |...3....6
              |.......7.
              |.3.5...8.
              |9724...5.""".stripMargin.replaceAll("\\n", "")

searchT(List(game3), Nil).map(pretty)
search1T(List(game3)).map(pretty)

// Moral: Whenever a recursive function is resource-critical
// (i.e. works with large recursion depth), then you need to
// write it in tail-recursive fashion.
// 
// Unfortuantely, Scala because of current limitations in 
// the JVM is not as clever as other functional languages. It can 
// only optimise "self-tail calls". This excludes the cases of 
// multiple functions making tail calls to each other. Well,
// nothing is perfect. 










//************
// Either
val either1 : Either[Exception,Int] = Right(1)
val either2: Either[Exception, Int] = Right(2)

for{
  one <- either1
  two <- either2
} yield one + two