progs/lecture2.scala
changeset 204 9b45dd24271b
parent 192 a112e0e2325c
child 212 4bda49ec24da
--- a/progs/lecture2.scala	Thu Nov 15 14:23:55 2018 +0000
+++ b/progs/lecture2.scala	Fri Nov 16 02:06:03 2018 +0000
@@ -1,11 +1,216 @@
 // Scala Lecture 2
 //=================
 
+// UNFINISHED BUSINESS from Lecture 1
+//====================================
+
+
+// for measuring time
+def time_needed[T](n: Int, code: => T) = {
+  val start = System.nanoTime()
+  for (i <- (0 to n)) code
+  val end = System.nanoTime()
+  (end - start) / 1.0e9
+}
+
+
+val list = (1 to 1000000).toList
+time_needed(10, for (n <- list) yield n + 42)
+time_needed(10, for (n <- list.par) yield n + 42)
+
+
+// Just for "Fun": Mutable vs Immutable
+//=======================================
+//
+// - no vars, no ++i, no +=
+// - no mutable data-structures (no Arrays, no ListBuffers)
+
+
+// Q: Count how many elements are in the intersections of two sets?
+
+def count_intersection(A: Set[Int], B: Set[Int]) : Int = {
+  var count = 0
+  for (x <- A; if B contains x) count += 1 
+  count
+}
+
+val A = (1 to 1000).toSet
+val B = (1 to 1000 by 4).toSet
+
+count_intersection(A, B)
+
+// but do not try to add .par to the for-loop above
+
+
+//propper parallel version
+def count_intersection2(A: Set[Int], B: Set[Int]) : Int = 
+  A.par.count(x => B contains x)
+
+count_intersection2(A, B)
+
+
+val A = (1 to 1000000).toSet
+val B = (1 to 1000000 by 4).toSet
+
+time_needed(100, count_intersection(A, B))
+time_needed(100, count_intersection2(A, B))
+
+
+
+// For-Comprehensions Again
+//==========================
+
+// the first produces a result, while the second does not
+for (n <- List(1, 2, 3, 4, 5)) yield n * n
+
+
+for (n <- List(1, 2, 3, 4, 5)) println(n)
+
+
+
+// Higher-Order Functions
+//========================
+
+// functions can take functions as arguments
+
+def even(x: Int) : Boolean = x % 2 == 0
+def odd(x: Int) : Boolean = x % 2 == 1
+
+val lst = (1 to 10).toList
+
+lst.filter(x => even(x))
+lst.filter(even(_))
+lst.filter(even)
+
+lst.count(even)
+
+lst.find(_ > 8)
+
+
+val ps = List((3, 0), (3, 2), (4, 2), (2, 0), (1, 1), (1, 0))
+
+ps.sortBy(_._1)
+ps.sortBy(_._2)
+
+ps.maxBy(_._1)
+ps.maxBy(_._2)
+
+
+
+// maps
+//=====
+
+def square(x: Int): Int = x * x
+
+val lst = (1 to 10).toList
+
+lst.map(square)
+
+// this is actually what for is defined at in Scala
+
+lst.map(n => square(n))
+for (n <- lst) yield square(n)
+
+// this can be iterated
+
+lst.map(square).filter(_ > 4)
+
+lst.map(square).filter(_ > 4).map(square)
+
+
+// lets define our own functions
+// type of functions, for example f: Int => Int
+
+def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = {
+  if (lst == Nil) Nil
+  else f(lst.head) :: my_map_int(lst.tail, f)
+}
+
+my_map_int(lst, square)
+
+
+// same function using pattern matching: a kind
+// of switch statement on steroids (see more later on)
+
+def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = lst match {
+  case Nil => Nil
+  case x::xs => f(x)::my_map_int(xs, f)
+}
+
+
+// other function types
+//
+// f1: (Int, Int) => Int
+// f2: List[String] => Option[Int]
+// ... 
+
+
+def sumOf(f: Int => Int, lst: List[Int]): Int = lst match {
+  case Nil => 0
+  case x::xs => f(x) + sumOf(f, xs)
+}
+
+def sum_squares(lst: List[Int]) = sumOf(square, lst)
+def sum_cubes(lst: List[Int])   = sumOf(x => x * x * x, lst)
+
+sum_squares(lst)
+sum_cubes(lst)
+
+// lets try it factorial
+def fact(n: Int) : Int = ...
+
+def sum_fact(lst: List[Int]) = sumOf(fact, lst)
+sum_fact(lst)
+
+
+
+
+
+// Map type
+//==========
+
+// Note the difference between map and Map
+
+def factors(n: Int) : List[Int] =
+  ((1 until n).filter { divisor =>
+      n % divisor == 0
+    }).toList
+
+
+var ls = (1 to 10).toList
+
+val facs = ls.map(n => (n, factors(n)))
+
+facs.find(_._1 == 4)
+
+// works for lists of pairs
+facs.toMap
+
+
+facs.toMap.get(4)
+facs.toMap.getOrElse(4, Nil)
+
+val facsMap = facs.toMap
+
+val facsMap0 = facsMap + (0 -> List(1,2,3,4,5))
+facsMap0.get(0)
+
+val facsMap4 = facsMap + (1 -> List(1,2,3,4,5))
+facsMap.get(1)
+facsMap4.get(1)
+
+val ls = List("one", "two", "three", "four", "five")
+ls.groupBy(_.length)
+
+ls.groupBy(_.length).get(3)
+
+
 
 // Option type
 //=============
 
 //in Java if something unusually happens, you return null;
+//
 //in Scala you use Option
 //   - if the value is present, you use Some(value)
 //   - if no value is present, you use None
@@ -14,16 +219,7 @@
 List(7,2,3,4,5,6).find(_ < 4)
 List(5,6,7,8,9).find(_ < 4)
 
-
-// Values in types
-//
-// Boolean: 
-// Int: 
-// String: 
-//
-// Option[String]:
-//   
-
+// operations on options
 
 val lst = List(None, Some(1), Some(2), None, Some(3))
 
@@ -43,6 +239,7 @@
 // getOrElse is for setting a default value
 
 val lst = List(None, Some(1), Some(2), None, Some(3))
+
 for (x <- lst) yield x.getOrElse(0)
 
 
@@ -61,17 +258,24 @@
 
 Try(Some(Source.fromURL("""http://www.inf.kcl.ac.uk/staff/urbanc/""").mkString)).getOrElse(None)
 
-// a function that turns strings into numbers
-Integer.parseInt("12u34")
 
-def get_me_an_int(s: String): Option[Int] = 
+// a function that turns strings into numbers (similar to .toInt)
+Integer.parseInt("1234")
+
+
+def get_me_an_int(s: String) : Option[Int] = 
  Try(Some(Integer.parseInt(s))).getOrElse(None)
 
-val lst = List("12345", "foo", "5432", "bar", "x21")
+val lst = List("12345", "foo", "5432", "bar", "x21", "456")
 for (x <- lst) yield get_me_an_int(x)
 
 // summing all the numbers
-val sum = lst.flatMap(get_me_an_int(_)).sum
+
+lst.map(get_me_an_int)
+lst.map(get_me_an_int).flatten.sum
+
+
+val sum = lst.flatMap(get_me_an_int).sum
 
 
 // This may not look any better than working with null in Java, but to
@@ -89,64 +293,8 @@
 // even Scala is not immune to problems like this:
 
 List(5,6,7,8,9).indexOf(7)
-
-
-
-
-
-// Type abbreviations
-//====================
-
-// some syntactic convenience
-type Pos = (int, Int)
-
-type Board = List[List[Int]]
-
-
-
-// Implicits
-//===========
-//
-// for example adding your own methods to Strings:
-// imagine you want to increment strings, like
-//
-//     "HAL".increment
-//
-// you can avoid ugly fudges, like a MyString, by
-// using implicit conversions
-
+List(5,6,7,8,9).indexOf(10)
 
-implicit class MyString(s: String) {
-  def increment = for (c <- s) yield (c + 1).toChar 
-}
-
-"HAL".increment
-
-
-// No return in Scala
-//====================
-
-//You should not use "return" in Scala:
-//
-// A return expression, when evaluated, abandons the 
-// current computation and returns to the caller of the 
-// function in which return appears."
-
-def sq1(x: Int): Int = x * x
-def sq2(x: Int): Int = return x * x
-
-def sumq(ls: List[Int]): Int = {
-  (for (x <- ls) yield (return x * x)).sum[Int]
-}
-
-sumq(List(1,2,3,4))
-
-
-// last expression in a function is the return statement
-def square(x: Int): Int = {
-  println(s"The argument is ${x}.")
-  x * x
-}
 
 
 
@@ -169,7 +317,7 @@
 
 
 
-// remember
+// remember?
 val lst = List(None, Some(1), Some(2), None, Some(3)).flatten
 
 
@@ -178,6 +326,7 @@
 }
 
 
+
 def my_flatten(lst: List[Option[Int]]): List[Int] = lst match {
   case Nil => Nil
   case None::xs => my_flatten(xs)
@@ -200,7 +349,8 @@
   case "March" | "April" | "May" => "It's spring"
   case "June" | "July" | "August" => "It's summer"
   case "September" | "October" | "November" => "It's autumn"
-  case "December" | "January" | "February" => "It's winter"
+  case "December" => "It's winter"
+  case "January" | "February" => "It's unfortunately winter"
 }
  
 println(season("November"))
@@ -209,7 +359,7 @@
 
 println(season("foobar"))
 
-// fizz buzz
+// Silly: fizz buzz
 def fizz_buzz(n: Int) : String = (n % 3, n % 5) match {
   case (0, 0) => "fizz buzz"
   case (0, _) => "fizz"
@@ -224,162 +374,177 @@
 // User-defined Datatypes
 //========================
 
-abstract class Tree
-case class Node(elem: Int, left: Tree, right: Tree) extends Tree
-case class Leaf() extends Tree
 
+abstract class Colour
+case object Red extends Colour 
+case object Green extends Colour 
+case object Blue extends Colour
 
-def insert(tr: Tree, n: Int): Tree = tr match {
-  case Leaf() => Node(n, Leaf(), Leaf())
-  case Node(m, left, right) => 
-    if (n == m) Node(m, left, right) 
-    else if (n < m) Node(m, insert(left, n), right)
-    else Node(m, left, insert(right, n))
+def fav_colour(c: Colour) : Boolean = c match {
+  case Red   => false
+  case Green => true
+  case Blue  => false 
 }
 
+fav_colour(Green)
+
 
-val t1 = Node(4, Node(2, Leaf(), Leaf()), Node(7, Leaf(), Leaf()))
-insert(t1, 3)
+// ... a bit more useful: Roman Numerals
+
+abstract class RomanDigit 
+case object I extends RomanDigit 
+case object V extends RomanDigit 
+case object X extends RomanDigit 
+case object L extends RomanDigit 
+case object C extends RomanDigit 
+case object D extends RomanDigit 
+case object M extends RomanDigit 
+
+type RomanNumeral = List[RomanDigit] 
 
-def depth(tr: Tree): Int = tr match {
-  case Leaf() => 0
-  case Node(_, left, right) => 1 + List(depth(left), depth(right)).max
+def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { 
+  case Nil => 0
+  case M::r    => 1000 + RomanNumeral2Int(r)  
+  case C::M::r => 900 + RomanNumeral2Int(r)
+  case D::r    => 500 + RomanNumeral2Int(r)
+  case C::D::r => 400 + RomanNumeral2Int(r)
+  case C::r    => 100 + RomanNumeral2Int(r)
+  case X::C::r => 90 + RomanNumeral2Int(r)
+  case L::r    => 50 + RomanNumeral2Int(r)
+  case X::L::r => 40 + RomanNumeral2Int(r)
+  case X::r    => 10 + RomanNumeral2Int(r)
+  case I::X::r => 9 + RomanNumeral2Int(r)
+  case V::r    => 5 + RomanNumeral2Int(r)
+  case I::V::r => 4 + RomanNumeral2Int(r)
+  case I::r    => 1 + RomanNumeral2Int(r)
 }
 
+RomanNumeral2Int(List(I,V))             // 4
+RomanNumeral2Int(List(I,I,I,I))         // 4 (invalid Roman number)
+RomanNumeral2Int(List(V,I))             // 6
+RomanNumeral2Int(List(I,X))             // 9
+RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
+RomanNumeral2Int(List(M,M,X,V,I,I))     // 2017
+
 
-def balance(tr: Tree): Int = tr match {
-  case Leaf() => 0
-  case Node(_, left, right) => depth(left) - depth(right)
-}
+// another example
+//=================
 
-balance(insert(t1, 3))
+// Once upon a time, in a complete fictional country there were Persons...
 
-// another example
 
 abstract class Person
-case class King() extends Person
+case object King extends Person
 case class Peer(deg: String, terr: String, succ: Int) extends Person
 case class Knight(name: String) extends Person
 case class Peasant(name: String) extends Person
-case class Clown() extends Person
+case object Clown extends Person
 
 def title(p: Person): String = p match {
-  case King() => "His Majesty the King"
+  case King => "His Majesty the King"
   case Peer(deg, terr, _) => s"The ${deg} of ${terr}"
   case Knight(name) => s"Sir ${name}"
   case Peasant(name) => name
 }
 
 def superior(p1: Person, p2: Person): Boolean = (p1, p2) match {
-  case (King(), _) => true
+  case (King, _) => true
   case (Peer(_,_,_), Knight(_)) => true
   case (Peer(_,_,_), Peasant(_)) => true
-  case (Peer(_,_,_), Clown()) => true
+  case (Peer(_,_,_), Clown) => true
   case (Knight(_), Peasant(_)) => true
-  case (Knight(_), Clown()) => true
-  case (Clown(), Peasant(_)) => true
+  case (Knight(_), Clown) => true
+  case (Clown, Peasant(_)) => true
   case _ => false
 }
 
 val people = List(Knight("David"), 
                   Peer("Duke", "Norfolk", 84), 
                   Peasant("Christian"), 
-                  King(), 
-                  Clown())
+                  King, 
+                  Clown)
 
 println(people.sortWith(superior(_, _)).mkString(", "))
 
 
-
-// Higher-Order Functions
-//========================
-
-// functions can take functions as arguments
-
-val lst = (1 to 10).toList
-
-def even(x: Int): Boolean = x % 2 == 0
-def odd(x: Int): Boolean = x % 2 == 1
-
-lst.filter(x => even(x))
-lst.filter(even(_))
-lst.filter(even)
-
-lst.find(_ > 8)
-
-def square(x: Int): Int = x * x
-
-lst.map(square)
-
-lst.map(square).filter(_ > 4)
-
-lst.map(square).filter(_ > 4).map(square)
-
-// in my collatz.scala
-//(1 to bnd).map(i => (collatz(i), i)).maxBy(_._1)
+// Tail recursion
+//================
 
 
-// type of functions, for example f: Int => Int
-
-def my_map_int(lst: List[Int], f: Int => Int): List[Int] = lst match {
-  case Nil => Nil
-  case x::xs => f(x)::my_map_int(xs, f)
-}
+def fact(n: Long): Long = 
+  if (n == 0) 1 else n * fact(n - 1)
 
-my_map_int(lst, square)
+fact(10)              //ok
+fact(10000)           // produces a stackoverflow
 
-// other function types
-//
-// f1: (Int, Int) => Int
-// f2: List[String] => Option[Int]
-// ... 
-
+def factT(n: BigInt, acc: BigInt): BigInt =
+  if (n == 0) acc else factT(n - 1, n * acc)
 
-def sumOf(f: Int => Int, lst: List[Int]): Int = lst match {
-  case Nil => 0
-  case x::xs => f(x) + sumOf(f, xs)
-}
-
-def sum_squares(lst: List[Int]) = sumOf(square, lst)
-def sum_cubes(lst: List[Int])   = sumOf(x => x * x * x, lst)
-
-sum_squares(lst)
-sum_cubes(lst)
+factT(10, 1)
+factT(100000, 1)
 
-// lets try it factorial
-def fact(n: Int): Int = ...
-
-def sum_fact(lst: List[Int]) = sumOf(fact, lst)
-sum_fact(lst)
+// there is a flag for ensuring a function is tail recursive
+import scala.annotation.tailrec
 
-// Avoid being mutable
-//=====================
-
-// a student showed me...
-import scala.collection.mutable.ListBuffer
+@tailrec
+def factT(n: BigInt, acc: BigInt): BigInt =
+  if (n == 0) acc else factT(n - 1, n * acc)
 
 
 
-def collatz_max(bnd: Long): (Long, Long) = {
-  val colNos = ListBuffer[(Long, Long)]()
-  for (i <- (1L to bnd).toList) colNos += ((collatz(i), i))
-  colNos.max
-}
+// for tail-recursive functions the Scala compiler
+// generates loop-like code, which does not need
+// to allocate stack-space in each recursive
+// call; Scala can do this only for tail-recursive
+// functions
+
 
-def collatz_max(bnd: Long): (Long, Long) = {
-  (1L to bnd).map((i) => (collatz(i), i)).maxBy(_._1)
+// A Web Crawler 
+//===============
+//
+// the idea is to look for dead links using the
+// regular expression "https?://[^"]*"
+
+import io.Source
+import scala.util._
+
+// gets the first 10K of a web-page
+def get_page(url: String) : String = {
+  Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
+    getOrElse { println(s"  Problem with: $url"); ""}
 }
 
-//views -> lazy collection
-def collatz_max(bnd: Long): (Long, Long) = {
-  (1L to bnd).view.map((i) => (collatz(i), i)).maxBy(_._1)
+// regex for URLs and emails
+val http_pattern = """"https?://[^"]*"""".r
+val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
+
+
+// drops the first and last character from a string
+def unquote(s: String) = s.drop(1).dropRight(1)
+
+def get_all_URLs(page: String): Set[String] = 
+  http_pattern.findAllIn(page).map(unquote).toSet
+
+// naive version of crawl - searches until a given depth,
+// visits pages potentially more than once
+def crawl(url: String, n: Int) : Set[String] = {
+  if (n == 0) Set()
+  else {
+    println(s"  Visiting: $n $url")
+    val page = get_page(url)
+    val new_emails = email_pattern.findAllIn(page).toSet
+    new_emails ++ (for (u <- get_all_URLs(page).par) yield crawl(u, n - 1)).flatten
+  }
 }
 
-// raises a GC exception
-(1 to 1000000000).filter(_ % 2 == 0).take(10).toList
-// ==> java.lang.OutOfMemoryError: GC overhead limit exceeded
+// some starting URLs for the crawler
+val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
 
-(1 to 1000000000).view.filter(_ % 2 == 0).take(10).toList
+crawl(startURL, 2)
+
+
+
+