--- a/progs/lecture2.scala Thu Nov 15 14:23:55 2018 +0000
+++ b/progs/lecture2.scala Fri Nov 16 02:06:03 2018 +0000
@@ -1,11 +1,216 @@
// Scala Lecture 2
//=================
+// UNFINISHED BUSINESS from Lecture 1
+//====================================
+
+
+// for measuring time
+def time_needed[T](n: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (i <- (0 to n)) code
+ val end = System.nanoTime()
+ (end - start) / 1.0e9
+}
+
+
+val list = (1 to 1000000).toList
+time_needed(10, for (n <- list) yield n + 42)
+time_needed(10, for (n <- list.par) yield n + 42)
+
+
+// Just for "Fun": Mutable vs Immutable
+//=======================================
+//
+// - no vars, no ++i, no +=
+// - no mutable data-structures (no Arrays, no ListBuffers)
+
+
+// Q: Count how many elements are in the intersections of two sets?
+
+def count_intersection(A: Set[Int], B: Set[Int]) : Int = {
+ var count = 0
+ for (x <- A; if B contains x) count += 1
+ count
+}
+
+val A = (1 to 1000).toSet
+val B = (1 to 1000 by 4).toSet
+
+count_intersection(A, B)
+
+// but do not try to add .par to the for-loop above
+
+
+//propper parallel version
+def count_intersection2(A: Set[Int], B: Set[Int]) : Int =
+ A.par.count(x => B contains x)
+
+count_intersection2(A, B)
+
+
+val A = (1 to 1000000).toSet
+val B = (1 to 1000000 by 4).toSet
+
+time_needed(100, count_intersection(A, B))
+time_needed(100, count_intersection2(A, B))
+
+
+
+// For-Comprehensions Again
+//==========================
+
+// the first produces a result, while the second does not
+for (n <- List(1, 2, 3, 4, 5)) yield n * n
+
+
+for (n <- List(1, 2, 3, 4, 5)) println(n)
+
+
+
+// Higher-Order Functions
+//========================
+
+// functions can take functions as arguments
+
+def even(x: Int) : Boolean = x % 2 == 0
+def odd(x: Int) : Boolean = x % 2 == 1
+
+val lst = (1 to 10).toList
+
+lst.filter(x => even(x))
+lst.filter(even(_))
+lst.filter(even)
+
+lst.count(even)
+
+lst.find(_ > 8)
+
+
+val ps = List((3, 0), (3, 2), (4, 2), (2, 0), (1, 1), (1, 0))
+
+ps.sortBy(_._1)
+ps.sortBy(_._2)
+
+ps.maxBy(_._1)
+ps.maxBy(_._2)
+
+
+
+// maps
+//=====
+
+def square(x: Int): Int = x * x
+
+val lst = (1 to 10).toList
+
+lst.map(square)
+
+// this is actually what for is defined at in Scala
+
+lst.map(n => square(n))
+for (n <- lst) yield square(n)
+
+// this can be iterated
+
+lst.map(square).filter(_ > 4)
+
+lst.map(square).filter(_ > 4).map(square)
+
+
+// lets define our own functions
+// type of functions, for example f: Int => Int
+
+def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = {
+ if (lst == Nil) Nil
+ else f(lst.head) :: my_map_int(lst.tail, f)
+}
+
+my_map_int(lst, square)
+
+
+// same function using pattern matching: a kind
+// of switch statement on steroids (see more later on)
+
+def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = lst match {
+ case Nil => Nil
+ case x::xs => f(x)::my_map_int(xs, f)
+}
+
+
+// other function types
+//
+// f1: (Int, Int) => Int
+// f2: List[String] => Option[Int]
+// ...
+
+
+def sumOf(f: Int => Int, lst: List[Int]): Int = lst match {
+ case Nil => 0
+ case x::xs => f(x) + sumOf(f, xs)
+}
+
+def sum_squares(lst: List[Int]) = sumOf(square, lst)
+def sum_cubes(lst: List[Int]) = sumOf(x => x * x * x, lst)
+
+sum_squares(lst)
+sum_cubes(lst)
+
+// lets try it factorial
+def fact(n: Int) : Int = ...
+
+def sum_fact(lst: List[Int]) = sumOf(fact, lst)
+sum_fact(lst)
+
+
+
+
+
+// Map type
+//==========
+
+// Note the difference between map and Map
+
+def factors(n: Int) : List[Int] =
+ ((1 until n).filter { divisor =>
+ n % divisor == 0
+ }).toList
+
+
+var ls = (1 to 10).toList
+
+val facs = ls.map(n => (n, factors(n)))
+
+facs.find(_._1 == 4)
+
+// works for lists of pairs
+facs.toMap
+
+
+facs.toMap.get(4)
+facs.toMap.getOrElse(4, Nil)
+
+val facsMap = facs.toMap
+
+val facsMap0 = facsMap + (0 -> List(1,2,3,4,5))
+facsMap0.get(0)
+
+val facsMap4 = facsMap + (1 -> List(1,2,3,4,5))
+facsMap.get(1)
+facsMap4.get(1)
+
+val ls = List("one", "two", "three", "four", "five")
+ls.groupBy(_.length)
+
+ls.groupBy(_.length).get(3)
+
+
// Option type
//=============
//in Java if something unusually happens, you return null;
+//
//in Scala you use Option
// - if the value is present, you use Some(value)
// - if no value is present, you use None
@@ -14,16 +219,7 @@
List(7,2,3,4,5,6).find(_ < 4)
List(5,6,7,8,9).find(_ < 4)
-
-// Values in types
-//
-// Boolean:
-// Int:
-// String:
-//
-// Option[String]:
-//
-
+// operations on options
val lst = List(None, Some(1), Some(2), None, Some(3))
@@ -43,6 +239,7 @@
// getOrElse is for setting a default value
val lst = List(None, Some(1), Some(2), None, Some(3))
+
for (x <- lst) yield x.getOrElse(0)
@@ -61,17 +258,24 @@
Try(Some(Source.fromURL("""http://www.inf.kcl.ac.uk/staff/urbanc/""").mkString)).getOrElse(None)
-// a function that turns strings into numbers
-Integer.parseInt("12u34")
-def get_me_an_int(s: String): Option[Int] =
+// a function that turns strings into numbers (similar to .toInt)
+Integer.parseInt("1234")
+
+
+def get_me_an_int(s: String) : Option[Int] =
Try(Some(Integer.parseInt(s))).getOrElse(None)
-val lst = List("12345", "foo", "5432", "bar", "x21")
+val lst = List("12345", "foo", "5432", "bar", "x21", "456")
for (x <- lst) yield get_me_an_int(x)
// summing all the numbers
-val sum = lst.flatMap(get_me_an_int(_)).sum
+
+lst.map(get_me_an_int)
+lst.map(get_me_an_int).flatten.sum
+
+
+val sum = lst.flatMap(get_me_an_int).sum
// This may not look any better than working with null in Java, but to
@@ -89,64 +293,8 @@
// even Scala is not immune to problems like this:
List(5,6,7,8,9).indexOf(7)
-
-
-
-
-
-// Type abbreviations
-//====================
-
-// some syntactic convenience
-type Pos = (int, Int)
-
-type Board = List[List[Int]]
-
-
-
-// Implicits
-//===========
-//
-// for example adding your own methods to Strings:
-// imagine you want to increment strings, like
-//
-// "HAL".increment
-//
-// you can avoid ugly fudges, like a MyString, by
-// using implicit conversions
-
+List(5,6,7,8,9).indexOf(10)
-implicit class MyString(s: String) {
- def increment = for (c <- s) yield (c + 1).toChar
-}
-
-"HAL".increment
-
-
-// No return in Scala
-//====================
-
-//You should not use "return" in Scala:
-//
-// A return expression, when evaluated, abandons the
-// current computation and returns to the caller of the
-// function in which return appears."
-
-def sq1(x: Int): Int = x * x
-def sq2(x: Int): Int = return x * x
-
-def sumq(ls: List[Int]): Int = {
- (for (x <- ls) yield (return x * x)).sum[Int]
-}
-
-sumq(List(1,2,3,4))
-
-
-// last expression in a function is the return statement
-def square(x: Int): Int = {
- println(s"The argument is ${x}.")
- x * x
-}
@@ -169,7 +317,7 @@
-// remember
+// remember?
val lst = List(None, Some(1), Some(2), None, Some(3)).flatten
@@ -178,6 +326,7 @@
}
+
def my_flatten(lst: List[Option[Int]]): List[Int] = lst match {
case Nil => Nil
case None::xs => my_flatten(xs)
@@ -200,7 +349,8 @@
case "March" | "April" | "May" => "It's spring"
case "June" | "July" | "August" => "It's summer"
case "September" | "October" | "November" => "It's autumn"
- case "December" | "January" | "February" => "It's winter"
+ case "December" => "It's winter"
+ case "January" | "February" => "It's unfortunately winter"
}
println(season("November"))
@@ -209,7 +359,7 @@
println(season("foobar"))
-// fizz buzz
+// Silly: fizz buzz
def fizz_buzz(n: Int) : String = (n % 3, n % 5) match {
case (0, 0) => "fizz buzz"
case (0, _) => "fizz"
@@ -224,162 +374,177 @@
// User-defined Datatypes
//========================
-abstract class Tree
-case class Node(elem: Int, left: Tree, right: Tree) extends Tree
-case class Leaf() extends Tree
+abstract class Colour
+case object Red extends Colour
+case object Green extends Colour
+case object Blue extends Colour
-def insert(tr: Tree, n: Int): Tree = tr match {
- case Leaf() => Node(n, Leaf(), Leaf())
- case Node(m, left, right) =>
- if (n == m) Node(m, left, right)
- else if (n < m) Node(m, insert(left, n), right)
- else Node(m, left, insert(right, n))
+def fav_colour(c: Colour) : Boolean = c match {
+ case Red => false
+ case Green => true
+ case Blue => false
}
+fav_colour(Green)
+
-val t1 = Node(4, Node(2, Leaf(), Leaf()), Node(7, Leaf(), Leaf()))
-insert(t1, 3)
+// ... a bit more useful: Roman Numerals
+
+abstract class RomanDigit
+case object I extends RomanDigit
+case object V extends RomanDigit
+case object X extends RomanDigit
+case object L extends RomanDigit
+case object C extends RomanDigit
+case object D extends RomanDigit
+case object M extends RomanDigit
+
+type RomanNumeral = List[RomanDigit]
-def depth(tr: Tree): Int = tr match {
- case Leaf() => 0
- case Node(_, left, right) => 1 + List(depth(left), depth(right)).max
+def RomanNumeral2Int(rs: RomanNumeral): Int = rs match {
+ case Nil => 0
+ case M::r => 1000 + RomanNumeral2Int(r)
+ case C::M::r => 900 + RomanNumeral2Int(r)
+ case D::r => 500 + RomanNumeral2Int(r)
+ case C::D::r => 400 + RomanNumeral2Int(r)
+ case C::r => 100 + RomanNumeral2Int(r)
+ case X::C::r => 90 + RomanNumeral2Int(r)
+ case L::r => 50 + RomanNumeral2Int(r)
+ case X::L::r => 40 + RomanNumeral2Int(r)
+ case X::r => 10 + RomanNumeral2Int(r)
+ case I::X::r => 9 + RomanNumeral2Int(r)
+ case V::r => 5 + RomanNumeral2Int(r)
+ case I::V::r => 4 + RomanNumeral2Int(r)
+ case I::r => 1 + RomanNumeral2Int(r)
}
+RomanNumeral2Int(List(I,V)) // 4
+RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number)
+RomanNumeral2Int(List(V,I)) // 6
+RomanNumeral2Int(List(I,X)) // 9
+RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
+RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017
+
-def balance(tr: Tree): Int = tr match {
- case Leaf() => 0
- case Node(_, left, right) => depth(left) - depth(right)
-}
+// another example
+//=================
-balance(insert(t1, 3))
+// Once upon a time, in a complete fictional country there were Persons...
-// another example
abstract class Person
-case class King() extends Person
+case object King extends Person
case class Peer(deg: String, terr: String, succ: Int) extends Person
case class Knight(name: String) extends Person
case class Peasant(name: String) extends Person
-case class Clown() extends Person
+case object Clown extends Person
def title(p: Person): String = p match {
- case King() => "His Majesty the King"
+ case King => "His Majesty the King"
case Peer(deg, terr, _) => s"The ${deg} of ${terr}"
case Knight(name) => s"Sir ${name}"
case Peasant(name) => name
}
def superior(p1: Person, p2: Person): Boolean = (p1, p2) match {
- case (King(), _) => true
+ case (King, _) => true
case (Peer(_,_,_), Knight(_)) => true
case (Peer(_,_,_), Peasant(_)) => true
- case (Peer(_,_,_), Clown()) => true
+ case (Peer(_,_,_), Clown) => true
case (Knight(_), Peasant(_)) => true
- case (Knight(_), Clown()) => true
- case (Clown(), Peasant(_)) => true
+ case (Knight(_), Clown) => true
+ case (Clown, Peasant(_)) => true
case _ => false
}
val people = List(Knight("David"),
Peer("Duke", "Norfolk", 84),
Peasant("Christian"),
- King(),
- Clown())
+ King,
+ Clown)
println(people.sortWith(superior(_, _)).mkString(", "))
-
-// Higher-Order Functions
-//========================
-
-// functions can take functions as arguments
-
-val lst = (1 to 10).toList
-
-def even(x: Int): Boolean = x % 2 == 0
-def odd(x: Int): Boolean = x % 2 == 1
-
-lst.filter(x => even(x))
-lst.filter(even(_))
-lst.filter(even)
-
-lst.find(_ > 8)
-
-def square(x: Int): Int = x * x
-
-lst.map(square)
-
-lst.map(square).filter(_ > 4)
-
-lst.map(square).filter(_ > 4).map(square)
-
-// in my collatz.scala
-//(1 to bnd).map(i => (collatz(i), i)).maxBy(_._1)
+// Tail recursion
+//================
-// type of functions, for example f: Int => Int
-
-def my_map_int(lst: List[Int], f: Int => Int): List[Int] = lst match {
- case Nil => Nil
- case x::xs => f(x)::my_map_int(xs, f)
-}
+def fact(n: Long): Long =
+ if (n == 0) 1 else n * fact(n - 1)
-my_map_int(lst, square)
+fact(10) //ok
+fact(10000) // produces a stackoverflow
-// other function types
-//
-// f1: (Int, Int) => Int
-// f2: List[String] => Option[Int]
-// ...
-
+def factT(n: BigInt, acc: BigInt): BigInt =
+ if (n == 0) acc else factT(n - 1, n * acc)
-def sumOf(f: Int => Int, lst: List[Int]): Int = lst match {
- case Nil => 0
- case x::xs => f(x) + sumOf(f, xs)
-}
-
-def sum_squares(lst: List[Int]) = sumOf(square, lst)
-def sum_cubes(lst: List[Int]) = sumOf(x => x * x * x, lst)
-
-sum_squares(lst)
-sum_cubes(lst)
+factT(10, 1)
+factT(100000, 1)
-// lets try it factorial
-def fact(n: Int): Int = ...
-
-def sum_fact(lst: List[Int]) = sumOf(fact, lst)
-sum_fact(lst)
+// there is a flag for ensuring a function is tail recursive
+import scala.annotation.tailrec
-// Avoid being mutable
-//=====================
-
-// a student showed me...
-import scala.collection.mutable.ListBuffer
+@tailrec
+def factT(n: BigInt, acc: BigInt): BigInt =
+ if (n == 0) acc else factT(n - 1, n * acc)
-def collatz_max(bnd: Long): (Long, Long) = {
- val colNos = ListBuffer[(Long, Long)]()
- for (i <- (1L to bnd).toList) colNos += ((collatz(i), i))
- colNos.max
-}
+// for tail-recursive functions the Scala compiler
+// generates loop-like code, which does not need
+// to allocate stack-space in each recursive
+// call; Scala can do this only for tail-recursive
+// functions
+
-def collatz_max(bnd: Long): (Long, Long) = {
- (1L to bnd).map((i) => (collatz(i), i)).maxBy(_._1)
+// A Web Crawler
+//===============
+//
+// the idea is to look for dead links using the
+// regular expression "https?://[^"]*"
+
+import io.Source
+import scala.util._
+
+// gets the first 10K of a web-page
+def get_page(url: String) : String = {
+ Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
+ getOrElse { println(s" Problem with: $url"); ""}
}
-//views -> lazy collection
-def collatz_max(bnd: Long): (Long, Long) = {
- (1L to bnd).view.map((i) => (collatz(i), i)).maxBy(_._1)
+// regex for URLs and emails
+val http_pattern = """"https?://[^"]*"""".r
+val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
+
+
+// drops the first and last character from a string
+def unquote(s: String) = s.drop(1).dropRight(1)
+
+def get_all_URLs(page: String): Set[String] =
+ http_pattern.findAllIn(page).map(unquote).toSet
+
+// naive version of crawl - searches until a given depth,
+// visits pages potentially more than once
+def crawl(url: String, n: Int) : Set[String] = {
+ if (n == 0) Set()
+ else {
+ println(s" Visiting: $n $url")
+ val page = get_page(url)
+ val new_emails = email_pattern.findAllIn(page).toSet
+ new_emails ++ (for (u <- get_all_URLs(page).par) yield crawl(u, n - 1)).flatten
+ }
}
-// raises a GC exception
-(1 to 1000000000).filter(_ % 2 == 0).take(10).toList
-// ==> java.lang.OutOfMemoryError: GC overhead limit exceeded
+// some starting URLs for the crawler
+val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
-(1 to 1000000000).view.filter(_ % 2 == 0).take(10).toList
+crawl(startURL, 2)
+
+
+
+