diff -r e0420c7b8a19 -r 1b04ea68dca6 progs/lecture2.scala --- a/progs/lecture2.scala Thu Nov 15 14:23:55 2018 +0000 +++ b/progs/lecture2.scala Fri Nov 16 02:06:03 2018 +0000 @@ -1,11 +1,216 @@ // Scala Lecture 2 //================= +// UNFINISHED BUSINESS from Lecture 1 +//==================================== + + +// for measuring time +def time_needed[T](n: Int, code: => T) = { + val start = System.nanoTime() + for (i <- (0 to n)) code + val end = System.nanoTime() + (end - start) / 1.0e9 +} + + +val list = (1 to 1000000).toList +time_needed(10, for (n <- list) yield n + 42) +time_needed(10, for (n <- list.par) yield n + 42) + + +// Just for "Fun": Mutable vs Immutable +//======================================= +// +// - no vars, no ++i, no += +// - no mutable data-structures (no Arrays, no ListBuffers) + + +// Q: Count how many elements are in the intersections of two sets? + +def count_intersection(A: Set[Int], B: Set[Int]) : Int = { + var count = 0 + for (x <- A; if B contains x) count += 1 + count +} + +val A = (1 to 1000).toSet +val B = (1 to 1000 by 4).toSet + +count_intersection(A, B) + +// but do not try to add .par to the for-loop above + + +//propper parallel version +def count_intersection2(A: Set[Int], B: Set[Int]) : Int = + A.par.count(x => B contains x) + +count_intersection2(A, B) + + +val A = (1 to 1000000).toSet +val B = (1 to 1000000 by 4).toSet + +time_needed(100, count_intersection(A, B)) +time_needed(100, count_intersection2(A, B)) + + + +// For-Comprehensions Again +//========================== + +// the first produces a result, while the second does not +for (n <- List(1, 2, 3, 4, 5)) yield n * n + + +for (n <- List(1, 2, 3, 4, 5)) println(n) + + + +// Higher-Order Functions +//======================== + +// functions can take functions as arguments + +def even(x: Int) : Boolean = x % 2 == 0 +def odd(x: Int) : Boolean = x % 2 == 1 + +val lst = (1 to 10).toList + +lst.filter(x => even(x)) +lst.filter(even(_)) +lst.filter(even) + +lst.count(even) + +lst.find(_ > 8) + + +val ps = List((3, 0), (3, 2), (4, 2), (2, 0), (1, 1), (1, 0)) + +ps.sortBy(_._1) +ps.sortBy(_._2) + +ps.maxBy(_._1) +ps.maxBy(_._2) + + + +// maps +//===== + +def square(x: Int): Int = x * x + +val lst = (1 to 10).toList + +lst.map(square) + +// this is actually what for is defined at in Scala + +lst.map(n => square(n)) +for (n <- lst) yield square(n) + +// this can be iterated + +lst.map(square).filter(_ > 4) + +lst.map(square).filter(_ > 4).map(square) + + +// lets define our own functions +// type of functions, for example f: Int => Int + +def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = { + if (lst == Nil) Nil + else f(lst.head) :: my_map_int(lst.tail, f) +} + +my_map_int(lst, square) + + +// same function using pattern matching: a kind +// of switch statement on steroids (see more later on) + +def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = lst match { + case Nil => Nil + case x::xs => f(x)::my_map_int(xs, f) +} + + +// other function types +// +// f1: (Int, Int) => Int +// f2: List[String] => Option[Int] +// ... + + +def sumOf(f: Int => Int, lst: List[Int]): Int = lst match { + case Nil => 0 + case x::xs => f(x) + sumOf(f, xs) +} + +def sum_squares(lst: List[Int]) = sumOf(square, lst) +def sum_cubes(lst: List[Int]) = sumOf(x => x * x * x, lst) + +sum_squares(lst) +sum_cubes(lst) + +// lets try it factorial +def fact(n: Int) : Int = ... + +def sum_fact(lst: List[Int]) = sumOf(fact, lst) +sum_fact(lst) + + + + + +// Map type +//========== + +// Note the difference between map and Map + +def factors(n: Int) : List[Int] = + ((1 until n).filter { divisor => + n % divisor == 0 + }).toList + + +var ls = (1 to 10).toList + +val facs = ls.map(n => (n, factors(n))) + +facs.find(_._1 == 4) + +// works for lists of pairs +facs.toMap + + +facs.toMap.get(4) +facs.toMap.getOrElse(4, Nil) + +val facsMap = facs.toMap + +val facsMap0 = facsMap + (0 -> List(1,2,3,4,5)) +facsMap0.get(0) + +val facsMap4 = facsMap + (1 -> List(1,2,3,4,5)) +facsMap.get(1) +facsMap4.get(1) + +val ls = List("one", "two", "three", "four", "five") +ls.groupBy(_.length) + +ls.groupBy(_.length).get(3) + + // Option type //============= //in Java if something unusually happens, you return null; +// //in Scala you use Option // - if the value is present, you use Some(value) // - if no value is present, you use None @@ -14,16 +219,7 @@ List(7,2,3,4,5,6).find(_ < 4) List(5,6,7,8,9).find(_ < 4) - -// Values in types -// -// Boolean: -// Int: -// String: -// -// Option[String]: -// - +// operations on options val lst = List(None, Some(1), Some(2), None, Some(3)) @@ -43,6 +239,7 @@ // getOrElse is for setting a default value val lst = List(None, Some(1), Some(2), None, Some(3)) + for (x <- lst) yield x.getOrElse(0) @@ -61,17 +258,24 @@ Try(Some(Source.fromURL("""http://www.inf.kcl.ac.uk/staff/urbanc/""").mkString)).getOrElse(None) -// a function that turns strings into numbers -Integer.parseInt("12u34") -def get_me_an_int(s: String): Option[Int] = +// a function that turns strings into numbers (similar to .toInt) +Integer.parseInt("1234") + + +def get_me_an_int(s: String) : Option[Int] = Try(Some(Integer.parseInt(s))).getOrElse(None) -val lst = List("12345", "foo", "5432", "bar", "x21") +val lst = List("12345", "foo", "5432", "bar", "x21", "456") for (x <- lst) yield get_me_an_int(x) // summing all the numbers -val sum = lst.flatMap(get_me_an_int(_)).sum + +lst.map(get_me_an_int) +lst.map(get_me_an_int).flatten.sum + + +val sum = lst.flatMap(get_me_an_int).sum // This may not look any better than working with null in Java, but to @@ -89,64 +293,8 @@ // even Scala is not immune to problems like this: List(5,6,7,8,9).indexOf(7) - - - - - -// Type abbreviations -//==================== - -// some syntactic convenience -type Pos = (int, Int) - -type Board = List[List[Int]] - - - -// Implicits -//=========== -// -// for example adding your own methods to Strings: -// imagine you want to increment strings, like -// -// "HAL".increment -// -// you can avoid ugly fudges, like a MyString, by -// using implicit conversions - +List(5,6,7,8,9).indexOf(10) -implicit class MyString(s: String) { - def increment = for (c <- s) yield (c + 1).toChar -} - -"HAL".increment - - -// No return in Scala -//==================== - -//You should not use "return" in Scala: -// -// A return expression, when evaluated, abandons the -// current computation and returns to the caller of the -// function in which return appears." - -def sq1(x: Int): Int = x * x -def sq2(x: Int): Int = return x * x - -def sumq(ls: List[Int]): Int = { - (for (x <- ls) yield (return x * x)).sum[Int] -} - -sumq(List(1,2,3,4)) - - -// last expression in a function is the return statement -def square(x: Int): Int = { - println(s"The argument is ${x}.") - x * x -} @@ -169,7 +317,7 @@ -// remember +// remember? val lst = List(None, Some(1), Some(2), None, Some(3)).flatten @@ -178,6 +326,7 @@ } + def my_flatten(lst: List[Option[Int]]): List[Int] = lst match { case Nil => Nil case None::xs => my_flatten(xs) @@ -200,7 +349,8 @@ case "March" | "April" | "May" => "It's spring" case "June" | "July" | "August" => "It's summer" case "September" | "October" | "November" => "It's autumn" - case "December" | "January" | "February" => "It's winter" + case "December" => "It's winter" + case "January" | "February" => "It's unfortunately winter" } println(season("November")) @@ -209,7 +359,7 @@ println(season("foobar")) -// fizz buzz +// Silly: fizz buzz def fizz_buzz(n: Int) : String = (n % 3, n % 5) match { case (0, 0) => "fizz buzz" case (0, _) => "fizz" @@ -224,162 +374,177 @@ // User-defined Datatypes //======================== -abstract class Tree -case class Node(elem: Int, left: Tree, right: Tree) extends Tree -case class Leaf() extends Tree +abstract class Colour +case object Red extends Colour +case object Green extends Colour +case object Blue extends Colour -def insert(tr: Tree, n: Int): Tree = tr match { - case Leaf() => Node(n, Leaf(), Leaf()) - case Node(m, left, right) => - if (n == m) Node(m, left, right) - else if (n < m) Node(m, insert(left, n), right) - else Node(m, left, insert(right, n)) +def fav_colour(c: Colour) : Boolean = c match { + case Red => false + case Green => true + case Blue => false } +fav_colour(Green) + -val t1 = Node(4, Node(2, Leaf(), Leaf()), Node(7, Leaf(), Leaf())) -insert(t1, 3) +// ... a bit more useful: Roman Numerals + +abstract class RomanDigit +case object I extends RomanDigit +case object V extends RomanDigit +case object X extends RomanDigit +case object L extends RomanDigit +case object C extends RomanDigit +case object D extends RomanDigit +case object M extends RomanDigit + +type RomanNumeral = List[RomanDigit] -def depth(tr: Tree): Int = tr match { - case Leaf() => 0 - case Node(_, left, right) => 1 + List(depth(left), depth(right)).max +def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { + case Nil => 0 + case M::r => 1000 + RomanNumeral2Int(r) + case C::M::r => 900 + RomanNumeral2Int(r) + case D::r => 500 + RomanNumeral2Int(r) + case C::D::r => 400 + RomanNumeral2Int(r) + case C::r => 100 + RomanNumeral2Int(r) + case X::C::r => 90 + RomanNumeral2Int(r) + case L::r => 50 + RomanNumeral2Int(r) + case X::L::r => 40 + RomanNumeral2Int(r) + case X::r => 10 + RomanNumeral2Int(r) + case I::X::r => 9 + RomanNumeral2Int(r) + case V::r => 5 + RomanNumeral2Int(r) + case I::V::r => 4 + RomanNumeral2Int(r) + case I::r => 1 + RomanNumeral2Int(r) } +RomanNumeral2Int(List(I,V)) // 4 +RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number) +RomanNumeral2Int(List(V,I)) // 6 +RomanNumeral2Int(List(I,X)) // 9 +RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979 +RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017 + -def balance(tr: Tree): Int = tr match { - case Leaf() => 0 - case Node(_, left, right) => depth(left) - depth(right) -} +// another example +//================= -balance(insert(t1, 3)) +// Once upon a time, in a complete fictional country there were Persons... -// another example abstract class Person -case class King() extends Person +case object King extends Person case class Peer(deg: String, terr: String, succ: Int) extends Person case class Knight(name: String) extends Person case class Peasant(name: String) extends Person -case class Clown() extends Person +case object Clown extends Person def title(p: Person): String = p match { - case King() => "His Majesty the King" + case King => "His Majesty the King" case Peer(deg, terr, _) => s"The ${deg} of ${terr}" case Knight(name) => s"Sir ${name}" case Peasant(name) => name } def superior(p1: Person, p2: Person): Boolean = (p1, p2) match { - case (King(), _) => true + case (King, _) => true case (Peer(_,_,_), Knight(_)) => true case (Peer(_,_,_), Peasant(_)) => true - case (Peer(_,_,_), Clown()) => true + case (Peer(_,_,_), Clown) => true case (Knight(_), Peasant(_)) => true - case (Knight(_), Clown()) => true - case (Clown(), Peasant(_)) => true + case (Knight(_), Clown) => true + case (Clown, Peasant(_)) => true case _ => false } val people = List(Knight("David"), Peer("Duke", "Norfolk", 84), Peasant("Christian"), - King(), - Clown()) + King, + Clown) println(people.sortWith(superior(_, _)).mkString(", ")) - -// Higher-Order Functions -//======================== - -// functions can take functions as arguments - -val lst = (1 to 10).toList - -def even(x: Int): Boolean = x % 2 == 0 -def odd(x: Int): Boolean = x % 2 == 1 - -lst.filter(x => even(x)) -lst.filter(even(_)) -lst.filter(even) - -lst.find(_ > 8) - -def square(x: Int): Int = x * x - -lst.map(square) - -lst.map(square).filter(_ > 4) - -lst.map(square).filter(_ > 4).map(square) - -// in my collatz.scala -//(1 to bnd).map(i => (collatz(i), i)).maxBy(_._1) +// Tail recursion +//================ -// type of functions, for example f: Int => Int - -def my_map_int(lst: List[Int], f: Int => Int): List[Int] = lst match { - case Nil => Nil - case x::xs => f(x)::my_map_int(xs, f) -} +def fact(n: Long): Long = + if (n == 0) 1 else n * fact(n - 1) -my_map_int(lst, square) +fact(10) //ok +fact(10000) // produces a stackoverflow -// other function types -// -// f1: (Int, Int) => Int -// f2: List[String] => Option[Int] -// ... - +def factT(n: BigInt, acc: BigInt): BigInt = + if (n == 0) acc else factT(n - 1, n * acc) -def sumOf(f: Int => Int, lst: List[Int]): Int = lst match { - case Nil => 0 - case x::xs => f(x) + sumOf(f, xs) -} - -def sum_squares(lst: List[Int]) = sumOf(square, lst) -def sum_cubes(lst: List[Int]) = sumOf(x => x * x * x, lst) - -sum_squares(lst) -sum_cubes(lst) +factT(10, 1) +factT(100000, 1) -// lets try it factorial -def fact(n: Int): Int = ... - -def sum_fact(lst: List[Int]) = sumOf(fact, lst) -sum_fact(lst) +// there is a flag for ensuring a function is tail recursive +import scala.annotation.tailrec -// Avoid being mutable -//===================== - -// a student showed me... -import scala.collection.mutable.ListBuffer +@tailrec +def factT(n: BigInt, acc: BigInt): BigInt = + if (n == 0) acc else factT(n - 1, n * acc) -def collatz_max(bnd: Long): (Long, Long) = { - val colNos = ListBuffer[(Long, Long)]() - for (i <- (1L to bnd).toList) colNos += ((collatz(i), i)) - colNos.max -} +// for tail-recursive functions the Scala compiler +// generates loop-like code, which does not need +// to allocate stack-space in each recursive +// call; Scala can do this only for tail-recursive +// functions + -def collatz_max(bnd: Long): (Long, Long) = { - (1L to bnd).map((i) => (collatz(i), i)).maxBy(_._1) +// A Web Crawler +//=============== +// +// the idea is to look for dead links using the +// regular expression "https?://[^"]*" + +import io.Source +import scala.util._ + +// gets the first 10K of a web-page +def get_page(url: String) : String = { + Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString). + getOrElse { println(s" Problem with: $url"); ""} } -//views -> lazy collection -def collatz_max(bnd: Long): (Long, Long) = { - (1L to bnd).view.map((i) => (collatz(i), i)).maxBy(_._1) +// regex for URLs and emails +val http_pattern = """"https?://[^"]*"""".r +val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r + + +// drops the first and last character from a string +def unquote(s: String) = s.drop(1).dropRight(1) + +def get_all_URLs(page: String): Set[String] = + http_pattern.findAllIn(page).map(unquote).toSet + +// naive version of crawl - searches until a given depth, +// visits pages potentially more than once +def crawl(url: String, n: Int) : Set[String] = { + if (n == 0) Set() + else { + println(s" Visiting: $n $url") + val page = get_page(url) + val new_emails = email_pattern.findAllIn(page).toSet + new_emails ++ (for (u <- get_all_URLs(page).par) yield crawl(u, n - 1)).flatten + } } -// raises a GC exception -(1 to 1000000000).filter(_ % 2 == 0).take(10).toList -// ==> java.lang.OutOfMemoryError: GC overhead limit exceeded +// some starting URLs for the crawler +val startURL = """https://nms.kcl.ac.uk/christian.urban/""" -(1 to 1000000000).view.filter(_ % 2 == 0).take(10).toList +crawl(startURL, 2) + + + +