progs/lecture3.scala
changeset 217 e689375abcc1
parent 194 060b081523de
child 218 22705d22c105
--- a/progs/lecture3.scala	Thu Nov 22 23:00:57 2018 +0000
+++ b/progs/lecture3.scala	Fri Nov 23 01:52:37 2018 +0000
@@ -1,238 +1,154 @@
 // Scala Lecture 3
 //=================
 
-// Pattern Matching
-//==================
+
+// A Web Crawler / Email Harvester
+//=================================
+//
+// the idea is to look for links using the
+// regular expression "https?://[^"]*" and for
+// email addresses using another regex.
+
+import io.Source
+import scala.util._
 
-// A powerful tool which is supposed to come to Java in a few years
-// time (https://www.youtube.com/watch?v=oGll155-vuQ)...Scala already
-// has it for many years. Other functional languages have it already for
-// decades. I think I would be really upset if a programming language 
-// I have to use does not have pattern matching....its is just so 
-// useful. ;o)
+// gets the first 10K of a web-page
+def get_page(url: String) : String = {
+  Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
+    getOrElse { println(s"  Problem with: $url"); ""}
+}
 
-// The general schema:
-//
-//    expression match {
-//       case pattern1 => expression1
-//       case pattern2 => expression2
-//       ...
-//       case patternN => expressionN
-//    }
+// regex for URLs and emails
+val http_pattern = """"https?://[^"]*"""".r
+val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
+
+//email_pattern.findAllIn
+//  ("foo bla christian@kcl.ac.uk 1234567").toList
 
 
-// remember
-val lst = List(None, Some(1), Some(2), None, Some(3)).flatten
+// drops the first and last character from a string
+def unquote(s: String) = s.drop(1).dropRight(1)
 
+def get_all_URLs(page: String): Set[String] = 
+  http_pattern.findAllIn(page).map(unquote).toSet
 
-def my_flatten(xs: List[Option[Int]]): List[Int] = {
-  if (xs == Nil) Nil
-  else if (xs.head == None) my_flatten(xs.tail)
-  else xs.head.get :: my_flatten(xs.tail)
+// naive version of crawl - searches until a given depth,
+// visits pages potentially more than once
+def crawl(url: String, n: Int) : Set[String] = {
+  if (n == 0) Set()
+  else {
+    println(s"  Visiting: $n $url")
+    val page = get_page(url)
+    val new_emails = email_pattern.findAllIn(page).toSet
+    new_emails ++ 
+      (for (u <- get_all_URLs(page)) yield crawl(u, n - 1)).flatten
+  }
 }
 
+// some starting URLs for the crawler
+val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
+
+crawl(startURL, 2)
+
 
 
-val lst = List(None, Some(1), Some(2), None, Some(3))
+// User-defined Datatypes and Pattern Matching
+//============================================
+
 
-def my_flatten(lst: List[Option[Int]]): List[Int] = lst match {
-  case Nil => Nil
-  case None::xs => my_flatten(xs)
-  case Some(n)::xs => n::my_flatten(xs)
-}
-
-my_flatten(lst)
-
-Nil == List()
+abstract class Exp
+case class N(n: Int) extends Exp
+case class Plus(e1: Exp, e2: Exp) extends Exp
+case class Times(e1: Exp, e2: Exp) extends Exp
 
 
-// another example including a catch-all pattern
-def get_me_a_string(n: Int): String = n match {
-  case 0 => "zero"
-  case 1 => "one"
-  case 2 => "two"
-  case _ => "many"
-}
 
-get_me_a_string(10)
-
-// you can also have cases combined
-def season(month: String) = month match {
-  case "March" | "April" | "May" => "It's spring"
-  case "June" | "July" | "August" => "It's summer"
-  case "September" | "October" | "November" => "It's autumn"
-  case "December" | "January" | "February" => "It's winter"
-}
- 
-println(season("November"))
-
-// What happens if no case matches?
-
-println(season("foobar"))
-
-
-// we can also match more complicated pattern
-//
-// let's look at the Collatz function on binary strings
-
-// adding two binary strings in a very, very lazy manner
-
-def badd(s1: String, s2: String) : String = 
-  (BigInt(s1, 2) + BigInt(s2, 2)).toString(2)
-
-
-"111".dropRight(1)
-"111".last
-
-def bcollatz(s: String) : Long = (s.dropRight(1), s.last) match {
-  case ("", '1') => 1                               // we reached 1
-  case (rest, '0') => 1 + bcollatz(rest)            
-                                  // even number => divide by two
-  case (rest, '1') => 1 + bcollatz(badd(s + '1', s))
-                                  // odd number => s + '1' is 2 * s + 1
-                                  // add another s gives 3 * s + 1  
-} 
-
-bcollatz(6.toBinaryString)
-bcollatz(837799.toBinaryString)
-bcollatz(100000000000000000L.toBinaryString)
-bcollatz(BigInt("1000000000000000000000000000000000000000000000000000000000000000000000000000").toString(2))
-
+// string of an Exp
+// eval of an Exp
+// simp an Exp
+// Tokens
+// Reverse Polish Notation
+// compute RP
+// transform RP into Exp
+// process RP string and generate Exp
 
 
 
-// User-defined Datatypes
-//========================
+def string(e: Exp) : String = e match {
+  case N(n) => n.toString
+  case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")"
+  case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")"
+}
 
-abstract class Colour
-case object Red extends Colour 
-case object Green extends Colour 
-case object Blue extends Colour
+val e = Plus(N(9), Times(N(3), N(4)))
+
+println(string(e))
 
-def fav_colour(c: Colour) : Boolean = c match {
-  case Red   => false
-  case Green => true
-  case Blue  => false 
+def eval(e: Exp) : Int = e match {
+  case N(n) => n
+  case Plus(e1, e2) => eval(e1) + eval(e2)
+  case Times(e1, e2) => eval(e1) * eval(e2)
 }
 
-fav_colour(Green)
-
-
-// actually colors can be written with "object",
-// because they do not take any arguments
-
-abstract class Day
-case object Monday extends Day 
-case object Tuesday extends Day 
-case object Wednesday extends Day
-case object Thursday extends Day 
-case object Friday extends Day 
-case object Saturday extends Day
-case object Sunday extends Day 
+eval(e)
 
-abstract class Suit
-case object Spades extends Suit
-case object Hearts extends Suit
-case object Diamonds extends Suit
-case object Clubs extends Suit
-
-//define function for colour of suits
-
-abstract class Rank
-case class Ace extends Rank
-case class King extends Rank
-case class Queen extends Rank
-case class Jack extends Rank
-case class Num(n: Int) extends Rank
-
-//define functions for beats
-//beats Ace _ => true
-//beats _ Acs => false
+def simp(e: Exp) : Exp = e match {
+  case N(n) => N(n)
+  case Plus(e1, e2) => (simp(e1), simp(e2)) match {
+    case (N(0), e2s) => e2s
+    case (e1s, N(0)) => e1s
+    case (e1s, e2s) => Plus(e1s, e2s) 
+  }
+  case Times(e1, e2) => (simp(e1), simp(e2)) match {
+    case (N(0), e2s) => N(0)
+    case (e1s, N(0)) => N(0)
+    case (N(1), e2s) => e2s
+    case (e1s, N(1)) => e1s
+    case (e1s, e2s) => Times(e1s, e2s) 
+  }
+}
 
 
-// ... a bit more useful: Roman Numerals
-
-abstract class RomanDigit 
-case object I extends RomanDigit 
-case object V extends RomanDigit 
-case object X extends RomanDigit 
-case object L extends RomanDigit 
-case object C extends RomanDigit 
-case object D extends RomanDigit 
-case object M extends RomanDigit 
-
-type RomanNumeral = List[RomanDigit] 
+val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
+println(string(e2))
+println(string(simp(e2)))
 
-def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { 
-  case Nil => 0
-  case M::r    => 1000 + RomanNumeral2Int(r)  
-  case C::M::r => 900 + RomanNumeral2Int(r)
-  case D::r    => 500 + RomanNumeral2Int(r)
-  case C::D::r => 400 + RomanNumeral2Int(r)
-  case C::r    => 100 + RomanNumeral2Int(r)
-  case X::C::r => 90 + RomanNumeral2Int(r)
-  case L::r    => 50 + RomanNumeral2Int(r)
-  case X::L::r => 40 + RomanNumeral2Int(r)
-  case X::r    => 10 + RomanNumeral2Int(r)
-  case I::X::r => 9 + RomanNumeral2Int(r)
-  case V::r    => 5 + RomanNumeral2Int(r)
-  case I::V::r => 4 + RomanNumeral2Int(r)
-  case I::r    => 1 + RomanNumeral2Int(r)
+// Token and Reverse Polish Notation
+abstract class Token
+case class T(n: Int) extends Token
+case object PL extends Token
+case object TI extends Token
+
+def rp(e: Exp) : List[Token] = e match {
+  case N(n) => List(T(n))
+  case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL)
+  case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI)
 }
 
-RomanNumeral2Int(List(I,V))             // 4
-RomanNumeral2Int(List(I,I,I,I))         // 4 (invalid Roman number)
-RomanNumeral2Int(List(V,I))             // 6
-RomanNumeral2Int(List(I,X))             // 9
-RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
-RomanNumeral2Int(List(M,M,X,V,I,I))     // 2017
+def comp(ts: List[Token], stk: List[Int]) : Int = (ts, stk) match {
+  case (Nil, st) => st.head
+  case (T(n)::rest, st) => comp(rest, n::st)
+  case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)
+  case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)
+}
 
+def exp(ts: List[Token], st: List[Exp]) : Exp = (ts, st) match {
+  case (Nil, st) => st.head
+  case (T(n)::rest, st) => exp(rest, N(n)::st)
+  case (PL::rest, n1::n2::st) => exp(rest, Plus(n2, n1)::st)
+  case (TI::rest, n1::n2::st) => exp(rest, Times(n2, n1)::st)
+}
+
+exp(toks(e2), Nil)
+
+def proc(s: String) = s match {
+  case "+" => PL
+  case "*" => TI
+  case n => T(n.toInt)
+}
 
 
-// another example
-//=================
-
-// Once upon a time, in a complete fictional country there were Persons...
-
-abstract class Person
-case object King extends Person
-case class Peer(deg: String, terr: String, succ: Int) extends Person
-case class Knight(name: String) extends Person
-case class Peasant(name: String) extends Person
-case object Clown extends Person
-
-def title(p: Person): String = p match {
-  case King => "His Majesty the King"
-  case Peer(deg, terr, _) => s"The ${deg} of ${terr}"
-  case Knight(name) => s"Sir ${name}"
-  case Peasant(name) => name
-  case Clown => "My name is Boris Johnson"
-
-}
-
-title(Clown)
-
-
-
-def superior(p1: Person, p2: Person): Boolean = (p1, p2) match {
-  case (King, _) => true
-  case (Peer(_,_,_), Knight(_)) => true
-  case (Peer(_,_,_), Peasant(_)) => true
-  case (Peer(_,_,_), Clown) => true
-  case (Knight(_), Peasant(_)) => true
-  case (Knight(_), Clown) => true
-  case (Clown, Peasant(_)) => true
-  case _ => false
-}
-
-val people = List(Knight("David"), 
-                  Peer("Duke", "Norfolk", 84), 
-                  Peasant("Christian"), 
-                  King, 
-                  Clown)
-
-println(people.sortWith(superior(_, _)).mkString(", "))
-
+string(exp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil))
 
 
 
@@ -269,7 +185,96 @@
 
 
 
-// sudoku again
+// Jumping Towers
+//================
+
+
+// the first n prefixes of xs
+// for 1 => include xs
+
+def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match {
+  case (Nil, _) => Nil
+  case (xs, 0) => Nil
+  case (x::xs, n) => (x::xs) :: moves(xs, n - 1)
+}
+
+
+moves(List(5,1,0), 1)
+moves(List(5,1,0), 2)
+moves(List(5,1,0), 5)
+
+// checks whether a jump tour exists at all
+// in the second case it needs to be < instead of <=
+
+def search(xs: List[Int]) : Boolean = xs match {
+  case Nil => true
+  case (x::xs) =>
+    if (xs.length < x) true else moves(xs, x).exists(search(_))
+}
+
+
+search(List(5,3,2,5,1,1))
+search(List(3,5,1,0,0,0,1))
+search(List(3,5,1,0,0,0,0,1))
+search(List(3,5,1,0,0,0,1,1))
+search(List(3,5,1))
+search(List(5,1,1))
+search(Nil)
+search(List(1))
+search(List(5,1,1))
+search(List(3,5,1,0,0,0,0,0,0,0,0,1))
+
+// generates *all* jump tours
+//    if we are only interested in the shortes one, we could
+//    shortcircut the calculation and only return List(x) in
+//    case where xs.length < x, because no tour can be shorter
+//    than 1
+// 
+
+def jumps(xs: List[Int]) : List[List[Int]] = xs match {
+  case Nil => Nil
+  case (x::xs) => {
+    val children = moves(xs, x)
+    val results = children.flatMap((cs) => jumps(cs).map(x :: _))
+    if (xs.length < x) List(x) :: results else results
+  }
+}
+
+
+
+jumps(List(5,3,2,5,1,1))
+jumps(List(3,5,1,2,1,2,1))
+jumps(List(3,5,1,2,3,4,1))
+jumps(List(3,5,1,0,0,0,1))
+jumps(List(3,5,1))
+jumps(List(5,1,1))
+jumps(Nil)
+jumps(List(1))
+jumps(List(5,1,2))
+moves(List(1,2), 5)
+jumps(List(1,5,1,2))
+jumps(List(3,5,1,0,0,0,0,0,0,0,0,1))
+
+jumps(List(5,3,2,5,1,1)).minBy(_.length)
+jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length)
+jumps(List(1,3,6,1,0,9)).minBy(_.length)
+jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length)
+
+
+
+
+
+
+
+
+
+// Sudoku 
+//========
+
+// THE POINT OF THIS CODE IS NOT TO BE SUPER
+// EFFICIENT AND FAST, just explaining exhaustive
+// depth-first search
+
 
 val game0 = """.14.6.3..
               |62...4..9
@@ -308,6 +313,11 @@
     (x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue)))
 }
 
+//get_row(game0, 0)
+//get_row(game0, 1)
+//get_box(game0, (3,1))
+
+
 // this is not mutable!!
 def update(game: String, pos: Int, value: Char): String = 
   game.updated(pos, value)
@@ -333,6 +343,58 @@
   }
 }
 
+search(game0).map(pretty)
+
+val game1 = """23.915...
+              |...2..54.
+              |6.7......
+              |..1.....9
+              |89.5.3.17
+              |5.....6..
+              |......9.5
+              |.16..7...
+              |...329..1""".stripMargin.replaceAll("\\n", "")
+
+
+// game that is in the hard category
+val game2 = """8........
+              |..36.....
+              |.7..9.2..
+              |.5...7...
+              |....457..
+              |...1...3.
+              |..1....68
+              |..85...1.
+              |.9....4..""".stripMargin.replaceAll("\\n", "")
+
+// game with multiple solutions
+val game3 = """.8...9743
+              |.5...8.1.
+              |.1.......
+              |8....5...
+              |...8.4...
+              |...3....6
+              |.......7.
+              |.3.5...8.
+              |9724...5.""".stripMargin.replaceAll("\\n", "")
+
+
+
+
+search(game1).map(pretty)
+search(game3).map(pretty)
+search(game2).map(pretty)
+
+// for measuring time
+def time_needed[T](i: Int, code: => T) = {
+  val start = System.nanoTime()
+  for (j <- 1 to i) code
+  val end = System.nanoTime()
+  ((end - start) / 1.0e9) + " secs"
+}
+
+time_needed(1, search(game2))
+
 // tail recursive version that searches 
 // for all solutions
 
@@ -365,6 +427,8 @@
 }
 
 search1T(List(game3)).map(pretty)
+time_needed(10, search1T(List(game3)))
+
 
 // game with multiple solutions
 val game3 = """.8...9743
@@ -419,7 +483,6 @@
   case x::xs => 1 + length(xs)
 }
 length(List("1", "2", "3", "4"))
-length(List(King, Knight("foo"), Clown))
 length(List(1, 2, 3, 4))
 
 def map[A, B](lst: List[A], f: A => B): List[B] = lst match {
@@ -430,9 +493,6 @@
 map_int_list(List(1, 2, 3, 4), square)
 
 
-// Remember?
-def first[A, B](xs: List[A], f: A => Option[B]): Option[B] = ...
-
 
 
 
@@ -462,95 +522,5 @@
 
 
 
-// Regular expressions - the power of DSLs in Scala
-//==================================================
-
-abstract class Rexp
-case object ZERO extends Rexp                       // nothing
-case object ONE extends Rexp                        // the empty string
-case class CHAR(c: Char) extends Rexp               // a character c
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp     // alternative  r1 + r2
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp     // sequence     r1 o r2  
-case class STAR(r: Rexp) extends Rexp               // star         r*
-
-
-
-// (ab)*
-val r0 = STAR(SEQ(CHAR('a'), CHAR('b')))
-
-
-// some convenience for typing in regular expressions
-import scala.language.implicitConversions    
-import scala.language.reflectiveCalls 
-
-def charlist2rexp(s: List[Char]): Rexp = s match {
-  case Nil => ONE
-  case c::Nil => CHAR(c)
-  case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList)
 
 
-val r1 = STAR("ab")
-val r2 = STAR(ALT("ab"))
-val r3 = STAR(ALT("ab", "baa baa black sheep"))
-
-implicit def RexpOps (r: Rexp) = new {
-  def | (s: Rexp) = ALT(r, s)
-  def % = STAR(r)
-  def ~ (s: Rexp) = SEQ(r, s)
-}
-
-implicit def stringOps (s: String) = new {
-  def | (r: Rexp) = ALT(s, r)
-  def | (r: String) = ALT(s, r)
-  def % = STAR(s)
-  def ~ (r: Rexp) = SEQ(s, r)
-  def ~ (r: String) = SEQ(s, r)
-}
-
-//example regular expressions
-val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
-val sign = "+" | "-" | ""
-val number = sign ~ digit ~ digit.% 
-
-
-
-
-
-// The End
-//=========
-
-// A function should do one thing, and only one thing.
-
-// Make your variables immutable, unless there's a good 
-// reason not to.
-
-// You can be productive on Day 1, but the language is deep.
-//
-// http://scalapuzzlers.com
-//
-// http://www.latkin.org/blog/2017/05/02/when-the-scala-compiler-doesnt-help/
-
-List(1, 2, 3) contains "your mom"
-
-// I like best about Scala that it lets me often write
-// concise, readable code.
-
-
-
-// You can define your own while loop
-
-
-def my_while(condition: => Boolean)(block: => Unit): Unit = 
-  if (condition) { block ; my_while(condition) { block } } else { }
-
-
-var x = 10
-my_while (x > 0) { 
-  println(s"$x") ; x = x - 1 
-}
-
-
-`symbol
-`symbol`