--- a/progs/lecture3.scala Thu Nov 22 23:00:57 2018 +0000
+++ b/progs/lecture3.scala Fri Nov 23 01:52:37 2018 +0000
@@ -1,238 +1,154 @@
// Scala Lecture 3
//=================
-// Pattern Matching
-//==================
+
+// A Web Crawler / Email Harvester
+//=================================
+//
+// the idea is to look for links using the
+// regular expression "https?://[^"]*" and for
+// email addresses using another regex.
+
+import io.Source
+import scala.util._
-// A powerful tool which is supposed to come to Java in a few years
-// time (https://www.youtube.com/watch?v=oGll155-vuQ)...Scala already
-// has it for many years. Other functional languages have it already for
-// decades. I think I would be really upset if a programming language
-// I have to use does not have pattern matching....its is just so
-// useful. ;o)
+// gets the first 10K of a web-page
+def get_page(url: String) : String = {
+ Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
+ getOrElse { println(s" Problem with: $url"); ""}
+}
-// The general schema:
-//
-// expression match {
-// case pattern1 => expression1
-// case pattern2 => expression2
-// ...
-// case patternN => expressionN
-// }
+// regex for URLs and emails
+val http_pattern = """"https?://[^"]*"""".r
+val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
+
+//email_pattern.findAllIn
+// ("foo bla christian@kcl.ac.uk 1234567").toList
-// remember
-val lst = List(None, Some(1), Some(2), None, Some(3)).flatten
+// drops the first and last character from a string
+def unquote(s: String) = s.drop(1).dropRight(1)
+def get_all_URLs(page: String): Set[String] =
+ http_pattern.findAllIn(page).map(unquote).toSet
-def my_flatten(xs: List[Option[Int]]): List[Int] = {
- if (xs == Nil) Nil
- else if (xs.head == None) my_flatten(xs.tail)
- else xs.head.get :: my_flatten(xs.tail)
+// naive version of crawl - searches until a given depth,
+// visits pages potentially more than once
+def crawl(url: String, n: Int) : Set[String] = {
+ if (n == 0) Set()
+ else {
+ println(s" Visiting: $n $url")
+ val page = get_page(url)
+ val new_emails = email_pattern.findAllIn(page).toSet
+ new_emails ++
+ (for (u <- get_all_URLs(page)) yield crawl(u, n - 1)).flatten
+ }
}
+// some starting URLs for the crawler
+val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
+
+crawl(startURL, 2)
+
-val lst = List(None, Some(1), Some(2), None, Some(3))
+// User-defined Datatypes and Pattern Matching
+//============================================
+
-def my_flatten(lst: List[Option[Int]]): List[Int] = lst match {
- case Nil => Nil
- case None::xs => my_flatten(xs)
- case Some(n)::xs => n::my_flatten(xs)
-}
-
-my_flatten(lst)
-
-Nil == List()
+abstract class Exp
+case class N(n: Int) extends Exp
+case class Plus(e1: Exp, e2: Exp) extends Exp
+case class Times(e1: Exp, e2: Exp) extends Exp
-// another example including a catch-all pattern
-def get_me_a_string(n: Int): String = n match {
- case 0 => "zero"
- case 1 => "one"
- case 2 => "two"
- case _ => "many"
-}
-get_me_a_string(10)
-
-// you can also have cases combined
-def season(month: String) = month match {
- case "March" | "April" | "May" => "It's spring"
- case "June" | "July" | "August" => "It's summer"
- case "September" | "October" | "November" => "It's autumn"
- case "December" | "January" | "February" => "It's winter"
-}
-
-println(season("November"))
-
-// What happens if no case matches?
-
-println(season("foobar"))
-
-
-// we can also match more complicated pattern
-//
-// let's look at the Collatz function on binary strings
-
-// adding two binary strings in a very, very lazy manner
-
-def badd(s1: String, s2: String) : String =
- (BigInt(s1, 2) + BigInt(s2, 2)).toString(2)
-
-
-"111".dropRight(1)
-"111".last
-
-def bcollatz(s: String) : Long = (s.dropRight(1), s.last) match {
- case ("", '1') => 1 // we reached 1
- case (rest, '0') => 1 + bcollatz(rest)
- // even number => divide by two
- case (rest, '1') => 1 + bcollatz(badd(s + '1', s))
- // odd number => s + '1' is 2 * s + 1
- // add another s gives 3 * s + 1
-}
-
-bcollatz(6.toBinaryString)
-bcollatz(837799.toBinaryString)
-bcollatz(100000000000000000L.toBinaryString)
-bcollatz(BigInt("1000000000000000000000000000000000000000000000000000000000000000000000000000").toString(2))
-
+// string of an Exp
+// eval of an Exp
+// simp an Exp
+// Tokens
+// Reverse Polish Notation
+// compute RP
+// transform RP into Exp
+// process RP string and generate Exp
-// User-defined Datatypes
-//========================
+def string(e: Exp) : String = e match {
+ case N(n) => n.toString
+ case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")"
+ case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")"
+}
-abstract class Colour
-case object Red extends Colour
-case object Green extends Colour
-case object Blue extends Colour
+val e = Plus(N(9), Times(N(3), N(4)))
+
+println(string(e))
-def fav_colour(c: Colour) : Boolean = c match {
- case Red => false
- case Green => true
- case Blue => false
+def eval(e: Exp) : Int = e match {
+ case N(n) => n
+ case Plus(e1, e2) => eval(e1) + eval(e2)
+ case Times(e1, e2) => eval(e1) * eval(e2)
}
-fav_colour(Green)
-
-
-// actually colors can be written with "object",
-// because they do not take any arguments
-
-abstract class Day
-case object Monday extends Day
-case object Tuesday extends Day
-case object Wednesday extends Day
-case object Thursday extends Day
-case object Friday extends Day
-case object Saturday extends Day
-case object Sunday extends Day
+eval(e)
-abstract class Suit
-case object Spades extends Suit
-case object Hearts extends Suit
-case object Diamonds extends Suit
-case object Clubs extends Suit
-
-//define function for colour of suits
-
-abstract class Rank
-case class Ace extends Rank
-case class King extends Rank
-case class Queen extends Rank
-case class Jack extends Rank
-case class Num(n: Int) extends Rank
-
-//define functions for beats
-//beats Ace _ => true
-//beats _ Acs => false
+def simp(e: Exp) : Exp = e match {
+ case N(n) => N(n)
+ case Plus(e1, e2) => (simp(e1), simp(e2)) match {
+ case (N(0), e2s) => e2s
+ case (e1s, N(0)) => e1s
+ case (e1s, e2s) => Plus(e1s, e2s)
+ }
+ case Times(e1, e2) => (simp(e1), simp(e2)) match {
+ case (N(0), e2s) => N(0)
+ case (e1s, N(0)) => N(0)
+ case (N(1), e2s) => e2s
+ case (e1s, N(1)) => e1s
+ case (e1s, e2s) => Times(e1s, e2s)
+ }
+}
-// ... a bit more useful: Roman Numerals
-
-abstract class RomanDigit
-case object I extends RomanDigit
-case object V extends RomanDigit
-case object X extends RomanDigit
-case object L extends RomanDigit
-case object C extends RomanDigit
-case object D extends RomanDigit
-case object M extends RomanDigit
-
-type RomanNumeral = List[RomanDigit]
+val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
+println(string(e2))
+println(string(simp(e2)))
-def RomanNumeral2Int(rs: RomanNumeral): Int = rs match {
- case Nil => 0
- case M::r => 1000 + RomanNumeral2Int(r)
- case C::M::r => 900 + RomanNumeral2Int(r)
- case D::r => 500 + RomanNumeral2Int(r)
- case C::D::r => 400 + RomanNumeral2Int(r)
- case C::r => 100 + RomanNumeral2Int(r)
- case X::C::r => 90 + RomanNumeral2Int(r)
- case L::r => 50 + RomanNumeral2Int(r)
- case X::L::r => 40 + RomanNumeral2Int(r)
- case X::r => 10 + RomanNumeral2Int(r)
- case I::X::r => 9 + RomanNumeral2Int(r)
- case V::r => 5 + RomanNumeral2Int(r)
- case I::V::r => 4 + RomanNumeral2Int(r)
- case I::r => 1 + RomanNumeral2Int(r)
+// Token and Reverse Polish Notation
+abstract class Token
+case class T(n: Int) extends Token
+case object PL extends Token
+case object TI extends Token
+
+def rp(e: Exp) : List[Token] = e match {
+ case N(n) => List(T(n))
+ case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL)
+ case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI)
}
-RomanNumeral2Int(List(I,V)) // 4
-RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number)
-RomanNumeral2Int(List(V,I)) // 6
-RomanNumeral2Int(List(I,X)) // 9
-RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
-RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017
+def comp(ts: List[Token], stk: List[Int]) : Int = (ts, stk) match {
+ case (Nil, st) => st.head
+ case (T(n)::rest, st) => comp(rest, n::st)
+ case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)
+ case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)
+}
+def exp(ts: List[Token], st: List[Exp]) : Exp = (ts, st) match {
+ case (Nil, st) => st.head
+ case (T(n)::rest, st) => exp(rest, N(n)::st)
+ case (PL::rest, n1::n2::st) => exp(rest, Plus(n2, n1)::st)
+ case (TI::rest, n1::n2::st) => exp(rest, Times(n2, n1)::st)
+}
+
+exp(toks(e2), Nil)
+
+def proc(s: String) = s match {
+ case "+" => PL
+ case "*" => TI
+ case n => T(n.toInt)
+}
-// another example
-//=================
-
-// Once upon a time, in a complete fictional country there were Persons...
-
-abstract class Person
-case object King extends Person
-case class Peer(deg: String, terr: String, succ: Int) extends Person
-case class Knight(name: String) extends Person
-case class Peasant(name: String) extends Person
-case object Clown extends Person
-
-def title(p: Person): String = p match {
- case King => "His Majesty the King"
- case Peer(deg, terr, _) => s"The ${deg} of ${terr}"
- case Knight(name) => s"Sir ${name}"
- case Peasant(name) => name
- case Clown => "My name is Boris Johnson"
-
-}
-
-title(Clown)
-
-
-
-def superior(p1: Person, p2: Person): Boolean = (p1, p2) match {
- case (King, _) => true
- case (Peer(_,_,_), Knight(_)) => true
- case (Peer(_,_,_), Peasant(_)) => true
- case (Peer(_,_,_), Clown) => true
- case (Knight(_), Peasant(_)) => true
- case (Knight(_), Clown) => true
- case (Clown, Peasant(_)) => true
- case _ => false
-}
-
-val people = List(Knight("David"),
- Peer("Duke", "Norfolk", 84),
- Peasant("Christian"),
- King,
- Clown)
-
-println(people.sortWith(superior(_, _)).mkString(", "))
-
+string(exp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil))
@@ -269,7 +185,96 @@
-// sudoku again
+// Jumping Towers
+//================
+
+
+// the first n prefixes of xs
+// for 1 => include xs
+
+def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match {
+ case (Nil, _) => Nil
+ case (xs, 0) => Nil
+ case (x::xs, n) => (x::xs) :: moves(xs, n - 1)
+}
+
+
+moves(List(5,1,0), 1)
+moves(List(5,1,0), 2)
+moves(List(5,1,0), 5)
+
+// checks whether a jump tour exists at all
+// in the second case it needs to be < instead of <=
+
+def search(xs: List[Int]) : Boolean = xs match {
+ case Nil => true
+ case (x::xs) =>
+ if (xs.length < x) true else moves(xs, x).exists(search(_))
+}
+
+
+search(List(5,3,2,5,1,1))
+search(List(3,5,1,0,0,0,1))
+search(List(3,5,1,0,0,0,0,1))
+search(List(3,5,1,0,0,0,1,1))
+search(List(3,5,1))
+search(List(5,1,1))
+search(Nil)
+search(List(1))
+search(List(5,1,1))
+search(List(3,5,1,0,0,0,0,0,0,0,0,1))
+
+// generates *all* jump tours
+// if we are only interested in the shortes one, we could
+// shortcircut the calculation and only return List(x) in
+// case where xs.length < x, because no tour can be shorter
+// than 1
+//
+
+def jumps(xs: List[Int]) : List[List[Int]] = xs match {
+ case Nil => Nil
+ case (x::xs) => {
+ val children = moves(xs, x)
+ val results = children.flatMap((cs) => jumps(cs).map(x :: _))
+ if (xs.length < x) List(x) :: results else results
+ }
+}
+
+
+
+jumps(List(5,3,2,5,1,1))
+jumps(List(3,5,1,2,1,2,1))
+jumps(List(3,5,1,2,3,4,1))
+jumps(List(3,5,1,0,0,0,1))
+jumps(List(3,5,1))
+jumps(List(5,1,1))
+jumps(Nil)
+jumps(List(1))
+jumps(List(5,1,2))
+moves(List(1,2), 5)
+jumps(List(1,5,1,2))
+jumps(List(3,5,1,0,0,0,0,0,0,0,0,1))
+
+jumps(List(5,3,2,5,1,1)).minBy(_.length)
+jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length)
+jumps(List(1,3,6,1,0,9)).minBy(_.length)
+jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length)
+
+
+
+
+
+
+
+
+
+// Sudoku
+//========
+
+// THE POINT OF THIS CODE IS NOT TO BE SUPER
+// EFFICIENT AND FAST, just explaining exhaustive
+// depth-first search
+
val game0 = """.14.6.3..
|62...4..9
@@ -308,6 +313,11 @@
(x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue)))
}
+//get_row(game0, 0)
+//get_row(game0, 1)
+//get_box(game0, (3,1))
+
+
// this is not mutable!!
def update(game: String, pos: Int, value: Char): String =
game.updated(pos, value)
@@ -333,6 +343,58 @@
}
}
+search(game0).map(pretty)
+
+val game1 = """23.915...
+ |...2..54.
+ |6.7......
+ |..1.....9
+ |89.5.3.17
+ |5.....6..
+ |......9.5
+ |.16..7...
+ |...329..1""".stripMargin.replaceAll("\\n", "")
+
+
+// game that is in the hard category
+val game2 = """8........
+ |..36.....
+ |.7..9.2..
+ |.5...7...
+ |....457..
+ |...1...3.
+ |..1....68
+ |..85...1.
+ |.9....4..""".stripMargin.replaceAll("\\n", "")
+
+// game with multiple solutions
+val game3 = """.8...9743
+ |.5...8.1.
+ |.1.......
+ |8....5...
+ |...8.4...
+ |...3....6
+ |.......7.
+ |.3.5...8.
+ |9724...5.""".stripMargin.replaceAll("\\n", "")
+
+
+
+
+search(game1).map(pretty)
+search(game3).map(pretty)
+search(game2).map(pretty)
+
+// for measuring time
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ ((end - start) / 1.0e9) + " secs"
+}
+
+time_needed(1, search(game2))
+
// tail recursive version that searches
// for all solutions
@@ -365,6 +427,8 @@
}
search1T(List(game3)).map(pretty)
+time_needed(10, search1T(List(game3)))
+
// game with multiple solutions
val game3 = """.8...9743
@@ -419,7 +483,6 @@
case x::xs => 1 + length(xs)
}
length(List("1", "2", "3", "4"))
-length(List(King, Knight("foo"), Clown))
length(List(1, 2, 3, 4))
def map[A, B](lst: List[A], f: A => B): List[B] = lst match {
@@ -430,9 +493,6 @@
map_int_list(List(1, 2, 3, 4), square)
-// Remember?
-def first[A, B](xs: List[A], f: A => Option[B]): Option[B] = ...
-
@@ -462,95 +522,5 @@
-// Regular expressions - the power of DSLs in Scala
-//==================================================
-
-abstract class Rexp
-case object ZERO extends Rexp // nothing
-case object ONE extends Rexp // the empty string
-case class CHAR(c: Char) extends Rexp // a character c
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative r1 + r2
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence r1 o r2
-case class STAR(r: Rexp) extends Rexp // star r*
-
-
-
-// (ab)*
-val r0 = STAR(SEQ(CHAR('a'), CHAR('b')))
-
-
-// some convenience for typing in regular expressions
-import scala.language.implicitConversions
-import scala.language.reflectiveCalls
-
-def charlist2rexp(s: List[Char]): Rexp = s match {
- case Nil => ONE
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList)
-val r1 = STAR("ab")
-val r2 = STAR(ALT("ab"))
-val r3 = STAR(ALT("ab", "baa baa black sheep"))
-
-implicit def RexpOps (r: Rexp) = new {
- def | (s: Rexp) = ALT(r, s)
- def % = STAR(r)
- def ~ (s: Rexp) = SEQ(r, s)
-}
-
-implicit def stringOps (s: String) = new {
- def | (r: Rexp) = ALT(s, r)
- def | (r: String) = ALT(s, r)
- def % = STAR(s)
- def ~ (r: Rexp) = SEQ(s, r)
- def ~ (r: String) = SEQ(s, r)
-}
-
-//example regular expressions
-val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
-val sign = "+" | "-" | ""
-val number = sign ~ digit ~ digit.%
-
-
-
-
-
-// The End
-//=========
-
-// A function should do one thing, and only one thing.
-
-// Make your variables immutable, unless there's a good
-// reason not to.
-
-// You can be productive on Day 1, but the language is deep.
-//
-// http://scalapuzzlers.com
-//
-// http://www.latkin.org/blog/2017/05/02/when-the-scala-compiler-doesnt-help/
-
-List(1, 2, 3) contains "your mom"
-
-// I like best about Scala that it lets me often write
-// concise, readable code.
-
-
-
-// You can define your own while loop
-
-
-def my_while(condition: => Boolean)(block: => Unit): Unit =
- if (condition) { block ; my_while(condition) { block } } else { }
-
-
-var x = 10
-my_while (x > 0) {
- println(s"$x") ; x = x - 1
-}
-
-
-`symbol
-`symbol`