diff -r 8c868feb917b -r e689375abcc1 progs/lecture3.scala --- a/progs/lecture3.scala Thu Nov 22 23:00:57 2018 +0000 +++ b/progs/lecture3.scala Fri Nov 23 01:52:37 2018 +0000 @@ -1,238 +1,154 @@ // Scala Lecture 3 //================= -// Pattern Matching -//================== + +// A Web Crawler / Email Harvester +//================================= +// +// the idea is to look for links using the +// regular expression "https?://[^"]*" and for +// email addresses using another regex. + +import io.Source +import scala.util._ -// A powerful tool which is supposed to come to Java in a few years -// time (https://www.youtube.com/watch?v=oGll155-vuQ)...Scala already -// has it for many years. Other functional languages have it already for -// decades. I think I would be really upset if a programming language -// I have to use does not have pattern matching....its is just so -// useful. ;o) +// gets the first 10K of a web-page +def get_page(url: String) : String = { + Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString). + getOrElse { println(s" Problem with: $url"); ""} +} -// The general schema: -// -// expression match { -// case pattern1 => expression1 -// case pattern2 => expression2 -// ... -// case patternN => expressionN -// } +// regex for URLs and emails +val http_pattern = """"https?://[^"]*"""".r +val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r + +//email_pattern.findAllIn +// ("foo bla christian@kcl.ac.uk 1234567").toList -// remember -val lst = List(None, Some(1), Some(2), None, Some(3)).flatten +// drops the first and last character from a string +def unquote(s: String) = s.drop(1).dropRight(1) +def get_all_URLs(page: String): Set[String] = + http_pattern.findAllIn(page).map(unquote).toSet -def my_flatten(xs: List[Option[Int]]): List[Int] = { - if (xs == Nil) Nil - else if (xs.head == None) my_flatten(xs.tail) - else xs.head.get :: my_flatten(xs.tail) +// naive version of crawl - searches until a given depth, +// visits pages potentially more than once +def crawl(url: String, n: Int) : Set[String] = { + if (n == 0) Set() + else { + println(s" Visiting: $n $url") + val page = get_page(url) + val new_emails = email_pattern.findAllIn(page).toSet + new_emails ++ + (for (u <- get_all_URLs(page)) yield crawl(u, n - 1)).flatten + } } +// some starting URLs for the crawler +val startURL = """https://nms.kcl.ac.uk/christian.urban/""" + +crawl(startURL, 2) + -val lst = List(None, Some(1), Some(2), None, Some(3)) +// User-defined Datatypes and Pattern Matching +//============================================ + -def my_flatten(lst: List[Option[Int]]): List[Int] = lst match { - case Nil => Nil - case None::xs => my_flatten(xs) - case Some(n)::xs => n::my_flatten(xs) -} - -my_flatten(lst) - -Nil == List() +abstract class Exp +case class N(n: Int) extends Exp +case class Plus(e1: Exp, e2: Exp) extends Exp +case class Times(e1: Exp, e2: Exp) extends Exp -// another example including a catch-all pattern -def get_me_a_string(n: Int): String = n match { - case 0 => "zero" - case 1 => "one" - case 2 => "two" - case _ => "many" -} -get_me_a_string(10) - -// you can also have cases combined -def season(month: String) = month match { - case "March" | "April" | "May" => "It's spring" - case "June" | "July" | "August" => "It's summer" - case "September" | "October" | "November" => "It's autumn" - case "December" | "January" | "February" => "It's winter" -} - -println(season("November")) - -// What happens if no case matches? - -println(season("foobar")) - - -// we can also match more complicated pattern -// -// let's look at the Collatz function on binary strings - -// adding two binary strings in a very, very lazy manner - -def badd(s1: String, s2: String) : String = - (BigInt(s1, 2) + BigInt(s2, 2)).toString(2) - - -"111".dropRight(1) -"111".last - -def bcollatz(s: String) : Long = (s.dropRight(1), s.last) match { - case ("", '1') => 1 // we reached 1 - case (rest, '0') => 1 + bcollatz(rest) - // even number => divide by two - case (rest, '1') => 1 + bcollatz(badd(s + '1', s)) - // odd number => s + '1' is 2 * s + 1 - // add another s gives 3 * s + 1 -} - -bcollatz(6.toBinaryString) -bcollatz(837799.toBinaryString) -bcollatz(100000000000000000L.toBinaryString) -bcollatz(BigInt("1000000000000000000000000000000000000000000000000000000000000000000000000000").toString(2)) - +// string of an Exp +// eval of an Exp +// simp an Exp +// Tokens +// Reverse Polish Notation +// compute RP +// transform RP into Exp +// process RP string and generate Exp -// User-defined Datatypes -//======================== +def string(e: Exp) : String = e match { + case N(n) => n.toString + case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")" + case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")" +} -abstract class Colour -case object Red extends Colour -case object Green extends Colour -case object Blue extends Colour +val e = Plus(N(9), Times(N(3), N(4))) + +println(string(e)) -def fav_colour(c: Colour) : Boolean = c match { - case Red => false - case Green => true - case Blue => false +def eval(e: Exp) : Int = e match { + case N(n) => n + case Plus(e1, e2) => eval(e1) + eval(e2) + case Times(e1, e2) => eval(e1) * eval(e2) } -fav_colour(Green) - - -// actually colors can be written with "object", -// because they do not take any arguments - -abstract class Day -case object Monday extends Day -case object Tuesday extends Day -case object Wednesday extends Day -case object Thursday extends Day -case object Friday extends Day -case object Saturday extends Day -case object Sunday extends Day +eval(e) -abstract class Suit -case object Spades extends Suit -case object Hearts extends Suit -case object Diamonds extends Suit -case object Clubs extends Suit - -//define function for colour of suits - -abstract class Rank -case class Ace extends Rank -case class King extends Rank -case class Queen extends Rank -case class Jack extends Rank -case class Num(n: Int) extends Rank - -//define functions for beats -//beats Ace _ => true -//beats _ Acs => false +def simp(e: Exp) : Exp = e match { + case N(n) => N(n) + case Plus(e1, e2) => (simp(e1), simp(e2)) match { + case (N(0), e2s) => e2s + case (e1s, N(0)) => e1s + case (e1s, e2s) => Plus(e1s, e2s) + } + case Times(e1, e2) => (simp(e1), simp(e2)) match { + case (N(0), e2s) => N(0) + case (e1s, N(0)) => N(0) + case (N(1), e2s) => e2s + case (e1s, N(1)) => e1s + case (e1s, e2s) => Times(e1s, e2s) + } +} -// ... a bit more useful: Roman Numerals - -abstract class RomanDigit -case object I extends RomanDigit -case object V extends RomanDigit -case object X extends RomanDigit -case object L extends RomanDigit -case object C extends RomanDigit -case object D extends RomanDigit -case object M extends RomanDigit - -type RomanNumeral = List[RomanDigit] +val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9))) +println(string(e2)) +println(string(simp(e2))) -def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { - case Nil => 0 - case M::r => 1000 + RomanNumeral2Int(r) - case C::M::r => 900 + RomanNumeral2Int(r) - case D::r => 500 + RomanNumeral2Int(r) - case C::D::r => 400 + RomanNumeral2Int(r) - case C::r => 100 + RomanNumeral2Int(r) - case X::C::r => 90 + RomanNumeral2Int(r) - case L::r => 50 + RomanNumeral2Int(r) - case X::L::r => 40 + RomanNumeral2Int(r) - case X::r => 10 + RomanNumeral2Int(r) - case I::X::r => 9 + RomanNumeral2Int(r) - case V::r => 5 + RomanNumeral2Int(r) - case I::V::r => 4 + RomanNumeral2Int(r) - case I::r => 1 + RomanNumeral2Int(r) +// Token and Reverse Polish Notation +abstract class Token +case class T(n: Int) extends Token +case object PL extends Token +case object TI extends Token + +def rp(e: Exp) : List[Token] = e match { + case N(n) => List(T(n)) + case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL) + case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI) } -RomanNumeral2Int(List(I,V)) // 4 -RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number) -RomanNumeral2Int(List(V,I)) // 6 -RomanNumeral2Int(List(I,X)) // 9 -RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979 -RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017 +def comp(ts: List[Token], stk: List[Int]) : Int = (ts, stk) match { + case (Nil, st) => st.head + case (T(n)::rest, st) => comp(rest, n::st) + case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st) + case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st) +} +def exp(ts: List[Token], st: List[Exp]) : Exp = (ts, st) match { + case (Nil, st) => st.head + case (T(n)::rest, st) => exp(rest, N(n)::st) + case (PL::rest, n1::n2::st) => exp(rest, Plus(n2, n1)::st) + case (TI::rest, n1::n2::st) => exp(rest, Times(n2, n1)::st) +} + +exp(toks(e2), Nil) + +def proc(s: String) = s match { + case "+" => PL + case "*" => TI + case n => T(n.toInt) +} -// another example -//================= - -// Once upon a time, in a complete fictional country there were Persons... - -abstract class Person -case object King extends Person -case class Peer(deg: String, terr: String, succ: Int) extends Person -case class Knight(name: String) extends Person -case class Peasant(name: String) extends Person -case object Clown extends Person - -def title(p: Person): String = p match { - case King => "His Majesty the King" - case Peer(deg, terr, _) => s"The ${deg} of ${terr}" - case Knight(name) => s"Sir ${name}" - case Peasant(name) => name - case Clown => "My name is Boris Johnson" - -} - -title(Clown) - - - -def superior(p1: Person, p2: Person): Boolean = (p1, p2) match { - case (King, _) => true - case (Peer(_,_,_), Knight(_)) => true - case (Peer(_,_,_), Peasant(_)) => true - case (Peer(_,_,_), Clown) => true - case (Knight(_), Peasant(_)) => true - case (Knight(_), Clown) => true - case (Clown, Peasant(_)) => true - case _ => false -} - -val people = List(Knight("David"), - Peer("Duke", "Norfolk", 84), - Peasant("Christian"), - King, - Clown) - -println(people.sortWith(superior(_, _)).mkString(", ")) - +string(exp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)) @@ -269,7 +185,96 @@ -// sudoku again +// Jumping Towers +//================ + + +// the first n prefixes of xs +// for 1 => include xs + +def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match { + case (Nil, _) => Nil + case (xs, 0) => Nil + case (x::xs, n) => (x::xs) :: moves(xs, n - 1) +} + + +moves(List(5,1,0), 1) +moves(List(5,1,0), 2) +moves(List(5,1,0), 5) + +// checks whether a jump tour exists at all +// in the second case it needs to be < instead of <= + +def search(xs: List[Int]) : Boolean = xs match { + case Nil => true + case (x::xs) => + if (xs.length < x) true else moves(xs, x).exists(search(_)) +} + + +search(List(5,3,2,5,1,1)) +search(List(3,5,1,0,0,0,1)) +search(List(3,5,1,0,0,0,0,1)) +search(List(3,5,1,0,0,0,1,1)) +search(List(3,5,1)) +search(List(5,1,1)) +search(Nil) +search(List(1)) +search(List(5,1,1)) +search(List(3,5,1,0,0,0,0,0,0,0,0,1)) + +// generates *all* jump tours +// if we are only interested in the shortes one, we could +// shortcircut the calculation and only return List(x) in +// case where xs.length < x, because no tour can be shorter +// than 1 +// + +def jumps(xs: List[Int]) : List[List[Int]] = xs match { + case Nil => Nil + case (x::xs) => { + val children = moves(xs, x) + val results = children.flatMap((cs) => jumps(cs).map(x :: _)) + if (xs.length < x) List(x) :: results else results + } +} + + + +jumps(List(5,3,2,5,1,1)) +jumps(List(3,5,1,2,1,2,1)) +jumps(List(3,5,1,2,3,4,1)) +jumps(List(3,5,1,0,0,0,1)) +jumps(List(3,5,1)) +jumps(List(5,1,1)) +jumps(Nil) +jumps(List(1)) +jumps(List(5,1,2)) +moves(List(1,2), 5) +jumps(List(1,5,1,2)) +jumps(List(3,5,1,0,0,0,0,0,0,0,0,1)) + +jumps(List(5,3,2,5,1,1)).minBy(_.length) +jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length) +jumps(List(1,3,6,1,0,9)).minBy(_.length) +jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length) + + + + + + + + + +// Sudoku +//======== + +// THE POINT OF THIS CODE IS NOT TO BE SUPER +// EFFICIENT AND FAST, just explaining exhaustive +// depth-first search + val game0 = """.14.6.3.. |62...4..9 @@ -308,6 +313,11 @@ (x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue))) } +//get_row(game0, 0) +//get_row(game0, 1) +//get_box(game0, (3,1)) + + // this is not mutable!! def update(game: String, pos: Int, value: Char): String = game.updated(pos, value) @@ -333,6 +343,58 @@ } } +search(game0).map(pretty) + +val game1 = """23.915... + |...2..54. + |6.7...... + |..1.....9 + |89.5.3.17 + |5.....6.. + |......9.5 + |.16..7... + |...329..1""".stripMargin.replaceAll("\\n", "") + + +// game that is in the hard category +val game2 = """8........ + |..36..... + |.7..9.2.. + |.5...7... + |....457.. + |...1...3. + |..1....68 + |..85...1. + |.9....4..""".stripMargin.replaceAll("\\n", "") + +// game with multiple solutions +val game3 = """.8...9743 + |.5...8.1. + |.1....... + |8....5... + |...8.4... + |...3....6 + |.......7. + |.3.5...8. + |9724...5.""".stripMargin.replaceAll("\\n", "") + + + + +search(game1).map(pretty) +search(game3).map(pretty) +search(game2).map(pretty) + +// for measuring time +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + ((end - start) / 1.0e9) + " secs" +} + +time_needed(1, search(game2)) + // tail recursive version that searches // for all solutions @@ -365,6 +427,8 @@ } search1T(List(game3)).map(pretty) +time_needed(10, search1T(List(game3))) + // game with multiple solutions val game3 = """.8...9743 @@ -419,7 +483,6 @@ case x::xs => 1 + length(xs) } length(List("1", "2", "3", "4")) -length(List(King, Knight("foo"), Clown)) length(List(1, 2, 3, 4)) def map[A, B](lst: List[A], f: A => B): List[B] = lst match { @@ -430,9 +493,6 @@ map_int_list(List(1, 2, 3, 4), square) -// Remember? -def first[A, B](xs: List[A], f: A => Option[B]): Option[B] = ... - @@ -462,95 +522,5 @@ -// Regular expressions - the power of DSLs in Scala -//================================================== - -abstract class Rexp -case object ZERO extends Rexp // nothing -case object ONE extends Rexp // the empty string -case class CHAR(c: Char) extends Rexp // a character c -case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative r1 + r2 -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence r1 o r2 -case class STAR(r: Rexp) extends Rexp // star r* - - - -// (ab)* -val r0 = STAR(SEQ(CHAR('a'), CHAR('b'))) - - -// some convenience for typing in regular expressions -import scala.language.implicitConversions -import scala.language.reflectiveCalls - -def charlist2rexp(s: List[Char]): Rexp = s match { - case Nil => ONE - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList) -val r1 = STAR("ab") -val r2 = STAR(ALT("ab")) -val r3 = STAR(ALT("ab", "baa baa black sheep")) - -implicit def RexpOps (r: Rexp) = new { - def | (s: Rexp) = ALT(r, s) - def % = STAR(r) - def ~ (s: Rexp) = SEQ(r, s) -} - -implicit def stringOps (s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) -} - -//example regular expressions -val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" -val sign = "+" | "-" | "" -val number = sign ~ digit ~ digit.% - - - - - -// The End -//========= - -// A function should do one thing, and only one thing. - -// Make your variables immutable, unless there's a good -// reason not to. - -// You can be productive on Day 1, but the language is deep. -// -// http://scalapuzzlers.com -// -// http://www.latkin.org/blog/2017/05/02/when-the-scala-compiler-doesnt-help/ - -List(1, 2, 3) contains "your mom" - -// I like best about Scala that it lets me often write -// concise, readable code. - - - -// You can define your own while loop - - -def my_while(condition: => Boolean)(block: => Unit): Unit = - if (condition) { block ; my_while(condition) { block } } else { } - - -var x = 10 -my_while (x > 0) { - println(s"$x") ; x = x - 1 -} - - -`symbol -`symbol`