1 // Scala Lecture 3  | 
     1 // Scala Lecture 3  | 
     2 //=================  | 
     2 //=================  | 
     3   | 
     3   | 
     4 // Pattern Matching  | 
     4   | 
     5 //==================  | 
     5 // A Web Crawler / Email Harvester  | 
     6   | 
     6 //=================================  | 
     7 // A powerful tool which is supposed to come to Java in a few years  | 
         | 
     8 // time (https://www.youtube.com/watch?v=oGll155-vuQ)...Scala already  | 
         | 
     9 // has it for many years. Other functional languages have it already for  | 
         | 
    10 // decades. I think I would be really upset if a programming language   | 
         | 
    11 // I have to use does not have pattern matching....its is just so   | 
         | 
    12 // useful. ;o)  | 
         | 
    13   | 
         | 
    14 // The general schema:  | 
         | 
    15 //  | 
     7 //  | 
    16 //    expression match { | 
     8 // the idea is to look for links using the  | 
    17 //       case pattern1 => expression1  | 
     9 // regular expression "https?://[^"]*" and for  | 
    18 //       case pattern2 => expression2  | 
    10 // email addresses using another regex.  | 
    19 //       ...  | 
    11   | 
    20 //       case patternN => expressionN  | 
    12 import io.Source  | 
    21 //    }  | 
    13 import scala.util._  | 
    22   | 
    14   | 
    23   | 
    15 // gets the first 10K of a web-page  | 
    24 // remember  | 
    16 def get_page(url: String) : String = { | 
    25 val lst = List(None, Some(1), Some(2), None, Some(3)).flatten  | 
    17   Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString). | 
    26   | 
    18     getOrElse { println(s"  Problem with: $url"); ""} | 
    27   | 
    19 }  | 
    28 def my_flatten(xs: List[Option[Int]]): List[Int] = { | 
    20   | 
    29   if (xs == Nil) Nil  | 
    21 // regex for URLs and emails  | 
    30   else if (xs.head == None) my_flatten(xs.tail)  | 
    22 val http_pattern = """"https?://[^"]*"""".r  | 
    31   else xs.head.get :: my_flatten(xs.tail)  | 
    23 val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r | 
    32 }  | 
    24   | 
    33   | 
    25 //email_pattern.findAllIn  | 
    34   | 
    26 //  ("foo bla christian@kcl.ac.uk 1234567").toList | 
    35   | 
    27   | 
    36 val lst = List(None, Some(1), Some(2), None, Some(3))  | 
    28   | 
    37   | 
    29 // drops the first and last character from a string  | 
    38 def my_flatten(lst: List[Option[Int]]): List[Int] = lst match { | 
    30 def unquote(s: String) = s.drop(1).dropRight(1)  | 
    39   case Nil => Nil  | 
    31   | 
    40   case None::xs => my_flatten(xs)  | 
    32 def get_all_URLs(page: String): Set[String] =   | 
    41   case Some(n)::xs => n::my_flatten(xs)  | 
    33   http_pattern.findAllIn(page).map(unquote).toSet  | 
    42 }  | 
    34   | 
    43   | 
    35 // naive version of crawl - searches until a given depth,  | 
    44 my_flatten(lst)  | 
    36 // visits pages potentially more than once  | 
    45   | 
    37 def crawl(url: String, n: Int) : Set[String] = { | 
    46 Nil == List()  | 
    38   if (n == 0) Set()  | 
    47   | 
    39   else { | 
    48   | 
    40     println(s"  Visiting: $n $url")  | 
    49 // another example including a catch-all pattern  | 
    41     val page = get_page(url)  | 
    50 def get_me_a_string(n: Int): String = n match { | 
    42     val new_emails = email_pattern.findAllIn(page).toSet  | 
    51   case 0 => "zero"  | 
    43     new_emails ++   | 
    52   case 1 => "one"  | 
    44       (for (u <- get_all_URLs(page)) yield crawl(u, n - 1)).flatten  | 
    53   case 2 => "two"  | 
    45   }  | 
    54   case _ => "many"  | 
    46 }  | 
    55 }  | 
    47   | 
    56   | 
    48 // some starting URLs for the crawler  | 
    57 get_me_a_string(10)  | 
    49 val startURL = """https://nms.kcl.ac.uk/christian.urban/"""  | 
    58   | 
    50   | 
    59 // you can also have cases combined  | 
    51 crawl(startURL, 2)  | 
    60 def season(month: String) = month match { | 
    52   | 
    61   case "March" | "April" | "May" => "It's spring"  | 
    53   | 
    62   case "June" | "July" | "August" => "It's summer"  | 
    54   | 
    63   case "September" | "October" | "November" => "It's autumn"  | 
    55 // User-defined Datatypes and Pattern Matching  | 
    64   case "December" | "January" | "February" => "It's winter"  | 
    56 //============================================  | 
    65 }  | 
    57   | 
    66    | 
    58   | 
    67 println(season("November")) | 
    59 abstract class Exp  | 
    68   | 
    60 case class N(n: Int) extends Exp  | 
    69 // What happens if no case matches?  | 
    61 case class Plus(e1: Exp, e2: Exp) extends Exp  | 
    70   | 
    62 case class Times(e1: Exp, e2: Exp) extends Exp  | 
    71 println(season("foobar")) | 
    63   | 
    72   | 
    64   | 
    73   | 
    65   | 
    74 // we can also match more complicated pattern  | 
    66 // string of an Exp  | 
    75 //  | 
    67 // eval of an Exp  | 
    76 // let's look at the Collatz function on binary strings  | 
    68 // simp an Exp  | 
    77   | 
    69 // Tokens  | 
    78 // adding two binary strings in a very, very lazy manner  | 
    70 // Reverse Polish Notation  | 
    79   | 
    71 // compute RP  | 
    80 def badd(s1: String, s2: String) : String =   | 
    72 // transform RP into Exp  | 
    81   (BigInt(s1, 2) + BigInt(s2, 2)).toString(2)  | 
    73 // process RP string and generate Exp  | 
    82   | 
    74   | 
    83   | 
    75   | 
    84 "111".dropRight(1)  | 
    76   | 
    85 "111".last  | 
    77 def string(e: Exp) : String = e match { | 
    86   | 
    78   case N(n) => n.toString  | 
    87 def bcollatz(s: String) : Long = (s.dropRight(1), s.last) match { | 
    79   case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")" | 
    88   case ("", '1') => 1                               // we reached 1 | 
    80   case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")" | 
    89   case (rest, '0') => 1 + bcollatz(rest)              | 
    81 }  | 
    90                                   // even number => divide by two  | 
    82   | 
    91   case (rest, '1') => 1 + bcollatz(badd(s + '1', s))  | 
    83 val e = Plus(N(9), Times(N(3), N(4)))  | 
    92                                   // odd number => s + '1' is 2 * s + 1  | 
    84   | 
    93                                   // add another s gives 3 * s + 1    | 
    85 println(string(e))  | 
    94 }   | 
    86   | 
    95   | 
    87 def eval(e: Exp) : Int = e match { | 
    96 bcollatz(6.toBinaryString)  | 
    88   case N(n) => n  | 
    97 bcollatz(837799.toBinaryString)  | 
    89   case Plus(e1, e2) => eval(e1) + eval(e2)  | 
    98 bcollatz(100000000000000000L.toBinaryString)  | 
    90   case Times(e1, e2) => eval(e1) * eval(e2)  | 
    99 bcollatz(BigInt("1000000000000000000000000000000000000000000000000000000000000000000000000000").toString(2)) | 
    91 }  | 
   100   | 
    92   | 
   101   | 
    93 eval(e)  | 
   102   | 
    94   | 
   103   | 
    95 def simp(e: Exp) : Exp = e match { | 
   104 // User-defined Datatypes  | 
    96   case N(n) => N(n)  | 
   105 //========================  | 
    97   case Plus(e1, e2) => (simp(e1), simp(e2)) match { | 
   106   | 
    98     case (N(0), e2s) => e2s  | 
   107 abstract class Colour  | 
    99     case (e1s, N(0)) => e1s  | 
   108 case object Red extends Colour   | 
   100     case (e1s, e2s) => Plus(e1s, e2s)   | 
   109 case object Green extends Colour   | 
   101   }  | 
   110 case object Blue extends Colour  | 
   102   case Times(e1, e2) => (simp(e1), simp(e2)) match { | 
   111   | 
   103     case (N(0), e2s) => N(0)  | 
   112 def fav_colour(c: Colour) : Boolean = c match { | 
   104     case (e1s, N(0)) => N(0)  | 
   113   case Red   => false  | 
   105     case (N(1), e2s) => e2s  | 
   114   case Green => true  | 
   106     case (e1s, N(1)) => e1s  | 
   115   case Blue  => false   | 
   107     case (e1s, e2s) => Times(e1s, e2s)   | 
   116 }  | 
   108   }  | 
   117   | 
   109 }  | 
   118 fav_colour(Green)  | 
   110   | 
   119   | 
   111   | 
   120   | 
   112 val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))  | 
   121 // actually colors can be written with "object",  | 
   113 println(string(e2))  | 
   122 // because they do not take any arguments  | 
   114 println(string(simp(e2)))  | 
   123   | 
   115   | 
   124 abstract class Day  | 
   116 // Token and Reverse Polish Notation  | 
   125 case object Monday extends Day   | 
   117 abstract class Token  | 
   126 case object Tuesday extends Day   | 
   118 case class T(n: Int) extends Token  | 
   127 case object Wednesday extends Day  | 
   119 case object PL extends Token  | 
   128 case object Thursday extends Day   | 
   120 case object TI extends Token  | 
   129 case object Friday extends Day   | 
   121   | 
   130 case object Saturday extends Day  | 
   122 def rp(e: Exp) : List[Token] = e match { | 
   131 case object Sunday extends Day   | 
   123   case N(n) => List(T(n))  | 
   132   | 
   124   case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL)  | 
   133 abstract class Suit  | 
   125   case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI)  | 
   134 case object Spades extends Suit  | 
   126 }  | 
   135 case object Hearts extends Suit  | 
   127   | 
   136 case object Diamonds extends Suit  | 
   128 def comp(ts: List[Token], stk: List[Int]) : Int = (ts, stk) match { | 
   137 case object Clubs extends Suit  | 
   129   case (Nil, st) => st.head  | 
   138   | 
   130   case (T(n)::rest, st) => comp(rest, n::st)  | 
   139 //define function for colour of suits  | 
   131   case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)  | 
   140   | 
   132   case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)  | 
   141 abstract class Rank  | 
   133 }  | 
   142 case class Ace extends Rank  | 
   134   | 
   143 case class King extends Rank  | 
   135 def exp(ts: List[Token], st: List[Exp]) : Exp = (ts, st) match { | 
   144 case class Queen extends Rank  | 
   136   case (Nil, st) => st.head  | 
   145 case class Jack extends Rank  | 
   137   case (T(n)::rest, st) => exp(rest, N(n)::st)  | 
   146 case class Num(n: Int) extends Rank  | 
   138   case (PL::rest, n1::n2::st) => exp(rest, Plus(n2, n1)::st)  | 
   147   | 
   139   case (TI::rest, n1::n2::st) => exp(rest, Times(n2, n1)::st)  | 
   148 //define functions for beats  | 
   140 }  | 
   149 //beats Ace _ => true  | 
   141   | 
   150 //beats _ Acs => false  | 
   142 exp(toks(e2), Nil)  | 
   151   | 
   143   | 
   152   | 
   144 def proc(s: String) = s match { | 
   153 // ... a bit more useful: Roman Numerals  | 
   145   case "+" => PL  | 
   154   | 
   146   case "*" => TI  | 
   155 abstract class RomanDigit   | 
   147   case n => T(n.toInt)  | 
   156 case object I extends RomanDigit   | 
   148 }  | 
   157 case object V extends RomanDigit   | 
   149   | 
   158 case object X extends RomanDigit   | 
   150   | 
   159 case object L extends RomanDigit   | 
   151 string(exp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)) | 
   160 case object C extends RomanDigit   | 
         | 
   161 case object D extends RomanDigit   | 
         | 
   162 case object M extends RomanDigit   | 
         | 
   163   | 
         | 
   164 type RomanNumeral = List[RomanDigit]   | 
         | 
   165   | 
         | 
   166 def RomanNumeral2Int(rs: RomanNumeral): Int = rs match {  | 
         | 
   167   case Nil => 0  | 
         | 
   168   case M::r    => 1000 + RomanNumeral2Int(r)    | 
         | 
   169   case C::M::r => 900 + RomanNumeral2Int(r)  | 
         | 
   170   case D::r    => 500 + RomanNumeral2Int(r)  | 
         | 
   171   case C::D::r => 400 + RomanNumeral2Int(r)  | 
         | 
   172   case C::r    => 100 + RomanNumeral2Int(r)  | 
         | 
   173   case X::C::r => 90 + RomanNumeral2Int(r)  | 
         | 
   174   case L::r    => 50 + RomanNumeral2Int(r)  | 
         | 
   175   case X::L::r => 40 + RomanNumeral2Int(r)  | 
         | 
   176   case X::r    => 10 + RomanNumeral2Int(r)  | 
         | 
   177   case I::X::r => 9 + RomanNumeral2Int(r)  | 
         | 
   178   case V::r    => 5 + RomanNumeral2Int(r)  | 
         | 
   179   case I::V::r => 4 + RomanNumeral2Int(r)  | 
         | 
   180   case I::r    => 1 + RomanNumeral2Int(r)  | 
         | 
   181 }  | 
         | 
   182   | 
         | 
   183 RomanNumeral2Int(List(I,V))             // 4  | 
         | 
   184 RomanNumeral2Int(List(I,I,I,I))         // 4 (invalid Roman number)  | 
         | 
   185 RomanNumeral2Int(List(V,I))             // 6  | 
         | 
   186 RomanNumeral2Int(List(I,X))             // 9  | 
         | 
   187 RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979  | 
         | 
   188 RomanNumeral2Int(List(M,M,X,V,I,I))     // 2017  | 
         | 
   189   | 
         | 
   190   | 
         | 
   191   | 
         | 
   192 // another example  | 
         | 
   193 //=================  | 
         | 
   194   | 
         | 
   195 // Once upon a time, in a complete fictional country there were Persons...  | 
         | 
   196   | 
         | 
   197 abstract class Person  | 
         | 
   198 case object King extends Person  | 
         | 
   199 case class Peer(deg: String, terr: String, succ: Int) extends Person  | 
         | 
   200 case class Knight(name: String) extends Person  | 
         | 
   201 case class Peasant(name: String) extends Person  | 
         | 
   202 case object Clown extends Person  | 
         | 
   203   | 
         | 
   204 def title(p: Person): String = p match { | 
         | 
   205   case King => "His Majesty the King"  | 
         | 
   206   case Peer(deg, terr, _) => s"The ${deg} of ${terr}" | 
         | 
   207   case Knight(name) => s"Sir ${name}" | 
         | 
   208   case Peasant(name) => name  | 
         | 
   209   case Clown => "My name is Boris Johnson"  | 
         | 
   210   | 
         | 
   211 }  | 
         | 
   212   | 
         | 
   213 title(Clown)  | 
         | 
   214   | 
         | 
   215   | 
         | 
   216   | 
         | 
   217 def superior(p1: Person, p2: Person): Boolean = (p1, p2) match { | 
         | 
   218   case (King, _) => true  | 
         | 
   219   case (Peer(_,_,_), Knight(_)) => true  | 
         | 
   220   case (Peer(_,_,_), Peasant(_)) => true  | 
         | 
   221   case (Peer(_,_,_), Clown) => true  | 
         | 
   222   case (Knight(_), Peasant(_)) => true  | 
         | 
   223   case (Knight(_), Clown) => true  | 
         | 
   224   case (Clown, Peasant(_)) => true  | 
         | 
   225   case _ => false  | 
         | 
   226 }  | 
         | 
   227   | 
         | 
   228 val people = List(Knight("David"),  | 
         | 
   229                   Peer("Duke", "Norfolk", 84),  | 
         | 
   230                   Peasant("Christian"),  | 
         | 
   231                   King,   | 
         | 
   232                   Clown)  | 
         | 
   233   | 
         | 
   234 println(people.sortWith(superior(_, _)).mkString(", ")) | 
         | 
   235   | 
         | 
   236   | 
   152   | 
   237   | 
   153   | 
   238   | 
   154   | 
   239 // Tail recursion  | 
   155 // Tail recursion  | 
   240 //================  | 
   156 //================  | 
   267 // call; Scala can do this only for tail-recursive  | 
   183 // call; Scala can do this only for tail-recursive  | 
   268 // functions  | 
   184 // functions  | 
   269   | 
   185   | 
   270   | 
   186   | 
   271   | 
   187   | 
   272 // sudoku again  | 
   188 // Jumping Towers  | 
         | 
   189 //================  | 
         | 
   190   | 
         | 
   191   | 
         | 
   192 // the first n prefixes of xs  | 
         | 
   193 // for 1 => include xs  | 
         | 
   194   | 
         | 
   195 def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match { | 
         | 
   196   case (Nil, _) => Nil  | 
         | 
   197   case (xs, 0) => Nil  | 
         | 
   198   case (x::xs, n) => (x::xs) :: moves(xs, n - 1)  | 
         | 
   199 }  | 
         | 
   200   | 
         | 
   201   | 
         | 
   202 moves(List(5,1,0), 1)  | 
         | 
   203 moves(List(5,1,0), 2)  | 
         | 
   204 moves(List(5,1,0), 5)  | 
         | 
   205   | 
         | 
   206 // checks whether a jump tour exists at all  | 
         | 
   207 // in the second case it needs to be < instead of <=  | 
         | 
   208   | 
         | 
   209 def search(xs: List[Int]) : Boolean = xs match { | 
         | 
   210   case Nil => true  | 
         | 
   211   case (x::xs) =>  | 
         | 
   212     if (xs.length < x) true else moves(xs, x).exists(search(_))  | 
         | 
   213 }  | 
         | 
   214   | 
         | 
   215   | 
         | 
   216 search(List(5,3,2,5,1,1))  | 
         | 
   217 search(List(3,5,1,0,0,0,1))  | 
         | 
   218 search(List(3,5,1,0,0,0,0,1))  | 
         | 
   219 search(List(3,5,1,0,0,0,1,1))  | 
         | 
   220 search(List(3,5,1))  | 
         | 
   221 search(List(5,1,1))  | 
         | 
   222 search(Nil)  | 
         | 
   223 search(List(1))  | 
         | 
   224 search(List(5,1,1))  | 
         | 
   225 search(List(3,5,1,0,0,0,0,0,0,0,0,1))  | 
         | 
   226   | 
         | 
   227 // generates *all* jump tours  | 
         | 
   228 //    if we are only interested in the shortes one, we could  | 
         | 
   229 //    shortcircut the calculation and only return List(x) in  | 
         | 
   230 //    case where xs.length < x, because no tour can be shorter  | 
         | 
   231 //    than 1  | 
         | 
   232 //   | 
         | 
   233   | 
         | 
   234 def jumps(xs: List[Int]) : List[List[Int]] = xs match { | 
         | 
   235   case Nil => Nil  | 
         | 
   236   case (x::xs) => { | 
         | 
   237     val children = moves(xs, x)  | 
         | 
   238     val results = children.flatMap((cs) => jumps(cs).map(x :: _))  | 
         | 
   239     if (xs.length < x) List(x) :: results else results  | 
         | 
   240   }  | 
         | 
   241 }  | 
         | 
   242   | 
         | 
   243   | 
         | 
   244   | 
         | 
   245 jumps(List(5,3,2,5,1,1))  | 
         | 
   246 jumps(List(3,5,1,2,1,2,1))  | 
         | 
   247 jumps(List(3,5,1,2,3,4,1))  | 
         | 
   248 jumps(List(3,5,1,0,0,0,1))  | 
         | 
   249 jumps(List(3,5,1))  | 
         | 
   250 jumps(List(5,1,1))  | 
         | 
   251 jumps(Nil)  | 
         | 
   252 jumps(List(1))  | 
         | 
   253 jumps(List(5,1,2))  | 
         | 
   254 moves(List(1,2), 5)  | 
         | 
   255 jumps(List(1,5,1,2))  | 
         | 
   256 jumps(List(3,5,1,0,0,0,0,0,0,0,0,1))  | 
         | 
   257   | 
         | 
   258 jumps(List(5,3,2,5,1,1)).minBy(_.length)  | 
         | 
   259 jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length)  | 
         | 
   260 jumps(List(1,3,6,1,0,9)).minBy(_.length)  | 
         | 
   261 jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length)  | 
         | 
   262   | 
         | 
   263   | 
         | 
   264   | 
         | 
   265   | 
         | 
   266   | 
         | 
   267   | 
         | 
   268   | 
         | 
   269   | 
         | 
   270   | 
         | 
   271 // Sudoku   | 
         | 
   272 //========  | 
         | 
   273   | 
         | 
   274 // THE POINT OF THIS CODE IS NOT TO BE SUPER  | 
         | 
   275 // EFFICIENT AND FAST, just explaining exhaustive  | 
         | 
   276 // depth-first search  | 
         | 
   277   | 
   273   | 
   278   | 
   274 val game0 = """.14.6.3..  | 
   279 val game0 = """.14.6.3..  | 
   275               |62...4..9  | 
   280               |62...4..9  | 
   276               |.8..5.6..  | 
   281               |.8..5.6..  | 
   277               |.6.2....3  | 
   282               |.6.2....3  |