|      1 // Scala Lecture 3 |      1 // Scala Lecture 3 | 
|      2 //================= |      2 //================= | 
|      3  |      3  | 
|      4 // Pattern Matching |      4  | 
|      5 //================== |      5 // A Web Crawler / Email Harvester | 
|      6  |      6 //================================= | 
|      7 // A powerful tool which is supposed to come to Java in a few years |         | 
|      8 // time (https://www.youtube.com/watch?v=oGll155-vuQ)...Scala already |         | 
|      9 // has it for many years. Other functional languages have it already for |         | 
|     10 // decades. I think I would be really upset if a programming language  |         | 
|     11 // I have to use does not have pattern matching....its is just so  |         | 
|     12 // useful. ;o) |         | 
|     13  |         | 
|     14 // The general schema: |         | 
|     15 // |      7 // | 
|     16 //    expression match { |      8 // the idea is to look for links using the | 
|     17 //       case pattern1 => expression1 |      9 // regular expression "https?://[^"]*" and for | 
|     18 //       case pattern2 => expression2 |     10 // email addresses using another regex. | 
|     19 //       ... |     11  | 
|     20 //       case patternN => expressionN |     12 import io.Source | 
|     21 //    } |     13 import scala.util._ | 
|     22  |     14  | 
|     23  |     15 // gets the first 10K of a web-page | 
|     24 // remember |     16 def get_page(url: String) : String = { | 
|     25 val lst = List(None, Some(1), Some(2), None, Some(3)).flatten |     17   Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString). | 
|     26  |     18     getOrElse { println(s"  Problem with: $url"); ""} | 
|     27  |     19 } | 
|     28 def my_flatten(xs: List[Option[Int]]): List[Int] = { |     20  | 
|     29   if (xs == Nil) Nil |     21 // regex for URLs and emails | 
|     30   else if (xs.head == None) my_flatten(xs.tail) |     22 val http_pattern = """"https?://[^"]*"""".r | 
|     31   else xs.head.get :: my_flatten(xs.tail) |     23 val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r | 
|     32 } |     24  | 
|     33  |     25 //email_pattern.findAllIn | 
|     34  |     26 //  ("foo bla christian@kcl.ac.uk 1234567").toList | 
|     35  |     27  | 
|     36 val lst = List(None, Some(1), Some(2), None, Some(3)) |     28  | 
|     37  |     29 // drops the first and last character from a string | 
|     38 def my_flatten(lst: List[Option[Int]]): List[Int] = lst match { |     30 def unquote(s: String) = s.drop(1).dropRight(1) | 
|     39   case Nil => Nil |     31  | 
|     40   case None::xs => my_flatten(xs) |     32 def get_all_URLs(page: String): Set[String] =  | 
|     41   case Some(n)::xs => n::my_flatten(xs) |     33   http_pattern.findAllIn(page).map(unquote).toSet | 
|     42 } |     34  | 
|     43  |     35 // naive version of crawl - searches until a given depth, | 
|     44 my_flatten(lst) |     36 // visits pages potentially more than once | 
|     45  |     37 def crawl(url: String, n: Int) : Set[String] = { | 
|     46 Nil == List() |     38   if (n == 0) Set() | 
|     47  |     39   else { | 
|     48  |     40     println(s"  Visiting: $n $url") | 
|     49 // another example including a catch-all pattern |     41     val page = get_page(url) | 
|     50 def get_me_a_string(n: Int): String = n match { |     42     val new_emails = email_pattern.findAllIn(page).toSet | 
|     51   case 0 => "zero" |     43     new_emails ++  | 
|     52   case 1 => "one" |     44       (for (u <- get_all_URLs(page)) yield crawl(u, n - 1)).flatten | 
|     53   case 2 => "two" |     45   } | 
|     54   case _ => "many" |     46 } | 
|     55 } |     47  | 
|     56  |     48 // some starting URLs for the crawler | 
|     57 get_me_a_string(10) |     49 val startURL = """https://nms.kcl.ac.uk/christian.urban/""" | 
|     58  |     50  | 
|     59 // you can also have cases combined |     51 crawl(startURL, 2) | 
|     60 def season(month: String) = month match { |     52  | 
|     61   case "March" | "April" | "May" => "It's spring" |     53  | 
|     62   case "June" | "July" | "August" => "It's summer" |     54  | 
|     63   case "September" | "October" | "November" => "It's autumn" |     55 // User-defined Datatypes and Pattern Matching | 
|     64   case "December" | "January" | "February" => "It's winter" |     56 //============================================ | 
|     65 } |     57  | 
|     66   |     58  | 
|     67 println(season("November")) |     59 abstract class Exp | 
|     68  |     60 case class N(n: Int) extends Exp | 
|     69 // What happens if no case matches? |     61 case class Plus(e1: Exp, e2: Exp) extends Exp | 
|     70  |     62 case class Times(e1: Exp, e2: Exp) extends Exp | 
|     71 println(season("foobar")) |     63  | 
|     72  |     64  | 
|     73  |     65  | 
|     74 // we can also match more complicated pattern |     66 // string of an Exp | 
|     75 // |     67 // eval of an Exp | 
|     76 // let's look at the Collatz function on binary strings |     68 // simp an Exp | 
|     77  |     69 // Tokens | 
|     78 // adding two binary strings in a very, very lazy manner |     70 // Reverse Polish Notation | 
|     79  |     71 // compute RP | 
|     80 def badd(s1: String, s2: String) : String =  |     72 // transform RP into Exp | 
|     81   (BigInt(s1, 2) + BigInt(s2, 2)).toString(2) |     73 // process RP string and generate Exp | 
|     82  |     74  | 
|     83  |     75  | 
|     84 "111".dropRight(1) |     76  | 
|     85 "111".last |     77 def string(e: Exp) : String = e match { | 
|     86  |     78   case N(n) => n.toString | 
|     87 def bcollatz(s: String) : Long = (s.dropRight(1), s.last) match { |     79   case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")" | 
|     88   case ("", '1') => 1                               // we reached 1 |     80   case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")" | 
|     89   case (rest, '0') => 1 + bcollatz(rest)             |     81 } | 
|     90                                   // even number => divide by two |     82  | 
|     91   case (rest, '1') => 1 + bcollatz(badd(s + '1', s)) |     83 val e = Plus(N(9), Times(N(3), N(4))) | 
|     92                                   // odd number => s + '1' is 2 * s + 1 |     84  | 
|     93                                   // add another s gives 3 * s + 1   |     85 println(string(e)) | 
|     94 }  |     86  | 
|     95  |     87 def eval(e: Exp) : Int = e match { | 
|     96 bcollatz(6.toBinaryString) |     88   case N(n) => n | 
|     97 bcollatz(837799.toBinaryString) |     89   case Plus(e1, e2) => eval(e1) + eval(e2) | 
|     98 bcollatz(100000000000000000L.toBinaryString) |     90   case Times(e1, e2) => eval(e1) * eval(e2) | 
|     99 bcollatz(BigInt("1000000000000000000000000000000000000000000000000000000000000000000000000000").toString(2)) |     91 } | 
|    100  |     92  | 
|    101  |     93 eval(e) | 
|    102  |     94  | 
|    103  |     95 def simp(e: Exp) : Exp = e match { | 
|    104 // User-defined Datatypes |     96   case N(n) => N(n) | 
|    105 //======================== |     97   case Plus(e1, e2) => (simp(e1), simp(e2)) match { | 
|    106  |     98     case (N(0), e2s) => e2s | 
|    107 abstract class Colour |     99     case (e1s, N(0)) => e1s | 
|    108 case object Red extends Colour  |    100     case (e1s, e2s) => Plus(e1s, e2s)  | 
|    109 case object Green extends Colour  |    101   } | 
|    110 case object Blue extends Colour |    102   case Times(e1, e2) => (simp(e1), simp(e2)) match { | 
|    111  |    103     case (N(0), e2s) => N(0) | 
|    112 def fav_colour(c: Colour) : Boolean = c match { |    104     case (e1s, N(0)) => N(0) | 
|    113   case Red   => false |    105     case (N(1), e2s) => e2s | 
|    114   case Green => true |    106     case (e1s, N(1)) => e1s | 
|    115   case Blue  => false  |    107     case (e1s, e2s) => Times(e1s, e2s)  | 
|    116 } |    108   } | 
|    117  |    109 } | 
|    118 fav_colour(Green) |    110  | 
|    119  |    111  | 
|    120  |    112 val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9))) | 
|    121 // actually colors can be written with "object", |    113 println(string(e2)) | 
|    122 // because they do not take any arguments |    114 println(string(simp(e2))) | 
|    123  |    115  | 
|    124 abstract class Day |    116 // Token and Reverse Polish Notation | 
|    125 case object Monday extends Day  |    117 abstract class Token | 
|    126 case object Tuesday extends Day  |    118 case class T(n: Int) extends Token | 
|    127 case object Wednesday extends Day |    119 case object PL extends Token | 
|    128 case object Thursday extends Day  |    120 case object TI extends Token | 
|    129 case object Friday extends Day  |    121  | 
|    130 case object Saturday extends Day |    122 def rp(e: Exp) : List[Token] = e match { | 
|    131 case object Sunday extends Day  |    123   case N(n) => List(T(n)) | 
|    132  |    124   case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL) | 
|    133 abstract class Suit |    125   case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI) | 
|    134 case object Spades extends Suit |    126 } | 
|    135 case object Hearts extends Suit |    127  | 
|    136 case object Diamonds extends Suit |    128 def comp(ts: List[Token], stk: List[Int]) : Int = (ts, stk) match { | 
|    137 case object Clubs extends Suit |    129   case (Nil, st) => st.head | 
|    138  |    130   case (T(n)::rest, st) => comp(rest, n::st) | 
|    139 //define function for colour of suits |    131   case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st) | 
|    140  |    132   case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st) | 
|    141 abstract class Rank |    133 } | 
|    142 case class Ace extends Rank |    134  | 
|    143 case class King extends Rank |    135 def exp(ts: List[Token], st: List[Exp]) : Exp = (ts, st) match { | 
|    144 case class Queen extends Rank |    136   case (Nil, st) => st.head | 
|    145 case class Jack extends Rank |    137   case (T(n)::rest, st) => exp(rest, N(n)::st) | 
|    146 case class Num(n: Int) extends Rank |    138   case (PL::rest, n1::n2::st) => exp(rest, Plus(n2, n1)::st) | 
|    147  |    139   case (TI::rest, n1::n2::st) => exp(rest, Times(n2, n1)::st) | 
|    148 //define functions for beats |    140 } | 
|    149 //beats Ace _ => true |    141  | 
|    150 //beats _ Acs => false |    142 exp(toks(e2), Nil) | 
|    151  |    143  | 
|    152  |    144 def proc(s: String) = s match { | 
|    153 // ... a bit more useful: Roman Numerals |    145   case "+" => PL | 
|    154  |    146   case "*" => TI | 
|    155 abstract class RomanDigit  |    147   case n => T(n.toInt) | 
|    156 case object I extends RomanDigit  |    148 } | 
|    157 case object V extends RomanDigit  |    149  | 
|    158 case object X extends RomanDigit  |    150  | 
|    159 case object L extends RomanDigit  |    151 string(exp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)) | 
|    160 case object C extends RomanDigit  |         | 
|    161 case object D extends RomanDigit  |         | 
|    162 case object M extends RomanDigit  |         | 
|    163  |         | 
|    164 type RomanNumeral = List[RomanDigit]  |         | 
|    165  |         | 
|    166 def RomanNumeral2Int(rs: RomanNumeral): Int = rs match {  |         | 
|    167   case Nil => 0 |         | 
|    168   case M::r    => 1000 + RomanNumeral2Int(r)   |         | 
|    169   case C::M::r => 900 + RomanNumeral2Int(r) |         | 
|    170   case D::r    => 500 + RomanNumeral2Int(r) |         | 
|    171   case C::D::r => 400 + RomanNumeral2Int(r) |         | 
|    172   case C::r    => 100 + RomanNumeral2Int(r) |         | 
|    173   case X::C::r => 90 + RomanNumeral2Int(r) |         | 
|    174   case L::r    => 50 + RomanNumeral2Int(r) |         | 
|    175   case X::L::r => 40 + RomanNumeral2Int(r) |         | 
|    176   case X::r    => 10 + RomanNumeral2Int(r) |         | 
|    177   case I::X::r => 9 + RomanNumeral2Int(r) |         | 
|    178   case V::r    => 5 + RomanNumeral2Int(r) |         | 
|    179   case I::V::r => 4 + RomanNumeral2Int(r) |         | 
|    180   case I::r    => 1 + RomanNumeral2Int(r) |         | 
|    181 } |         | 
|    182  |         | 
|    183 RomanNumeral2Int(List(I,V))             // 4 |         | 
|    184 RomanNumeral2Int(List(I,I,I,I))         // 4 (invalid Roman number) |         | 
|    185 RomanNumeral2Int(List(V,I))             // 6 |         | 
|    186 RomanNumeral2Int(List(I,X))             // 9 |         | 
|    187 RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979 |         | 
|    188 RomanNumeral2Int(List(M,M,X,V,I,I))     // 2017 |         | 
|    189  |         | 
|    190  |         | 
|    191  |         | 
|    192 // another example |         | 
|    193 //================= |         | 
|    194  |         | 
|    195 // Once upon a time, in a complete fictional country there were Persons... |         | 
|    196  |         | 
|    197 abstract class Person |         | 
|    198 case object King extends Person |         | 
|    199 case class Peer(deg: String, terr: String, succ: Int) extends Person |         | 
|    200 case class Knight(name: String) extends Person |         | 
|    201 case class Peasant(name: String) extends Person |         | 
|    202 case object Clown extends Person |         | 
|    203  |         | 
|    204 def title(p: Person): String = p match { |         | 
|    205   case King => "His Majesty the King" |         | 
|    206   case Peer(deg, terr, _) => s"The ${deg} of ${terr}" |         | 
|    207   case Knight(name) => s"Sir ${name}" |         | 
|    208   case Peasant(name) => name |         | 
|    209   case Clown => "My name is Boris Johnson" |         | 
|    210  |         | 
|    211 } |         | 
|    212  |         | 
|    213 title(Clown) |         | 
|    214  |         | 
|    215  |         | 
|    216  |         | 
|    217 def superior(p1: Person, p2: Person): Boolean = (p1, p2) match { |         | 
|    218   case (King, _) => true |         | 
|    219   case (Peer(_,_,_), Knight(_)) => true |         | 
|    220   case (Peer(_,_,_), Peasant(_)) => true |         | 
|    221   case (Peer(_,_,_), Clown) => true |         | 
|    222   case (Knight(_), Peasant(_)) => true |         | 
|    223   case (Knight(_), Clown) => true |         | 
|    224   case (Clown, Peasant(_)) => true |         | 
|    225   case _ => false |         | 
|    226 } |         | 
|    227  |         | 
|    228 val people = List(Knight("David"),  |         | 
|    229                   Peer("Duke", "Norfolk", 84),  |         | 
|    230                   Peasant("Christian"),  |         | 
|    231                   King,  |         | 
|    232                   Clown) |         | 
|    233  |         | 
|    234 println(people.sortWith(superior(_, _)).mkString(", ")) |         | 
|    235  |         | 
|    236  |    152  | 
|    237  |    153  | 
|    238  |    154  | 
|    239 // Tail recursion |    155 // Tail recursion | 
|    240 //================ |    156 //================ | 
|    267 // call; Scala can do this only for tail-recursive |    183 // call; Scala can do this only for tail-recursive | 
|    268 // functions |    184 // functions | 
|    269  |    185  | 
|    270  |    186  | 
|    271  |    187  | 
|    272 // sudoku again |    188 // Jumping Towers | 
|         |    189 //================ | 
|         |    190  | 
|         |    191  | 
|         |    192 // the first n prefixes of xs | 
|         |    193 // for 1 => include xs | 
|         |    194  | 
|         |    195 def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match { | 
|         |    196   case (Nil, _) => Nil | 
|         |    197   case (xs, 0) => Nil | 
|         |    198   case (x::xs, n) => (x::xs) :: moves(xs, n - 1) | 
|         |    199 } | 
|         |    200  | 
|         |    201  | 
|         |    202 moves(List(5,1,0), 1) | 
|         |    203 moves(List(5,1,0), 2) | 
|         |    204 moves(List(5,1,0), 5) | 
|         |    205  | 
|         |    206 // checks whether a jump tour exists at all | 
|         |    207 // in the second case it needs to be < instead of <= | 
|         |    208  | 
|         |    209 def search(xs: List[Int]) : Boolean = xs match { | 
|         |    210   case Nil => true | 
|         |    211   case (x::xs) => | 
|         |    212     if (xs.length < x) true else moves(xs, x).exists(search(_)) | 
|         |    213 } | 
|         |    214  | 
|         |    215  | 
|         |    216 search(List(5,3,2,5,1,1)) | 
|         |    217 search(List(3,5,1,0,0,0,1)) | 
|         |    218 search(List(3,5,1,0,0,0,0,1)) | 
|         |    219 search(List(3,5,1,0,0,0,1,1)) | 
|         |    220 search(List(3,5,1)) | 
|         |    221 search(List(5,1,1)) | 
|         |    222 search(Nil) | 
|         |    223 search(List(1)) | 
|         |    224 search(List(5,1,1)) | 
|         |    225 search(List(3,5,1,0,0,0,0,0,0,0,0,1)) | 
|         |    226  | 
|         |    227 // generates *all* jump tours | 
|         |    228 //    if we are only interested in the shortes one, we could | 
|         |    229 //    shortcircut the calculation and only return List(x) in | 
|         |    230 //    case where xs.length < x, because no tour can be shorter | 
|         |    231 //    than 1 | 
|         |    232 //  | 
|         |    233  | 
|         |    234 def jumps(xs: List[Int]) : List[List[Int]] = xs match { | 
|         |    235   case Nil => Nil | 
|         |    236   case (x::xs) => { | 
|         |    237     val children = moves(xs, x) | 
|         |    238     val results = children.flatMap((cs) => jumps(cs).map(x :: _)) | 
|         |    239     if (xs.length < x) List(x) :: results else results | 
|         |    240   } | 
|         |    241 } | 
|         |    242  | 
|         |    243  | 
|         |    244  | 
|         |    245 jumps(List(5,3,2,5,1,1)) | 
|         |    246 jumps(List(3,5,1,2,1,2,1)) | 
|         |    247 jumps(List(3,5,1,2,3,4,1)) | 
|         |    248 jumps(List(3,5,1,0,0,0,1)) | 
|         |    249 jumps(List(3,5,1)) | 
|         |    250 jumps(List(5,1,1)) | 
|         |    251 jumps(Nil) | 
|         |    252 jumps(List(1)) | 
|         |    253 jumps(List(5,1,2)) | 
|         |    254 moves(List(1,2), 5) | 
|         |    255 jumps(List(1,5,1,2)) | 
|         |    256 jumps(List(3,5,1,0,0,0,0,0,0,0,0,1)) | 
|         |    257  | 
|         |    258 jumps(List(5,3,2,5,1,1)).minBy(_.length) | 
|         |    259 jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length) | 
|         |    260 jumps(List(1,3,6,1,0,9)).minBy(_.length) | 
|         |    261 jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length) | 
|         |    262  | 
|         |    263  | 
|         |    264  | 
|         |    265  | 
|         |    266  | 
|         |    267  | 
|         |    268  | 
|         |    269  | 
|         |    270  | 
|         |    271 // Sudoku  | 
|         |    272 //======== | 
|         |    273  | 
|         |    274 // THE POINT OF THIS CODE IS NOT TO BE SUPER | 
|         |    275 // EFFICIENT AND FAST, just explaining exhaustive | 
|         |    276 // depth-first search | 
|         |    277  | 
|    273  |    278  | 
|    274 val game0 = """.14.6.3.. |    279 val game0 = """.14.6.3.. | 
|    275               |62...4..9 |    280               |62...4..9 | 
|    276               |.8..5.6.. |    281               |.8..5.6.. | 
|    277               |.6.2....3 |    282               |.6.2....3 |