| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Mon, 09 Nov 2020 14:36:35 +0000 | |
| changeset 358 | 837bd2c4dd57 | 
| parent 343 | 51e25cc30483 | 
| child 364 | 18942af74fa1 | 
| permissions | -rw-r--r-- | 
| 67 | 1  | 
// Scala Lecture 3  | 
2  | 
//=================  | 
|
3  | 
||
| 320 | 4  | 
// - last week  | 
5  | 
//  | 
|
6  | 
// option type  | 
|
7  | 
// higher-order function  | 
|
8  | 
||
9  | 
||
| 323 | 10  | 
def add(x: Int, y: Int) : Int = x + y  | 
11  | 
||
12  | 
def plus5(x: Int) : Int = add(5, x)  | 
|
13  | 
||
14  | 
plus5(6)  | 
|
15  | 
||
16  | 
def add2(x: Int)(y: Int) : Int = x + y  | 
|
17  | 
||
18  | 
def plus3(y: Int) : Int => Int = add2(3)(y)  | 
|
19  | 
||
20  | 
plus3(9)  | 
|
21  | 
||
22  | 
List(1,2,3,4,5).map(add2(3))  | 
|
23  | 
List(1,2,3,4,5).map(add(3, _))  | 
|
24  | 
||
25  | 
type Pos = (Int, Int)  | 
|
26  | 
||
27  | 
def test(p: Pos) = {
 | 
|
28  | 
  if (p._1 < 5 && p._2 < 5) {
 | 
|
29  | 
Some(p)  | 
|
30  | 
}  | 
|
31  | 
}  | 
|
32  | 
||
33  | 
val l = List((1,2), (5,3), (2,5), (1,3))  | 
|
34  | 
||
35  | 
l.map(test).flatten  | 
|
| 320 | 36  | 
|
| 343 | 37  | 
// naive quicksort with "On" function  | 
38  | 
||
39  | 
def sortOn(f: Int => Int, xs: List[Int]) : List[Int] = {
 | 
|
40  | 
if (xs.size < 2) xs  | 
|
41  | 
  else {
 | 
|
42  | 
val pivot = xs.head  | 
|
43  | 
val (left, right) = xs.partition(f(_) < f(pivot))  | 
|
44  | 
sortOn(f, left) ::: pivot :: sortOn(f, right.tail)  | 
|
45  | 
}  | 
|
46  | 
}  | 
|
47  | 
||
48  | 
sortOn(identity, List(99,99,99,98,10,-3,2))  | 
|
49  | 
sortOn(n => - n, List(99,99,99,98,10,-3,2))  | 
|
50  | 
||
51  | 
||
52  | 
||
53  | 
||
| 320 | 54  | 
// Recursion Again ;o)  | 
55  | 
//====================  | 
|
56  | 
||
| 217 | 57  | 
|
58  | 
// A Web Crawler / Email Harvester  | 
|
59  | 
//=================================  | 
|
60  | 
//  | 
|
61  | 
// the idea is to look for links using the  | 
|
62  | 
// regular expression "https?://[^"]*" and for  | 
|
| 218 | 63  | 
// email addresses using yet another regex.  | 
| 217 | 64  | 
|
65  | 
import io.Source  | 
|
66  | 
import scala.util._  | 
|
| 155 | 67  | 
|
| 217 | 68  | 
// gets the first 10K of a web-page  | 
69  | 
def get_page(url: String) : String = {
 | 
|
70  | 
  Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
 | 
|
| 320 | 71  | 
    getOrElse { println(s" Problem with: $url"); ""}
 | 
| 217 | 72  | 
}  | 
| 155 | 73  | 
|
| 217 | 74  | 
// regex for URLs and emails  | 
75  | 
val http_pattern = """"https?://[^"]*"""".r  | 
|
76  | 
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
 | 
|
77  | 
||
| 218 | 78  | 
// val s = "foo bla christian@kcl.ac.uk 1234567"  | 
79  | 
// email_pattern.findAllIn(s).toList  | 
|
| 155 | 80  | 
|
| 217 | 81  | 
// drops the first and last character from a string  | 
82  | 
def unquote(s: String) = s.drop(1).dropRight(1)  | 
|
| 155 | 83  | 
|
| 217 | 84  | 
def get_all_URLs(page: String): Set[String] =  | 
85  | 
http_pattern.findAllIn(page).map(unquote).toSet  | 
|
| 155 | 86  | 
|
| 320 | 87  | 
// a naive version of crawl - searches until a given depth,  | 
| 217 | 88  | 
// visits pages potentially more than once  | 
| 320 | 89  | 
def crawl(url: String, n: Int) : Unit = {
 | 
90  | 
if (n == 0) ()  | 
|
| 217 | 91  | 
  else {
 | 
92  | 
println(s" Visiting: $n $url")  | 
|
| 321 | 93  | 
val page = get_page(url)  | 
94  | 
for (u <- get_all_URLs(page)) crawl(u, n - 1)  | 
|
| 217 | 95  | 
}  | 
| 155 | 96  | 
}  | 
97  | 
||
| 217 | 98  | 
// some starting URLs for the crawler  | 
99  | 
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""  | 
|
| 320 | 100  | 
|
| 217 | 101  | 
crawl(startURL, 2)  | 
102  | 
||
| 323 | 103  | 
for (x <- List(1,2,3,4,5,6)) println(x)  | 
| 318 | 104  | 
|
| 320 | 105  | 
// a primitive email harvester  | 
106  | 
def emails(url: String, n: Int) : Set[String] = {
 | 
|
107  | 
if (n == 0) Set()  | 
|
108  | 
  else {
 | 
|
109  | 
println(s" Visiting: $n $url")  | 
|
110  | 
val page = get_page(url)  | 
|
111  | 
val new_emails = email_pattern.findAllIn(page).toSet  | 
|
| 323 | 112  | 
new_emails ++ (for (u <- get_all_URLs(page).par) yield emails(u, n - 1)).flatten  | 
| 320 | 113  | 
}  | 
| 218 | 114  | 
}  | 
115  | 
||
| 323 | 116  | 
emails(startURL, 3)  | 
| 218 | 117  | 
|
118  | 
||
| 320 | 119  | 
// if we want to explore the internet "deeper", then we  | 
120  | 
// first have to parallelise the request of webpages:  | 
|
121  | 
//  | 
|
122  | 
// scala -cp scala-parallel-collections_2.13-0.2.0.jar  | 
|
123  | 
// import scala.collection.parallel.CollectionConverters._  | 
|
| 155 | 124  | 
|
125  | 
||
126  | 
||
| 320 | 127  | 
// another well-known example  | 
128  | 
//============================  | 
|
| 178 | 129  | 
|
| 320 | 130  | 
def move(from: Char, to: Char) =  | 
131  | 
println(s"Move disc from $from to $to!")  | 
|
| 67 | 132  | 
|
| 320 | 133  | 
def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = {
 | 
134  | 
if (n == 0) ()  | 
|
135  | 
  else {
 | 
|
136  | 
hanoi(n - 1, from, to, via)  | 
|
137  | 
move(from, to)  | 
|
138  | 
hanoi(n - 1, via, from, to)  | 
|
139  | 
}  | 
|
140  | 
}  | 
|
| 67 | 141  | 
|
| 320 | 142  | 
hanoi(4, 'A', 'B', 'C')  | 
| 67 | 143  | 
|
| 155 | 144  | 
|
145  | 
||
| 217 | 146  | 
// Jumping Towers  | 
147  | 
//================  | 
|
148  | 
||
149  | 
||
150  | 
// the first n prefixes of xs  | 
|
151  | 
// for 1 => include xs  | 
|
152  | 
||
153  | 
def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match {
 | 
|
154  | 
case (Nil, _) => Nil  | 
|
| 323 | 155  | 
case (_, 0) => Nil  | 
156  | 
case (y::ys, n) => xs :: moves(ys, n - 1)  | 
|
| 217 | 157  | 
}  | 
158  | 
||
159  | 
||
160  | 
moves(List(5,1,0), 1)  | 
|
161  | 
moves(List(5,1,0), 2)  | 
|
162  | 
moves(List(5,1,0), 5)  | 
|
163  | 
||
164  | 
// checks whether a jump tour exists at all  | 
|
165  | 
||
166  | 
def search(xs: List[Int]) : Boolean = xs match {
 | 
|
167  | 
case Nil => true  | 
|
| 321 | 168  | 
case x::xs =>  | 
169  | 
if (xs.length < x) true  | 
|
170  | 
else moves(xs, x).exists(search(_))  | 
|
| 217 | 171  | 
}  | 
172  | 
||
173  | 
||
174  | 
search(List(5,3,2,5,1,1))  | 
|
175  | 
search(List(3,5,1,0,0,0,1))  | 
|
176  | 
search(List(3,5,1,0,0,0,0,1))  | 
|
177  | 
search(List(3,5,1,0,0,0,1,1))  | 
|
178  | 
search(List(3,5,1))  | 
|
179  | 
search(List(5,1,1))  | 
|
180  | 
search(Nil)  | 
|
181  | 
search(List(1))  | 
|
182  | 
search(List(5,1,1))  | 
|
183  | 
search(List(3,5,1,0,0,0,0,0,0,0,0,1))  | 
|
184  | 
||
185  | 
// generates *all* jump tours  | 
|
| 321 | 186  | 
// if we are only interested in the shortest one, we could  | 
| 217 | 187  | 
// shortcircut the calculation and only return List(x) in  | 
188  | 
// case where xs.length < x, because no tour can be shorter  | 
|
189  | 
// than 1  | 
|
190  | 
//  | 
|
191  | 
||
192  | 
def jumps(xs: List[Int]) : List[List[Int]] = xs match {
 | 
|
193  | 
case Nil => Nil  | 
|
| 321 | 194  | 
  case x::xs => {
 | 
| 217 | 195  | 
val children = moves(xs, x)  | 
| 320 | 196  | 
val results = children.map(cs => jumps(cs).map(x :: _)).flatten  | 
197  | 
if (xs.length < x) List(x)::results else results  | 
|
| 217 | 198  | 
}  | 
199  | 
}  | 
|
200  | 
||
| 320 | 201  | 
jumps(List(5,3,2,5,1,1)).minBy(_.length)  | 
| 217 | 202  | 
jumps(List(3,5,1,2,1,2,1))  | 
203  | 
jumps(List(3,5,1,2,3,4,1))  | 
|
204  | 
jumps(List(3,5,1,0,0,0,1))  | 
|
205  | 
jumps(List(3,5,1))  | 
|
206  | 
jumps(List(5,1,1))  | 
|
207  | 
jumps(Nil)  | 
|
208  | 
jumps(List(1))  | 
|
209  | 
jumps(List(5,1,2))  | 
|
210  | 
moves(List(1,2), 5)  | 
|
211  | 
jumps(List(1,5,1,2))  | 
|
212  | 
jumps(List(3,5,1,0,0,0,0,0,0,0,0,1))  | 
|
213  | 
||
214  | 
jumps(List(5,3,2,5,1,1)).minBy(_.length)  | 
|
215  | 
jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length)  | 
|
216  | 
jumps(List(1,3,6,1,0,9)).minBy(_.length)  | 
|
217  | 
jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length)  | 
|
218  | 
||
219  | 
||
220  | 
||
| 318 | 221  | 
|
222  | 
||
223  | 
||
| 320 | 224  | 
// User-defined Datatypes  | 
225  | 
//========================  | 
|
226  | 
||
| 323 | 227  | 
abstract class Tree  | 
228  | 
case class Leaf(x: Int) extends Tree  | 
|
229  | 
case class Node(s: String, left: Tree, right: Tree) extends Tree  | 
|
230  | 
||
231  | 
List(Leaf(20), Node("foo", Leaf(1), Leaf(2)))
 | 
|
| 320 | 232  | 
|
| 321 | 233  | 
sealed abstract class Colour  | 
| 320 | 234  | 
case object Red extends Colour  | 
235  | 
case object Green extends Colour  | 
|
236  | 
case object Blue extends Colour  | 
|
| 323 | 237  | 
case object Yellow extends Colour  | 
| 320 | 238  | 
|
239  | 
||
240  | 
def fav_colour(c: Colour) : Boolean = c match {
 | 
|
241  | 
case Green => true  | 
|
| 323 | 242  | 
case _ => false  | 
| 320 | 243  | 
}  | 
244  | 
||
245  | 
fav_colour(Green)  | 
|
246  | 
||
247  | 
// ... a tiny bit more useful: Roman Numerals  | 
|
248  | 
||
| 321 | 249  | 
sealed abstract class RomanDigit  | 
| 320 | 250  | 
case object I extends RomanDigit  | 
251  | 
case object V extends RomanDigit  | 
|
252  | 
case object X extends RomanDigit  | 
|
253  | 
case object L extends RomanDigit  | 
|
254  | 
case object C extends RomanDigit  | 
|
255  | 
case object D extends RomanDigit  | 
|
256  | 
case object M extends RomanDigit  | 
|
257  | 
||
258  | 
type RomanNumeral = List[RomanDigit]  | 
|
259  | 
||
| 323 | 260  | 
List(X,I,M,D)  | 
| 320 | 261  | 
|
262  | 
/*  | 
|
263  | 
I -> 1  | 
|
264  | 
II -> 2  | 
|
265  | 
III -> 3  | 
|
266  | 
IV -> 4  | 
|
267  | 
V -> 5  | 
|
268  | 
VI -> 6  | 
|
269  | 
VII -> 7  | 
|
270  | 
VIII -> 8  | 
|
271  | 
IX -> 9  | 
|
272  | 
X -> 10  | 
|
273  | 
*/  | 
|
274  | 
||
275  | 
def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { 
 | 
|
276  | 
case Nil => 0  | 
|
277  | 
case M::r => 1000 + RomanNumeral2Int(r)  | 
|
278  | 
case C::M::r => 900 + RomanNumeral2Int(r)  | 
|
279  | 
case D::r => 500 + RomanNumeral2Int(r)  | 
|
280  | 
case C::D::r => 400 + RomanNumeral2Int(r)  | 
|
281  | 
case C::r => 100 + RomanNumeral2Int(r)  | 
|
282  | 
case X::C::r => 90 + RomanNumeral2Int(r)  | 
|
283  | 
case L::r => 50 + RomanNumeral2Int(r)  | 
|
284  | 
case X::L::r => 40 + RomanNumeral2Int(r)  | 
|
285  | 
case X::r => 10 + RomanNumeral2Int(r)  | 
|
286  | 
case I::X::r => 9 + RomanNumeral2Int(r)  | 
|
287  | 
case V::r => 5 + RomanNumeral2Int(r)  | 
|
288  | 
case I::V::r => 4 + RomanNumeral2Int(r)  | 
|
289  | 
case I::r => 1 + RomanNumeral2Int(r)  | 
|
290  | 
}  | 
|
291  | 
||
292  | 
RomanNumeral2Int(List(I,V)) // 4  | 
|
293  | 
RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number)  | 
|
294  | 
RomanNumeral2Int(List(V,I)) // 6  | 
|
295  | 
RomanNumeral2Int(List(I,X)) // 9  | 
|
296  | 
RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979  | 
|
297  | 
RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017  | 
|
298  | 
||
299  | 
||
300  | 
// String interpolations as patterns  | 
|
301  | 
||
302  | 
val date = "2019-11-26"  | 
|
303  | 
val s"$year-$month-$day" = date  | 
|
304  | 
||
305  | 
def parse_date(date: String) : Option[(Int, Int, Int)]= date match {
 | 
|
306  | 
case s"$year-$month-$day" => Some((day.toInt, month.toInt, year.toInt))  | 
|
307  | 
case s"$day/$month/$year" => Some((day.toInt, month.toInt, year.toInt))  | 
|
308  | 
case s"$day.$month.$year" => Some((day.toInt, month.toInt, year.toInt))  | 
|
309  | 
case _ => None  | 
|
310  | 
}  | 
|
| 318 | 311  | 
|
| 320 | 312  | 
parse_date("2019-11-26")
 | 
313  | 
parse_date("26/11/2019")
 | 
|
314  | 
parse_date("26.11.2019")
 | 
|
315  | 
||
316  | 
||
317  | 
// User-defined Datatypes and Pattern Matching  | 
|
318  | 
//=============================================  | 
|
319  | 
||
320  | 
||
321  | 
||
322  | 
||
323  | 
// Tail recursion  | 
|
324  | 
//================  | 
|
325  | 
||
326  | 
||
327  | 
def fact(n: Long): Long =  | 
|
328  | 
if (n == 0) 1 else n * fact(n - 1)  | 
|
329  | 
||
330  | 
def factB(n: BigInt): BigInt =  | 
|
331  | 
if (n == 0) 1 else n * factB(n - 1)  | 
|
332  | 
||
333  | 
factB(100000)  | 
|
334  | 
||
335  | 
fact(10) //ok  | 
|
336  | 
fact(10000) // produces a stackoverflow  | 
|
337  | 
||
338  | 
def factT(n: BigInt, acc: BigInt): BigInt =  | 
|
339  | 
if (n == 0) acc else factT(n - 1, n * acc)  | 
|
340  | 
||
341  | 
factT(10, 1)  | 
|
342  | 
println(factT(100000, 1))  | 
|
343  | 
||
344  | 
// there is a flag for ensuring a function is tail recursive  | 
|
345  | 
import scala.annotation.tailrec  | 
|
346  | 
||
347  | 
@tailrec  | 
|
348  | 
def factT(n: BigInt, acc: BigInt): BigInt =  | 
|
349  | 
if (n == 0) acc else factT(n - 1, n * acc)  | 
|
350  | 
||
351  | 
||
352  | 
||
353  | 
// for tail-recursive functions the Scala compiler  | 
|
354  | 
// generates loop-like code, which does not need  | 
|
355  | 
// to allocate stack-space in each recursive  | 
|
356  | 
// call; Scala can do this only for tail-recursive  | 
|
357  | 
// functions  | 
|
358  | 
||
| 155 | 359  | 
// tail recursive version that searches  | 
| 158 | 360  | 
// for all solutions  | 
361  | 
||
| 155 | 362  | 
def searchT(games: List[String], sols: List[String]): List[String] = games match {
 | 
363  | 
case Nil => sols  | 
|
364  | 
  case game::rest => {
 | 
|
365  | 
if (isDone(game)) searchT(rest, game::sols)  | 
|
366  | 
    else {
 | 
|
367  | 
val cs = candidates(game, emptyPosition(game))  | 
|
368  | 
searchT(cs.map(c => update(game, empty(game), c)) ::: rest, sols)  | 
|
369  | 
}  | 
|
370  | 
}  | 
|
| 67 | 371  | 
}  | 
372  | 
||
| 158 | 373  | 
searchT(List(game3), List()).map(pretty)  | 
374  | 
||
375  | 
||
| 155 | 376  | 
// tail recursive version that searches  | 
377  | 
// for a single solution  | 
|
| 158 | 378  | 
|
| 155 | 379  | 
def search1T(games: List[String]): Option[String] = games match {
 | 
| 67 | 380  | 
case Nil => None  | 
| 155 | 381  | 
  case game::rest => {
 | 
382  | 
if (isDone(game)) Some(game)  | 
|
383  | 
    else {
 | 
|
384  | 
val cs = candidates(game, emptyPosition(game))  | 
|
385  | 
search1T(cs.map(c => update(game, empty(game), c)) ::: rest)  | 
|
386  | 
}  | 
|
387  | 
}  | 
|
| 67 | 388  | 
}  | 
389  | 
||
| 158 | 390  | 
search1T(List(game3)).map(pretty)  | 
| 217 | 391  | 
time_needed(10, search1T(List(game3)))  | 
392  | 
||
| 158 | 393  | 
|
| 155 | 394  | 
// game with multiple solutions  | 
395  | 
val game3 = """.8...9743  | 
|
396  | 
|.5...8.1.  | 
|
397  | 
|.1.......  | 
|
398  | 
|8....5...  | 
|
399  | 
|...8.4...  | 
|
400  | 
|...3....6  | 
|
401  | 
|.......7.  | 
|
402  | 
|.3.5...8.  | 
|
403  | 
              |9724...5.""".stripMargin.replaceAll("\\n", "")
 | 
|
404  | 
||
| 158 | 405  | 
searchT(List(game3), Nil).map(pretty)  | 
| 155 | 406  | 
search1T(List(game3)).map(pretty)  | 
| 67 | 407  | 
|
| 
77
 
3cbe3d90b77f
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
73 
diff
changeset
 | 
408  | 
// Moral: Whenever a recursive function is resource-critical  | 
| 158 | 409  | 
// (i.e. works with large recursion depth), then you need to  | 
| 
77
 
3cbe3d90b77f
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
73 
diff
changeset
 | 
410  | 
// write it in tail-recursive fashion.  | 
| 
 
3cbe3d90b77f
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
73 
diff
changeset
 | 
411  | 
//  | 
| 155 | 412  | 
// Unfortuantely, Scala because of current limitations in  | 
413  | 
// the JVM is not as clever as other functional languages. It can  | 
|
| 
77
 
3cbe3d90b77f
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
73 
diff
changeset
 | 
414  | 
// only optimise "self-tail calls". This excludes the cases of  | 
| 
 
3cbe3d90b77f
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
73 
diff
changeset
 | 
415  | 
// multiple functions making tail calls to each other. Well,  | 
| 
 
3cbe3d90b77f
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
73 
diff
changeset
 | 
416  | 
// nothing is perfect.  | 
| 
 
3cbe3d90b77f
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
73 
diff
changeset
 | 
417  | 
|
| 
 
3cbe3d90b77f
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
73 
diff
changeset
 | 
418  | 
|
| 67 | 419  | 
|
420  | 
||
| 71 | 421  | 
|
| 67 | 422  | 
|
| 335 | 423  | 
|
424  | 
||
425  | 
||
426  | 
||
427  | 
//************  | 
|
428  | 
// Either  | 
|
429  | 
val either1 : Either[Exception,Int] = Right(1)  | 
|
430  | 
val either2: Either[Exception, Int] = Right(2)  | 
|
431  | 
||
432  | 
for{
 | 
|
433  | 
one <- either1  | 
|
434  | 
two <- either2  | 
|
435  | 
} yield one + two  |