progs/lecture2.scala
author Christian Urban <christian.urban@kcl.ac.uk>
Fri, 23 Feb 2024 11:31:36 +0000
changeset 480 a1151868a997
parent 478 0e6ca70496c1
child 491 2a30c7dfe3ed
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
51
0e60e6c24b99 updated
Christian Urban <urbanc@in.tum.de>
parents: 39
diff changeset
     1
// Scala Lecture 2
0e60e6c24b99 updated
Christian Urban <urbanc@in.tum.de>
parents: 39
diff changeset
     2
//=================
363
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
     3
 
468
c71ae4477e55 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 444
diff changeset
     4
// - Options
c71ae4477e55 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 444
diff changeset
     5
// - Higher-Order Functions (short-hand notation)
c71ae4477e55 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 444
diff changeset
     6
// - maps (behind for-comprehensions)
c71ae4477e55 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 444
diff changeset
     7
// - Pattern-Matching
c71ae4477e55 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 444
diff changeset
     8
// - Recursion
361
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
     9
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    10
// The Option Type
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    11
//=================
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    12
361
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    13
// in Java, if something unusually happens, you return null 
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    14
// or raise an exception
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    15
//
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    16
//in Scala you use Options instead
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    17
//   - if the value is present, you use Some(value)
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    18
//   - if no value is present, you use None
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    19
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    20
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    21
List(7,2,3,4,5,6).find(_ < 4)
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    22
List(5,6,7,8,9).find(_ < 4)
212
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
    23
361
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    24
// Int:      ..., 0, 1, 2,...
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    25
// Boolean:  true false
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    26
//
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    27
// List[Int]: Nil, List(_) 
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    28
//
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    29
// Option[Int]: None, Some(0), Some(1), ...
444
e6df3c7ff132 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 384
diff changeset
    30
// Option[Boolean]: None, Some(true), Some(false)
361
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    31
// Option[...]: None, Some(_)
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    32
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    33
def safe_div(x: Int, y: Int) : Option[Int] = 
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    34
  if (y == 0) None else Some(x / y)
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    35
478
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
    36
safe_div(10 + 5, 0)  
361
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    37
444
e6df3c7ff132 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 384
diff changeset
    38
List(1,2,3,4,5,6).indexOf(7)
468
c71ae4477e55 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 444
diff changeset
    39
List[Int]().min
478
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
    40
List[Int](3,4,5).minOption
444
e6df3c7ff132 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 384
diff changeset
    41
361
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    42
310
996279af8952 updated
Christian Urban <urbanc@in.tum.de>
parents: 309
diff changeset
    43
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    44
// better error handling with Options (no exceptions)
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    45
//
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    46
//  Try(something).getOrElse(what_to_do_in_case_of_an_exception)
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    47
//
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    48
468
c71ae4477e55 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 444
diff changeset
    49
import scala.util._      // Try,...
c71ae4477e55 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 444
diff changeset
    50
import io.Source         // fromURL
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    51
478
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
    52
val my_url = "https://nms.kcl.ac.uk/christian.urban2/"
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    53
361
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    54
Source.fromURL(my_url)("ISO-8859-1").mkString
444
e6df3c7ff132 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 384
diff changeset
    55
Source.fromURL(my_url)("ISO-8859-1").getLines().toList
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    56
361
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    57
Try(Source.fromURL(my_url)("ISO-8859-1").mkString).getOrElse("")
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    58
361
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    59
Try(Some(Source.fromURL(my_url)("ISO-8859-1").mkString)).getOrElse(None)
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    60
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    61
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    62
// the same for files
444
e6df3c7ff132 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 384
diff changeset
    63
361
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    64
Try(Some(Source.fromFile("test.txt")("ISO-8859-1").mkString)).getOrElse(None)
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    65
444
e6df3c7ff132 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 384
diff changeset
    66
Try(Source.fromFile("test.txt")("ISO-8859-1").mkString).toOption
e6df3c7ff132 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 384
diff changeset
    67
e6df3c7ff132 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 384
diff changeset
    68
Using(Source.fromFile("test.txt")("ISO-8859-1"))(_.mkString).toOption
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    69
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
    70
// how to implement a function for reading 
444
e6df3c7ff132 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 384
diff changeset
    71
// (lines) from files...
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
    72
//
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    73
def get_contents(name: String) : List[String] = 
444
e6df3c7ff132 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 384
diff changeset
    74
  Source.fromFile(name)("ISO-8859-1").getLines().toList
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    75
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
    76
get_contents("text.txt")
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    77
get_contents("test.txt")
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    78
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    79
// slightly better - return Nil
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    80
def get_contents(name: String) : List[String] = 
361
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
    81
  Try(Source.fromFile(name)("ISO-8859-1").getLines.toList).getOrElse(List())
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    82
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    83
get_contents("text.txt")
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    84
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    85
// much better - you record in the type that things can go wrong 
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    86
def get_contents(name: String) : Option[List[String]] = 
444
e6df3c7ff132 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 384
diff changeset
    87
  Try(Some(Source.fromFile(name)("ISO-8859-1").getLines().toList)).getOrElse(None)
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    88
316
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    89
get_contents("text.txt")
03d55eb6a0b7 updated
Christian Urban <urbanc@in.tum.de>
parents: 310
diff changeset
    90
get_contents("test.txt")
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    91
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    92
317
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
    93
// operations on options
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    94
317
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
    95
val lst = List(None, Some(1), Some(2), None, Some(3))
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    96
317
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
    97
lst.flatten
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
    98
317
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
    99
Some(1).get
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   100
None.get
310
996279af8952 updated
Christian Urban <urbanc@in.tum.de>
parents: 309
diff changeset
   101
317
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   102
Some(1).isDefined
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   103
None.isDefined
310
996279af8952 updated
Christian Urban <urbanc@in.tum.de>
parents: 309
diff changeset
   104
361
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
   105
for (x <- lst) yield x.getOrElse(0)
310
996279af8952 updated
Christian Urban <urbanc@in.tum.de>
parents: 309
diff changeset
   106
361
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
   107
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
   108
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
   109
val ps = List((3, 0), (4, 2), (6, 2), 
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
   110
              (2, 0), (1, 0), (1, 1))
317
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   111
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   112
// division where possible
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   113
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   114
for ((x, y) <- ps) yield {
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   115
  if (y == 0) None else Some(x / y)
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   116
}
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   117
361
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
   118
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
   119
317
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   120
// getOrElse is for setting a default value
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   121
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   122
val lst = List(None, Some(1), Some(2), None, Some(3))
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   123
361
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
   124
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
   125
// a function that turns strings into numbers 
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
   126
// (similar to .toInt)
736a60afedbc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 334
diff changeset
   127
Integer.parseInt("1234")
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   128
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   129
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   130
def get_me_an_int(s: String) : Option[Int] = 
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   131
 Try(Some(Integer.parseInt(s))).getOrElse(None)
310
996279af8952 updated
Christian Urban <urbanc@in.tum.de>
parents: 309
diff changeset
   132
996279af8952 updated
Christian Urban <urbanc@in.tum.de>
parents: 309
diff changeset
   133
317
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   134
// This may not look any better than working with null in Java, but to
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   135
// see the value, you have to put yourself in the shoes of the
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   136
// consumer of the get_me_an_int function, and imagine you didn't
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   137
// write that function.
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   138
//
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   139
// In Java, if you didn't write this function, you'd have to depend on
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   140
// the Javadoc of the get_me_an_int. If you didn't look at the Javadoc, 
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   141
// you might not know that get_me_an_int could return null, and your 
317
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   142
// code could potentially throw a NullPointerException.
310
996279af8952 updated
Christian Urban <urbanc@in.tum.de>
parents: 309
diff changeset
   143
996279af8952 updated
Christian Urban <urbanc@in.tum.de>
parents: 309
diff changeset
   144
317
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   145
// even Scala is not immune to problems like this:
310
996279af8952 updated
Christian Urban <urbanc@in.tum.de>
parents: 309
diff changeset
   146
317
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   147
List(5,6,7,8,9).indexOf(7)
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   148
List(5,6,7,8,9).indexOf(10)
07583fbe4f95 updated
Christian Urban <urbanc@in.tum.de>
parents: 316
diff changeset
   149
List(5,6,7,8,9)(-1)
310
996279af8952 updated
Christian Urban <urbanc@in.tum.de>
parents: 309
diff changeset
   150
996279af8952 updated
Christian Urban <urbanc@in.tum.de>
parents: 309
diff changeset
   151
320
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   152
Try({
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   153
  val x = 3
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   154
  val y = 0
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   155
  Some(x / y)
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   156
}).getOrElse(None)
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   157
323
93b6c16dded8 updated
Christian Urban <urbanc@in.tum.de>
parents: 320
diff changeset
   158
93b6c16dded8 updated
Christian Urban <urbanc@in.tum.de>
parents: 320
diff changeset
   159
// minOption 
93b6c16dded8 updated
Christian Urban <urbanc@in.tum.de>
parents: 320
diff changeset
   160
// maxOption 
93b6c16dded8 updated
Christian Urban <urbanc@in.tum.de>
parents: 320
diff changeset
   161
// minByOption 
93b6c16dded8 updated
Christian Urban <urbanc@in.tum.de>
parents: 320
diff changeset
   162
// maxByOption
93b6c16dded8 updated
Christian Urban <urbanc@in.tum.de>
parents: 320
diff changeset
   163
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   164
// Higher-Order Functions
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   165
//========================
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   166
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   167
// functions can take functions as arguments
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   168
// and produce functions as result
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   169
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   170
def even(x: Int) : Boolean = x % 2 == 0
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   171
def odd(x: Int) : Boolean = x % 2 == 1
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   172
478
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   173
def inc(x: Int) : Int = x + 1
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   174
val lst = (1 to 10).toList
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   175
478
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   176
lst.filter(_ % 2 == 0)
320
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   177
lst.count(odd)
212
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   178
lst.find(even)
320
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   179
lst.exists(even)
212
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   180
362
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   181
lst.find(_ < 4)
320
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   182
lst.filter(_ < 4) 
362
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   183
478
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   184
val x = 3 < 4
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   185
362
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   186
def less4(x: Int) : Boolean = x < 4
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   187
lst.find(less4)
478
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   188
lst.find(x => !(x < 4))
362
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   189
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   190
lst.filter(x => x % 2 == 0)
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   191
lst.filter(_ % 2 == 0)
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   192
320
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   193
362
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   194
lst.sortWith((x, y) => x < y)
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   195
lst.sortWith(_ > _)
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   196
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   197
// but this only works when the arguments are clear, but 
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   198
// not with multiple occurences
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   199
lst.find(n => odd(n) && n > 2)
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   200
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   201
444
e6df3c7ff132 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 384
diff changeset
   202
// lexicographic ordering
362
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   203
val ps = List((3, 0), (3, 2), (4, 2), (2, 2), 
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   204
              (2, 0), (1, 1), (1, 0))
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   205
212
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   206
def lex(x: (Int, Int), y: (Int, Int)) : Boolean = 
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   207
  if (x._1 == y._1) x._2 < y._2 else x._1 < y._1
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   208
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   209
ps.sortWith(lex)
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   210
320
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   211
ps.sortBy(x => x._1)
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   212
ps.sortBy(_._2)
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   213
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   214
ps.maxBy(_._1)
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   215
ps.maxBy(_._2)
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   216
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   217
212
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   218
// maps (lower-case)
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   219
//===================
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   220
212
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   221
def double(x: Int): Int = x + x
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   222
def square(x: Int): Int = x * x
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   223
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   224
val lst = (1 to 10).toList
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   225
362
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   226
lst.map(square)
212
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   227
lst.map(x => (double(x), square(x)))
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   228
362
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   229
// works also for strings
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   230
def tweet(c: Char) = c.toUpper
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   231
478
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   232
"Hello\nWorld".map(tweet)
362
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   233
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   234
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   235
// this can be iterated
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   236
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   237
lst.map(square).filter(_ > 4)
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   238
363
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   239
lst.map(square).find(_ > 4)
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   240
lst.map(square).find(_ > 4).map(double)
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   241
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   242
lst.map(square)
362
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   243
   .find(_ > 4)
363
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   244
   .map(double)
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   245
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   246
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   247
// Option Type and maps
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   248
//======================
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   249
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   250
// a function that turns strings into numbers (similar to .toInt)
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   251
Integer.parseInt("12u34")
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   252
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   253
// maps on Options
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   254
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   255
import scala.util._
362
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   256
363
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   257
def get_me_an_int(s: String) : Option[Int] = 
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   258
 Try(Some(Integer.parseInt(s))).getOrElse(None)
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   259
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   260
get_me_an_int("12345").map(_ % 2 == 0)
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   261
get_me_an_int("12u34").map(_ % 2 == 0)
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   262
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   263
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   264
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   265
val lst = List("12345", "foo", "5432", "bar", "x21", "456")
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   266
for (x <- lst) yield get_me_an_int(x)
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   267
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   268
// summing up all the numbers
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   269
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   270
lst.map(get_me_an_int).flatten.sum
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   271
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   272
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   273
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   274
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   275
// this is actually how for-comprehensions are
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   276
// defined in Scala
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   277
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   278
lst.map(n => square(n))
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   279
for (n <- lst) yield square(n)
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   280
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   281
// lets define our own higher-order functions
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   282
// type of functions is for example Int => Int
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   283
212
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   284
363
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   285
def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = 
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   286
{
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   287
  if (lst == Nil) Nil
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   288
  else f(lst.head) :: my_map_int(lst.tail, f)
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   289
}
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   290
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   291
my_map_int(lst, square)
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   292
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   293
// same function using pattern matching: a kind
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   294
// of switch statement on steroids (see more later on)
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   295
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   296
def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = 
362
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   297
  lst match {
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   298
    case Nil => Nil
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   299
    case x::xs => f(x)::my_map_int(xs, f)
fc9394f4f0ea updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 361
diff changeset
   300
  }
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   301
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   302
363
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   303
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   304
val biglst = (1 to 10000).toList
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   305
my_map_int(biglst, double)
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   306
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   307
(1 to 10000000).toList.map(double)
9f481fd7c613 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 362
diff changeset
   308
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   309
// other function types
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   310
//
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   311
// f1: (Int, Int) => Int
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   312
// f2: List[String] => Option[Int]
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   313
// ... 
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   314
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   315
320
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   316
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   317
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   318
212
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   319
// Map type (upper-case)
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   320
//=======================
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   321
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   322
// Note the difference between map and Map
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   323
364
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   324
val m = Map(1 -> "one", 2 -> "two", 10 -> "many")
320
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   325
364
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   326
List((1, "one"), (2, "two"), (10, "many")).toMap
320
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   327
364
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   328
m.get(1)
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   329
m.get(4)
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   330
364
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   331
m.getOrElse(1, "")
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   332
m.getOrElse(4, "")
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   333
364
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   334
val new_m = m + (10 -> "ten")
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   335
364
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   336
new_m.get(10)
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   337
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   338
val m2 = for ((k, v) <- m) yield (k, v.toUpperCase)
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   339
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   340
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   341
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   342
// groupBy function on Maps
364
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   343
val lst = List("one", "two", "three", "four", "five")
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   344
lst.groupBy(_.head)
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   345
364
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   346
lst.groupBy(_.length)
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   347
364
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   348
lst.groupBy(_.length).get(3)
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   349
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   350
val grps = lst.groupBy(_.length)
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   351
grps.keySet
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   352
478
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   353
// naive quicksort with "On" function
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   354
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   355
def sortOn(f: Int => Int, xs: List[Int]) : List[Int] = {
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   356
  if (xs.size < 2) xs
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   357
  else {
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   358
   val pivot = xs.head
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   359
   val (left, right) = xs.partition(f(_) < f(pivot))
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   360
   sortOn(f, left) ::: pivot :: sortOn(f, right.tail)
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   361
  }
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   362
} 
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   363
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   364
sortOn(identity, List(99,99,99,98,10,-3,2)) 
0e6ca70496c1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 468
diff changeset
   365
sortOn(n => - n, List(99,99,99,98,10,-3,2))
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   366
51
0e60e6c24b99 updated
Christian Urban <urbanc@in.tum.de>
parents: 39
diff changeset
   367
192
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   368
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   369
// Pattern Matching
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   370
//==================
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   371
468
c71ae4477e55 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 444
diff changeset
   372
// A powerful tool which has even landed in Java during 
c71ae4477e55 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 444
diff changeset
   373
// the last few years (https://inside.java/2021/06/13/podcast-017/).
c71ae4477e55 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 444
diff changeset
   374
// ...Scala already has it for many years and the concept is
c71ae4477e55 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 444
diff changeset
   375
// older than your friendly lecturer, that is stone old  ;o)
192
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   376
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   377
// The general schema:
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   378
//
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   379
//    expression match {
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   380
//       case pattern1 => expression1
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   381
//       case pattern2 => expression2
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   382
//       ...
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   383
//       case patternN => expressionN
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   384
//    }
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   385
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   386
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   387
// recall
365
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   388
def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = 
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   389
  lst match {
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   390
    case Nil => Nil
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   391
    case x::xs => f(x)::my_map_int(xs, f)
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   392
  }
58
93a2b6e4b84c updated
Christian Urban <urbanc@in.tum.de>
parents: 57
diff changeset
   393
468
c71ae4477e55 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 444
diff changeset
   394
def my_map_option(opt: Option[Int], f: Int => Int) : Option[Int] = 
c71ae4477e55 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 444
diff changeset
   395
  opt match {
365
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   396
    case None => None
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   397
    case Some(x) => Some(f(x))
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   398
  }
58
93a2b6e4b84c updated
Christian Urban <urbanc@in.tum.de>
parents: 57
diff changeset
   399
365
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   400
my_map_option(None, x => x * x)
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   401
my_map_option(Some(8), x => x * x)
192
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   402
212
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   403
192
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   404
// you can also have cases combined
266
31e5218f43de updated to 2.13
Christian Urban <urbanc@in.tum.de>
parents: 212
diff changeset
   405
def season(month: String) : String = month match {
192
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   406
  case "March" | "April" | "May" => "It's spring"
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   407
  case "June" | "July" | "August" => "It's summer"
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   408
  case "September" | "October" | "November" => "It's autumn"
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   409
  case "December" => "It's winter"
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   410
  case "January" | "February" => "It's unfortunately winter"
365
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   411
  case _ => "Wrong month"
266
31e5218f43de updated to 2.13
Christian Urban <urbanc@in.tum.de>
parents: 212
diff changeset
   412
}
31e5218f43de updated to 2.13
Christian Urban <urbanc@in.tum.de>
parents: 212
diff changeset
   413
365
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   414
// pattern-match on integers
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   415
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   416
def fib(n: Int) : Int = n match { 
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   417
  case 0 | 1 => 1
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   418
  case n => fib(n - 1) + fib(n - 2)
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   419
}
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   420
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   421
fib(10)
266
31e5218f43de updated to 2.13
Christian Urban <urbanc@in.tum.de>
parents: 212
diff changeset
   422
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   423
// Silly: fizz buzz
192
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   424
def fizz_buzz(n: Int) : String = (n % 3, n % 5) match {
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   425
  case (0, 0) => "fizz buzz"
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   426
  case (0, _) => "fizz"
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   427
  case (_, 0) => "buzz"
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   428
  case _ => n.toString  
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   429
}
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   430
365
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   431
for (n <- 1 to 20) 
192
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   432
 println(fizz_buzz(n))
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   433
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   434
365
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   435
val lst = List(None, Some(1), Some(2), None, Some(3)).flatten
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   436
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   437
def my_flatten(xs: List[Option[Int]]): List[Int] = 
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   438
 xs match {
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   439
   case Nil => Nil 
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   440
   case None::rest => my_flatten(rest)
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   441
   case Some(v)::rest => v :: my_flatten(rest)
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   442
 }
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   443
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   444
my_flatten(List(None, Some(1), Some(2), None, Some(3)))
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   445
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   446
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   447
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   448
 
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   449
08241d957be4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 364
diff changeset
   450
278
57b5bba67467 updated
Christian Urban <urbanc@in.tum.de>
parents: 268
diff changeset
   451
57b5bba67467 updated
Christian Urban <urbanc@in.tum.de>
parents: 268
diff changeset
   452
309
c5f16a86ae92 updated
Christian Urban <urbanc@in.tum.de>
parents: 278
diff changeset
   453
// Recursion
c5f16a86ae92 updated
Christian Urban <urbanc@in.tum.de>
parents: 278
diff changeset
   454
//===========
c5f16a86ae92 updated
Christian Urban <urbanc@in.tum.de>
parents: 278
diff changeset
   455
c5f16a86ae92 updated
Christian Urban <urbanc@in.tum.de>
parents: 278
diff changeset
   456
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   457
/* Say you have characters a, b, c.
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   458
   What are all the combinations of a certain length?
309
c5f16a86ae92 updated
Christian Urban <urbanc@in.tum.de>
parents: 278
diff changeset
   459
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   460
   All combinations of length 2:
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   461
  
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   462
     aa, ab, ac, ba, bb, bc, ca, cb, cc
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   463
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   464
   Combinations of length 3:
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   465
   
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   466
     aaa, baa, caa, and so on......
309
c5f16a86ae92 updated
Christian Urban <urbanc@in.tum.de>
parents: 278
diff changeset
   467
*/
c5f16a86ae92 updated
Christian Urban <urbanc@in.tum.de>
parents: 278
diff changeset
   468
320
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   469
def combs(cs: List[Char], n: Int) : List[String] = {
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   470
  if (n == 0) List("")
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   471
  else for (c <- cs; s <- combs(cs, n - 1)) yield s"$c$s"
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   472
}
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   473
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   474
combs(List('a', 'b', 'c'), 3)
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   475
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   476
90aed247c8cf updated
Christian Urban <urbanc@in.tum.de>
parents: 319
diff changeset
   477
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   478
def combs(cs: List[Char], l: Int) : List[String] = {
309
c5f16a86ae92 updated
Christian Urban <urbanc@in.tum.de>
parents: 278
diff changeset
   479
  if (l == 0) List("")
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   480
  else for (c <- cs; s <- combs(cs, l - 1)) yield s"$c$s"
309
c5f16a86ae92 updated
Christian Urban <urbanc@in.tum.de>
parents: 278
diff changeset
   481
}
c5f16a86ae92 updated
Christian Urban <urbanc@in.tum.de>
parents: 278
diff changeset
   482
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   483
combs("abc".toList, 2)
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   484
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   485
329
828326d1b3b2 updated
Christian Urban <urbanc@in.tum.de>
parents: 323
diff changeset
   486
// When writing recursive functions you have to
828326d1b3b2 updated
Christian Urban <urbanc@in.tum.de>
parents: 323
diff changeset
   487
// think about three points
828326d1b3b2 updated
Christian Urban <urbanc@in.tum.de>
parents: 323
diff changeset
   488
// 
828326d1b3b2 updated
Christian Urban <urbanc@in.tum.de>
parents: 323
diff changeset
   489
// - How to start with a recursive function
828326d1b3b2 updated
Christian Urban <urbanc@in.tum.de>
parents: 323
diff changeset
   490
// - How to communicate between recursive calls
828326d1b3b2 updated
Christian Urban <urbanc@in.tum.de>
parents: 323
diff changeset
   491
// - Exit conditions
828326d1b3b2 updated
Christian Urban <urbanc@in.tum.de>
parents: 323
diff changeset
   492
828326d1b3b2 updated
Christian Urban <urbanc@in.tum.de>
parents: 323
diff changeset
   493
147
3e5d8657302f updated
Christian Urban <urbanc@in.tum.de>
parents: 95
diff changeset
   494
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   495
// A Recursive Web Crawler / Email Harvester
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   496
//===========================================
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   497
//
212
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   498
// the idea is to look for links using the
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   499
// regular expression "https?://[^"]*" and for
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   500
// email addresses using another regex.
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   501
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   502
import io.Source
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   503
import scala.util._
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   504
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   505
// gets the first 10K of a web-page
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   506
def get_page(url: String) : String = {
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   507
  Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   508
    getOrElse { println(s"  Problem with: $url"); ""}
147
3e5d8657302f updated
Christian Urban <urbanc@in.tum.de>
parents: 95
diff changeset
   509
}
3e5d8657302f updated
Christian Urban <urbanc@in.tum.de>
parents: 95
diff changeset
   510
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   511
// regex for URLs and emails
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   512
val http_pattern = """"https?://[^"]*"""".r
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   513
val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   514
268
d20583497c5b updated
Christian Urban <urbanc@in.tum.de>
parents: 266
diff changeset
   515
//test case:
212
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   516
//email_pattern.findAllIn
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   517
//  ("foo bla christian@kcl.ac.uk 1234567").toList
c86e40fb3b21 updated
Christian Urban <urbanc@in.tum.de>
parents: 204
diff changeset
   518
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   519
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   520
// drops the first and last character from a string
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   521
def unquote(s: String) = s.drop(1).dropRight(1)
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   522
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   523
def get_all_URLs(page: String): Set[String] = 
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   524
  http_pattern.findAllIn(page).map(unquote).toSet
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   525
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   526
// naive version of crawl - searches until a given depth,
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   527
// visits pages potentially more than once
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   528
def crawl(url: String, n: Int) : Unit = {
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   529
  if (n == 0) ()
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   530
  else {
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   531
    println(s"  Visiting: $n $url")
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   532
    for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   533
  }
147
3e5d8657302f updated
Christian Urban <urbanc@in.tum.de>
parents: 95
diff changeset
   534
}
3e5d8657302f updated
Christian Urban <urbanc@in.tum.de>
parents: 95
diff changeset
   535
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   536
// some starting URLs for the crawler
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   537
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
147
3e5d8657302f updated
Christian Urban <urbanc@in.tum.de>
parents: 95
diff changeset
   538
204
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   539
crawl(startURL, 2)
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   540
1b04ea68dca6 updated
Christian Urban <urbanc@in.tum.de>
parents: 192
diff changeset
   541
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   542
// a primitive email harvester
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   543
def emails(url: String, n: Int) : Set[String] = {
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   544
  if (n == 0) Set()
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   545
  else {
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   546
    println(s"  Visiting: $n $url")
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   547
    val page = get_page(url)
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   548
    val new_emails = email_pattern.findAllIn(page).toSet
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   549
    new_emails ++ (for (u <- get_all_URLs(page)) yield emails(u, n - 1)).flatten
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   550
  }
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   551
}
55
6610c1dfa8a9 updated
Christian Urban <urbanc@in.tum.de>
parents: 53
diff changeset
   552
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   553
emails(startURL, 3)
55
6610c1dfa8a9 updated
Christian Urban <urbanc@in.tum.de>
parents: 53
diff changeset
   554
6610c1dfa8a9 updated
Christian Urban <urbanc@in.tum.de>
parents: 53
diff changeset
   555
318
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   556
// if we want to explore the internet "deeper", then we
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   557
// first have to parallelise the request of webpages:
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   558
//
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   559
// scala -cp scala-parallel-collections_2.13-0.2.0.jar 
f1215a72cd88 updated
Christian Urban <urbanc@in.tum.de>
parents: 317
diff changeset
   560
// import scala.collection.parallel.CollectionConverters._
55
6610c1dfa8a9 updated
Christian Urban <urbanc@in.tum.de>
parents: 53
diff changeset
   561
53
9f8751912560 updated
Christian Urban <urbanc@in.tum.de>
parents: 51
diff changeset
   562
9f8751912560 updated
Christian Urban <urbanc@in.tum.de>
parents: 51
diff changeset
   563
9f8751912560 updated
Christian Urban <urbanc@in.tum.de>
parents: 51
diff changeset
   564
192
cd2a9c969ef2 updated
Christian Urban <urbanc@in.tum.de>
parents: 174
diff changeset
   565
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   566
// Jumping Towers
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   567
//================
278
57b5bba67467 updated
Christian Urban <urbanc@in.tum.de>
parents: 268
diff changeset
   568
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   569
364
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   570
def moves(xs: List[Int], n: Int) : List[List[Int]] = 
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   571
 (xs, n) match {
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   572
   case (Nil, _) => Nil
366
d2f895c1dba6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 365
diff changeset
   573
   case (_, 0) => Nil
364
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   574
   case (x::xs, n) => (x::xs) :: moves(xs, n - 1)
18942af74fa1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 363
diff changeset
   575
 }
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   576
366
d2f895c1dba6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 365
diff changeset
   577
// List(5,5,1,0) -> moves(List(5,1,0), 5)
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   578
moves(List(5,1,0), 1)
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   579
moves(List(5,1,0), 2)
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   580
moves(List(5,1,0), 5)
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   581
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   582
// checks whether a jump tour exists at all
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   583
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   584
def search(xs: List[Int]) : Boolean = xs match {
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   585
  case Nil => true
366
d2f895c1dba6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 365
diff changeset
   586
  case x::xs =>
d2f895c1dba6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 365
diff changeset
   587
    if (xs.length < x) true 
d2f895c1dba6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 365
diff changeset
   588
    else moves(xs, x).exists(search(_))
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   589
}
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   590
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   591
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   592
search(List(5,3,2,5,1,1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   593
search(List(3,5,1,0,0,0,1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   594
search(List(3,5,1,0,0,0,0,1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   595
search(List(3,5,1,0,0,0,1,1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   596
search(List(3,5,1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   597
search(List(5,1,1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   598
search(Nil)
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   599
search(List(1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   600
search(List(5,1,1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   601
search(List(3,5,1,0,0,0,0,0,0,0,0,1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   602
366
d2f895c1dba6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 365
diff changeset
   603
d2f895c1dba6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 365
diff changeset
   604
import scala.util._
d2f895c1dba6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 365
diff changeset
   605
List.fill(100)(Random.nextInt(2))
d2f895c1dba6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 365
diff changeset
   606
search(List.fill(100)(Random.nextInt(10)))
d2f895c1dba6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 365
diff changeset
   607
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   608
// generate *all* jump tours
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   609
//    if we are only interested in the shortes one, we could
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   610
//    shortcircut the calculation and only return List(x) in
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   611
//    case where xs.length < x, because no tour can be shorter
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   612
//    than 1
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   613
// 
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   614
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   615
def jumps(xs: List[Int]) : List[List[Int]] = xs match {
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   616
  case Nil => Nil
366
d2f895c1dba6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 365
diff changeset
   617
  case x::xs => {
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   618
    val children = moves(xs, x)
366
d2f895c1dba6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 365
diff changeset
   619
    val results = 
d2f895c1dba6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 365
diff changeset
   620
      children.map(cs => jumps(cs).map(x :: _)).flatten
319
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   621
    if (xs.length < x) List(x) :: results else results
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   622
  }
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   623
}
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   624
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   625
jumps(List(3,5,1,2,1,2,1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   626
jumps(List(3,5,1,2,3,4,1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   627
jumps(List(3,5,1,0,0,0,1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   628
jumps(List(3,5,1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   629
jumps(List(5,1,1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   630
jumps(Nil)
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   631
jumps(List(1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   632
jumps(List(5,1,2))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   633
moves(List(1,2), 5)
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   634
jumps(List(1,5,1,2))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   635
jumps(List(3,5,1,0,0,0,0,0,0,0,0,1))
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   636
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   637
jumps(List(5,3,2,5,1,1)).minBy(_.length)
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   638
jumps(List(1,3,5,8,9,2,6,7,6,8,9)).minBy(_.length)
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   639
jumps(List(1,3,6,1,0,9)).minBy(_.length)
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   640
jumps(List(2,3,1,1,2,4,2,0,1,1)).minBy(_.length)
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   641
ed7543c5d317 updated
Christian Urban <urbanc@in.tum.de>
parents: 318
diff changeset
   642