--- a/progs/lecture2.scala Tue Nov 12 10:47:27 2019 +0000
+++ b/progs/lecture2.scala Tue Nov 19 00:40:27 2019 +0000
@@ -128,7 +128,7 @@
// a function that turns strings into numbers (similar to .toInt)
-Integer.parseInt("1234")
+Integer.parseInt("12u34")
def get_me_an_int(s: String) : Option[Int] =
@@ -153,7 +153,11 @@
List(5,6,7,8,9)(-1)
-
+Try({
+ val x = 3
+ val y = 0
+ Some(x / y)
+}).getOrElse(None)
// Higher-Order Functions
//========================
@@ -165,15 +169,20 @@
def odd(x: Int) : Boolean = x % 2 == 1
val lst = (1 to 10).toList
+lst.reverse.sorted
+
lst.filter(even)
-lst.count(even)
+lst.count(odd)
lst.find(even)
+lst.exists(even)
-lst.filter(x => x % 2 == 0)
+lst.filter(_ < 4)
+lst.filter(x => x % 2 == 1)
lst.filter(_ % 2 == 0)
-lst.sortWith(_ > _)
+
+lst.sortWith((x, y) => x > y)
lst.sortWith(_ < _)
// but this only works when the arguments are clear, but
@@ -188,7 +197,7 @@
ps.sortWith(lex)
-ps.sortBy(_._1)
+ps.sortBy(x => x._1)
ps.sortBy(_._2)
ps.maxBy(_._1)
@@ -218,13 +227,18 @@
lst.map(square).filter(_ > 4)
-lst.map(square).filter(_ > 4).map(square)
+(lst.map(square)
+ .filter(_ > 4)
+ .map(square))
// lets define our own higher-order functions
// type of functions is for example Int => Int
+0 :: List(3,4,5,6)
+
+
def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = {
if (lst == Nil) Nil
else f(lst.head) :: my_map_int(lst.tail, f)
@@ -250,9 +264,17 @@
// ...
val lst = (1 to 10).toList
-def sumOf(f: Int => Int, lst: List[Int]): Int = lst match {
+lst.sum
+
+val lst = List(1,2,3,4)
+
+lst.head
+lst.tail
+
+def sumOf(f: Int => Int, lst: List[Int]): Int =
+lst match {
case Nil => 0
- case x::xs => f(x) + sumOf(f, xs)
+ case x::foo => f(x) + sumOf(f, foo)
}
def sum_squares(lst: List[Int]) = sumOf(square, lst)
@@ -305,9 +327,18 @@
// maps on Options
-get_me_an_int("1234").map(even)
+get_me_an_int("12345").map(even)
get_me_an_int("12u34").map(even)
+def my_map_option(o: Option[Int], f : Int => Int) : Option[Int] = {
+o match {
+ case None => None
+ case Some(foo) => Some(f(foo))
+}}
+
+my_map_option(Some(4), square)
+my_map_option(None, square)
+
// Map type (upper-case)
@@ -315,6 +346,11 @@
// Note the difference between map and Map
+val ascii = ('a' to 'z').map(c => (c, c.toInt)).toList
+
+val ascii_Map = ascii.toMap
+
+
def factors(n: Int) : List[Int] =
(2 until n).toList.filter(n % _ == 0)
@@ -327,7 +363,7 @@
facs.toMap
-facs.toMap.get(4)
+facs.toMap.get(40)
facs.toMap.getOrElse(42, Nil)
val facsMap = facs.toMap
@@ -344,7 +380,7 @@
val ls = List("one", "two", "three", "four", "five")
ls.groupBy(_.length)
-ls.groupBy(_.length).get(3)
+ls.groupBy(_.length).get(5)
@@ -369,7 +405,8 @@
// recall
val lst = List(None, Some(1), Some(2), None, Some(3)).flatten
-def my_flatten(xs: List[Option[Int]]): List[Int] = xs match {
+def my_flatten(xs: List[Option[Int]]): List[Int] =
+xs match {
case Nil => Nil
case None::rest => my_flatten(rest)
case Some(v)::rest => v :: my_flatten(rest)
@@ -381,10 +418,9 @@
// another example with a default case
def get_me_a_string(n: Int): String = n match {
case 0 | 1 | 2 => "small"
- case _ => "big"
}
-get_me_a_string(0)
+get_me_a_string(3)
// you can also have cases combined
@@ -446,6 +482,15 @@
aaa, baa, caa, and so on......
*/
+def combs(cs: List[Char], n: Int) : List[String] = {
+ if (n == 0) List("")
+ else for (c <- cs; s <- combs(cs, n - 1)) yield s"$c$s"
+}
+
+combs(List('a', 'b', 'c'), 3)
+
+
+
def combs(cs: List[Char], l: Int) : List[String] = {
if (l == 0) List("")
else for (c <- cs; s <- combs(cs, l - 1)) yield s"$c$s"
@@ -454,22 +499,6 @@
combs("abc".toList, 2)
-// another well-known example
-
-def move(from: Char, to: Char) =
- println(s"Move disc from $from to $to!")
-
-def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = {
- if (n == 0) ()
- else {
- hanoi(n - 1, from, to, via)
- move(from, to)
- hanoi(n - 1, via, from, to)
- }
-}
-
-hanoi(4, 'A', 'B', 'C')
-
// A Recursive Web Crawler / Email Harvester
//===========================================
--- a/progs/lecture3.scala Tue Nov 12 10:47:27 2019 +0000
+++ b/progs/lecture3.scala Tue Nov 19 00:40:27 2019 +0000
@@ -1,6 +1,16 @@
// Scala Lecture 3
//=================
+// - last week
+//
+// option type
+// higher-order function
+
+
+
+// Recursion Again ;o)
+//====================
+
// A Web Crawler / Email Harvester
//=================================
@@ -15,7 +25,7 @@
// gets the first 10K of a web-page
def get_page(url: String) : String = {
Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString).
- getOrElse { println(s" Problem with: $url"); ""}
+ getOrElse { println(s" Problem with: $url"); ""}
}
// regex for URLs and emails
@@ -31,345 +41,61 @@
def get_all_URLs(page: String): Set[String] =
http_pattern.findAllIn(page).map(unquote).toSet
-// naive version of crawl - searches until a given depth,
+// a naive version of crawl - searches until a given depth,
// visits pages potentially more than once
-
-def crawl(url: String, n: Int) : Set[String] = {
- if (n == 0) Set()
+def crawl(url: String, n: Int) : Unit = {
+ if (n == 0) ()
else {
println(s" Visiting: $n $url")
- val page = get_page(url)
- val new_emails = email_pattern.findAllIn(page).toSet
- new_emails ++
- (for (u <- get_all_URLs(page).par) yield crawl(u, n - 1)).flatten
+ for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
}
}
// some starting URLs for the crawler
val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
+
crawl(startURL, 2)
-// User-defined Datatypes
-//========================
-
-
-abstract class Colour
-case object Red extends Colour
-case object Green extends Colour
-case object Blue extends Colour
-
-def fav_colour(c: Colour) : Boolean = c match {
- case Red => false
- case Green => true
- case Blue => false
-}
-
-fav_colour(Green)
-
-
-// ... a tiny bit more useful: Roman Numerals
-
-abstract class RomanDigit
-case object I extends RomanDigit
-case object V extends RomanDigit
-case object X extends RomanDigit
-case object L extends RomanDigit
-case object C extends RomanDigit
-case object D extends RomanDigit
-case object M extends RomanDigit
-
-type RomanNumeral = List[RomanDigit]
-
-List(X,I)
-
-/*
-I -> 1
-II -> 2
-III -> 3
-IV -> 4
-V -> 5
-VI -> 6
-VII -> 7
-VIII -> 8
-IX -> 9
-X -> X
-*/
-
-def RomanNumeral2Int(rs: RomanNumeral): Int = rs match {
- case Nil => 0
- case M::r => 1000 + RomanNumeral2Int(r)
- case C::M::r => 900 + RomanNumeral2Int(r)
- case D::r => 500 + RomanNumeral2Int(r)
- case C::D::r => 400 + RomanNumeral2Int(r)
- case C::r => 100 + RomanNumeral2Int(r)
- case X::C::r => 90 + RomanNumeral2Int(r)
- case L::r => 50 + RomanNumeral2Int(r)
- case X::L::r => 40 + RomanNumeral2Int(r)
- case X::r => 10 + RomanNumeral2Int(r)
- case I::X::r => 9 + RomanNumeral2Int(r)
- case V::r => 5 + RomanNumeral2Int(r)
- case I::V::r => 4 + RomanNumeral2Int(r)
- case I::r => 1 + RomanNumeral2Int(r)
-}
-
-RomanNumeral2Int(List(I,V)) // 4
-RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number)
-RomanNumeral2Int(List(V,I)) // 6
-RomanNumeral2Int(List(I,X)) // 9
-RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
-RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017
-
-
-// another example
-//=================
-
-// Once upon a time, in a complete fictional
-// country there were Persons...
-
-
-abstract class Person
-case object King extends Person
-case class Peer(deg: String, terr: String, succ: Int) extends Person
-case class Knight(name: String) extends Person
-case class Peasant(name: String) extends Person
-
-
-def title(p: Person): String = p match {
- case King => "His Majesty the King"
- case Peer(deg, terr, _) => s"The ${deg} of ${terr}"
- case Knight(name) => s"Sir ${name}"
- case Peasant(name) => name
-}
-
-def superior(p1: Person, p2: Person): Boolean = (p1, p2) match {
- case (King, _) => true
- case (Peer(_,_,_), Knight(_)) => true
- case (Peer(_,_,_), Peasant(_)) => true
- case (Peer(_,_,_), Clown) => true
- case (Knight(_), Peasant(_)) => true
- case (Knight(_), Clown) => true
- case (Clown, Peasant(_)) => true
- case _ => false
-}
-
-val people = List(Knight("David"),
- Peer("Duke", "Norfolk", 84),
- Peasant("Christian"),
- King,
- Clown)
-
-println(people.sortWith(superior).mkString("\n"))
-
-
-// String interpolations as patterns
-
-val date = "2000-01-01"
-val s"$year-$month-$day" = date
-
-def parse_date(date: String) = date match {
- case s"$year-$month-$day" => Some((year.toInt, month.toInt, day.toInt))
- case s"$day/$month/$year" => Some((year.toInt, month.toInt, day.toInt))
- case _ => None
-}
-
-
-
-
-// User-defined Datatypes and Pattern Matching
-//=============================================
-
-
-abstract class Exp
-case class N(n: Int) extends Exp // for numbers
-case class Plus(e1: Exp, e2: Exp) extends Exp
-case class Times(e1: Exp, e2: Exp) extends Exp
-
-def string(e: Exp) : String = e match {
- case N(n) => n.toString
- case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")"
- case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")"
-}
-
-val e = Plus(N(9), Times(N(3), N(4)))
-println(string(e))
-
-def eval(e: Exp) : Int = e match {
- case N(n) => n
- case Plus(e1, e2) => eval(e1) + eval(e2)
- case Times(e1, e2) => eval(e1) * eval(e2)
-}
-
-def simp(e: Exp) : Exp = e match {
- case N(n) => N(n)
- case Plus(e1, e2) => (simp(e1), simp(e2)) match {
- case (N(0), e2s) => e2s
- case (e1s, N(0)) => e1s
- case (e1s, e2s) => Plus(e1s, e2s)
- }
- case Times(e1, e2) => (simp(e1), simp(e2)) match {
- case (N(0), _) => N(0)
- case (_, N(0)) => N(0)
- case (N(1), e2s) => e2s
- case (e1s, N(1)) => e1s
- case (e1s, e2s) => Times(e1s, e2s)
- }
+// a primitive email harvester
+def emails(url: String, n: Int) : Set[String] = {
+ if (n == 0) Set()
+ else {
+ println(s" Visiting: $n $url")
+ val page = get_page(url)
+ val new_emails = email_pattern.findAllIn(page).toSet
+ new_emails ++ (for (u <- get_all_URLs(page)) yield emails(u, n - 1)).flatten
+ }
}
-println(eval(e))
-
-val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
-println(string(e2))
-println(string(simp(e2)))
+emails(startURL, 2)
-// Tokens and Reverse Polish Notation
-abstract class Token
-case class T(n: Int) extends Token
-case object PL extends Token
-case object TI extends Token
-def rp(e: Exp) : List[Token] = e match {
- case N(n) => List(T(n))
- case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL)
- case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI)
-}
-println(string(e2))
-println(rp(e2))
-
-def comp(ls: List[Token], st: List[Int]) : Int = (ls, st) match {
- case (Nil, st) => st.head
- case (T(n)::rest, st) => comp(rest, n::st)
- case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)
- case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)
-}
-
-comp(rp(e), Nil)
-
-def proc(s: String) : Token = s match {
- case "+" => PL
- case "*" => TI
- case _ => T(s.toInt)
-}
-
-comp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)
-
+// if we want to explore the internet "deeper", then we
+// first have to parallelise the request of webpages:
+//
+// scala -cp scala-parallel-collections_2.13-0.2.0.jar
+// import scala.collection.parallel.CollectionConverters._
-def string(e: Exp) : String = e match {
- case N(n) => n.toString
- case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")"
- case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")"
-}
-
-val e = Plus(N(9), Times(N(3), N(4)))
-
-println(string(e))
-
-def eval(e: Exp) : Int = e match {
- case N(n) => n
- case Plus(e1, e2) => eval(e1) + eval(e2)
- case Times(e1, e2) => eval(e1) * eval(e2)
-}
-
-eval(e)
+// another well-known example
+//============================
-def simp(e: Exp) : Exp = e match {
- case N(n) => N(n)
- case Plus(e1, e2) => (simp(e1), simp(e2)) match {
- case (N(0), e2s) => e2s
- case (e1s, N(0)) => e1s
- case (e1s, e2s) => Plus(e1s, e2s)
- }
- case Times(e1, e2) => (simp(e1), simp(e2)) match {
- case (N(0), e2s) => N(0)
- case (e1s, N(0)) => N(0)
- case (N(1), e2s) => e2s
- case (e1s, N(1)) => e1s
- case (e1s, e2s) => Times(e1s, e2s)
- }
-}
-
-
-val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
-println(string(e2))
-println(string(simp(e2)))
-
-// Token and Reverse Polish Notation
-abstract class Token
-case class T(n: Int) extends Token
-case object PL extends Token
-case object TI extends Token
-
-def rp(e: Exp) : List[Token] = e match {
- case N(n) => List(T(n))
- case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL)
- case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI)
-}
+def move(from: Char, to: Char) =
+ println(s"Move disc from $from to $to!")
-def comp(ts: List[Token], stk: List[Int]) : Int = (ts, stk) match {
- case (Nil, st) => st.head
- case (T(n)::rest, st) => comp(rest, n::st)
- case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)
- case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)
-}
-
-def exp(ts: List[Token], st: List[Exp]) : Exp = (ts, st) match {
- case (Nil, st) => st.head
- case (T(n)::rest, st) => exp(rest, N(n)::st)
- case (PL::rest, n1::n2::st) => exp(rest, Plus(n2, n1)::st)
- case (TI::rest, n1::n2::st) => exp(rest, Times(n2, n1)::st)
-}
-
-exp(toks(e2), Nil)
-
-def proc(s: String) = s match {
- case "+" => PL
- case "*" => TI
- case n => T(n.toInt)
-}
-
-
-string(exp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil))
-
-
-
-// Tail recursion
-//================
-
+def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = {
+ if (n == 0) ()
+ else {
+ hanoi(n - 1, from, to, via)
+ move(from, to)
+ hanoi(n - 1, via, from, to)
+ }
+}
-def fact(n: Long): Long =
- if (n == 0) 1 else n * fact(n - 1)
-
-def factB(n: BigInt): BigInt =
- if (n == 0) 1 else n * factB(n - 1)
-
-factB(100000)
-
-fact(10) //ok
-fact(10000) // produces a stackoverflow
-
-def factT(n: BigInt, acc: BigInt): BigInt =
- if (n == 0) acc else factT(n - 1, n * acc)
-
-factT(10, 1)
-println(factT(100000, 1))
-
-// there is a flag for ensuring a function is tail recursive
-import scala.annotation.tailrec
-
-@tailrec
-def factT(n: BigInt, acc: BigInt): BigInt =
- if (n == 0) acc else factT(n - 1, n * acc)
-
-
-
-// for tail-recursive functions the Scala compiler
-// generates loop-like code, which does not need
-// to allocate stack-space in each recursive
-// call; Scala can do this only for tail-recursive
-// functions
+hanoi(4, 'A', 'B', 'C')
@@ -380,8 +106,6 @@
// the first n prefixes of xs
// for 1 => include xs
-
-
def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match {
case (Nil, _) => Nil
case (xs, 0) => Nil
@@ -424,12 +148,12 @@
case Nil => Nil
case (x::xs) => {
val children = moves(xs, x)
- val results = children.map((cs) => jumps(cs).map(x :: _)).flatten
- if (xs.length < x) List(x) :: results else results
+ val results = children.map(cs => jumps(cs).map(x :: _)).flatten
+ if (xs.length < x) List(x)::results else results
}
}
-println(jumps(List(5,3,2,5,1,1)).minBy(_.length))
+jumps(List(5,3,2,5,1,1)).minBy(_.length)
jumps(List(3,5,1,2,1,2,1))
jumps(List(3,5,1,2,3,4,1))
jumps(List(3,5,1,0,0,0,1))
@@ -449,37 +173,176 @@
-// Tail Recursion
-//================
-
-
-def fact(n: Long): Long =
- if (n == 0) 1 else n * fact(n - 1)
-
-fact(10) //ok
-fact(10000) // produces a stackoverflow
-
-def factT(n: BigInt, acc: BigInt): BigInt =
- if (n == 0) acc else factT(n - 1, n * acc)
-
-factT(10, 1)
-factT(100000, 1)
-
-// there is a flag for ensuring a function is tail recursive
-import scala.annotation.tailrec
-
-@tailrec
-def factT(n: BigInt, acc: BigInt): BigInt =
- if (n == 0) acc else factT(n - 1, n * acc)
-// for tail-recursive functions the Scala compiler
-// generates loop-like code, which does not need
-// to allocate stack-space in each recursive
-// call; Scala can do this only for tail-recursive
-// functions
+// User-defined Datatypes
+//========================
+
+
+abstract class Colour
+case object Red extends Colour
+case object Green extends Colour
+case object Blue extends Colour
+
+
+def fav_colour(c: Colour) : Boolean = c match {
+ case Red => false
+ case Green => true
+ case Blue => false
+}
+
+fav_colour(Green)
+
+// ... a tiny bit more useful: Roman Numerals
+
+abstract class RomanDigit
+case object I extends RomanDigit
+case object V extends RomanDigit
+case object X extends RomanDigit
+case object L extends RomanDigit
+case object C extends RomanDigit
+case object D extends RomanDigit
+case object M extends RomanDigit
+
+type RomanNumeral = List[RomanDigit]
+
+List(X,I)
+
+/*
+I -> 1
+II -> 2
+III -> 3
+IV -> 4
+V -> 5
+VI -> 6
+VII -> 7
+VIII -> 8
+IX -> 9
+X -> 10
+*/
+
+def RomanNumeral2Int(rs: RomanNumeral): Int = rs match {
+ case Nil => 0
+ case M::r => 1000 + RomanNumeral2Int(r)
+ case C::M::r => 900 + RomanNumeral2Int(r)
+ case D::r => 500 + RomanNumeral2Int(r)
+ case C::D::r => 400 + RomanNumeral2Int(r)
+ case C::r => 100 + RomanNumeral2Int(r)
+ case X::C::r => 90 + RomanNumeral2Int(r)
+ case L::r => 50 + RomanNumeral2Int(r)
+ case X::L::r => 40 + RomanNumeral2Int(r)
+ case X::r => 10 + RomanNumeral2Int(r)
+ case I::X::r => 9 + RomanNumeral2Int(r)
+ case V::r => 5 + RomanNumeral2Int(r)
+ case I::V::r => 4 + RomanNumeral2Int(r)
+ case I::r => 1 + RomanNumeral2Int(r)
+}
+
+RomanNumeral2Int(List(I,V)) // 4
+RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number)
+RomanNumeral2Int(List(V,I)) // 6
+RomanNumeral2Int(List(I,X)) // 9
+RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979
+RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017
+
+
+// String interpolations as patterns
+
+val date = "2019-11-26"
+val s"$year-$month-$day" = date
+
+def parse_date(date: String) : Option[(Int, Int, Int)]= date match {
+ case s"$year-$month-$day" => Some((day.toInt, month.toInt, year.toInt))
+ case s"$day/$month/$year" => Some((day.toInt, month.toInt, year.toInt))
+ case s"$day.$month.$year" => Some((day.toInt, month.toInt, year.toInt))
+ case _ => None
+}
+parse_date("2019-11-26")
+parse_date("26/11/2019")
+parse_date("26.11.2019")
+
+
+// User-defined Datatypes and Pattern Matching
+//=============================================
+
+// trees
+
+abstract class Exp
+case class N(n: Int) extends Exp // for numbers
+case class Plus(e1: Exp, e2: Exp) extends Exp
+case class Times(e1: Exp, e2: Exp) extends Exp
+
+def string(e: Exp) : String = e match {
+ case N(n) => s"$n"
+ case Plus(e1, e2) => s"(${string(e1)} + ${string(e2)})"
+ case Times(e1, e2) => s"(${string(e1)} * ${string(e2)})"
+}
+
+val e = Plus(N(9), Times(N(3), N(4)))
+println(string(e))
+
+def eval(e: Exp) : Int = e match {
+ case N(n) => n
+ case Plus(e1, e2) => eval(e1) + eval(e2)
+ case Times(e1, e2) => eval(e1) * eval(e2)
+}
+
+println(eval(e))
+
+def simp(e: Exp) : Exp = e match {
+ case N(n) => N(n)
+ case Plus(e1, e2) => (simp(e1), simp(e2)) match {
+ case (N(0), e2s) => e2s
+ case (e1s, N(0)) => e1s
+ case (e1s, e2s) => Plus(e1s, e2s)
+ }
+ case Times(e1, e2) => (simp(e1), simp(e2)) match {
+ case (N(0), _) => N(0)
+ case (_, N(0)) => N(0)
+ case (N(1), e2s) => e2s
+ case (e1s, N(1)) => e1s
+ case (e1s, e2s) => Times(e1s, e2s)
+ }
+}
+
+
+val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
+println(string(e2))
+println(string(simp(e2)))
+
+
+// Tokens and Reverse Polish Notation
+abstract class Token
+case class T(n: Int) extends Token
+case object PL extends Token
+case object TI extends Token
+
+def rp(e: Exp) : List[Token] = e match {
+ case N(n) => List(T(n))
+ case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL)
+ case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI)
+}
+println(string(e2))
+println(rp(e2))
+
+def comp(ls: List[Token], st: List[Int]) : Int = (ls, st) match {
+ case (Nil, st) => st.head
+ case (T(n)::rest, st) => comp(rest, n::st)
+ case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)
+ case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)
+}
+
+comp(rp(e), Nil)
+
+def proc(s: String) : Token = s match {
+ case "+" => PL
+ case "*" => TI
+ case _ => T(s.toInt)
+}
+
+comp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)
@@ -555,7 +418,7 @@
if (isDone(game)) List(game)
else {
val cs = candidates(game, emptyPosition(game))
- cs.par.map(c => search(update(game, empty(game), c))).toList.flatten
+ cs.map(c => search(update(game, empty(game), c))).toList.flatten
}
}
@@ -609,6 +472,45 @@
time_needed(1, search(game2))
+
+
+
+// Tail recursion
+//================
+
+
+def fact(n: Long): Long =
+ if (n == 0) 1 else n * fact(n - 1)
+
+def factB(n: BigInt): BigInt =
+ if (n == 0) 1 else n * factB(n - 1)
+
+factB(100000)
+
+fact(10) //ok
+fact(10000) // produces a stackoverflow
+
+def factT(n: BigInt, acc: BigInt): BigInt =
+ if (n == 0) acc else factT(n - 1, n * acc)
+
+factT(10, 1)
+println(factT(100000, 1))
+
+// there is a flag for ensuring a function is tail recursive
+import scala.annotation.tailrec
+
+@tailrec
+def factT(n: BigInt, acc: BigInt): BigInt =
+ if (n == 0) acc else factT(n - 1, n * acc)
+
+
+
+// for tail-recursive functions the Scala compiler
+// generates loop-like code, which does not need
+// to allocate stack-space in each recursive
+// call; Scala can do this only for tail-recursive
+// functions
+
// tail recursive version that searches
// for all solutions
--- a/progs/lecture4.scala Tue Nov 12 10:47:27 2019 +0000
+++ b/progs/lecture4.scala Tue Nov 19 00:40:27 2019 +0000
@@ -22,7 +22,6 @@
length_string_list(List("1", "2", "3", "4"))
length_int_list(List(1, 2, 3, 4))
-//-----
def length[A](lst: List[A]): Int = lst match {
case Nil => 0
case x::xs => 1 + length(xs)
--- a/progs/sudoku.scala Tue Nov 12 10:47:27 2019 +0000
+++ b/progs/sudoku.scala Tue Nov 19 00:40:27 2019 +0000
@@ -78,11 +78,9 @@
".6.5.4.3.1...9...8.........9...5...6.4.6.2.7.7...4...5.........4...8...1.5.2.3.4.",
"7.....4...2..7..8...3..8.799..5..3...6..2..9...1.97..6...3..9...3..4..6...9..1.35",
"....7..2.8.......6.1.2.5...9.54....8.........3....85.1...3.2.8.4.......9.7..6....",
-// "4.....8.5.3..........7......2.....6.....8.4......1.......6.3.7.5..2.....1.4......",
"52...6.........7.13...........4..8..6......5...........418.........3..2...87.....",
"6.....8.3.4.7.................5.4.7.3..2.....1.6.......2.....5.....8.6......1....",
"48.3............71.2.......7.5....6....2..8.............1.76...3.....4......5....",
-// "....14....3....2...7..........9...3.6.1.............8.2.....1.4....5.6.....7.8...",
"......52..8.4......3...9...5.1...6..2..7........3.....6...1..........7.4.......3.",
"6.2.5.........3.4..........43...8....1....2........7..5..27...........81...6.....",
".524.........7.1..............8.2...3.....6...9.5.....1.6.3...........897........",
@@ -100,7 +98,6 @@
"..84...3....3.....9....157479...8........7..514.....2...9.6...2.5....4......9..56",
".98.1....2......6.............3.2.5..84.........6.........4.8.93..5...........1..",
"..247..58..............1.4.....2...9528.9.4....9...1.........3.3....75..685..2...",
-// "4.....8.5.3..........7......2.....6.....5.4......1.......6.3.7.5..2.....1.9......",
".2.3......63.....58.......15....9.3....7........1....8.879..26......6.7...6..7..4",
"1.....7.9.4...72..8.........7..1..6.3.......5.6..4..2.........8..53...7.7.2....46",
"4.....3.....8.2......7........1...8734.......6........5...6........1.4...82......",
@@ -113,12 +110,10 @@
".2.......3.5.62..9.68...3...5..........64.8.2..47..9....3.....1.....6...17.43....",
".8..4....3......1........2...5...4.69..1..8..2...........3.9....6....5.....2.....",
"..8.9.1...6.5...2......6....3.1.7.5.........9..4...3...5....2...7...3.8.2..7....4",
-// "4.....5.8.3..........7......2.....6.....5.8......1.......6.3.7.5..2.....1.8......",
"1.....3.8.6.4..............2.3.1...........958.........5.6...7.....8.2...4.......",
"1....6.8..64..........4...7....9.6...7.4..5..5...7.1...5....32.3....8...4........",
"249.6...3.3....2..8.......5.....6......2......1..4.82..9.5..7....4.....1.7...3...",
"...8....9.873...4.6..7.......85..97...........43..75.......3....3...145.4....2..1",
-// "...5.1....9....8...6.......4.1..........7..9........3.8.....1.5...2..4.....36....",
"......8.16..2........7.5......6...2..1....3...8.......2......7..3..8....5...4....",
".476...5.8.3.....2.....9......8.5..6...1.....6.24......78...51...6....4..9...4..7",
".....7.95.....1...86..2.....2..73..85......6...3..49..3.5...41724................",
@@ -198,7 +193,7 @@
// 1 single thread version 800 secs
-// 4 cores parallel version on moderate laptop 400 secs
-// 8 cores (4 physical + 4 hyperthread): 290 secs
-// 36 cores (18 physical + 18 hyperthread): 142 secs
+// 4 cores parallel version on a moderate laptop 400 secs
+// 8 cores: 290 secs
+// 18 cores: 142 secs
Binary file slides/slides03.pdf has changed
--- a/slides/slides03.tex Tue Nov 12 10:47:27 2019 +0000
+++ b/slides/slides03.tex Tue Nov 19 00:40:27 2019 +0000
@@ -1,8 +1,10 @@
+% !TEX program = xelatex
\documentclass[dvipsnames,14pt,t,xelatex]{beamer}
\usepackage{../slides}
\usepackage{../graphics}
\usepackage{../langs}
%%\usepackage{../data}
+\usetikzlibrary{shapes}
\usepackage[export]{adjustbox}
\hfuzz=220pt
@@ -21,6 +23,22 @@
% beamer stuff
\renewcommand{\slidecaption}{PEP (Scala) 03, King's College London}
+\newcommand{\UParrow}[3]{%
+\begin{textblock}{0}(#2,#3)%
+\onslide<#1>{%
+\begin{tikzpicture}%
+\node at (0,0) [single arrow, shape border rotate=90, fill=red,text=red]{a};%
+\end{tikzpicture}}%
+\end{textblock}}
+
+\newcommand{\DOWNarrow}[3]{%
+\begin{textblock}{0}(#2,#3)%
+\onslide<#1>{%
+\begin{tikzpicture}%
+\node at (0,0) [single arrow, shape border rotate=270, fill=red,text=red]{a};%
+\end{tikzpicture}}%
+\end{textblock}}
+
\begin{document}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
@@ -34,39 +52,33 @@
\begin{center}
\begin{tabular}{ll}
Email: & christian.urban at kcl.ac.uk\\
- Office: & N7.07 (North Wing, Bush House)\\
- Slides \& Code: & KEATS\medskip\\
- Office Hours: & \alert{next Monday} 11 -- 12 \& 13 -- 14\\
+ Office: & N\liningnums{7.07} (North Wing, Bush House)\bigskip\\
+ Slides \& Code: & KEATS\bigskip\\
+ Office Hours: & Thursdays 12:00 -- 14:00\\
+ Additionally: & (for Scala) Tuesdays 10:45 -- 11:45\\
\end{tabular}
\end{center}
-
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Preliminary 6}
-\begin{frame}[c]
-\frametitle{Marks for CW6 (Part 1 + 2)}
-
-Raw marks (234 submissions):
+Raw marks (261 submissions):\bigskip
\begin{itemize}
-\item 6\%: \hspace{4mm}163 students
-\item 5\%: \hspace{4mm}29
-\item 4\%: \hspace{4mm}3
-\item 3\%: \hspace{4mm}13
-\item 2\%: \hspace{4mm}3
+\item 3\%: \hspace{4mm}219
+\item 2\%: \hspace{4mm}19
\item 1\%: \hspace{4mm}0
-\item 0\%: \hspace{4mm}23
+\item 0\%: \hspace{4mm}23 \;(4 no submission)
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c,fragile]
@@ -86,65 +98,139 @@
\begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-1mm]
def collatz(n: Long) : Long =
collatzHelper(n, 0)
-\end{lstlisting}\pause
-
-
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{frame}[c,fragile]
-
-\begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-1mm]
-def collatz_max(bnd: Long) : (Long,Long) = {val lst = for(a<-(1 to bnd.toInt)) yield (collatz(a),a.toLong);val lst2 = lst.sortBy(_._1);lst2(lst2.length-1)}
-\end{lstlisting}\bigskip
-
-\tiny
-\begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-1mm]
-def collatz_max(bnd: Long) : (Long,Long) = {val lst = for(a<-(1 to bnd.toInt)) yield (collatz(a),a.toLong);val lst2 = lst.sortBy(_._1);lst2(lst2.length-1)}
-\end{lstlisting}\pause
-
+\end{lstlisting}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c,fragile]
+\frametitle{Default Arguments}
\small
-\begin{lstlisting}[language=Scala, xleftmargin=-4mm,numbers=left]
- def process_ratings(lines: List[String]) = {
- val values = List[(String,String)]()
+\begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-3mm]
+def collatzHelper(n: Int, a: Int = 0) : Int = ...
- for(line <- lines) {
- val splitList = ...
- if(splitList(2).toInt >= 4){
- val userID = splitList(0)
- val movieID = splitList(1)
- val tuple = (userID, movieID)
- tuple :: values
- }
- }
-
- values
- }
+collatzHelper(n, 3)
+collatzHelper(n, 0)
+
+collatzHelper(n) // a = 0
\end{lstlisting}
-\normalsize
-What does this function (always) return?
-
+\DOWNarrow{1}{10.7}{3.4}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c,fragile]
+\frametitle{Last Week: Options \& HO Funs.}
+\begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-1mm]
+List(7,2,3,4,5,6).find(_ < 4)
+res: Option[Int] = Some(2)
+
+
+List(5,6,7,8,9).find(_ < 4)
+res: Option[Int] = None
+
+
+List(1,2,3,4,5).map(x => x * x)
+res: List[Int] = List(1, 4, 9, 16, 25)
+\end{lstlisting}
+
+ \end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c,fragile]
+\frametitle{Web-Crawler (1)}
+
+\small
+\begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-7mm]
+def get_page(url: String) : String = {
+Try(fromURL(url)("ISO-8859-1").take(10000).mkString)
+ .getOrElse { println(s" Problem with: $url"); ""}
+}
+\end{lstlisting}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c,fragile]
+\frametitle{Web-Crawler (2)}
+
+\small
+\begin{lstlisting}[language=Scala, numbers=none,
+ xleftmargin=-7mm, escapeinside={(*@}{@*)}]
+val http_pattern = """(*@\textcolor{codegreen}{"}@*)https?://[\^(*@\textcolor{codegreen}{"}@*)]*(*@\textcolor{codegreen}{"}@*)""".r
+val email_pattern =
+ """([a-z\d\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
+
+
+def unquote(s: String) = s.drop(1).dropRight(1)
+
+
+def get_all_URLs(page: String): Set[String] =
+ http_pattern.findAllIn(page).map(unquote).toSet
+
+ // returns all URLs in a page
+\end{lstlisting}
+
+ \end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c,fragile]
+\frametitle{Web-Crawler (3)}
+
+\small
+\begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-1mm]
+def crawl(url: String, n: Int) : Unit = {
+ if (n == 0) ()
+ else {
+ println(s" Visiting: $n $url")
+ val page = get_page(url)
+ for (u <- get_all_URLs(page))
+ crawl(u, n - 1)
+ }
+}
+\end{lstlisting}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c,fragile]
+\frametitle{Email Harvester}
+
+\small
+\begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-3mm]
+def emails(url: String, n: Int) : Set[String] = {
+ if (n == 0) Set()
+ else {
+ println(s" Visiting: $n $url")
+ val page = get_page(url)
+ val new_emails =
+ email_pattern.findAllIn(page).toSet
+ new_emails ++
+ (for (u <- get_all_URLs(page))
+ yield emails(u, n - 1)).flatten
+ }
+}
+
+\end{lstlisting}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Jumping Towers}
\begin{center}
-\begin{tikzpicture}[scale=1.2]
+\begin{tikzpicture}[scale=1.3]
\draw[line width=1mm,cap=round] (0,0) -- (5,0);
\draw[line width=1mm,cap=round] (0,1) -- (5,1);
@@ -186,9 +272,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
\begin{frame}[c]
\frametitle{``Children'' / moves}
@@ -205,14 +289,38 @@
\end{tikzpicture}
\end{center}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c,fragile]
+\frametitle{Reverse Polish Notation}
+
+{\Large\bl{$(3 + 1) * (2 + 9)$}}\bigskip
+
+{\Large$\Rightarrow$}\bigskip
+
+{\;\;\Large\bl{$3\;\;1\;+\;2\;\;9\;+\;*$}}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c,fragile]
+\frametitle{Sudoku}
+
+A very simple-minded version on 110 problems:\bigskip
+
+\begin{itemize}
+\item 1 core: 800 secs
+\item 2 cores: 400 secs
+\item 8 cores: 290 secs
+\item 18 cores: 142 secs
+\end{itemize}
\end{frame}
-
-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
+
\end{document}
--- a/solutions1/collatz.scala Tue Nov 12 10:47:27 2019 +0000
+++ b/solutions1/collatz.scala Tue Nov 19 00:40:27 2019 +0000
@@ -17,6 +17,9 @@
all.maxBy(_._1)
}
+//collatz_max(1000000)
+//collatz_max(10000000)
+//collatz_max(100000000)
/* some test cases
val bnds = List(10, 100, 1000, 10000, 100000, 1000000)
--- a/testing1/collatz.scala Tue Nov 12 10:47:27 2019 +0000
+++ b/testing1/collatz.scala Tue Nov 19 00:40:27 2019 +0000
@@ -1,51 +1,70 @@
-// Part 1 about the 3n+1 conjecture
-//==================================
+object CW6a {
-// generate jar with
-// > scala -d collatz.jar collatz.scala
-
-object CW6a {
+//(1) Complete the collatz function below. It should
+// recursively calculate the number of steps needed
+// until the collatz series reaches the number 1.
+// If needed, you can use an auxiliary function that
+// performs the recursion. The function should expect
+// arguments in the range of 1 to 1 Million.
-/*
- * def collatz(n: Long): Long =
- if (n == 1) 0 else
- if (n % 2 == 0) 1 + collatz(n / 2) else
- 1 + collatz(3 * n + 1)
-*/
+// def collatz(n: Long) : Long = {
+// if (n == 1) 1 //else
+// // if (n % 2 == 0) {
+// // collatz(n/2)
+// // steps + 1
+// // } //else
+// // if (n % 2 != 0) {
+// // collatz((3 * n) + 1)
+// // steps + 1
+// // }
+// }
+
+// val steps: Long = 1
+// val lst = List()
+// def collatz(n: Long) : Long = {
+// if (n == 1) { steps + 1 }
+// else if (n % 2 == 0) {
+// collatz(n/2);
+// }
+// else {
+// collatz((3 * n) + 1);
+// }
+// steps + 1
+// }
+// collatz(6)
-def collatz_max(bnd: Long): (Long, Long) = {
- val all = for (i <- (1L to bnd)) yield (collatz(i), i)
- all.maxBy(_._1)
+def collatz(n: Long, list: List[Long] = List()): Long = {
+ if (n == 1) {
+ n :: list
+ list.size.toLong
+ }
+ else if (n % 2 == 0) {
+ collatz(n / 2, n :: list)
+ }
+ else {
+ collatz((3 * n) + 1, n :: list)
+ }
+}
+
+val test = collatz(6)
+
+//(2) Complete the collatz_max function below. It should
+// calculate how many steps are needed for each number
+// from 1 up to a bound and then calculate the maximum number of
+// steps and the corresponding number that needs that many
+// steps. Again, you should expect bounds in the range of 1
+// up to 1 Million. The first component of the pair is
+// the maximum number of steps and the second is the
+// corresponding number.
+
+//def collatz_max(bnd: Long) : (Long, Long) = ...
+def collatz_max(bnd: Long) : (Long, Long) = {
+ val stepsTable = for (n <- (1 to bnd.toInt).toList) yield (collatz(n), n.toLong)
+ //println(stepsTable)
+ stepsTable.max
}
-/* some test cases
-val bnds = List(10, 100, 1000, 10000, 100000, 1000000)
-
-for (bnd <- bnds) {
- val (steps, max) = collatz_max(bnd)
- println(s"In the range of 1 - ${bnd} the number ${max} needs the maximum steps of ${steps}")
}
-*/
-
-
-
-
-def collatz(n: Long) : Long = {
- if (n == 1) {
- 1L
- } else {
- if (n % 2 == 0) {
- collatz(n/2) + 1
- } else {
- collatz((n*3)+1) + 1
- }
- }
-}
-
-}
-
-
-
--- a/testing1/drumb_test.sh Tue Nov 12 10:47:27 2019 +0000
+++ b/testing1/drumb_test.sh Tue Nov 19 00:40:27 2019 +0000
@@ -149,8 +149,8 @@
if [ $tsts -eq 0 ]
then
echo -e " val ds = get_deltas(get_prices(List(\"GOOG\", \"AAPL\"), 2010 to 2012))" >> $out
- echo -e " yearly_yield(get_deltas(ds, 100, 0)) == 125" >> $out
- echo -e " yearly_yield(get_deltas(ds, 100, 1)) == 117" >> $out
+ echo -e " yearly_yield(ds, 100, 0) == 125" >> $out
+ echo -e " yearly_yield(ds, 100, 1) == 117" >> $out
if (scala_assert "drumb.scala" "drumb_test6.scala")
then
--- a/testing2/docdiff.scala Tue Nov 12 10:47:27 2019 +0000
+++ b/testing2/docdiff.scala Tue Nov 19 00:40:27 2019 +0000
@@ -2,7 +2,8 @@
//========================================
-object CW7a { // for purposes of generating a jar
+object CW7a {
+
//(1) Complete the clean function below. It should find
// all words in a string using the regular expression
@@ -12,16 +13,39 @@
//
// The words should be Returned as a list of strings.
-def clean(s: String) : List[String] =
- ("""\w+""".r).findAllIn(s).toList
+
+def clean(s: String) : List[String] = {
+ val regex = """\w+""".r;
+ val list_of_words = s.split(" ").toList
+ for(word <- list_of_words;
+ actual_word <- divide_string_where_different(word, regex.findAllIn(word).mkString, 0)) yield actual_word
+}
+/*
+ A secondary function that takes as parameters @param original which is the original word, @param returned which is thea word after the process of removing
+ some characters not allowed by a regular expression, and @param i which is the index where to start compare the characters of the two words.
+ It @return a List of strings which represents all the substrings of returned which were previously divided by characters not allowed by the regular expression applied on it.
+*/
+def divide_string_where_different(original: String, returned: String, i : Int): List[String] ={
+ val max_i = original.length -1
+ if(original(i) != returned(i)) returned.substring(0, i)::divide_string_where_different(original.substring(i+1), returned.substring(i), 0).filter(_.nonEmpty)
+ else if (i == max_i) List(returned)
+ else divide_string_where_different(original,returned, i +1)
+
+}
//(2) The function occurrences calculates the number of times
// strings occur in a list of strings. These occurrences should
// be calculated as a Map from strings to integers.
-def occurrences(xs: List[String]): Map[String, Int] =
- (for (x <- xs.distinct) yield (x, xs.count(_ == x))).toMap
+
+def occurrences(xs: List[String]): Map[String, Int] = {
+ val lst = xs.distinct
+ val word_pairs = (for (word <- lst) yield (word, xs.count(_==word))).toList
+ word_pairs.toMap
+}
+
+
//(3) This functions calculates the dot-product of two documents
// (list of strings). For this it calculates the occurrence
@@ -29,29 +53,33 @@
// If a string does not occur in a document, the product is zero.
// The function finally sums up all products.
+
def prod(lst1: List[String], lst2: List[String]) : Int = {
- val words = (lst1 ::: lst2).distinct
- val occs1 = occurrences(lst1)
- val occs2 = occurrences(lst2)
- words.map{ w => occs1.getOrElse(w, 0) * occs2.getOrElse(w, 0) }.sum
+ val map1 = occurrences(lst1)
+ val map2 = occurrences(lst2)
+ print(s"map1 is $map1 \n and map2 is $map2")
+ val pairs = (for(pair1 <- map1 if(map2.get(pair1._1) != None)) yield (pair1._2, map2.get(pair1._1).get)).toList
+ print(s"\n pairs are $pairs")
+ val products = (for(pair <- pairs) yield pair._1 * pair._2).toList
+ products.sum
+
}
+
//(4) Complete the functions overlap and similarity. The overlap of
// two documents is calculated by the formula given in the assignment
// description. The similarity of two strings is given by the overlap
-// of the cleaned (see (1)) strings.
-
-def overlap(lst1: List[String], lst2: List[String]) : Double = {
- val m1 = prod(lst1, lst1)
- val m2 = prod(lst2, lst2)
- prod(lst1, lst2).toDouble / (List(m1, m2).max)
-}
-
-def similarity(s1: String, s2: String) : Double =
- overlap(clean(s1), clean(s2))
+// of the cleaned strings (see (1)).
-/*
+//def overlap(lst1: List[String], lst2: List[String]) : Double = ...
+
+//def similarity(s1: String, s2: String) : Double = ...
+
+
+
+
+/* Test cases
val list1 = List("a", "b", "b", "c", "d")
@@ -61,6 +89,8 @@
occurrences(List("d", "b", "d", "b", "d")) // Map(d -> 3, b -> 2)
prod(list1,list2) // 7
+prod(list1,list1)
+prod(list2,list2)
overlap(list1, list2) // 0.5384615384615384
overlap(list2, list1) // 0.5384615384615384
@@ -81,7 +111,7 @@
heritage which ensures Australia's capacity to attract international
ecotourists."""
-similarity(orig1, plag1)
+similarity(orig1, plag1) // 0.8679245283018868
// Plagiarism examples from
@@ -105,13 +135,15 @@
recovery: a controversial tactic that is often implemented immediately
following an oil spill."""
-overlap(clean(orig2), clean(plag2))
-similarity(orig2, plag2)
+overlap(clean(orig2), clean(plag2)) // 0.728
+similarity(orig2, plag2) // 0.728
+
+
// The punchline: everything above 0.6 looks suspicious and
-// should be looked at by staff.
+// should be investigated by staff.
*/
+}
-}