# HG changeset patch # User Christian Urban # Date 1574124027 0 # Node ID cdfb2ce30a3d3f851f0306bcea21112aef9654f4 # Parent b84ea52bfd8ffb90c434014bea385c5efa5fa4f3 updated diff -r b84ea52bfd8f -r cdfb2ce30a3d progs/lecture2.scala --- a/progs/lecture2.scala Tue Nov 12 10:47:27 2019 +0000 +++ b/progs/lecture2.scala Tue Nov 19 00:40:27 2019 +0000 @@ -128,7 +128,7 @@ // a function that turns strings into numbers (similar to .toInt) -Integer.parseInt("1234") +Integer.parseInt("12u34") def get_me_an_int(s: String) : Option[Int] = @@ -153,7 +153,11 @@ List(5,6,7,8,9)(-1) - +Try({ + val x = 3 + val y = 0 + Some(x / y) +}).getOrElse(None) // Higher-Order Functions //======================== @@ -165,15 +169,20 @@ def odd(x: Int) : Boolean = x % 2 == 1 val lst = (1 to 10).toList +lst.reverse.sorted + lst.filter(even) -lst.count(even) +lst.count(odd) lst.find(even) +lst.exists(even) -lst.filter(x => x % 2 == 0) +lst.filter(_ < 4) +lst.filter(x => x % 2 == 1) lst.filter(_ % 2 == 0) -lst.sortWith(_ > _) + +lst.sortWith((x, y) => x > y) lst.sortWith(_ < _) // but this only works when the arguments are clear, but @@ -188,7 +197,7 @@ ps.sortWith(lex) -ps.sortBy(_._1) +ps.sortBy(x => x._1) ps.sortBy(_._2) ps.maxBy(_._1) @@ -218,13 +227,18 @@ lst.map(square).filter(_ > 4) -lst.map(square).filter(_ > 4).map(square) +(lst.map(square) + .filter(_ > 4) + .map(square)) // lets define our own higher-order functions // type of functions is for example Int => Int +0 :: List(3,4,5,6) + + def my_map_int(lst: List[Int], f: Int => Int) : List[Int] = { if (lst == Nil) Nil else f(lst.head) :: my_map_int(lst.tail, f) @@ -250,9 +264,17 @@ // ... val lst = (1 to 10).toList -def sumOf(f: Int => Int, lst: List[Int]): Int = lst match { +lst.sum + +val lst = List(1,2,3,4) + +lst.head +lst.tail + +def sumOf(f: Int => Int, lst: List[Int]): Int = +lst match { case Nil => 0 - case x::xs => f(x) + sumOf(f, xs) + case x::foo => f(x) + sumOf(f, foo) } def sum_squares(lst: List[Int]) = sumOf(square, lst) @@ -305,9 +327,18 @@ // maps on Options -get_me_an_int("1234").map(even) +get_me_an_int("12345").map(even) get_me_an_int("12u34").map(even) +def my_map_option(o: Option[Int], f : Int => Int) : Option[Int] = { +o match { + case None => None + case Some(foo) => Some(f(foo)) +}} + +my_map_option(Some(4), square) +my_map_option(None, square) + // Map type (upper-case) @@ -315,6 +346,11 @@ // Note the difference between map and Map +val ascii = ('a' to 'z').map(c => (c, c.toInt)).toList + +val ascii_Map = ascii.toMap + + def factors(n: Int) : List[Int] = (2 until n).toList.filter(n % _ == 0) @@ -327,7 +363,7 @@ facs.toMap -facs.toMap.get(4) +facs.toMap.get(40) facs.toMap.getOrElse(42, Nil) val facsMap = facs.toMap @@ -344,7 +380,7 @@ val ls = List("one", "two", "three", "four", "five") ls.groupBy(_.length) -ls.groupBy(_.length).get(3) +ls.groupBy(_.length).get(5) @@ -369,7 +405,8 @@ // recall val lst = List(None, Some(1), Some(2), None, Some(3)).flatten -def my_flatten(xs: List[Option[Int]]): List[Int] = xs match { +def my_flatten(xs: List[Option[Int]]): List[Int] = +xs match { case Nil => Nil case None::rest => my_flatten(rest) case Some(v)::rest => v :: my_flatten(rest) @@ -381,10 +418,9 @@ // another example with a default case def get_me_a_string(n: Int): String = n match { case 0 | 1 | 2 => "small" - case _ => "big" } -get_me_a_string(0) +get_me_a_string(3) // you can also have cases combined @@ -446,6 +482,15 @@ aaa, baa, caa, and so on...... */ +def combs(cs: List[Char], n: Int) : List[String] = { + if (n == 0) List("") + else for (c <- cs; s <- combs(cs, n - 1)) yield s"$c$s" +} + +combs(List('a', 'b', 'c'), 3) + + + def combs(cs: List[Char], l: Int) : List[String] = { if (l == 0) List("") else for (c <- cs; s <- combs(cs, l - 1)) yield s"$c$s" @@ -454,22 +499,6 @@ combs("abc".toList, 2) -// another well-known example - -def move(from: Char, to: Char) = - println(s"Move disc from $from to $to!") - -def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = { - if (n == 0) () - else { - hanoi(n - 1, from, to, via) - move(from, to) - hanoi(n - 1, via, from, to) - } -} - -hanoi(4, 'A', 'B', 'C') - // A Recursive Web Crawler / Email Harvester //=========================================== diff -r b84ea52bfd8f -r cdfb2ce30a3d progs/lecture3.scala --- a/progs/lecture3.scala Tue Nov 12 10:47:27 2019 +0000 +++ b/progs/lecture3.scala Tue Nov 19 00:40:27 2019 +0000 @@ -1,6 +1,16 @@ // Scala Lecture 3 //================= +// - last week +// +// option type +// higher-order function + + + +// Recursion Again ;o) +//==================== + // A Web Crawler / Email Harvester //================================= @@ -15,7 +25,7 @@ // gets the first 10K of a web-page def get_page(url: String) : String = { Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString). - getOrElse { println(s" Problem with: $url"); ""} + getOrElse { println(s" Problem with: $url"); ""} } // regex for URLs and emails @@ -31,345 +41,61 @@ def get_all_URLs(page: String): Set[String] = http_pattern.findAllIn(page).map(unquote).toSet -// naive version of crawl - searches until a given depth, +// a naive version of crawl - searches until a given depth, // visits pages potentially more than once - -def crawl(url: String, n: Int) : Set[String] = { - if (n == 0) Set() +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () else { println(s" Visiting: $n $url") - val page = get_page(url) - val new_emails = email_pattern.findAllIn(page).toSet - new_emails ++ - (for (u <- get_all_URLs(page).par) yield crawl(u, n - 1)).flatten + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) } } // some starting URLs for the crawler val startURL = """https://nms.kcl.ac.uk/christian.urban/""" + crawl(startURL, 2) -// User-defined Datatypes -//======================== - - -abstract class Colour -case object Red extends Colour -case object Green extends Colour -case object Blue extends Colour - -def fav_colour(c: Colour) : Boolean = c match { - case Red => false - case Green => true - case Blue => false -} - -fav_colour(Green) - - -// ... a tiny bit more useful: Roman Numerals - -abstract class RomanDigit -case object I extends RomanDigit -case object V extends RomanDigit -case object X extends RomanDigit -case object L extends RomanDigit -case object C extends RomanDigit -case object D extends RomanDigit -case object M extends RomanDigit - -type RomanNumeral = List[RomanDigit] - -List(X,I) - -/* -I -> 1 -II -> 2 -III -> 3 -IV -> 4 -V -> 5 -VI -> 6 -VII -> 7 -VIII -> 8 -IX -> 9 -X -> X -*/ - -def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { - case Nil => 0 - case M::r => 1000 + RomanNumeral2Int(r) - case C::M::r => 900 + RomanNumeral2Int(r) - case D::r => 500 + RomanNumeral2Int(r) - case C::D::r => 400 + RomanNumeral2Int(r) - case C::r => 100 + RomanNumeral2Int(r) - case X::C::r => 90 + RomanNumeral2Int(r) - case L::r => 50 + RomanNumeral2Int(r) - case X::L::r => 40 + RomanNumeral2Int(r) - case X::r => 10 + RomanNumeral2Int(r) - case I::X::r => 9 + RomanNumeral2Int(r) - case V::r => 5 + RomanNumeral2Int(r) - case I::V::r => 4 + RomanNumeral2Int(r) - case I::r => 1 + RomanNumeral2Int(r) -} - -RomanNumeral2Int(List(I,V)) // 4 -RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number) -RomanNumeral2Int(List(V,I)) // 6 -RomanNumeral2Int(List(I,X)) // 9 -RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979 -RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017 - - -// another example -//================= - -// Once upon a time, in a complete fictional -// country there were Persons... - - -abstract class Person -case object King extends Person -case class Peer(deg: String, terr: String, succ: Int) extends Person -case class Knight(name: String) extends Person -case class Peasant(name: String) extends Person - - -def title(p: Person): String = p match { - case King => "His Majesty the King" - case Peer(deg, terr, _) => s"The ${deg} of ${terr}" - case Knight(name) => s"Sir ${name}" - case Peasant(name) => name -} - -def superior(p1: Person, p2: Person): Boolean = (p1, p2) match { - case (King, _) => true - case (Peer(_,_,_), Knight(_)) => true - case (Peer(_,_,_), Peasant(_)) => true - case (Peer(_,_,_), Clown) => true - case (Knight(_), Peasant(_)) => true - case (Knight(_), Clown) => true - case (Clown, Peasant(_)) => true - case _ => false -} - -val people = List(Knight("David"), - Peer("Duke", "Norfolk", 84), - Peasant("Christian"), - King, - Clown) - -println(people.sortWith(superior).mkString("\n")) - - -// String interpolations as patterns - -val date = "2000-01-01" -val s"$year-$month-$day" = date - -def parse_date(date: String) = date match { - case s"$year-$month-$day" => Some((year.toInt, month.toInt, day.toInt)) - case s"$day/$month/$year" => Some((year.toInt, month.toInt, day.toInt)) - case _ => None -} - - - - -// User-defined Datatypes and Pattern Matching -//============================================= - - -abstract class Exp -case class N(n: Int) extends Exp // for numbers -case class Plus(e1: Exp, e2: Exp) extends Exp -case class Times(e1: Exp, e2: Exp) extends Exp - -def string(e: Exp) : String = e match { - case N(n) => n.toString - case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")" - case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")" -} - -val e = Plus(N(9), Times(N(3), N(4))) -println(string(e)) - -def eval(e: Exp) : Int = e match { - case N(n) => n - case Plus(e1, e2) => eval(e1) + eval(e2) - case Times(e1, e2) => eval(e1) * eval(e2) -} - -def simp(e: Exp) : Exp = e match { - case N(n) => N(n) - case Plus(e1, e2) => (simp(e1), simp(e2)) match { - case (N(0), e2s) => e2s - case (e1s, N(0)) => e1s - case (e1s, e2s) => Plus(e1s, e2s) - } - case Times(e1, e2) => (simp(e1), simp(e2)) match { - case (N(0), _) => N(0) - case (_, N(0)) => N(0) - case (N(1), e2s) => e2s - case (e1s, N(1)) => e1s - case (e1s, e2s) => Times(e1s, e2s) - } +// a primitive email harvester +def emails(url: String, n: Int) : Set[String] = { + if (n == 0) Set() + else { + println(s" Visiting: $n $url") + val page = get_page(url) + val new_emails = email_pattern.findAllIn(page).toSet + new_emails ++ (for (u <- get_all_URLs(page)) yield emails(u, n - 1)).flatten + } } -println(eval(e)) - -val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9))) -println(string(e2)) -println(string(simp(e2))) +emails(startURL, 2) -// Tokens and Reverse Polish Notation -abstract class Token -case class T(n: Int) extends Token -case object PL extends Token -case object TI extends Token -def rp(e: Exp) : List[Token] = e match { - case N(n) => List(T(n)) - case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL) - case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI) -} -println(string(e2)) -println(rp(e2)) - -def comp(ls: List[Token], st: List[Int]) : Int = (ls, st) match { - case (Nil, st) => st.head - case (T(n)::rest, st) => comp(rest, n::st) - case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st) - case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st) -} - -comp(rp(e), Nil) - -def proc(s: String) : Token = s match { - case "+" => PL - case "*" => TI - case _ => T(s.toInt) -} - -comp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil) - +// if we want to explore the internet "deeper", then we +// first have to parallelise the request of webpages: +// +// scala -cp scala-parallel-collections_2.13-0.2.0.jar +// import scala.collection.parallel.CollectionConverters._ -def string(e: Exp) : String = e match { - case N(n) => n.toString - case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")" - case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")" -} - -val e = Plus(N(9), Times(N(3), N(4))) - -println(string(e)) - -def eval(e: Exp) : Int = e match { - case N(n) => n - case Plus(e1, e2) => eval(e1) + eval(e2) - case Times(e1, e2) => eval(e1) * eval(e2) -} - -eval(e) +// another well-known example +//============================ -def simp(e: Exp) : Exp = e match { - case N(n) => N(n) - case Plus(e1, e2) => (simp(e1), simp(e2)) match { - case (N(0), e2s) => e2s - case (e1s, N(0)) => e1s - case (e1s, e2s) => Plus(e1s, e2s) - } - case Times(e1, e2) => (simp(e1), simp(e2)) match { - case (N(0), e2s) => N(0) - case (e1s, N(0)) => N(0) - case (N(1), e2s) => e2s - case (e1s, N(1)) => e1s - case (e1s, e2s) => Times(e1s, e2s) - } -} - - -val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9))) -println(string(e2)) -println(string(simp(e2))) - -// Token and Reverse Polish Notation -abstract class Token -case class T(n: Int) extends Token -case object PL extends Token -case object TI extends Token - -def rp(e: Exp) : List[Token] = e match { - case N(n) => List(T(n)) - case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL) - case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI) -} +def move(from: Char, to: Char) = + println(s"Move disc from $from to $to!") -def comp(ts: List[Token], stk: List[Int]) : Int = (ts, stk) match { - case (Nil, st) => st.head - case (T(n)::rest, st) => comp(rest, n::st) - case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st) - case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st) -} - -def exp(ts: List[Token], st: List[Exp]) : Exp = (ts, st) match { - case (Nil, st) => st.head - case (T(n)::rest, st) => exp(rest, N(n)::st) - case (PL::rest, n1::n2::st) => exp(rest, Plus(n2, n1)::st) - case (TI::rest, n1::n2::st) => exp(rest, Times(n2, n1)::st) -} - -exp(toks(e2), Nil) - -def proc(s: String) = s match { - case "+" => PL - case "*" => TI - case n => T(n.toInt) -} - - -string(exp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)) - - - -// Tail recursion -//================ - +def hanoi(n: Int, from: Char, via: Char, to: Char) : Unit = { + if (n == 0) () + else { + hanoi(n - 1, from, to, via) + move(from, to) + hanoi(n - 1, via, from, to) + } +} -def fact(n: Long): Long = - if (n == 0) 1 else n * fact(n - 1) - -def factB(n: BigInt): BigInt = - if (n == 0) 1 else n * factB(n - 1) - -factB(100000) - -fact(10) //ok -fact(10000) // produces a stackoverflow - -def factT(n: BigInt, acc: BigInt): BigInt = - if (n == 0) acc else factT(n - 1, n * acc) - -factT(10, 1) -println(factT(100000, 1)) - -// there is a flag for ensuring a function is tail recursive -import scala.annotation.tailrec - -@tailrec -def factT(n: BigInt, acc: BigInt): BigInt = - if (n == 0) acc else factT(n - 1, n * acc) - - - -// for tail-recursive functions the Scala compiler -// generates loop-like code, which does not need -// to allocate stack-space in each recursive -// call; Scala can do this only for tail-recursive -// functions +hanoi(4, 'A', 'B', 'C') @@ -380,8 +106,6 @@ // the first n prefixes of xs // for 1 => include xs - - def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match { case (Nil, _) => Nil case (xs, 0) => Nil @@ -424,12 +148,12 @@ case Nil => Nil case (x::xs) => { val children = moves(xs, x) - val results = children.map((cs) => jumps(cs).map(x :: _)).flatten - if (xs.length < x) List(x) :: results else results + val results = children.map(cs => jumps(cs).map(x :: _)).flatten + if (xs.length < x) List(x)::results else results } } -println(jumps(List(5,3,2,5,1,1)).minBy(_.length)) +jumps(List(5,3,2,5,1,1)).minBy(_.length) jumps(List(3,5,1,2,1,2,1)) jumps(List(3,5,1,2,3,4,1)) jumps(List(3,5,1,0,0,0,1)) @@ -449,37 +173,176 @@ -// Tail Recursion -//================ - - -def fact(n: Long): Long = - if (n == 0) 1 else n * fact(n - 1) - -fact(10) //ok -fact(10000) // produces a stackoverflow - -def factT(n: BigInt, acc: BigInt): BigInt = - if (n == 0) acc else factT(n - 1, n * acc) - -factT(10, 1) -factT(100000, 1) - -// there is a flag for ensuring a function is tail recursive -import scala.annotation.tailrec - -@tailrec -def factT(n: BigInt, acc: BigInt): BigInt = - if (n == 0) acc else factT(n - 1, n * acc) -// for tail-recursive functions the Scala compiler -// generates loop-like code, which does not need -// to allocate stack-space in each recursive -// call; Scala can do this only for tail-recursive -// functions +// User-defined Datatypes +//======================== + + +abstract class Colour +case object Red extends Colour +case object Green extends Colour +case object Blue extends Colour + + +def fav_colour(c: Colour) : Boolean = c match { + case Red => false + case Green => true + case Blue => false +} + +fav_colour(Green) + +// ... a tiny bit more useful: Roman Numerals + +abstract class RomanDigit +case object I extends RomanDigit +case object V extends RomanDigit +case object X extends RomanDigit +case object L extends RomanDigit +case object C extends RomanDigit +case object D extends RomanDigit +case object M extends RomanDigit + +type RomanNumeral = List[RomanDigit] + +List(X,I) + +/* +I -> 1 +II -> 2 +III -> 3 +IV -> 4 +V -> 5 +VI -> 6 +VII -> 7 +VIII -> 8 +IX -> 9 +X -> 10 +*/ + +def RomanNumeral2Int(rs: RomanNumeral): Int = rs match { + case Nil => 0 + case M::r => 1000 + RomanNumeral2Int(r) + case C::M::r => 900 + RomanNumeral2Int(r) + case D::r => 500 + RomanNumeral2Int(r) + case C::D::r => 400 + RomanNumeral2Int(r) + case C::r => 100 + RomanNumeral2Int(r) + case X::C::r => 90 + RomanNumeral2Int(r) + case L::r => 50 + RomanNumeral2Int(r) + case X::L::r => 40 + RomanNumeral2Int(r) + case X::r => 10 + RomanNumeral2Int(r) + case I::X::r => 9 + RomanNumeral2Int(r) + case V::r => 5 + RomanNumeral2Int(r) + case I::V::r => 4 + RomanNumeral2Int(r) + case I::r => 1 + RomanNumeral2Int(r) +} + +RomanNumeral2Int(List(I,V)) // 4 +RomanNumeral2Int(List(I,I,I,I)) // 4 (invalid Roman number) +RomanNumeral2Int(List(V,I)) // 6 +RomanNumeral2Int(List(I,X)) // 9 +RomanNumeral2Int(List(M,C,M,L,X,X,I,X)) // 1979 +RomanNumeral2Int(List(M,M,X,V,I,I)) // 2017 + + +// String interpolations as patterns + +val date = "2019-11-26" +val s"$year-$month-$day" = date + +def parse_date(date: String) : Option[(Int, Int, Int)]= date match { + case s"$year-$month-$day" => Some((day.toInt, month.toInt, year.toInt)) + case s"$day/$month/$year" => Some((day.toInt, month.toInt, year.toInt)) + case s"$day.$month.$year" => Some((day.toInt, month.toInt, year.toInt)) + case _ => None +} +parse_date("2019-11-26") +parse_date("26/11/2019") +parse_date("26.11.2019") + + +// User-defined Datatypes and Pattern Matching +//============================================= + +// trees + +abstract class Exp +case class N(n: Int) extends Exp // for numbers +case class Plus(e1: Exp, e2: Exp) extends Exp +case class Times(e1: Exp, e2: Exp) extends Exp + +def string(e: Exp) : String = e match { + case N(n) => s"$n" + case Plus(e1, e2) => s"(${string(e1)} + ${string(e2)})" + case Times(e1, e2) => s"(${string(e1)} * ${string(e2)})" +} + +val e = Plus(N(9), Times(N(3), N(4))) +println(string(e)) + +def eval(e: Exp) : Int = e match { + case N(n) => n + case Plus(e1, e2) => eval(e1) + eval(e2) + case Times(e1, e2) => eval(e1) * eval(e2) +} + +println(eval(e)) + +def simp(e: Exp) : Exp = e match { + case N(n) => N(n) + case Plus(e1, e2) => (simp(e1), simp(e2)) match { + case (N(0), e2s) => e2s + case (e1s, N(0)) => e1s + case (e1s, e2s) => Plus(e1s, e2s) + } + case Times(e1, e2) => (simp(e1), simp(e2)) match { + case (N(0), _) => N(0) + case (_, N(0)) => N(0) + case (N(1), e2s) => e2s + case (e1s, N(1)) => e1s + case (e1s, e2s) => Times(e1s, e2s) + } +} + + +val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9))) +println(string(e2)) +println(string(simp(e2))) + + +// Tokens and Reverse Polish Notation +abstract class Token +case class T(n: Int) extends Token +case object PL extends Token +case object TI extends Token + +def rp(e: Exp) : List[Token] = e match { + case N(n) => List(T(n)) + case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL) + case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI) +} +println(string(e2)) +println(rp(e2)) + +def comp(ls: List[Token], st: List[Int]) : Int = (ls, st) match { + case (Nil, st) => st.head + case (T(n)::rest, st) => comp(rest, n::st) + case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st) + case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st) +} + +comp(rp(e), Nil) + +def proc(s: String) : Token = s match { + case "+" => PL + case "*" => TI + case _ => T(s.toInt) +} + +comp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil) @@ -555,7 +418,7 @@ if (isDone(game)) List(game) else { val cs = candidates(game, emptyPosition(game)) - cs.par.map(c => search(update(game, empty(game), c))).toList.flatten + cs.map(c => search(update(game, empty(game), c))).toList.flatten } } @@ -609,6 +472,45 @@ time_needed(1, search(game2)) + + + +// Tail recursion +//================ + + +def fact(n: Long): Long = + if (n == 0) 1 else n * fact(n - 1) + +def factB(n: BigInt): BigInt = + if (n == 0) 1 else n * factB(n - 1) + +factB(100000) + +fact(10) //ok +fact(10000) // produces a stackoverflow + +def factT(n: BigInt, acc: BigInt): BigInt = + if (n == 0) acc else factT(n - 1, n * acc) + +factT(10, 1) +println(factT(100000, 1)) + +// there is a flag for ensuring a function is tail recursive +import scala.annotation.tailrec + +@tailrec +def factT(n: BigInt, acc: BigInt): BigInt = + if (n == 0) acc else factT(n - 1, n * acc) + + + +// for tail-recursive functions the Scala compiler +// generates loop-like code, which does not need +// to allocate stack-space in each recursive +// call; Scala can do this only for tail-recursive +// functions + // tail recursive version that searches // for all solutions diff -r b84ea52bfd8f -r cdfb2ce30a3d progs/lecture4.scala --- a/progs/lecture4.scala Tue Nov 12 10:47:27 2019 +0000 +++ b/progs/lecture4.scala Tue Nov 19 00:40:27 2019 +0000 @@ -22,7 +22,6 @@ length_string_list(List("1", "2", "3", "4")) length_int_list(List(1, 2, 3, 4)) -//----- def length[A](lst: List[A]): Int = lst match { case Nil => 0 case x::xs => 1 + length(xs) diff -r b84ea52bfd8f -r cdfb2ce30a3d progs/sudoku.scala --- a/progs/sudoku.scala Tue Nov 12 10:47:27 2019 +0000 +++ b/progs/sudoku.scala Tue Nov 19 00:40:27 2019 +0000 @@ -78,11 +78,9 @@ ".6.5.4.3.1...9...8.........9...5...6.4.6.2.7.7...4...5.........4...8...1.5.2.3.4.", "7.....4...2..7..8...3..8.799..5..3...6..2..9...1.97..6...3..9...3..4..6...9..1.35", "....7..2.8.......6.1.2.5...9.54....8.........3....85.1...3.2.8.4.......9.7..6....", -// "4.....8.5.3..........7......2.....6.....8.4......1.......6.3.7.5..2.....1.4......", "52...6.........7.13...........4..8..6......5...........418.........3..2...87.....", "6.....8.3.4.7.................5.4.7.3..2.....1.6.......2.....5.....8.6......1....", "48.3............71.2.......7.5....6....2..8.............1.76...3.....4......5....", -// "....14....3....2...7..........9...3.6.1.............8.2.....1.4....5.6.....7.8...", "......52..8.4......3...9...5.1...6..2..7........3.....6...1..........7.4.......3.", "6.2.5.........3.4..........43...8....1....2........7..5..27...........81...6.....", ".524.........7.1..............8.2...3.....6...9.5.....1.6.3...........897........", @@ -100,7 +98,6 @@ "..84...3....3.....9....157479...8........7..514.....2...9.6...2.5....4......9..56", ".98.1....2......6.............3.2.5..84.........6.........4.8.93..5...........1..", "..247..58..............1.4.....2...9528.9.4....9...1.........3.3....75..685..2...", -// "4.....8.5.3..........7......2.....6.....5.4......1.......6.3.7.5..2.....1.9......", ".2.3......63.....58.......15....9.3....7........1....8.879..26......6.7...6..7..4", "1.....7.9.4...72..8.........7..1..6.3.......5.6..4..2.........8..53...7.7.2....46", "4.....3.....8.2......7........1...8734.......6........5...6........1.4...82......", @@ -113,12 +110,10 @@ ".2.......3.5.62..9.68...3...5..........64.8.2..47..9....3.....1.....6...17.43....", ".8..4....3......1........2...5...4.69..1..8..2...........3.9....6....5.....2.....", "..8.9.1...6.5...2......6....3.1.7.5.........9..4...3...5....2...7...3.8.2..7....4", -// "4.....5.8.3..........7......2.....6.....5.8......1.......6.3.7.5..2.....1.8......", "1.....3.8.6.4..............2.3.1...........958.........5.6...7.....8.2...4.......", "1....6.8..64..........4...7....9.6...7.4..5..5...7.1...5....32.3....8...4........", "249.6...3.3....2..8.......5.....6......2......1..4.82..9.5..7....4.....1.7...3...", "...8....9.873...4.6..7.......85..97...........43..75.......3....3...145.4....2..1", -// "...5.1....9....8...6.......4.1..........7..9........3.8.....1.5...2..4.....36....", "......8.16..2........7.5......6...2..1....3...8.......2......7..3..8....5...4....", ".476...5.8.3.....2.....9......8.5..6...1.....6.24......78...51...6....4..9...4..7", ".....7.95.....1...86..2.....2..73..85......6...3..49..3.5...41724................", @@ -198,7 +193,7 @@ // 1 single thread version 800 secs -// 4 cores parallel version on moderate laptop 400 secs -// 8 cores (4 physical + 4 hyperthread): 290 secs -// 36 cores (18 physical + 18 hyperthread): 142 secs +// 4 cores parallel version on a moderate laptop 400 secs +// 8 cores: 290 secs +// 18 cores: 142 secs diff -r b84ea52bfd8f -r cdfb2ce30a3d slides/slides03.pdf Binary file slides/slides03.pdf has changed diff -r b84ea52bfd8f -r cdfb2ce30a3d slides/slides03.tex --- a/slides/slides03.tex Tue Nov 12 10:47:27 2019 +0000 +++ b/slides/slides03.tex Tue Nov 19 00:40:27 2019 +0000 @@ -1,8 +1,10 @@ +% !TEX program = xelatex \documentclass[dvipsnames,14pt,t,xelatex]{beamer} \usepackage{../slides} \usepackage{../graphics} \usepackage{../langs} %%\usepackage{../data} +\usetikzlibrary{shapes} \usepackage[export]{adjustbox} \hfuzz=220pt @@ -21,6 +23,22 @@ % beamer stuff \renewcommand{\slidecaption}{PEP (Scala) 03, King's College London} +\newcommand{\UParrow}[3]{% +\begin{textblock}{0}(#2,#3)% +\onslide<#1>{% +\begin{tikzpicture}% +\node at (0,0) [single arrow, shape border rotate=90, fill=red,text=red]{a};% +\end{tikzpicture}}% +\end{textblock}} + +\newcommand{\DOWNarrow}[3]{% +\begin{textblock}{0}(#2,#3)% +\onslide<#1>{% +\begin{tikzpicture}% +\node at (0,0) [single arrow, shape border rotate=270, fill=red,text=red]{a};% +\end{tikzpicture}}% +\end{textblock}} + \begin{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[t] @@ -34,39 +52,33 @@ \begin{center} \begin{tabular}{ll} Email: & christian.urban at kcl.ac.uk\\ - Office: & N7.07 (North Wing, Bush House)\\ - Slides \& Code: & KEATS\medskip\\ - Office Hours: & \alert{next Monday} 11 -- 12 \& 13 -- 14\\ + Office: & N\liningnums{7.07} (North Wing, Bush House)\bigskip\\ + Slides \& Code: & KEATS\bigskip\\ + Office Hours: & Thursdays 12:00 -- 14:00\\ + Additionally: & (for Scala) Tuesdays 10:45 -- 11:45\\ \end{tabular} \end{center} - \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Preliminary 6} -\begin{frame}[c] -\frametitle{Marks for CW6 (Part 1 + 2)} - -Raw marks (234 submissions): +Raw marks (261 submissions):\bigskip \begin{itemize} -\item 6\%: \hspace{4mm}163 students -\item 5\%: \hspace{4mm}29 -\item 4\%: \hspace{4mm}3 -\item 3\%: \hspace{4mm}13 -\item 2\%: \hspace{4mm}3 +\item 3\%: \hspace{4mm}219 +\item 2\%: \hspace{4mm}19 \item 1\%: \hspace{4mm}0 -\item 0\%: \hspace{4mm}23 +\item 0\%: \hspace{4mm}23 \;(4 no submission) \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c,fragile] @@ -86,65 +98,139 @@ \begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-1mm] def collatz(n: Long) : Long = collatzHelper(n, 0) -\end{lstlisting}\pause - - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c,fragile] - -\begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-1mm] -def collatz_max(bnd: Long) : (Long,Long) = {val lst = for(a<-(1 to bnd.toInt)) yield (collatz(a),a.toLong);val lst2 = lst.sortBy(_._1);lst2(lst2.length-1)} -\end{lstlisting}\bigskip - -\tiny -\begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-1mm] -def collatz_max(bnd: Long) : (Long,Long) = {val lst = for(a<-(1 to bnd.toInt)) yield (collatz(a),a.toLong);val lst2 = lst.sortBy(_._1);lst2(lst2.length-1)} -\end{lstlisting}\pause - +\end{lstlisting} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c,fragile] +\frametitle{Default Arguments} \small -\begin{lstlisting}[language=Scala, xleftmargin=-4mm,numbers=left] - def process_ratings(lines: List[String]) = { - val values = List[(String,String)]() +\begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-3mm] +def collatzHelper(n: Int, a: Int = 0) : Int = ... - for(line <- lines) { - val splitList = ... - if(splitList(2).toInt >= 4){ - val userID = splitList(0) - val movieID = splitList(1) - val tuple = (userID, movieID) - tuple :: values - } - } - - values - } +collatzHelper(n, 3) +collatzHelper(n, 0) + +collatzHelper(n) // a = 0 \end{lstlisting} -\normalsize -What does this function (always) return? - +\DOWNarrow{1}{10.7}{3.4} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c,fragile] +\frametitle{Last Week: Options \& HO Funs.} +\begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-1mm] +List(7,2,3,4,5,6).find(_ < 4) +res: Option[Int] = Some(2) + + +List(5,6,7,8,9).find(_ < 4) +res: Option[Int] = None + + +List(1,2,3,4,5).map(x => x * x) +res: List[Int] = List(1, 4, 9, 16, 25) +\end{lstlisting} + + \end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c,fragile] +\frametitle{Web-Crawler (1)} + +\small +\begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-7mm] +def get_page(url: String) : String = { +Try(fromURL(url)("ISO-8859-1").take(10000).mkString) + .getOrElse { println(s" Problem with: $url"); ""} +} +\end{lstlisting} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c,fragile] +\frametitle{Web-Crawler (2)} + +\small +\begin{lstlisting}[language=Scala, numbers=none, + xleftmargin=-7mm, escapeinside={(*@}{@*)}] +val http_pattern = """(*@\textcolor{codegreen}{"}@*)https?://[\^(*@\textcolor{codegreen}{"}@*)]*(*@\textcolor{codegreen}{"}@*)""".r +val email_pattern = + """([a-z\d\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r + + +def unquote(s: String) = s.drop(1).dropRight(1) + + +def get_all_URLs(page: String): Set[String] = + http_pattern.findAllIn(page).map(unquote).toSet + + // returns all URLs in a page +\end{lstlisting} + + \end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c,fragile] +\frametitle{Web-Crawler (3)} + +\small +\begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-1mm] +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else { + println(s" Visiting: $n $url") + val page = get_page(url) + for (u <- get_all_URLs(page)) + crawl(u, n - 1) + } +} +\end{lstlisting} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c,fragile] +\frametitle{Email Harvester} + +\small +\begin{lstlisting}[language=Scala, numbers=none, xleftmargin=-3mm] +def emails(url: String, n: Int) : Set[String] = { + if (n == 0) Set() + else { + println(s" Visiting: $n $url") + val page = get_page(url) + val new_emails = + email_pattern.findAllIn(page).toSet + new_emails ++ + (for (u <- get_all_URLs(page)) + yield emails(u, n - 1)).flatten + } +} + +\end{lstlisting} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] \frametitle{Jumping Towers} \begin{center} -\begin{tikzpicture}[scale=1.2] +\begin{tikzpicture}[scale=1.3] \draw[line width=1mm,cap=round] (0,0) -- (5,0); \draw[line width=1mm,cap=round] (0,1) -- (5,1); @@ -186,9 +272,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - \begin{frame}[c] \frametitle{``Children'' / moves} @@ -205,14 +289,38 @@ \end{tikzpicture} \end{center} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c,fragile] +\frametitle{Reverse Polish Notation} + +{\Large\bl{$(3 + 1) * (2 + 9)$}}\bigskip + +{\Large$\Rightarrow$}\bigskip + +{\;\;\Large\bl{$3\;\;1\;+\;2\;\;9\;+\;*$}} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c,fragile] +\frametitle{Sudoku} + +A very simple-minded version on 110 problems:\bigskip + +\begin{itemize} +\item 1 core: 800 secs +\item 2 cores: 400 secs +\item 8 cores: 290 secs +\item 18 cores: 142 secs +\end{itemize} \end{frame} - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - + \end{document} diff -r b84ea52bfd8f -r cdfb2ce30a3d solutions1/collatz.scala --- a/solutions1/collatz.scala Tue Nov 12 10:47:27 2019 +0000 +++ b/solutions1/collatz.scala Tue Nov 19 00:40:27 2019 +0000 @@ -17,6 +17,9 @@ all.maxBy(_._1) } +//collatz_max(1000000) +//collatz_max(10000000) +//collatz_max(100000000) /* some test cases val bnds = List(10, 100, 1000, 10000, 100000, 1000000) diff -r b84ea52bfd8f -r cdfb2ce30a3d testing1/collatz.scala --- a/testing1/collatz.scala Tue Nov 12 10:47:27 2019 +0000 +++ b/testing1/collatz.scala Tue Nov 19 00:40:27 2019 +0000 @@ -1,51 +1,70 @@ -// Part 1 about the 3n+1 conjecture -//================================== +object CW6a { -// generate jar with -// > scala -d collatz.jar collatz.scala - -object CW6a { +//(1) Complete the collatz function below. It should +// recursively calculate the number of steps needed +// until the collatz series reaches the number 1. +// If needed, you can use an auxiliary function that +// performs the recursion. The function should expect +// arguments in the range of 1 to 1 Million. -/* - * def collatz(n: Long): Long = - if (n == 1) 0 else - if (n % 2 == 0) 1 + collatz(n / 2) else - 1 + collatz(3 * n + 1) -*/ +// def collatz(n: Long) : Long = { +// if (n == 1) 1 //else +// // if (n % 2 == 0) { +// // collatz(n/2) +// // steps + 1 +// // } //else +// // if (n % 2 != 0) { +// // collatz((3 * n) + 1) +// // steps + 1 +// // } +// } + +// val steps: Long = 1 +// val lst = List() +// def collatz(n: Long) : Long = { +// if (n == 1) { steps + 1 } +// else if (n % 2 == 0) { +// collatz(n/2); +// } +// else { +// collatz((3 * n) + 1); +// } +// steps + 1 +// } +// collatz(6) -def collatz_max(bnd: Long): (Long, Long) = { - val all = for (i <- (1L to bnd)) yield (collatz(i), i) - all.maxBy(_._1) +def collatz(n: Long, list: List[Long] = List()): Long = { + if (n == 1) { + n :: list + list.size.toLong + } + else if (n % 2 == 0) { + collatz(n / 2, n :: list) + } + else { + collatz((3 * n) + 1, n :: list) + } +} + +val test = collatz(6) + +//(2) Complete the collatz_max function below. It should +// calculate how many steps are needed for each number +// from 1 up to a bound and then calculate the maximum number of +// steps and the corresponding number that needs that many +// steps. Again, you should expect bounds in the range of 1 +// up to 1 Million. The first component of the pair is +// the maximum number of steps and the second is the +// corresponding number. + +//def collatz_max(bnd: Long) : (Long, Long) = ... +def collatz_max(bnd: Long) : (Long, Long) = { + val stepsTable = for (n <- (1 to bnd.toInt).toList) yield (collatz(n), n.toLong) + //println(stepsTable) + stepsTable.max } -/* some test cases -val bnds = List(10, 100, 1000, 10000, 100000, 1000000) - -for (bnd <- bnds) { - val (steps, max) = collatz_max(bnd) - println(s"In the range of 1 - ${bnd} the number ${max} needs the maximum steps of ${steps}") } -*/ - - - - -def collatz(n: Long) : Long = { - if (n == 1) { - 1L - } else { - if (n % 2 == 0) { - collatz(n/2) + 1 - } else { - collatz((n*3)+1) + 1 - } - } -} - -} - - - diff -r b84ea52bfd8f -r cdfb2ce30a3d testing1/drumb_test.sh --- a/testing1/drumb_test.sh Tue Nov 12 10:47:27 2019 +0000 +++ b/testing1/drumb_test.sh Tue Nov 19 00:40:27 2019 +0000 @@ -149,8 +149,8 @@ if [ $tsts -eq 0 ] then echo -e " val ds = get_deltas(get_prices(List(\"GOOG\", \"AAPL\"), 2010 to 2012))" >> $out - echo -e " yearly_yield(get_deltas(ds, 100, 0)) == 125" >> $out - echo -e " yearly_yield(get_deltas(ds, 100, 1)) == 117" >> $out + echo -e " yearly_yield(ds, 100, 0) == 125" >> $out + echo -e " yearly_yield(ds, 100, 1) == 117" >> $out if (scala_assert "drumb.scala" "drumb_test6.scala") then diff -r b84ea52bfd8f -r cdfb2ce30a3d testing2/docdiff.scala --- a/testing2/docdiff.scala Tue Nov 12 10:47:27 2019 +0000 +++ b/testing2/docdiff.scala Tue Nov 19 00:40:27 2019 +0000 @@ -2,7 +2,8 @@ //======================================== -object CW7a { // for purposes of generating a jar +object CW7a { + //(1) Complete the clean function below. It should find // all words in a string using the regular expression @@ -12,16 +13,39 @@ // // The words should be Returned as a list of strings. -def clean(s: String) : List[String] = - ("""\w+""".r).findAllIn(s).toList + +def clean(s: String) : List[String] = { + val regex = """\w+""".r; + val list_of_words = s.split(" ").toList + for(word <- list_of_words; + actual_word <- divide_string_where_different(word, regex.findAllIn(word).mkString, 0)) yield actual_word +} +/* + A secondary function that takes as parameters @param original which is the original word, @param returned which is thea word after the process of removing + some characters not allowed by a regular expression, and @param i which is the index where to start compare the characters of the two words. + It @return a List of strings which represents all the substrings of returned which were previously divided by characters not allowed by the regular expression applied on it. +*/ +def divide_string_where_different(original: String, returned: String, i : Int): List[String] ={ + val max_i = original.length -1 + if(original(i) != returned(i)) returned.substring(0, i)::divide_string_where_different(original.substring(i+1), returned.substring(i), 0).filter(_.nonEmpty) + else if (i == max_i) List(returned) + else divide_string_where_different(original,returned, i +1) + +} //(2) The function occurrences calculates the number of times // strings occur in a list of strings. These occurrences should // be calculated as a Map from strings to integers. -def occurrences(xs: List[String]): Map[String, Int] = - (for (x <- xs.distinct) yield (x, xs.count(_ == x))).toMap + +def occurrences(xs: List[String]): Map[String, Int] = { + val lst = xs.distinct + val word_pairs = (for (word <- lst) yield (word, xs.count(_==word))).toList + word_pairs.toMap +} + + //(3) This functions calculates the dot-product of two documents // (list of strings). For this it calculates the occurrence @@ -29,29 +53,33 @@ // If a string does not occur in a document, the product is zero. // The function finally sums up all products. + def prod(lst1: List[String], lst2: List[String]) : Int = { - val words = (lst1 ::: lst2).distinct - val occs1 = occurrences(lst1) - val occs2 = occurrences(lst2) - words.map{ w => occs1.getOrElse(w, 0) * occs2.getOrElse(w, 0) }.sum + val map1 = occurrences(lst1) + val map2 = occurrences(lst2) + print(s"map1 is $map1 \n and map2 is $map2") + val pairs = (for(pair1 <- map1 if(map2.get(pair1._1) != None)) yield (pair1._2, map2.get(pair1._1).get)).toList + print(s"\n pairs are $pairs") + val products = (for(pair <- pairs) yield pair._1 * pair._2).toList + products.sum + } + //(4) Complete the functions overlap and similarity. The overlap of // two documents is calculated by the formula given in the assignment // description. The similarity of two strings is given by the overlap -// of the cleaned (see (1)) strings. - -def overlap(lst1: List[String], lst2: List[String]) : Double = { - val m1 = prod(lst1, lst1) - val m2 = prod(lst2, lst2) - prod(lst1, lst2).toDouble / (List(m1, m2).max) -} - -def similarity(s1: String, s2: String) : Double = - overlap(clean(s1), clean(s2)) +// of the cleaned strings (see (1)). -/* +//def overlap(lst1: List[String], lst2: List[String]) : Double = ... + +//def similarity(s1: String, s2: String) : Double = ... + + + + +/* Test cases val list1 = List("a", "b", "b", "c", "d") @@ -61,6 +89,8 @@ occurrences(List("d", "b", "d", "b", "d")) // Map(d -> 3, b -> 2) prod(list1,list2) // 7 +prod(list1,list1) +prod(list2,list2) overlap(list1, list2) // 0.5384615384615384 overlap(list2, list1) // 0.5384615384615384 @@ -81,7 +111,7 @@ heritage which ensures Australia's capacity to attract international ecotourists.""" -similarity(orig1, plag1) +similarity(orig1, plag1) // 0.8679245283018868 // Plagiarism examples from @@ -105,13 +135,15 @@ recovery: a controversial tactic that is often implemented immediately following an oil spill.""" -overlap(clean(orig2), clean(plag2)) -similarity(orig2, plag2) +overlap(clean(orig2), clean(plag2)) // 0.728 +similarity(orig2, plag2) // 0.728 + + // The punchline: everything above 0.6 looks suspicious and -// should be looked at by staff. +// should be investigated by staff. */ +} -}