# HG changeset patch # User Christian Urban # Date 1636330670 0 # Node ID e48ea8300b2d41c1c550b57969e2017fd882e75d # Parent b17a98b0c52fac0b18f1733adc8ccf3dda420f40 updated diff -r b17a98b0c52f -r e48ea8300b2d core_solution2/docdiff.jar Binary file core_solution2/docdiff.jar has changed diff -r b17a98b0c52f -r e48ea8300b2d core_solution2/docdiff.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core_solution2/docdiff.scala Mon Nov 08 00:17:50 2021 +0000 @@ -0,0 +1,117 @@ +// Preliminary Part about Code Similarity +//======================================== + + +object CW7a { + +//(1) Complete the clean function below. It should find +// all words in a string using the regular expression +// \w+ and the library function +// +// some_regex.findAllIn(some_string) +// +// The words should be Returned as a list of strings. + +def clean(s: String) : List[String] = + ("""\w+""".r).findAllIn(s).toList + + +//(2) The function occurrences calculates the number of times +// strings occur in a list of strings. These occurrences should +// be calculated as a Map from strings to integers. + +def occurrences(xs: List[String]): Map[String, Int] = + (for (x <- xs.distinct) yield (x, xs.count(_ == x))).toMap + +//(3) This functions calculates the dot-product of two documents +// (list of strings). For this it calculates the occurrence +// maps from (2) and then multiplies the corresponding occurrences. +// If a string does not occur in a document, the product is zero. +// The function finally sums up all products. + +def prod(lst1: List[String], lst2: List[String]) : Int = { + val words = (lst1 ::: lst2).distinct + val occs1 = occurrences(lst1) + val occs2 = occurrences(lst2) + words.map{ w => occs1.getOrElse(w, 0) * occs2.getOrElse(w, 0) }.sum +} + +//(4) Complete the functions overlap and similarity. The overlap of +// two documents is calculated by the formula given in the assignment +// description. The similarity of two strings is given by the overlap +// of the cleaned (see (1)) strings. + +def overlap(lst1: List[String], lst2: List[String]) : Double = { + val m1 = prod(lst1, lst1) + val m2 = prod(lst2, lst2) + prod(lst1, lst2).toDouble / (List(m1, m2).max) +} + +def similarity(s1: String, s2: String) : Double = + overlap(clean(s1), clean(s2)) + + +/* + + +val list1 = List("a", "b", "b", "c", "d") +val list2 = List("d", "b", "d", "b", "d") + +occurrences(List("a", "b", "b", "c", "d")) // Map(a -> 1, b -> 2, c -> 1, d -> 1) +occurrences(List("d", "b", "d", "b", "d")) // Map(d -> 3, b -> 2) + +prod(list1,list2) // 7 + +overlap(list1, list2) // 0.5384615384615384 +overlap(list2, list1) // 0.5384615384615384 +overlap(list1, list1) // 1.0 +overlap(list2, list2) // 1.0 + +// Plagiarism examples from +// https://desales.libguides.com/avoidingplagiarism/examples + +val orig1 = """There is a strong market demand for eco-tourism in +Australia. Its rich and diverse natural heritage ensures Australia's +capacity to attract international ecotourists and gives Australia a +comparative advantage in the highly competitive tourism industry.""" + +val plag1 = """There is a high market demand for eco-tourism in +Australia. Australia has a comparative advantage in the highly +competitive tourism industry due to its rich and varied natural +heritage which ensures Australia's capacity to attract international +ecotourists.""" + +similarity(orig1, plag1) + + +// Plagiarism examples from +// https://www.utc.edu/library/help/tutorials/plagiarism/examples-of-plagiarism.php + +val orig2 = """No oil spill is entirely benign. Depending on timing and +location, even a relatively minor spill can cause significant harm to +individual organisms and entire populations. Oil spills can cause +impacts over a range of time scales, from days to years, or even +decades for certain spills. Impacts are typically divided into acute +(short-term) and chronic (long-term) effects. Both types are part of a +complicated and often controversial equation that is addressed after +an oil spill: ecosystem recovery.""" + +val plag2 = """There is no such thing as a "good" oil spill. If the +time and place are just right, even a small oil spill can cause damage +to sensitive ecosystems. Further, spills can cause harm days, months, +years, or even decades after they occur. Because of this, spills are +usually broken into short-term (acute) and long-term (chronic) +effects. Both of these types of harm must be addressed in ecosystem +recovery: a controversial tactic that is often implemented immediately +following an oil spill.""" + +overlap(clean(orig2), clean(plag2)) +similarity(orig2, plag2) + +// The punchline: everything above 0.6 looks suspicious and +// should be looked at by staff. + +*/ + + +} diff -r b17a98b0c52f -r e48ea8300b2d cws/main_cw03.pdf Binary file cws/main_cw03.pdf has changed diff -r b17a98b0c52f -r e48ea8300b2d cws/main_cw04.pdf Binary file cws/main_cw04.pdf has changed diff -r b17a98b0c52f -r e48ea8300b2d cws/main_cw04.tex --- a/cws/main_cw04.tex Sat Nov 06 00:06:39 2021 +0000 +++ b/cws/main_cw04.tex Mon Nov 08 00:17:50 2021 +0000 @@ -34,7 +34,7 @@ % \textcolor{red}{16 December} at 5pm; the core part (6\%) % is due on \cwNINEa{} at 5pm. Any 1\% you achieve in the % preliminary part counts as your ``weekly engagement''. -\bigskip +\medskip % Note the core, more advanced, part might include material you have not %yet seen in the first three lectures. \bigskip @@ -218,7 +218,7 @@ made available by importing \texttt{scala.annotation.tailrec}.\medskip - +\newpage \subsection*{Tasks} diff -r b17a98b0c52f -r e48ea8300b2d cws/main_cw05.pdf Binary file cws/main_cw05.pdf has changed diff -r b17a98b0c52f -r e48ea8300b2d cws/main_cw05.tex --- a/cws/main_cw05.tex Sat Nov 06 00:06:39 2021 +0000 +++ b/cws/main_cw05.tex Mon Nov 08 00:17:50 2021 +0000 @@ -98,8 +98,7 @@ more esoteric languages out there. One is called \emph{brainf***}. \here{https://esolangs.org/wiki/Brainfuck} You -are asked in this part to implement an interpreter for -a slight extension of this language. +are asked in this part to implement an interpreter for this language. Urban M\"uller developed the original version of brainf*** in 1993. A close relative of this language was already introduced in 1964 by Corado diff -r b17a98b0c52f -r e48ea8300b2d cws/upload --- a/cws/upload Sat Nov 06 00:06:39 2021 +0000 +++ b/cws/upload Mon Nov 08 00:17:50 2021 +0000 @@ -1,7 +1,7 @@ #!/bin/bash set -euo pipefail -fls=${1:-"pre_cw01.pdf pre_cw02.pdf pre_cw03.pdf main_cw01.pdf main_cw02.pdf main_cw03.pdf main_cw04.pdf main_cw05.pdf"} +fls=${1:-"core_cw01.pdf core_cw02.pdf core_cw03.pdf main_cw01.pdf main_cw02.pdf main_cw03.pdf main_cw04.pdf main_cw05.pdf"} for f in $fls; do echo -e "uploading $f" diff -r b17a98b0c52f -r e48ea8300b2d handouts/pep-ho.pdf Binary file handouts/pep-ho.pdf has changed diff -r b17a98b0c52f -r e48ea8300b2d handouts/pep-ho.tex --- a/handouts/pep-ho.tex Sat Nov 06 00:06:39 2021 +0000 +++ b/handouts/pep-ho.tex Mon Nov 08 00:17:50 2021 +0000 @@ -194,13 +194,13 @@ \includegraphics[scale=0.15]{../pics/vscode.png}\\[-10mm]\mbox{} \end{center} \caption{My installation of VS Code includes the following - packages from Marketplace: \textbf{Scala Syntax (official)} 0.3.4, - \textbf{Code Runner} 0.9.13, \textbf{Code Spell Checker} 1.7.17, - \textbf{Rewrap} 1.9.1 and \textbf{Subtle Match + packages from Marketplace: \textbf{Scala Syntax (official)} 0.5.4, + \textbf{Code Runner} 0.11.6, \textbf{Code Spell Checker} 2.0.12, + \textbf{Rewrap} 1.14.0 and \textbf{Subtle Match Brackets} 3.0.0. I have also bound the keys \keys{Ctrl} \keys{Ret} to the action ``Run-Selected-Text-In-Active-Terminal'' in order to quickly evaluate small code snippets in the Scala REPL. I use the internal - terminal to run Scala 2.13.1.\label{vscode}} + terminal to run Scala 2.13.6.\label{vscode}} \end{boxedminipage} \end{figure} @@ -380,8 +380,8 @@ \end{lstlisting}} & {\footnotesize\begin{lstlisting}[xleftmargin=0mm] -for (y <- (0 until H)/*@\keys{\texttt{.par}}@*/) { - for (x <- (0 until W)/*@\keys{\texttt{.par}}@*/) { +for (y <- (0 until H).par) { + for (x <- (0 until W).par) { val c = start + (x * d_x + y * d_y * i) @@ -461,7 +461,7 @@ \begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small] $ scala -Welcome to Scala 2.13.1 (Java HotSpot(TM) 64-Bit Server VM, Java 9). +Welcome to Scala 2.13.6 (OpenJDK 64-Bit Server VM, Java 17). Type in expressions for evaluation. Or try :help. scala> @@ -1717,8 +1717,8 @@ about Scala and of course lots of help online. For example \begin{itemize} -\item \url{http://www.scala-lang.org/docu/files/ScalaByExample.pdf} -\item \url{http://www.scala-lang.org/docu/files/ScalaTutorial.pdf} +%%\item \url{http://www.scala-lang.org/docu/files/ScalaByExample.pdf} +%%\item \url{http://www.scala-lang.org/docu/files/ScalaTutorial.pdf} \item \url{https://www.youtube.com/user/ShadowofCatron} \item \url{http://docs.scala-lang.org/tutorials} \item \url{https://www.scala-exercises.org} diff -r b17a98b0c52f -r e48ea8300b2d main_solution1/drumb.scala --- a/main_solution1/drumb.scala Sat Nov 06 00:06:39 2021 +0000 +++ b/main_solution1/drumb.scala Mon Nov 08 00:17:50 2021 +0000 @@ -1,4 +1,4 @@ -// Core Part 6 about a really dumb investment strategy +// Main Part 1 about a really dumb investment strategy //===================================================== @@ -6,7 +6,7 @@ // > scala -d drumb.jar drumb.scala -object CW6b { +object M1 { //two test portfolios diff -r b17a98b0c52f -r e48ea8300b2d main_solution2/danube.jar Binary file main_solution2/danube.jar has changed diff -r b17a98b0c52f -r e48ea8300b2d main_solution2/danube.scala --- a/main_solution2/danube.scala Sat Nov 06 00:06:39 2021 +0000 +++ b/main_solution2/danube.scala Mon Nov 08 00:17:50 2021 +0000 @@ -3,7 +3,7 @@ //======================================== -object CW7b { // for purposes of generating a jar +object M2 { // for purposes of generating a jar import io.Source import scala.util._ @@ -57,11 +57,6 @@ //good_ratings.length //48580 //movie_names.length // 9742 -//============================================== -// Do not change anything below, unless you want -// to submit the file for the advanced part 3! -//============================================== - // (3) Implement a grouping function that calulates a map // containing the userIds and all the corresponding recommendations @@ -162,35 +157,5 @@ // recommendations(ratings_map, movies_map, "4") // => Nil (there are three ratings for this movie in ratings.csv but they are not positive) -// (7) Calculate the recommendations for all movies according to -// what the recommendations function in (6) produces (this -// can take a few seconds). Put all recommendations into a list -// (of strings) and count how often the strings occur in -// this list. This produces a list of string-int pairs, -// where the first component is the movie name and the second -// is the number of how many times they were recommended. -// Sort all the pairs according to the number -// of times they were recommended (most recommended movie name -// first). - -def occurrences(xs: List[String]): List[(String, Int)] = - for (x <- xs.distinct) yield (x, xs.count(_ == x)) - -def most_recommended(recs: Map[String, List[String]], - movs: Map[String, String]) : List[(String, Int)] = { - val all = (for (name <- movs.toList.map(_._1)) yield { - recommendations(recs, movs, name) - }).flatten - val occs = occurrences(all) - occs.sortBy(_._2).reverse -} - - -//most_recommended(ratings_map, movies_map).take(3) -// => -// List((Matrix,698), -// (Star Wars: Episode IV - A New Hope (1977),402), -// (Jerry Maguire (1996),382)) - } diff -r b17a98b0c52f -r e48ea8300b2d main_solution3/re.jar Binary file main_solution3/re.jar has changed diff -r b17a98b0c52f -r e48ea8300b2d main_solution3/re.scala --- a/main_solution3/re.scala Sat Nov 06 00:06:39 2021 +0000 +++ b/main_solution3/re.scala Mon Nov 08 00:17:50 2021 +0000 @@ -155,8 +155,8 @@ //println(size(der('a', der('a', der('a', EVIL))))) // => 58 // size with simplification -//println(simp(der('a', der('a', EVIL)))) // => 8 -//println(simp(der('a', der('a', der('a', EVIL)))))// => 8 +//println(simp(der('a', der('a', EVIL)))) +//println(simp(der('a', der('a', der('a', EVIL))))) //println(size(simp(der('a', der('a', EVIL))))) // => 8 //println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 8 @@ -173,11 +173,11 @@ val start = System.nanoTime() for (j <- 1 to i) code val end = System.nanoTime() - (end - start)/(i * 1.0e9) + "%.5f".format((end - start)/(i * 1.0e9)) } //for (i <- 0 to 5000000 by 500000) { -// println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))) + " secs.") +// println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") //} // another "power" test case @@ -187,7 +187,7 @@ // // SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) // -// where SEQ is nested 100 times. +// where SEQ is nested 50 times. diff -r b17a98b0c52f -r e48ea8300b2d main_solution4/knight1.scala --- a/main_solution4/knight1.scala Sat Nov 06 00:06:39 2021 +0000 +++ b/main_solution4/knight1.scala Mon Nov 08 00:17:50 2021 +0000 @@ -1,7 +1,7 @@ // Part 1 about finding and counting Knight's tours //================================================== -object CW9a { // for preparing the jar +object M4a { // for preparing the jar type Pos = (Int, Int) // a position on a chessboard type Path = List[Pos] // a path...a list of positions @@ -43,8 +43,6 @@ List(( 1, 2),( 2, 1),( 2, -1),( 1, -2), (-1, -2),(-2, -1),(-2, 1),(-1, 2)).map(add_pair(x, _)) -// 1 mark - def legal_moves(dim: Int, path: Path, x: Pos): List[Pos] = moves(x).filter(is_legal(dim, path, _)) @@ -62,7 +60,6 @@ //assert(legal_moves(2, Nil, (0,0)) == List()) //assert(legal_moves(3, Nil, (0,0)) == List((1,2), (2,1))) -// 2 marks def tcount_tours(dim: Int, path: Path): Int = { if (path.length == dim * dim) 1 @@ -119,7 +116,6 @@ } */ -// 1 mark def first(xs: List[Pos], f: Pos => Option[Path]): Option[Path] = xs match { case Nil => None @@ -136,8 +132,6 @@ //first(List((1, 0),(2, 0),(3, 0)), foo) -// 1 mark - def tfirst_tour(dim: Int, path: Path): Option[Path] = { if (path.length == dim * dim) Some(path) else @@ -157,7 +151,7 @@ // 15 secs for 8 x 8 //val ts1 = time_needed(0,first_tour(8, List((0, 0))).get) -//val ts1 = time_needed(0,first_tour(8, List((1, 1))).get) +//??val ts1 = time_needed(0,first_tour(8, List((1, 1))).get) // no result for 4 x 4 //val ts2 = time_needed(0, first_tour(4, List((0, 0)))) diff -r b17a98b0c52f -r e48ea8300b2d main_solution4/knight2.scala --- a/main_solution4/knight2.scala Sat Nov 06 00:06:39 2021 +0000 +++ b/main_solution4/knight2.scala Mon Nov 08 00:17:50 2021 +0000 @@ -1,7 +1,7 @@ -// Part 4 about finding a single tour using the Warnsdorf Rule +// Part 2 about finding a single tour using the Warnsdorf Rule //============================================================= -object CW9b { // for preparing the jar +object M4b { // for preparing the jar type Pos = (Int, Int) type Path = List[Pos] diff -r b17a98b0c52f -r e48ea8300b2d main_solution4/knight3.scala --- a/main_solution4/knight3.scala Sat Nov 06 00:06:39 2021 +0000 +++ b/main_solution4/knight3.scala Mon Nov 08 00:17:50 2021 +0000 @@ -1,7 +1,7 @@ // Part 3 about finding a single tour using the Warnsdorf Rule //============================================================= -object CW9c { // for preparing the jar +object M4c { // for preparing the jar type Pos = (Int, Int) type Path = List[Pos] @@ -61,6 +61,7 @@ time_needed(ttour_on_mega_board(dim: Int, path: Path)) - +// testcases +//print_board(70, tour_on_mega_board(70, List((0, 0))).get) } diff -r b17a98b0c52f -r e48ea8300b2d main_solution5/bf.jar Binary file main_solution5/bf.jar has changed diff -r b17a98b0c52f -r e48ea8300b2d main_solution5/bf.scala --- a/main_solution5/bf.scala Sat Nov 06 00:06:39 2021 +0000 +++ b/main_solution5/bf.scala Mon Nov 08 00:17:50 2021 +0000 @@ -2,7 +2,7 @@ // the Brainf***++ language //============================================== -object CW10a { +object M5a { // representation of Bf memory @@ -96,18 +96,11 @@ case '+' => (pc + 1, mp, write(mem, mp, sread(mem, mp) + 1)) case '-' => (pc + 1, mp, write(mem, mp, sread(mem, mp) - 1)) case '.' => { print(sread(mem, mp).toChar); (pc + 1, mp, mem) } - //case ',' => (pc + 1, mp, write(mem, mp, Console.in.read().toByte)) - //case ',' => (pc + 1, mp, write(mem, mp, scala.io.StdIn.readByte())) case '[' => if (sread(mem, mp) == 0) (jumpRight(prog, pc + 1, 0), mp, mem) else (pc + 1, mp, mem) case ']' => if (sread(mem, mp) != 0) (jumpLeft(prog, pc - 1, 0), mp, mem) else (pc + 1, mp, mem) - // new commands - case '@' => (pc + 1, mp, write(mem, sread(mem, mp), sread(mem, mp - 1))) - case '*' => (pc + 1, mp, write(mem, mp, sread(mem, mp) * sread(mem, mp -1))) - case '#' => { println(s"${sread(mem, mp)}"); (pc + 1, mp, mem) } - case _ => (pc + 1, mp, mem) } compute(prog, new_pc, new_mp, new_mem) @@ -142,15 +135,6 @@ // prints out numbers 0 to 9 //run("""+++++[->++++++++++<]>--<+++[->>++++++++++<<]>>++<<----------[+>.>.<+<]""") -// bf++ program calculating the cube-function, 10 * 10 * 10 = 1000 -//run("""++++++++++#>+***#""") // Map(0 -> 10, 1 -> 1000) - - -// bf++ program copies 3 from 0-cell to to cells 1, 4, 5, 6 and 7 -// (note that because of how the program wprks cell 1 will contain 7) -//run("""+++>+@+@+@+@+@""") // Map(0 -> 3, 1 -> 7, 4 -> 3, 5 -> 3, 6 -> 3, 7 -> 3) - - // some more "useful" programs //----------------------------- diff -r b17a98b0c52f -r e48ea8300b2d main_solution5/bfc.jar Binary file main_solution5/bfc.jar has changed diff -r b17a98b0c52f -r e48ea8300b2d main_templates3/re.scala --- a/main_templates3/re.scala Sat Nov 06 00:06:39 2021 +0000 +++ b/main_templates3/re.scala Mon Nov 08 00:17:50 2021 +0000 @@ -130,15 +130,15 @@ val start = System.nanoTime() for (j <- 1 to i) code val end = System.nanoTime() - (end - start)/(i * 1.0e9) + "%.5f".format((end - start)/(i * 1.0e9)) } for (i <- 0 to 5000000 by 500000) { - println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i)))) + println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") } // another "power" test case -simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next) == ONE +simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next()) == ONE // the Iterator produces the rexp // diff -r b17a98b0c52f -r e48ea8300b2d main_templates4/knight1.scala --- a/main_templates4/knight1.scala Sat Nov 06 00:06:39 2021 +0000 +++ b/main_templates4/knight1.scala Mon Nov 08 00:17:50 2021 +0000 @@ -4,10 +4,10 @@ object M4a { -// If you need any auxiliary function, feel free to -// implement it, but do not make any changes to the +// If you need any auxiliary functions, feel free to +// implement them, but do not make any changes to the // templates below. Also have a look whether the functions -// at the end are of any help. +// at the end of the file are of any help. @@ -85,7 +85,9 @@ } // can be called for example with +// // time_needed(count_tours(dim, List((0, 0)))) +// // in order to print out the time that is needed for // running count_tours diff -r b17a98b0c52f -r e48ea8300b2d main_templates4/knight2.scala --- a/main_templates4/knight2.scala Sat Nov 06 00:06:39 2021 +0000 +++ b/main_templates4/knight2.scala Mon Nov 08 00:17:50 2021 +0000 @@ -7,8 +7,8 @@ // !!! Copy any function you need from file knight1.scala !!! // -// If you need any auxiliary function, feel free to -// implement it, but do not make any changes to the +// If you need any auxiliary functions, feel free to +// implement them, but do not make any changes to the // templates below. type Pos = (Int, Int) // a position on a chessboard diff -r b17a98b0c52f -r e48ea8300b2d main_templates5/bf.scala --- a/main_templates5/bf.scala Sat Nov 06 00:06:39 2021 +0000 +++ b/main_templates5/bf.scala Mon Nov 08 00:17:50 2021 +0000 @@ -12,9 +12,9 @@ // (1) Write a function that takes a file name as argument and -// and requests the corresponding file from disk. It Returns the +// and requests the corresponding file from disk. It returns the // content of the file as a String. If the file does not exists, -// the function should Return the empty string. +// the function should return the empty string. import io.Source import scala.util._ diff -r b17a98b0c52f -r e48ea8300b2d pre_solution2/docdiff.jar Binary file pre_solution2/docdiff.jar has changed diff -r b17a98b0c52f -r e48ea8300b2d pre_solution2/docdiff.scala --- a/pre_solution2/docdiff.scala Sat Nov 06 00:06:39 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,117 +0,0 @@ -// Preliminary Part about Code Similarity -//======================================== - - -object CW7a { - -//(1) Complete the clean function below. It should find -// all words in a string using the regular expression -// \w+ and the library function -// -// some_regex.findAllIn(some_string) -// -// The words should be Returned as a list of strings. - -def clean(s: String) : List[String] = - ("""\w+""".r).findAllIn(s).toList - - -//(2) The function occurrences calculates the number of times -// strings occur in a list of strings. These occurrences should -// be calculated as a Map from strings to integers. - -def occurrences(xs: List[String]): Map[String, Int] = - (for (x <- xs.distinct) yield (x, xs.count(_ == x))).toMap - -//(3) This functions calculates the dot-product of two documents -// (list of strings). For this it calculates the occurrence -// maps from (2) and then multiplies the corresponding occurrences. -// If a string does not occur in a document, the product is zero. -// The function finally sums up all products. - -def prod(lst1: List[String], lst2: List[String]) : Int = { - val words = (lst1 ::: lst2).distinct - val occs1 = occurrences(lst1) - val occs2 = occurrences(lst2) - words.map{ w => occs1.getOrElse(w, 0) * occs2.getOrElse(w, 0) }.sum -} - -//(4) Complete the functions overlap and similarity. The overlap of -// two documents is calculated by the formula given in the assignment -// description. The similarity of two strings is given by the overlap -// of the cleaned (see (1)) strings. - -def overlap(lst1: List[String], lst2: List[String]) : Double = { - val m1 = prod(lst1, lst1) - val m2 = prod(lst2, lst2) - prod(lst1, lst2).toDouble / (List(m1, m2).max) -} - -def similarity(s1: String, s2: String) : Double = - overlap(clean(s1), clean(s2)) - - -/* - - -val list1 = List("a", "b", "b", "c", "d") -val list2 = List("d", "b", "d", "b", "d") - -occurrences(List("a", "b", "b", "c", "d")) // Map(a -> 1, b -> 2, c -> 1, d -> 1) -occurrences(List("d", "b", "d", "b", "d")) // Map(d -> 3, b -> 2) - -prod(list1,list2) // 7 - -overlap(list1, list2) // 0.5384615384615384 -overlap(list2, list1) // 0.5384615384615384 -overlap(list1, list1) // 1.0 -overlap(list2, list2) // 1.0 - -// Plagiarism examples from -// https://desales.libguides.com/avoidingplagiarism/examples - -val orig1 = """There is a strong market demand for eco-tourism in -Australia. Its rich and diverse natural heritage ensures Australia's -capacity to attract international ecotourists and gives Australia a -comparative advantage in the highly competitive tourism industry.""" - -val plag1 = """There is a high market demand for eco-tourism in -Australia. Australia has a comparative advantage in the highly -competitive tourism industry due to its rich and varied natural -heritage which ensures Australia's capacity to attract international -ecotourists.""" - -similarity(orig1, plag1) - - -// Plagiarism examples from -// https://www.utc.edu/library/help/tutorials/plagiarism/examples-of-plagiarism.php - -val orig2 = """No oil spill is entirely benign. Depending on timing and -location, even a relatively minor spill can cause significant harm to -individual organisms and entire populations. Oil spills can cause -impacts over a range of time scales, from days to years, or even -decades for certain spills. Impacts are typically divided into acute -(short-term) and chronic (long-term) effects. Both types are part of a -complicated and often controversial equation that is addressed after -an oil spill: ecosystem recovery.""" - -val plag2 = """There is no such thing as a "good" oil spill. If the -time and place are just right, even a small oil spill can cause damage -to sensitive ecosystems. Further, spills can cause harm days, months, -years, or even decades after they occur. Because of this, spills are -usually broken into short-term (acute) and long-term (chronic) -effects. Both of these types of harm must be addressed in ecosystem -recovery: a controversial tactic that is often implemented immediately -following an oil spill.""" - -overlap(clean(orig2), clean(plag2)) -similarity(orig2, plag2) - -// The punchline: everything above 0.6 looks suspicious and -// should be looked at by staff. - -*/ - - -}