| 478 |      1 | // Core Part 2 about Code Similarity
 | 
|  |      2 | //===================================
 | 
| 211 |      3 | 
 | 
| 320 |      4 | 
 | 
| 401 |      5 | object C2 { 
 | 
| 211 |      6 | 
 | 
| 478 |      7 | // ADD YOUR CODE BELOW
 | 
|  |      8 | //======================
 | 
|  |      9 | 
 | 
|  |     10 | //(1)
 | 
|  |     11 | def clean(s: String) : List[String] = """(\w+)""".r.findAllIn(s).toList
 | 
|  |     12 |   
 | 
| 211 |     13 | 
 | 
| 478 |     14 | 
 | 
|  |     15 | //(2)
 | 
|  |     16 | def occurrences(xs: List[String]): Map[String, Int] = {
 | 
|  |     17 |     val ls = xs.distinct
 | 
|  |     18 |     val occLs = for (s <- ls) yield (s, xs.count(_.equals(s)))
 | 
|  |     19 |     occLs.toMap
 | 
|  |     20 | }
 | 
| 320 |     21 | 
 | 
| 211 |     22 | 
 | 
| 478 |     23 | //(3)
 | 
| 401 |     24 | def prod(lst1: List[String], lst2: List[String]) : Int = {
 | 
| 478 |     25 |     val occM1 = occurrences(lst1)
 | 
|  |     26 |     val occM2 = occurrences(lst2)
 | 
|  |     27 |     (for (s <- occM1) yield s._2 * occM2.getOrElse(s._1,0)).sum
 | 
| 401 |     28 | }
 | 
|  |     29 | 
 | 
| 478 |     30 | 
 | 
|  |     31 | //(4)
 | 
|  |     32 | def overlap(lst1: List[String], lst2: List[String]) : Double = prod(lst1,lst2) / prod(lst1,lst1).max(prod(lst2,lst2))
 | 
|  |     33 | 
 | 
|  |     34 | def similarity(s1: String, s2: String) : Double = overlap(clean(s1), clean(s2))
 | 
| 211 |     35 | 
 | 
|  |     36 | 
 | 
| 478 |     37 | 
 | 
|  |     38 | /* Test cases
 | 
| 320 |     39 | 
 | 
|  |     40 | 
 | 
| 401 |     41 | val list1 = List("a", "b", "b", "c", "d") 
 | 
| 211 |     42 | val list2 = List("d", "b", "d", "b", "d")
 | 
| 401 |     43 | 
 | 
|  |     44 | occurrences(List("a", "b", "b", "c", "d"))   // Map(a -> 1, b -> 2, c -> 1, d -> 1)
 | 
|  |     45 | occurrences(List("d", "b", "d", "b", "d"))   // Map(d -> 3, b -> 2)
 | 
|  |     46 | 
 | 
|  |     47 | prod(list1,list2) // 7 
 | 
|  |     48 | 
 | 
| 211 |     49 | overlap(list1, list2)   // 0.5384615384615384
 | 
|  |     50 | overlap(list2, list1)   // 0.5384615384615384
 | 
|  |     51 | overlap(list1, list1)   // 1.0
 | 
|  |     52 | overlap(list2, list2)   // 1.0
 | 
| 401 |     53 | 
 | 
|  |     54 | // Plagiarism examples from 
 | 
| 211 |     55 | // https://desales.libguides.com/avoidingplagiarism/examples
 | 
| 401 |     56 | 
 | 
| 211 |     57 | val orig1 = """There is a strong market demand for eco-tourism in
 | 
|  |     58 | Australia. Its rich and diverse natural heritage ensures Australia's
 | 
|  |     59 | capacity to attract international ecotourists and gives Australia a
 | 
|  |     60 | comparative advantage in the highly competitive tourism industry."""
 | 
| 401 |     61 | 
 | 
| 211 |     62 | val plag1 = """There is a high market demand for eco-tourism in
 | 
|  |     63 | Australia. Australia has a comparative advantage in the highly
 | 
|  |     64 | competitive tourism industry due to its rich and varied natural
 | 
|  |     65 | heritage which ensures Australia's capacity to attract international
 | 
|  |     66 | ecotourists."""
 | 
| 401 |     67 | 
 | 
| 478 |     68 | similarity(orig1, plag1) // 0.8679245283018868
 | 
| 401 |     69 | 
 | 
|  |     70 | 
 | 
|  |     71 | // Plagiarism examples from 
 | 
| 211 |     72 | // https://www.utc.edu/library/help/tutorials/plagiarism/examples-of-plagiarism.php
 | 
| 401 |     73 | 
 | 
| 211 |     74 | val orig2 = """No oil spill is entirely benign. Depending on timing and
 | 
|  |     75 | location, even a relatively minor spill can cause significant harm to
 | 
|  |     76 | individual organisms and entire populations. Oil spills can cause
 | 
|  |     77 | impacts over a range of time scales, from days to years, or even
 | 
|  |     78 | decades for certain spills. Impacts are typically divided into acute
 | 
|  |     79 | (short-term) and chronic (long-term) effects. Both types are part of a
 | 
|  |     80 | complicated and often controversial equation that is addressed after
 | 
|  |     81 | an oil spill: ecosystem recovery."""
 | 
| 401 |     82 | 
 | 
| 211 |     83 | val plag2 = """There is no such thing as a "good" oil spill. If the
 | 
|  |     84 | time and place are just right, even a small oil spill can cause damage
 | 
|  |     85 | to sensitive ecosystems. Further, spills can cause harm days, months,
 | 
|  |     86 | years, or even decades after they occur. Because of this, spills are
 | 
|  |     87 | usually broken into short-term (acute) and long-term (chronic)
 | 
|  |     88 | effects. Both of these types of harm must be addressed in ecosystem
 | 
|  |     89 | recovery: a controversial tactic that is often implemented immediately
 | 
|  |     90 | following an oil spill."""
 | 
| 401 |     91 | 
 | 
| 478 |     92 | overlap(clean(orig2), clean(plag2))  // 0.728
 | 
|  |     93 | similarity(orig2, plag2)             // 0.728
 | 
|  |     94 | 
 | 
| 401 |     95 | 
 | 
| 478 |     96 |  
 | 
| 401 |     97 | // The punchline: everything above 0.6 looks suspicious and 
 | 
| 478 |     98 | // should be investigated by staff.
 | 
| 401 |     99 | 
 | 
| 211 |    100 | */
 | 
|  |    101 | 
 | 
| 320 |    102 | }
 |