author | Christian Urban <christian.urban@kcl.ac.uk> |
Tue, 01 Nov 2022 15:03:48 +0000 | |
changeset 428 | cdfa6a293453 |
parent 396 | 3ffe978a5664 |
child 482 | 769bda18a43d |
permissions | -rw-r--r-- |
396 | 1 |
// Core Part 2 about Code Similarity |
2 |
//=================================== |
|
283 | 3 |
|
4 |
||
396 | 5 |
object C2 { |
283 | 6 |
|
428
cdfa6a293453
updated solutions and templates
Christian Urban <christian.urban@kcl.ac.uk>
parents:
396
diff
changeset
|
7 |
// ADD YOUR CODE BELOW |
cdfa6a293453
updated solutions and templates
Christian Urban <christian.urban@kcl.ac.uk>
parents:
396
diff
changeset
|
8 |
//====================== |
203 | 9 |
|
428
cdfa6a293453
updated solutions and templates
Christian Urban <christian.urban@kcl.ac.uk>
parents:
396
diff
changeset
|
10 |
//(1) |
346 | 11 |
def clean(s: String) : List[String] = ??? |
203 | 12 |
|
13 |
||
14 |
||
428
cdfa6a293453
updated solutions and templates
Christian Urban <christian.urban@kcl.ac.uk>
parents:
396
diff
changeset
|
15 |
//(2) |
346 | 16 |
def occurrences(xs: List[String]): Map[String, Int] = ??? |
203 | 17 |
|
18 |
||
428
cdfa6a293453
updated solutions and templates
Christian Urban <christian.urban@kcl.ac.uk>
parents:
396
diff
changeset
|
19 |
//(3) |
346 | 20 |
def prod(lst1: List[String], lst2: List[String]) : Int = ??? |
203 | 21 |
|
22 |
||
428
cdfa6a293453
updated solutions and templates
Christian Urban <christian.urban@kcl.ac.uk>
parents:
396
diff
changeset
|
23 |
//(4) |
346 | 24 |
def overlap(lst1: List[String], lst2: List[String]) : Double = ??? |
203 | 25 |
|
346 | 26 |
def similarity(s1: String, s2: String) : Double = ??? |
203 | 27 |
|
28 |
||
29 |
||
30 |
/* Test cases |
|
31 |
||
32 |
||
33 |
val list1 = List("a", "b", "b", "c", "d") |
|
34 |
val list2 = List("d", "b", "d", "b", "d") |
|
35 |
||
36 |
occurrences(List("a", "b", "b", "c", "d")) // Map(a -> 1, b -> 2, c -> 1, d -> 1) |
|
37 |
occurrences(List("d", "b", "d", "b", "d")) // Map(d -> 3, b -> 2) |
|
38 |
||
39 |
prod(list1,list2) // 7 |
|
40 |
||
41 |
overlap(list1, list2) // 0.5384615384615384 |
|
42 |
overlap(list2, list1) // 0.5384615384615384 |
|
43 |
overlap(list1, list1) // 1.0 |
|
44 |
overlap(list2, list2) // 1.0 |
|
45 |
||
46 |
// Plagiarism examples from |
|
47 |
// https://desales.libguides.com/avoidingplagiarism/examples |
|
48 |
||
49 |
val orig1 = """There is a strong market demand for eco-tourism in |
|
50 |
Australia. Its rich and diverse natural heritage ensures Australia's |
|
51 |
capacity to attract international ecotourists and gives Australia a |
|
52 |
comparative advantage in the highly competitive tourism industry.""" |
|
53 |
||
54 |
val plag1 = """There is a high market demand for eco-tourism in |
|
55 |
Australia. Australia has a comparative advantage in the highly |
|
56 |
competitive tourism industry due to its rich and varied natural |
|
57 |
heritage which ensures Australia's capacity to attract international |
|
58 |
ecotourists.""" |
|
59 |
||
60 |
similarity(orig1, plag1) // 0.8679245283018868 |
|
61 |
||
62 |
||
63 |
// Plagiarism examples from |
|
64 |
// https://www.utc.edu/library/help/tutorials/plagiarism/examples-of-plagiarism.php |
|
65 |
||
66 |
val orig2 = """No oil spill is entirely benign. Depending on timing and |
|
67 |
location, even a relatively minor spill can cause significant harm to |
|
68 |
individual organisms and entire populations. Oil spills can cause |
|
69 |
impacts over a range of time scales, from days to years, or even |
|
70 |
decades for certain spills. Impacts are typically divided into acute |
|
71 |
(short-term) and chronic (long-term) effects. Both types are part of a |
|
72 |
complicated and often controversial equation that is addressed after |
|
73 |
an oil spill: ecosystem recovery.""" |
|
74 |
||
75 |
val plag2 = """There is no such thing as a "good" oil spill. If the |
|
76 |
time and place are just right, even a small oil spill can cause damage |
|
77 |
to sensitive ecosystems. Further, spills can cause harm days, months, |
|
78 |
years, or even decades after they occur. Because of this, spills are |
|
79 |
usually broken into short-term (acute) and long-term (chronic) |
|
80 |
effects. Both of these types of harm must be addressed in ecosystem |
|
81 |
recovery: a controversial tactic that is often implemented immediately |
|
82 |
following an oil spill.""" |
|
83 |
||
84 |
overlap(clean(orig2), clean(plag2)) // 0.728 |
|
85 |
similarity(orig2, plag2) // 0.728 |
|
86 |
||
87 |
||
88 |
||
89 |
// The punchline: everything above 0.6 looks suspicious and |
|
90 |
// should be investigated by staff. |
|
91 |
||
92 |
*/ |
|
283 | 93 |
|
94 |
} |