2 //=================================== |
2 //=================================== |
3 |
3 |
4 |
4 |
5 object C2 { |
5 object C2 { |
6 |
6 |
|
7 // ADD YOUR CODE BELOW |
|
8 //====================== |
7 |
9 |
8 //(1) Complete the clean function below. It should find |
10 //(1) |
9 // all words in a string using the regular expression |
|
10 // \w+ and the library function |
|
11 // |
|
12 // some_regex.findAllIn(some_string) |
|
13 // |
|
14 // The words should be Returned as a list of strings. |
|
15 |
|
16 |
|
17 def clean(s: String) : List[String] = ??? |
11 def clean(s: String) : List[String] = ??? |
18 |
12 |
19 |
13 |
20 |
14 |
21 //(2) The function occurrences calculates the number of times |
15 //(2) |
22 // strings occur in a list of strings. These occurrences should |
|
23 // be calculated as a Map from strings to integers. |
|
24 |
|
25 |
|
26 def occurrences(xs: List[String]): Map[String, Int] = ??? |
16 def occurrences(xs: List[String]): Map[String, Int] = ??? |
27 |
17 |
28 |
18 |
29 //(3) This functions calculates the dot-product of two documents |
19 //(3) |
30 // (list of strings). For this it calculates the occurrence |
|
31 // maps from (2) and then multiplies the corresponding occurrences. |
|
32 // If a string does not occur in a document, the product is zero. |
|
33 // The function finally sums up all products. |
|
34 |
|
35 |
|
36 def prod(lst1: List[String], lst2: List[String]) : Int = ??? |
20 def prod(lst1: List[String], lst2: List[String]) : Int = ??? |
37 |
21 |
38 |
22 |
39 //(4) Complete the functions overlap and similarity. The overlap of |
23 //(4) |
40 // two documents is calculated by the formula given in the assignment |
|
41 // description. The similarity of two strings is given by the overlap |
|
42 // of the cleaned strings (see (1)). |
|
43 |
|
44 |
|
45 def overlap(lst1: List[String], lst2: List[String]) : Double = ??? |
24 def overlap(lst1: List[String], lst2: List[String]) : Double = ??? |
46 |
25 |
47 def similarity(s1: String, s2: String) : Double = ??? |
26 def similarity(s1: String, s2: String) : Double = ??? |
48 |
27 |
49 |
28 |