core_templates2/docdiff.scala
changeset 428 cdfa6a293453
parent 396 3ffe978a5664
child 482 769bda18a43d
equal deleted inserted replaced
427:6e93040e3378 428:cdfa6a293453
     2 //===================================
     2 //===================================
     3 
     3 
     4 
     4 
     5 object C2 { 
     5 object C2 { 
     6 
     6 
       
     7 // ADD YOUR CODE BELOW
       
     8 //======================
     7 
     9 
     8 //(1) Complete the clean function below. It should find
    10 //(1)
     9 //    all words in a string using the regular expression
       
    10 //    \w+  and the library function 
       
    11 //
       
    12 //         some_regex.findAllIn(some_string)
       
    13 //
       
    14 //    The words should be Returned as a list of strings.
       
    15 
       
    16 
       
    17 def clean(s: String) : List[String] = ???
    11 def clean(s: String) : List[String] = ???
    18   
    12   
    19 
    13 
    20 
    14 
    21 //(2) The function occurrences calculates the number of times  
    15 //(2)
    22 //    strings occur in a list of strings. These occurrences should 
       
    23 //    be calculated as a Map from strings to integers.
       
    24 
       
    25 
       
    26 def occurrences(xs: List[String]): Map[String, Int] = ???
    16 def occurrences(xs: List[String]): Map[String, Int] = ???
    27 
    17 
    28 
    18 
    29 //(3) This functions calculates the dot-product of two documents
    19 //(3)
    30 //    (list of strings). For this it calculates the occurrence
       
    31 //    maps from (2) and then multiplies the corresponding occurrences. 
       
    32 //    If a string does not occur in a document, the product is zero.
       
    33 //    The function finally sums up all products. 
       
    34 
       
    35 
       
    36 def prod(lst1: List[String], lst2: List[String]) : Int = ???
    20 def prod(lst1: List[String], lst2: List[String]) : Int = ???
    37 
    21 
    38 
    22 
    39 //(4) Complete the functions overlap and similarity. The overlap of
    23 //(4)
    40 //    two documents is calculated by the formula given in the assignment
       
    41 //    description. The similarity of two strings is given by the overlap
       
    42 //    of the cleaned strings (see (1)).  
       
    43 
       
    44 
       
    45 def overlap(lst1: List[String], lst2: List[String]) : Double = ???
    24 def overlap(lst1: List[String], lst2: List[String]) : Double = ???
    46 
    25 
    47 def similarity(s1: String, s2: String) : Double = ???
    26 def similarity(s1: String, s2: String) : Double = ???
    48 
    27 
    49 
    28