| 336 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |      1 | // Resit Exam about data-mining a chat log
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |      2 | //=========================================
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |      3 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |      4 | // This coursework is about mining a log of an online chat between 85
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |      5 | // participants. The log is given as a csv-list in the file
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |      6 | // log.csv. The log is an unordered list containing information which
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |      7 | // message has been sent, by whom, when and in response to which other
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |      8 | // message. Each message has also a number and a unique hash code.
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |      9 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     10 | // !! For further information abiout the tasks, see:     !!
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     11 | // !!                                                    !!
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     12 | // !! https://nms.kcl.ac.uk/christian.urban/cw-resit.pdf !!
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     13 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     14 | object Resit {
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     15 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     16 | import io.Source
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     17 | import scala.util._
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     18 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     19 | //=============
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     20 | // (1) The function get_csv takes file name as argument. It should read
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     21 | // the corresponding file and return its content. The content should
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     22 | // be returned as a list of strings, a string for each line in the
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     23 | // file. Since the file is a csv-file, the first line (the header)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     24 | // should be dropped. Lines are separated by "\n".
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     25 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     26 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     27 | def get_csv(name: String) : List[String] = ...
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     28 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     29 | // test cases:
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     30 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     31 | // get_csv("log.csv")
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     32 | // get_csv("log.csv").length  // should be 680
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     33 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     34 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     35 | //=============
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     36 | // (2) The function below takes a single line from the csv-file (as
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     37 | // generated by get_csv) and creates a Rec(ord) data structure. The
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     38 | // data from the csv-file should be copied as follows:
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     39 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     40 | //     csv-file         Rec data structure
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     41 | //     -----------------------------------
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     42 | //      counter      => num
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     43 | //      id           => msg_id 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     44 | //      time_date    => date
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     45 | //      name         => author
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     46 | //      country      => country (should be None if no country is given)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     47 | //      parent_id    => reply_id (should be None if there is no parent)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     48 | //      msg          => msg
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     49 | //                   => parent is set to None  (will be calculated later)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     50 | //                   => children is set to Nil (will be calculated later)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     51 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     52 | //     You should use the function line.split(",").toList to separate
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     53 | //     the items in the csv-line. BE CAREFUL that the message text in 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     54 | //     the last field can contain commas and therefore the split will not
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     55 | //     always result in a list of 7 elements. You need to concatenate
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     56 | //     anything beyond the 7th field into a single string for the field msg.
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     57 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     58 | case class Rec(num: Int, 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     59 |                msg_id: String,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     60 |                date: String,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     61 |                msg: String,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     62 |                author: String,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     63 |                country: Option[String],
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     64 |                reply_id : Option[String],
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     65 |                parent: Option[Int] = None,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     66 |                children: List[Int] = Nil)  
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     67 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     68 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     69 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     70 | def process_line(line: String) : Rec = ...
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     71 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     72 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     73 | // test cases:
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     74 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     75 | // process_line("""0,5ebeb459ac278d01301f1497,2020-05-15T15:25:13.413000,participant34,United Kingdom,5ebea6424923321d63155796,this question please?""")
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     76 | // 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     77 | //      ==>  Rec(0,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     78 | //               "5ebeb459ac278d01301f1497",
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     79 | //               "2020-05-15T15:25:13.413000",
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     80 | //               "this question please?",
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     81 | //               "participant34",
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     82 | //               Some("United Kingdom"),
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     83 | //               Some("5ebea6424923321d63155796"),
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     84 | //               None,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     85 | //               List())
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     86 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     87 | // process_line("""1,hash,date,p43,,,foo, bar""")
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     88 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     89 | //      ==>  Rec(1, "hash", "date", "foo, bar",
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     90 | //               "p43", None, None, None, List())
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     91 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     92 | // (Note that in the second test case the message needs to be "foo, bar")
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     93 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     94 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     95 | //=============
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     96 | // (3) Each record in the log contains a unique hash code
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     97 | //     identifying each message. Some messages also contain a hash
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     98 | //     code identifying the parent message (to which question they reply).
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |     99 | //     The function post_process fills in the information about
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    100 | //     potential children and a potential parent message. 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    101 | //  
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    102 | //     The auxiliary function get_children takes a record e and a
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    103 | //     record list rs as arguments, and returns the list of all direct
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    104 | //     children (which have the hash code of e as reply_id). The list
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    105 | //     of children are returned as a list of nums.
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    106 | //      
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    107 | //     The auxiliary function get_parent returns the number of the
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    108 | //     record corresponding to the reply_id (if there exists one,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    109 | //     otherwise returns None).
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    110 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    111 | def get_children(e: Rec, rs: List[Rec]) : List[Int] = ...
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    112 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    113 | def get_parent(e: Rec, rs: List[Rec]) : Option[Int] = ...
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    114 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    115 | def post_process(rs: List[Rec]) : List[Rec] = ...
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    116 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    117 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    118 | // test cases:
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    119 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    120 | //val recs = get_csv("log.csv").map(process_line)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    121 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    122 | //post_process(recs)(4).children   // List(12)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    123 | //post_process(recs)(23).children  // List(16,26)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    124 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    125 | //post_process(recs)(8).parent     // None
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    126 | //post_process(recs)(9).parent     // Some(7)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    127 | //post_process(recs)(16).parent    // Some(23)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    128 | //post_process(recs)(26).parent    // Some(23)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    129 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    130 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    131 | //=============
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    132 | // (4) The next two functions calculate the countries where
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    133 | //     message authors are coming from and how many authors
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    134 | //     come from each country (returned as a Map from countries
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    135 | //     to numbers). In case an author did not specify a country,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    136 | //     the empty string is returned.
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    137 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    138 | def get_countries(rs: List[Rec]) : Set[String] = ...
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    139 |   
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    140 | def get_countries_numbers(rs: List[Rec]) :  Map[String, Int] = ...
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    141 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    142 | // test cases:
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    143 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    144 | //val recs = get_csv("log.csv").map(process_line)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    145 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    146 | // get_countries(recs) => 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    147 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    148 | //    Set("", Poland, Lebanon, Trinidad and
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    149 | //        Tobago, Japan, Spain, Nigeria, Peru, India, Lithuania, Hong
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    150 | //        Kong, Italy, Ireland, Uganda, England, Bangladesh, China,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    151 | //        Romania, Slovakia, United Kingdom, Norway, Pakistan, Indonesia,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    152 | //        Malaysia, Turkey, Portugal, Hungary)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    153 | //
 | 
| 343 |    154 | // get_countries_numbers(recs) => 
 | 
| 336 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    155 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    156 | //    Map("" -> 4, Poland -> 2, Lebanon -> 1, Trinidad and Tobago -> 2,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    157 | //        Japan -> 1, Spain -> 1, Nigeria -> 1, Peru -> 1, India -> 6,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    158 | //        Lithuania -> 1, Hong Kong -> 3, Italy -> 1, Ireland -> 2, Uganda -> 1,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    159 | //        England -> 4, Bangladesh -> 2, China -> 4, Romania -> 3,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    160 | //        Slovakia -> 1, United Kingdom -> 34, Norway -> 1, Pakistan -> 2,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    161 | //        Indonesia -> 2, Malaysia -> 1, Turkey -> 1, Portugal -> 1,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    162 | //        Hungary -> 2)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    163 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    164 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    165 | //============= 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    166 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    167 | // (5) The function ordered_thread_sizes orders the message threads
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    168 | // according to how many answers were given for one message (that is
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    169 | // how many children, grand-children and so on one message has).
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    170 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    171 | // The auxiliary function search enumerates all children,
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    172 | // grand-children and so on for a given record r (including the record
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    173 | // itself). Search returns the children and so on as a list of Recs.
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    174 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    175 | // The function thread_size generates for a record, say r, a pair
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    176 | // consisting of the number of r and the number of all children as
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    177 | // produced by search. 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    178 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    179 | // The function ordered_thread_sizes orders than the list of pairs
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    180 | // according to which thread in the chat is the longest.
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    181 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    182 | def search(r: Rec, rs: List[Rec]) : List[Rec] = ...
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    183 |   
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    184 | def thread_size(r: Rec, rs: List[Rec]) : (Int, Int) = ...
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    185 |  
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    186 | def ordered_thread_sizes(rs: List[Rec]) : List[(Int, Int)] = ...
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    187 |  
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    188 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    189 | // test cases: 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    190 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    191 | //val recs_p = post_process(get_csv("log.csv").map(process_line))
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    192 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    193 | //search(recs_p(459), recs_p).map(_.num)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    194 | //    => List(459, 401, 404, 426, 428, 399, 377, 357, 325, 304)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    195 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    196 | //thread_size(recs_p(459), recs_p) 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    197 | //    => (459,10)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    198 | //
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    199 | //ordered_thread_sizes(recs_p).take(4)
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    200 | //    => List((402,18), (95,12), (488,11), (459,10))
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    201 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    202 | 
 | 
| 
Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset |    203 | }
 |