|      1 // CW1 |         | 
|      2  |         | 
|      3 abstract class Rexp |         | 
|      4 case object ZERO extends Rexp |         | 
|      5 case object ONE extends Rexp |         | 
|      6 case class CHAR(c: Char) extends Rexp |         | 
|      7 case class ALT(r1: Rexp, r2: Rexp) extends Rexp  |         | 
|      8 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp  |         | 
|      9 case class STAR(r: Rexp) extends Rexp  |         | 
|     10  |         | 
|     11 // extended Rexps |         | 
|     12 case class RANGE(s: Set[Char]) extends Rexp |         | 
|     13 case class PLUS(r: Rexp) extends Rexp |         | 
|     14 case class OPTIONAL(r: Rexp) extends Rexp |         | 
|     15 case class NTIMES(r: Rexp, n: Int) extends Rexp  |         | 
|     16 case class UPTO(r: Rexp, n: Int) extends Rexp |         | 
|     17 case class FROM(r: Rexp, n: Int) extends Rexp |         | 
|     18 case class BETWEEN(r: Rexp, n: Int, m: Int) extends Rexp |         | 
|     19 case class NOT(r: Rexp) extends Rexp |         | 
|     20  |         | 
|     21 // functions |         | 
|     22 case class CFUN(f: Char => Boolean) extends Rexp |         | 
|     23  |         | 
|     24 // abbreviations |         | 
|     25 def FCHAR(c: Char) = CFUN((a: Char) => a == c) |         | 
|     26 def FSET(s: Set[Char]) = CFUN((a: Char) => s.contains(a)) |         | 
|     27 val FALL = CFUN(_ => true) |         | 
|     28  |         | 
|     29 def nullable (r: Rexp) : Boolean = r match { |         | 
|     30   case ZERO => false |         | 
|     31   case ONE => true |         | 
|     32   case CHAR(_) => false |         | 
|     33   case ALT(r1, r2) => nullable(r1) || nullable(r2) |         | 
|     34   case SEQ(r1, r2) => nullable(r1) && nullable(r2) |         | 
|     35   case STAR(_) => true |         | 
|     36  |         | 
|     37   case RANGE(_) => false  |         | 
|     38   case PLUS(r1) => nullable(r1) |         | 
|     39   case OPTIONAL(_) => true |         | 
|     40   case NTIMES(r1, i) => if (i == 0) true else nullable(r1) |         | 
|     41   case UPTO(_, _) => true |         | 
|     42   case FROM(r1, i) => if (i == 0) true else nullable(r1) |         | 
|     43   case BETWEEN(r1, i, _) => if (i == 0) true else nullable(r1) |         | 
|     44   case NOT(r1) => !nullable(r1) |         | 
|     45  |         | 
|     46   case CFUN(f) => false |         | 
|     47 } |         | 
|     48  |         | 
|     49  |         | 
|     50 def der (c: Char, r: Rexp) : Rexp = r match { |         | 
|     51   case ZERO => ZERO |         | 
|     52   case ONE => ZERO |         | 
|     53   case CHAR(d) => if (c == d) ONE else ZERO |         | 
|     54   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |         | 
|     55   case SEQ(r1, r2) =>  |         | 
|     56     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |         | 
|     57     else SEQ(der(c, r1), r2) |         | 
|     58   case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |         | 
|     59  |         | 
|     60   case RANGE(s) => |         | 
|     61     if (s.contains(c)) ONE else ZERO  |         | 
|     62   case PLUS(r1) => SEQ(der(c, r1), STAR(r1)) |         | 
|     63   case OPTIONAL(r1) => der(c, r1) |         | 
|     64   case NTIMES(r, i) =>  |         | 
|     65     if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) |         | 
|     66   case UPTO(r1, i) =>  |         | 
|     67     if (i == 0) ZERO else SEQ(der(c, r1), UPTO(r1, i - 1))  |         | 
|     68   case FROM(r1, i) => |         | 
|     69     if (i == 0) SEQ(der(c, r1), FROM(r1, 0)) else |         | 
|     70     SEQ(der(c, r1), FROM(r1, i - 1)) |         | 
|     71   case BETWEEN(r1, i, j) =>  |         | 
|     72     if (i == 0) { |         | 
|     73       if (j == 0) ZERO else SEQ(der(c, r1), BETWEEN(r1, 0, j - 1)) |         | 
|     74     } else SEQ(der(c, r1), BETWEEN(r1, i - 1, j - 1)) |         | 
|     75   case NOT(r1) => NOT(der(c, r1)) |         | 
|     76  |         | 
|     77   case CFUN(f) => if (f(c)) ONE else ZERO |         | 
|     78 } |         | 
|     79  |         | 
|     80  |         | 
|     81 def simp(r: Rexp) : Rexp = r match { |         | 
|     82   case ALT(r1, r2) => (simp(r1), simp(r2)) match { |         | 
|     83     case (ZERO, r2s) => r2s |         | 
|     84     case (r1s, ZERO) => r1s |         | 
|     85     case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) |         | 
|     86   } |         | 
|     87   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match { |         | 
|     88     case (ZERO, _) => ZERO |         | 
|     89     case (_, ZERO) => ZERO |         | 
|     90     case (ONE, r2s) => r2s |         | 
|     91     case (r1s, ONE) => r1s |         | 
|     92     case (r1s, r2s) => SEQ(r1s, r2s) |         | 
|     93   } |         | 
|     94   case r => r |         | 
|     95 } |         | 
|     96  |         | 
|     97 def ders(s: List[Char], r: Rexp) : Rexp = s match { |         | 
|     98   case Nil => r |         | 
|     99   case c::s => ders(s, simp(der(c, r))) |         | 
|    100 } |         | 
|    101  |         | 
|    102 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) |         | 
|    103  |         | 
|    104  |         | 
|    105  |         | 
|    106 val Regex31 = NTIMES(CHAR('a'),3) |         | 
|    107 val Regex32 = NTIMES(OPTIONAL(CHAR('a')),3) |         | 
|    108 val Regex33 = UPTO(CHAR('a'),3) |         | 
|    109 val Regex34 = UPTO(OPTIONAL(CHAR('a')),3) |         | 
|    110 val Regex35 = BETWEEN(CHAR('a'),3,5) |         | 
|    111 val Regex36 = BETWEEN(OPTIONAL(CHAR('a')),3,5) |         | 
|    112 val string31 = "" |         | 
|    113 val string32 = "a" |         | 
|    114 val string33 = "aa" |         | 
|    115 val string34 = "aaa" |         | 
|    116 val string35 = "aaaa" |         | 
|    117 val string36 = "aaaaa" |         | 
|    118 val string37 = "aaaaaa" |         | 
|    119  |         | 
|    120  |         | 
|    121 println("Question3") |         | 
|    122 println(matcher(Regex31, string31)) |         | 
|    123 println(matcher(Regex31, string32)) |         | 
|    124 println(matcher(Regex31, string33)) |         | 
|    125 println(matcher(Regex31, string34)) |         | 
|    126 println(matcher(Regex31, string35)) |         | 
|    127 println(matcher(Regex31, string36)) |         | 
|    128 println(matcher(Regex31, string37)); println("") |         | 
|    129 println(matcher(Regex32, string31)) |         | 
|    130 println(matcher(Regex32, string32)) |         | 
|    131 println(matcher(Regex32, string33)) |         | 
|    132 println(matcher(Regex32, string34)) |         | 
|    133 println(matcher(Regex32, string35)) |         | 
|    134 println(matcher(Regex32, string36)) |         | 
|    135 println(matcher(Regex32, string37)); println("") |         | 
|    136 println(matcher(Regex33, string31)) |         | 
|    137 println(matcher(Regex33, string32)) |         | 
|    138 println(matcher(Regex33, string33)) |         | 
|    139 println(matcher(Regex33, string34)) |         | 
|    140 println(matcher(Regex33, string35)) |         | 
|    141 println(matcher(Regex33, string36)) |         | 
|    142 println(matcher(Regex33, string37)); println("") |         | 
|    143 println(matcher(Regex34, string31)) |         | 
|    144 println(matcher(Regex34, string32)) |         | 
|    145 println(matcher(Regex34, string33)) |         | 
|    146 println(matcher(Regex34, string34)) |         | 
|    147 println(matcher(Regex34, string35)) |         | 
|    148 println(matcher(Regex34, string36)) |         | 
|    149 println(matcher(Regex34, string37)); println("") |         | 
|    150 println(matcher(Regex35, string31)) |         | 
|    151 println(matcher(Regex35, string32)) |         | 
|    152 println(matcher(Regex35, string33)) |         | 
|    153 println(matcher(Regex35, string34)) |         | 
|    154 println(matcher(Regex35, string35)) |         | 
|    155 println(matcher(Regex35, string36)) |         | 
|    156 println(matcher(Regex35, string37)); println("") |         | 
|    157 println(matcher(Regex36, string31)) |         | 
|    158 println(matcher(Regex36, string32)) |         | 
|    159 println(matcher(Regex36, string33)) |         | 
|    160 println(matcher(Regex36, string34)) |         | 
|    161 println(matcher(Regex36, string35)) |         | 
|    162 println(matcher(Regex36, string36)) |         | 
|    163 println(matcher(Regex36, string37)); println("") |         | 
|    164  |         | 
|    165  |         | 
|    166 val RegexX = BETWEEN(CHAR('a'), 0, 5) |         | 
|    167 val stringX = "" |         | 
|    168 println(matcher(RegexX, stringX)) |         | 
|    169  |         | 
|    170  |         | 
|    171  |         | 
|    172 val str0 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |         | 
|    173 val str1 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |         | 
|    174 val str2 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |         | 
|    175  |         | 
|    176 val matchA = (c:Char) => c == 'a' |         | 
|    177  |         | 
|    178 val reg_1 = SEQ(SEQ(CFUN(matchA), CFUN(matchA)), CFUN(matchA)) |         | 
|    179 val reg_2 = SEQ(NTIMES(CFUN(matchA), 19), OPTIONAL(CFUN(matchA))) |         | 
|    180  |         | 
|    181 val reg_1_mod = PLUS(PLUS(reg_1)) |         | 
|    182 val reg_2_mod = PLUS(PLUS(reg_2)) |         | 
|    183  |         | 
|    184  |         | 
|    185 matcher(reg_1_mod, str0) |         | 
|    186 matcher(reg_1_mod, str1) |         | 
|    187 matcher(reg_1_mod, str2) |         | 
|    188 matcher(reg_2_mod, str0) |         | 
|    189 matcher(reg_2_mod, str1) |         | 
|    190 matcher(reg_2_mod, str2) |         | 
|    191  |         | 
|    192  |         | 
|    193  |         | 
|    194  |         | 
|    195  |         | 
|    196 //Q3: matcher test (table) |         | 
|    197  |         | 
|    198 // a^{3} |         | 
|    199 val re1 = NTIMES(CHAR('a'), 3) |         | 
|    200  |         | 
|    201 matcher(re1, "")        //false |         | 
|    202 matcher(re1, "a")       //false |         | 
|    203 matcher(re1, "aa")      //false |         | 
|    204 matcher(re1, "aaa")     //true |         | 
|    205 matcher(re1, "aaaaa")   //false |         | 
|    206 matcher(re1, "aaaaaa")  //false |         | 
|    207  |         | 
|    208 // (a?)^{3} |         | 
|    209 val re2 = NTIMES(OPTIONAL(CHAR('a')), 3) |         | 
|    210  |         | 
|    211 matcher(re2, "")        //true |         | 
|    212 matcher(re2, "a")       //true |         | 
|    213 matcher(re2, "aa")      //true |         | 
|    214 matcher(re2, "aaa")     //true |         | 
|    215 matcher(re2, "aaaaa")   //false |         | 
|    216 matcher(re2, "aaaaaa")  //false |         | 
|    217  |         | 
|    218 // (a)^{..3} |         | 
|    219 val re3 = UPTO((CHAR('a')), 3) |         | 
|    220  |         | 
|    221 matcher(re3, "")        //true |         | 
|    222 matcher(re3, "a")       //true |         | 
|    223 matcher(re3, "aa")      //true |         | 
|    224 matcher(re3, "aaa")     //true |         | 
|    225 matcher(re3, "aaaaa")   //false |         | 
|    226 matcher(re3, "aaaaaa")  //false |         | 
|    227  |         | 
|    228 // (a?)^{..3} |         | 
|    229 val re4 = UPTO(OPTIONAL(CHAR('a')), 3) |         | 
|    230  |         | 
|    231 matcher(re4, "")        //true |         | 
|    232 matcher(re4, "a")       //true |         | 
|    233 matcher(re4, "aa")      //true |         | 
|    234 matcher(re4, "aaa")     //true |         | 
|    235 matcher(re4, "aaaaa")   //false |         | 
|    236 matcher(re4, "aaaaaa")  //false |         | 
|    237  |         | 
|    238 // (a)^{3..5} |         | 
|    239 val re5 = BETWEEN(CHAR('a'), 3, 5) |         | 
|    240  |         | 
|    241 matcher(re5, "")        //false |         | 
|    242 matcher(re5, "a")       //false |         | 
|    243 matcher(re5, "aa")      //false |         | 
|    244 matcher(re5, "aaa")     //true |         | 
|    245 matcher(re5, "aaaaa")   //true |         | 
|    246 matcher(re5, "aaaaaa")  //false |         | 
|    247  |         | 
|    248 // (a?)^{3..5} |         | 
|    249 val re6 = BETWEEN(OPTIONAL(CHAR('a')), 3, 5) |         | 
|    250  |         | 
|    251 matcher(re6, "")        //true |         | 
|    252 matcher(re6, "a")       //true |         | 
|    253 matcher(re6, "aa")      //true |         | 
|    254 matcher(re6, "aaa")     //true |         | 
|    255 matcher(re6, "aaaaa")   //true |         | 
|    256 matcher(re6, "aaaaaa")  //false |         | 
|    257  |         | 
|    258 //Q5: regular expression for email addresses |         | 
|    259  |         | 
|    260 val alphaNum = ('a' to 'z').toSet ++ ('0' to '9') |         | 
|    261 val Q5email = SEQ( |         | 
|    262                 PLUS(RANGE(alphaNum + '_' + '-' + '.')),  |         | 
|    263                 SEQ( |         | 
|    264                     CHAR('@'),  |         | 
|    265                     SEQ( |         | 
|    266                         PLUS(RANGE(alphaNum + '-' + '.')),  |         | 
|    267                         SEQ( |         | 
|    268                             CHAR('.'),  |         | 
|    269                             BETWEEN(RANGE(('a' to 'z').toSet + '.'), 2, 6) |         | 
|    270                             ) |         | 
|    271                       ) |         | 
|    272                 ) |         | 
|    273               ) |         | 
|    274  |         | 
|    275 ders("nicolas.volken@kcl.ac.uk".toList, Q5email) |         | 
|    276  |         | 
|    277 // Q6 |         | 
|    278 val Q6 = SEQ(CHAR('/'), |         | 
|    279             SEQ(CHAR('*'), |         | 
|    280                 SEQ( |         | 
|    281                    |         | 
|    282                     NOT( |         | 
|    283                       SEQ( |         | 
|    284                         STAR(FALL), |         | 
|    285                         SEQ( |         | 
|    286                           CHAR('*'), |         | 
|    287                           SEQ( |         | 
|    288                             CHAR('/'), |         | 
|    289                             STAR(FALL) |         | 
|    290                           ) |         | 
|    291                         ) |         | 
|    292                       ) |         | 
|    293                     ) |         | 
|    294                    |         | 
|    295                     , |         | 
|    296                     SEQ(CHAR('*'), |         | 
|    297                     CHAR('/') |         | 
|    298                     ) |         | 
|    299                 ) |         | 
|    300             ) |         | 
|    301         ) |         | 
|    302  |         | 
|    303 matcher(Q6, "/**/") |         | 
|    304 matcher(Q6, "/*foobar*/") |         | 
|    305 matcher(Q6, "/*test*/test*/") |         | 
|    306 matcher(Q6, "/*test/*test*/") |         | 
|    307  |         | 
|    308 // Q7 |         | 
|    309  |         | 
|    310 val Q7r1 = SEQ(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))) |         | 
|    311 val Q7r2 = SEQ(BETWEEN(CHAR('a'), 19, 19), OPTIONAL(CHAR('a'))) |         | 
|    312  |         | 
|    313 val Q7str5 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |         | 
|    314 val Q7str6 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |         | 
|    315 val Q7str7 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |         | 
|    316  |         | 
|    317 matcher(PLUS(PLUS(Q7r1)), Q7str5) |         | 
|    318 matcher(PLUS(PLUS(Q7r2)), Q7str5) |         | 
|    319  |         | 
|    320 matcher(PLUS(PLUS(Q7r1)), Q7str6) |         | 
|    321 matcher(PLUS(PLUS(Q7r2)), Q7str6) |         | 
|    322  |         | 
|    323 matcher(PLUS(PLUS(Q7r1)), Q7str7) |         | 
|    324 matcher(PLUS(PLUS(Q7r2)), Q7str7) |         | 
|    325  |         |