solution/cw1/matcher.sc
changeset 864 b5b1bc0a603b
equal deleted inserted replaced
863:d59bcff69998 864:b5b1bc0a603b
       
     1 // CW1
       
     2 
       
     3 abstract class Rexp
       
     4 case object ZERO extends Rexp
       
     5 case object ONE extends Rexp
       
     6 case class CHAR(c: Char) extends Rexp
       
     7 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
       
     8 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
       
     9 case class STAR(r: Rexp) extends Rexp 
       
    10 
       
    11 // extended Rexps
       
    12 case class RANGE(s: Set[Char]) extends Rexp
       
    13 case class PLUS(r: Rexp) extends Rexp
       
    14 case class OPTIONAL(r: Rexp) extends Rexp
       
    15 case class NTIMES(r: Rexp, n: Int) extends Rexp 
       
    16 case class UPTO(r: Rexp, n: Int) extends Rexp
       
    17 case class FROM(r: Rexp, n: Int) extends Rexp
       
    18 case class BETWEEN(r: Rexp, n: Int, m: Int) extends Rexp
       
    19 case class NOT(r: Rexp) extends Rexp
       
    20 
       
    21 // functions
       
    22 case class CFUN(f: Char => Boolean) extends Rexp
       
    23 
       
    24 // abbreviations
       
    25 def FCHAR(c: Char) = CFUN((a: Char) => a == c)
       
    26 def FSET(s: Set[Char]) = CFUN((a: Char) => s.contains(a))
       
    27 val FALL = CFUN(_ => true)
       
    28 
       
    29 def nullable (r: Rexp) : Boolean = r match {
       
    30   case ZERO => false
       
    31   case ONE => true
       
    32   case CHAR(_) => false
       
    33   case ALT(r1, r2) => nullable(r1) || nullable(r2)
       
    34   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
    35   case STAR(_) => true
       
    36 
       
    37   case RANGE(_) => false 
       
    38   case PLUS(r1) => nullable(r1)
       
    39   case OPTIONAL(_) => true
       
    40   case NTIMES(r1, i) => if (i == 0) true else nullable(r1)
       
    41   case UPTO(_, _) => true
       
    42   case FROM(r1, i) => if (i == 0) true else nullable(r1)
       
    43   case BETWEEN(r1, i, _) => if (i == 0) true else nullable(r1)
       
    44   case NOT(r1) => !nullable(r1)
       
    45 
       
    46   case CFUN(f) => false
       
    47 }
       
    48 
       
    49 
       
    50 def der (c: Char, r: Rexp) : Rexp = r match {
       
    51   case ZERO => ZERO
       
    52   case ONE => ZERO
       
    53   case CHAR(d) => if (c == d) ONE else ZERO
       
    54   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
       
    55   case SEQ(r1, r2) => 
       
    56     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
       
    57     else SEQ(der(c, r1), r2)
       
    58   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
       
    59 
       
    60   case RANGE(s) =>
       
    61     if (s.contains(c)) ONE else ZERO 
       
    62   case PLUS(r1) => SEQ(der(c, r1), STAR(r1))
       
    63   case OPTIONAL(r1) => der(c, r1)
       
    64   case NTIMES(r, i) => 
       
    65     if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
       
    66   case UPTO(r1, i) => 
       
    67     if (i == 0) ZERO else SEQ(der(c, r1), UPTO(r1, i - 1)) 
       
    68   case FROM(r1, i) =>
       
    69     if (i == 0) SEQ(der(c, r1), FROM(r1, 0)) else
       
    70     SEQ(der(c, r1), FROM(r1, i - 1))
       
    71   case BETWEEN(r1, i, j) => 
       
    72     if (i == 0) {
       
    73       if (j == 0) ZERO else SEQ(der(c, r1), BETWEEN(r1, 0, j - 1))
       
    74     } else SEQ(der(c, r1), BETWEEN(r1, i - 1, j - 1))
       
    75   case NOT(r1) => NOT(der(c, r1))
       
    76 
       
    77   case CFUN(f) => if (f(c)) ONE else ZERO
       
    78 }
       
    79 
       
    80 
       
    81 def simp(r: Rexp) : Rexp = r match {
       
    82   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
       
    83     case (ZERO, r2s) => r2s
       
    84     case (r1s, ZERO) => r1s
       
    85     case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
       
    86   }
       
    87   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match {
       
    88     case (ZERO, _) => ZERO
       
    89     case (_, ZERO) => ZERO
       
    90     case (ONE, r2s) => r2s
       
    91     case (r1s, ONE) => r1s
       
    92     case (r1s, r2s) => SEQ(r1s, r2s)
       
    93   }
       
    94   case r => r
       
    95 }
       
    96 
       
    97 def ders(s: List[Char], r: Rexp) : Rexp = s match {
       
    98   case Nil => r
       
    99   case c::s => ders(s, simp(der(c, r)))
       
   100 }
       
   101 
       
   102 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
       
   103 
       
   104 
       
   105 
       
   106 val Regex31 = NTIMES(CHAR('a'),3)
       
   107 val Regex32 = NTIMES(OPTIONAL(CHAR('a')),3)
       
   108 val Regex33 = UPTO(CHAR('a'),3)
       
   109 val Regex34 = UPTO(OPTIONAL(CHAR('a')),3)
       
   110 val Regex35 = BETWEEN(CHAR('a'),3,5)
       
   111 val Regex36 = BETWEEN(OPTIONAL(CHAR('a')),3,5)
       
   112 val string31 = ""
       
   113 val string32 = "a"
       
   114 val string33 = "aa"
       
   115 val string34 = "aaa"
       
   116 val string35 = "aaaa"
       
   117 val string36 = "aaaaa"
       
   118 val string37 = "aaaaaa"
       
   119 
       
   120 
       
   121 println("Question3")
       
   122 println(matcher(Regex31, string31))
       
   123 println(matcher(Regex31, string32))
       
   124 println(matcher(Regex31, string33))
       
   125 println(matcher(Regex31, string34))
       
   126 println(matcher(Regex31, string35))
       
   127 println(matcher(Regex31, string36))
       
   128 println(matcher(Regex31, string37)); println("")
       
   129 println(matcher(Regex32, string31))
       
   130 println(matcher(Regex32, string32))
       
   131 println(matcher(Regex32, string33))
       
   132 println(matcher(Regex32, string34))
       
   133 println(matcher(Regex32, string35))
       
   134 println(matcher(Regex32, string36))
       
   135 println(matcher(Regex32, string37)); println("")
       
   136 println(matcher(Regex33, string31))
       
   137 println(matcher(Regex33, string32))
       
   138 println(matcher(Regex33, string33))
       
   139 println(matcher(Regex33, string34))
       
   140 println(matcher(Regex33, string35))
       
   141 println(matcher(Regex33, string36))
       
   142 println(matcher(Regex33, string37)); println("")
       
   143 println(matcher(Regex34, string31))
       
   144 println(matcher(Regex34, string32))
       
   145 println(matcher(Regex34, string33))
       
   146 println(matcher(Regex34, string34))
       
   147 println(matcher(Regex34, string35))
       
   148 println(matcher(Regex34, string36))
       
   149 println(matcher(Regex34, string37)); println("")
       
   150 println(matcher(Regex35, string31))
       
   151 println(matcher(Regex35, string32))
       
   152 println(matcher(Regex35, string33))
       
   153 println(matcher(Regex35, string34))
       
   154 println(matcher(Regex35, string35))
       
   155 println(matcher(Regex35, string36))
       
   156 println(matcher(Regex35, string37)); println("")
       
   157 println(matcher(Regex36, string31))
       
   158 println(matcher(Regex36, string32))
       
   159 println(matcher(Regex36, string33))
       
   160 println(matcher(Regex36, string34))
       
   161 println(matcher(Regex36, string35))
       
   162 println(matcher(Regex36, string36))
       
   163 println(matcher(Regex36, string37)); println("")
       
   164 
       
   165 
       
   166 val RegexX = BETWEEN(CHAR('a'), 0, 5)
       
   167 val stringX = ""
       
   168 println(matcher(RegexX, stringX))
       
   169 
       
   170 
       
   171 
       
   172 val str0 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
       
   173 val str1 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
       
   174 val str2 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
       
   175 
       
   176 val matchA = (c:Char) => c == 'a'
       
   177 
       
   178 val reg_1 = SEQ(SEQ(CFUN(matchA), CFUN(matchA)), CFUN(matchA))
       
   179 val reg_2 = SEQ(NTIMES(CFUN(matchA), 19), OPTIONAL(CFUN(matchA)))
       
   180 
       
   181 val reg_1_mod = PLUS(PLUS(reg_1))
       
   182 val reg_2_mod = PLUS(PLUS(reg_2))
       
   183 
       
   184 
       
   185 matcher(reg_1_mod, str0)
       
   186 matcher(reg_1_mod, str1)
       
   187 matcher(reg_1_mod, str2)
       
   188 matcher(reg_2_mod, str0)
       
   189 matcher(reg_2_mod, str1)
       
   190 matcher(reg_2_mod, str2)
       
   191 
       
   192 
       
   193 
       
   194 
       
   195 
       
   196 //Q3: matcher test (table)
       
   197 
       
   198 // a^{3}
       
   199 val re1 = NTIMES(CHAR('a'), 3)
       
   200 
       
   201 matcher(re1, "")        //false
       
   202 matcher(re1, "a")       //false
       
   203 matcher(re1, "aa")      //false
       
   204 matcher(re1, "aaa")     //true
       
   205 matcher(re1, "aaaaa")   //false
       
   206 matcher(re1, "aaaaaa")  //false
       
   207 
       
   208 // (a?)^{3}
       
   209 val re2 = NTIMES(OPTIONAL(CHAR('a')), 3)
       
   210 
       
   211 matcher(re2, "")        //true
       
   212 matcher(re2, "a")       //true
       
   213 matcher(re2, "aa")      //true
       
   214 matcher(re2, "aaa")     //true
       
   215 matcher(re2, "aaaaa")   //false
       
   216 matcher(re2, "aaaaaa")  //false
       
   217 
       
   218 // (a)^{..3}
       
   219 val re3 = UPTO((CHAR('a')), 3)
       
   220 
       
   221 matcher(re3, "")        //true
       
   222 matcher(re3, "a")       //true
       
   223 matcher(re3, "aa")      //true
       
   224 matcher(re3, "aaa")     //true
       
   225 matcher(re3, "aaaaa")   //false
       
   226 matcher(re3, "aaaaaa")  //false
       
   227 
       
   228 // (a?)^{..3}
       
   229 val re4 = UPTO(OPTIONAL(CHAR('a')), 3)
       
   230 
       
   231 matcher(re4, "")        //true
       
   232 matcher(re4, "a")       //true
       
   233 matcher(re4, "aa")      //true
       
   234 matcher(re4, "aaa")     //true
       
   235 matcher(re4, "aaaaa")   //false
       
   236 matcher(re4, "aaaaaa")  //false
       
   237 
       
   238 // (a)^{3..5}
       
   239 val re5 = BETWEEN(CHAR('a'), 3, 5)
       
   240 
       
   241 matcher(re5, "")        //false
       
   242 matcher(re5, "a")       //false
       
   243 matcher(re5, "aa")      //false
       
   244 matcher(re5, "aaa")     //true
       
   245 matcher(re5, "aaaaa")   //true
       
   246 matcher(re5, "aaaaaa")  //false
       
   247 
       
   248 // (a?)^{3..5}
       
   249 val re6 = BETWEEN(OPTIONAL(CHAR('a')), 3, 5)
       
   250 
       
   251 matcher(re6, "")        //true
       
   252 matcher(re6, "a")       //true
       
   253 matcher(re6, "aa")      //true
       
   254 matcher(re6, "aaa")     //true
       
   255 matcher(re6, "aaaaa")   //true
       
   256 matcher(re6, "aaaaaa")  //false
       
   257 
       
   258 //Q5: regular expression for email addresses
       
   259 
       
   260 val alphaNum = ('a' to 'z').toSet ++ ('0' to '9')
       
   261 val Q5email = SEQ(
       
   262                 PLUS(RANGE(alphaNum + '_' + '-' + '.')), 
       
   263                 SEQ(
       
   264                     CHAR('@'), 
       
   265                     SEQ(
       
   266                         PLUS(RANGE(alphaNum + '-' + '.')), 
       
   267                         SEQ(
       
   268                             CHAR('.'), 
       
   269                             BETWEEN(RANGE(('a' to 'z').toSet + '.'), 2, 6)
       
   270                             )
       
   271                       )
       
   272                 )
       
   273               )
       
   274 
       
   275 ders("nicolas.volken@kcl.ac.uk".toList, Q5email)
       
   276 
       
   277 // Q6
       
   278 val Q6 = SEQ(CHAR('/'),
       
   279             SEQ(CHAR('*'),
       
   280                 SEQ(
       
   281                   
       
   282                     NOT(
       
   283                       SEQ(
       
   284                         STAR(FALL),
       
   285                         SEQ(
       
   286                           CHAR('*'),
       
   287                           SEQ(
       
   288                             CHAR('/'),
       
   289                             STAR(FALL)
       
   290                           )
       
   291                         )
       
   292                       )
       
   293                     )
       
   294                   
       
   295                     ,
       
   296                     SEQ(CHAR('*'),
       
   297                     CHAR('/')
       
   298                     )
       
   299                 )
       
   300             )
       
   301         )
       
   302 
       
   303 matcher(Q6, "/**/")
       
   304 matcher(Q6, "/*foobar*/")
       
   305 matcher(Q6, "/*test*/test*/")
       
   306 matcher(Q6, "/*test/*test*/")
       
   307 
       
   308 // Q7
       
   309 
       
   310 val Q7r1 = SEQ(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))
       
   311 val Q7r2 = SEQ(BETWEEN(CHAR('a'), 19, 19), OPTIONAL(CHAR('a')))
       
   312 
       
   313 val Q7str5 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
       
   314 val Q7str6 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
       
   315 val Q7str7 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
       
   316 
       
   317 matcher(PLUS(PLUS(Q7r1)), Q7str5)
       
   318 matcher(PLUS(PLUS(Q7r2)), Q7str5)
       
   319 
       
   320 matcher(PLUS(PLUS(Q7r1)), Q7str6)
       
   321 matcher(PLUS(PLUS(Q7r2)), Q7str6)
       
   322 
       
   323 matcher(PLUS(PLUS(Q7r1)), Q7str7)
       
   324 matcher(PLUS(PLUS(Q7r2)), Q7str7)
       
   325