progs/cw1.scala
changeset 189 04346d82fe01
child 190 0b63c0edfb09
equal deleted inserted replaced
188:12ef150273ce 189:04346d82fe01
       
     1 import scala.language.implicitConversions    
       
     2 import scala.language.reflectiveCalls 
       
     3 
       
     4 abstract class Rexp {
       
     5   def simp : Rexp = this
       
     6 }
       
     7 
       
     8 case object NULL extends Rexp
       
     9 case object EMPTY extends Rexp
       
    10 case class CHAR(c: Char) extends Rexp
       
    11 case class ALT(r1: Rexp, r2: Rexp) extends Rexp {
       
    12   override def toString = r1.toString + " | " + r2.toString
       
    13   override def simp = (r1.simp, r2.simp) match {
       
    14     case (NULL, r) => r
       
    15     case (r, NULL) => r
       
    16     case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY)
       
    17     case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY)
       
    18     case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2)
       
    19   }
       
    20 }
       
    21 case class RANGE(cs: List[Char]) extends Rexp {
       
    22   //override def toString = cs.mkString("[",",","]")
       
    23   override def toString = "[" + cs.head + " .. " + cs.last + "]"
       
    24 }
       
    25 object RANGE {
       
    26   def apply(s: String) : RANGE = RANGE(s.toList)
       
    27 }
       
    28 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
       
    29   override def toString = r1.toString + " ~ " + r2.toString
       
    30   override def simp = (r1.simp, r2.simp) match {
       
    31     case (NULL, _) => NULL
       
    32     case (_, NULL) => NULL
       
    33     case (EMPTY, r) => r
       
    34     case (r, EMPTY) => r
       
    35     case (r1, r2) => SEQ(r1, r2)
       
    36   }
       
    37 }
       
    38 case class PLUS(r: Rexp) extends Rexp {
       
    39   override def simp = r.simp match {
       
    40     case NULL => EMPTY
       
    41     case EMPTY => EMPTY
       
    42     case r => PLUS(r)
       
    43   }
       
    44 }
       
    45 case class STAR(r: Rexp) extends Rexp {
       
    46   override def simp = r.simp match {
       
    47     case NULL => EMPTY
       
    48     case EMPTY => EMPTY
       
    49     case r => STAR(r)
       
    50   }
       
    51 }
       
    52 case class NTIMES(r: Rexp, n: Int) extends Rexp {
       
    53   override def simp = if (n == 0) EMPTY else 
       
    54     r.simp match {
       
    55       case NULL => NULL
       
    56       case EMPTY => EMPTY
       
    57       case r => NTIMES(r, n)
       
    58     }
       
    59 }
       
    60 case class NMTIMES(r: Rexp, n: Int, m: Int) extends Rexp {
       
    61   if(m < n) throw new IllegalArgumentException("the number m cannot be smaller than n.")
       
    62 
       
    63   override def simp = if (m == 0) EMPTY else 
       
    64     r.simp match {
       
    65       case NULL => NULL
       
    66       case EMPTY => EMPTY
       
    67       case r => NMTIMES(r, n, m)
       
    68     }
       
    69 }
       
    70 case class NOT(r: Rexp) extends Rexp {
       
    71   override def simp = NOT(r.simp)
       
    72 }
       
    73 case class OPT(r: Rexp) extends Rexp {
       
    74   override def simp = OPT(r.simp)
       
    75 }
       
    76 
       
    77 // nullable function: tests whether the regular 
       
    78 // expression can recognise the empty string
       
    79 def nullable (r: Rexp) : Boolean = r match {
       
    80   case NULL => false
       
    81   case EMPTY => true
       
    82   case CHAR(_) => false
       
    83   case ALT(r1, r2) => nullable(r1) || nullable(r2)
       
    84   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
    85   case STAR(_) => true
       
    86   case PLUS(r) => nullable(r)
       
    87   case NTIMES(r, i) => if (i == 0) true else nullable(r)
       
    88   case NMTIMES(r, i, j) => if (i == 0) true else nullable(r)
       
    89   case RANGE(_) => false
       
    90   case NOT(r) => !(nullable(r))
       
    91   case OPT(_) => true
       
    92 }
       
    93 
       
    94 // derivative of a regular expression w.r.t. a character
       
    95 def der (c: Char, r: Rexp) : Rexp = r match {
       
    96   case NULL => NULL
       
    97   case EMPTY => NULL
       
    98   case CHAR(d) => if (c == d) EMPTY else NULL
       
    99   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
       
   100   case SEQ(r1, r2) => 
       
   101     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
       
   102     else SEQ(der(c, r1), r2)
       
   103   case STAR(r) => SEQ(der(c, r), STAR(r))
       
   104   case PLUS(r) => der(c, SEQ(r, STAR(r)))
       
   105   case NTIMES(r, i) => 
       
   106     if (i == 0) NULL else der(c, SEQ(r, NTIMES(r, i - 1)))
       
   107   case NMTIMES(r, i, j) => 
       
   108     if (j == 0) NULL 
       
   109     if (i == 0) der(c, NTIMES(r, j))
       
   110     else der(c, SEQ(r, NMTIMES(r, i - 1, j - 1)))
       
   111   case RANGE(cs) => if (cs contains c) EMPTY else NULL
       
   112   case NOT(r) => NOT(der (c, r))
       
   113   case OPT(r) => der(c, r)
       
   114 }
       
   115 
       
   116 // derivative w.r.t. a string (iterates der)
       
   117 def ders (s: List[Char], r: Rexp) : Rexp = s match {
       
   118   case Nil => r
       
   119   case c::s => ders(s, der(c, r))
       
   120 }
       
   121 
       
   122 def ders_simp (s: List[Char], r: Rexp) : Rexp = s match {
       
   123   case Nil => r
       
   124   case c::s => ders_simp(s, der(c, r).simp)
       
   125 }
       
   126 
       
   127 // main matcher function
       
   128 def matcher(r: Rexp, s: String) : Boolean = nullable(ders_simp(s.toList, r))
       
   129 
       
   130 // some convenience for typing in regular expressions
       
   131 def charlist2rexp(s : List[Char]) : Rexp = s match {
       
   132   case Nil => EMPTY
       
   133   case c::Nil => CHAR(c)
       
   134   case c::s => SEQ(CHAR(c), charlist2rexp(s))
       
   135 }
       
   136 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
       
   137 
       
   138 implicit def RexpOps (r: Rexp) = new {
       
   139   def | (s: Rexp) = ALT(r, s)
       
   140   def % = STAR(r)
       
   141   def ~ (s: Rexp) = SEQ(r, s)
       
   142 }
       
   143 
       
   144 implicit def stringOps (s: String) = new {
       
   145   def | (r: Rexp) = ALT(s, r)
       
   146   def | (r: String) = ALT(s, r)
       
   147   def % = STAR(s)
       
   148   def ~ (r: Rexp) = SEQ(s, r)
       
   149   def ~ (r: String) = SEQ(s, r)
       
   150 }
       
   151 
       
   152 println("EMAIL:")
       
   153 val LOWERCASE = "abcdefghijklmnopqrstuvwxyz"
       
   154 val DIGITS = "0123456789"
       
   155 val EMAIL = PLUS(RANGE(LOWERCASE + DIGITS + "_" + "." + "-")) ~ "@" ~ 
       
   156             PLUS(RANGE(LOWERCASE + DIGITS + "." + "-")) ~ "." ~
       
   157             NMTIMES(RANGE(LOWERCASE + "."), 2, 6)
       
   158 
       
   159 val my_email = "christian.urban@kcl.ac.uk"
       
   160 
       
   161 println(EMAIL);
       
   162 println(matcher(EMAIL, my_email))
       
   163 println(ders_simp(my_email.toList, EMAIL))
       
   164 
       
   165 println("COMMENTS:")
       
   166 val ALL = RANGE(LOWERCASE + " ")
       
   167 val COMMENT = "/*" ~ (NOT(ALL.% ~ "*/" ~ ALL.%)) ~ "*/"
       
   168 
       
   169 println(matcher(COMMENT, "/**/"))
       
   170 println(matcher(COMMENT, "/*foobar_comment*/"))
       
   171 println(matcher(COMMENT, "/*test*/test*/"))
       
   172 println(matcher(COMMENT, "/*test/*test*/"))
       
   173 
       
   174 println("EVILS:")
       
   175 val EVIL1 = PLUS(PLUS("a" ~ "a" ~ "a"))
       
   176 val EVIL2 = PLUS(PLUS("aaaaaaaaaaaaaaaaaaa" ~ OPT("a")))  // 19 as ~ a?
       
   177 
       
   178 
       
   179 //40 * aaa matches
       
   180 //43 * aaa + aa does not
       
   181 //45 * aaa + a
       
   182 
       
   183 println("EVIL1:")
       
   184 println(matcher(EVIL1, "aaa" * 40))
       
   185 println(matcher(EVIL1, "aaa" * 43 + "aa"))
       
   186 println(matcher(EVIL1, "aaa" * 45 + "a"))
       
   187 println("EVIL2:")
       
   188 println(matcher(EVIL2, "aaa" * 40))
       
   189 println(matcher(EVIL2, "aaa" * 43 + "aa"))
       
   190 println(matcher(EVIL2, "aaa" * 45 + "a"))