progs/cw1.scala
changeset 492 39b7ff2cf1bc
parent 208 bd5a8a6b3871
child 493 4e97783862ff
equal deleted inserted replaced
491:d5776c6018f0 492:39b7ff2cf1bc
       
     1 // CW 1
     1 import scala.language.implicitConversions    
     2 import scala.language.implicitConversions    
     2 import scala.language.reflectiveCalls 
     3 import scala.language.reflectiveCalls 
     3 
     4 
     4 abstract class Rexp {
     5 abstract class Rexp
     5   def simp : Rexp = this
     6 case object ZERO extends Rexp                            // matches nothing
     6 }
     7 case object ONE extends Rexp                             // matches the empty string
     7 
     8 case class CHAR(c: Char) extends Rexp                    // matches a character c
     8 case object NULL extends Rexp
     9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp          // alternative
     9 case object EMPTY extends Rexp
    10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp          // sequence
    10 case class CHAR(c: Char) extends Rexp
    11 case class STAR(r: Rexp) extends Rexp                    // star
    11 case class ALT(r1: Rexp, r2: Rexp) extends Rexp {
    12 case class RANGE(cs: Set[Char]) extends Rexp             // set of characters
    12   override def toString = r1.toString + " | " + r2.toString
    13 case class PLUS(r: Rexp) extends Rexp                    // plus
    13   override def simp = (r1.simp, r2.simp) match {
    14 case class OPT(r: Rexp) extends Rexp                     // optional
    14     case (NULL, r) => r
    15 case class NTIMES(r: Rexp, n: Int) extends Rexp          // n-times
    15     case (r, NULL) => r
    16 case class UPNTIMES(r: Rexp, n: Int) extends Rexp        // up n-times
    16     case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY)
    17 case class FROMNTIMES(r: Rexp, n: Int) extends Rexp      // from n-times
    17     case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY)
    18 case class NMTIMES(r: Rexp, n: Int, m: Int) extends Rexp // between nm-times
    18     case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2)
    19 case class NOT(r: Rexp) extends Rexp                     // not
    19   }
    20 case class CFUN(f: Char => Boolean) extends Rexp         // subsuming CHAR and RANGE
    20 }
    21 
    21 case class RANGE(cs: List[Char]) extends Rexp {
       
    22   //override def toString = cs.mkString("[",",","]")
       
    23   override def toString = "[" + cs.head + " .. " + cs.last + "]"
       
    24 }
       
    25 object RANGE {
       
    26   def apply(s: String) : RANGE = RANGE(s.toList)
       
    27 }
       
    28 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
       
    29   override def toString = r1.toString + " ~ " + r2.toString
       
    30   override def simp = (r1.simp, r2.simp) match {
       
    31     case (NULL, _) => NULL
       
    32     case (_, NULL) => NULL
       
    33     case (EMPTY, r) => r
       
    34     case (r, EMPTY) => r
       
    35     case (r1, r2) => SEQ(r1, r2)
       
    36   }
       
    37 }
       
    38 case class PLUS(r: Rexp) extends Rexp {
       
    39   override def simp = r.simp match {
       
    40     case NULL => EMPTY
       
    41     case EMPTY => EMPTY
       
    42     case r => PLUS(r)
       
    43   }
       
    44 }
       
    45 case class STAR(r: Rexp) extends Rexp {
       
    46   override def simp = r.simp match {
       
    47     case NULL => EMPTY
       
    48     case EMPTY => EMPTY
       
    49     case r => STAR(r)
       
    50   }
       
    51 }
       
    52 case class NTIMES(r: Rexp, n: Int) extends Rexp {
       
    53   override def simp = if (n == 0) EMPTY else 
       
    54     r.simp match {
       
    55       case NULL => NULL
       
    56       case EMPTY => EMPTY
       
    57       case r => NTIMES(r, n)
       
    58     }
       
    59 }
       
    60 case class NUPTOM(r: Rexp, n: Int, m: Int) extends Rexp {
       
    61   override def simp = if (m == 0) EMPTY else 
       
    62     r.simp match {
       
    63       case NULL => NULL
       
    64       case EMPTY => EMPTY
       
    65       case r => NUPTOM(r, n, m)
       
    66     }
       
    67 }
       
    68 def NMTIMES(r: Rexp, n: Int, m: Int) = {
       
    69   if(m < n) throw new IllegalArgumentException("the number m cannot be smaller than n.")
       
    70   else NUPTOM(r, n, m - n)
       
    71 }
       
    72 
       
    73 
       
    74 case class NOT(r: Rexp) extends Rexp {
       
    75   override def simp = NOT(r.simp)
       
    76 }
       
    77 case class OPT(r: Rexp) extends Rexp {
       
    78   override def simp = OPT(r.simp)
       
    79 }
       
    80 
    22 
    81 // nullable function: tests whether the regular 
    23 // nullable function: tests whether the regular 
    82 // expression can recognise the empty string
    24 // expression can recognise the empty string
    83 def nullable (r: Rexp) : Boolean = r match {
    25 def nullable (r: Rexp) : Boolean = r match {
    84   case NULL => false
    26   case ZERO => false
    85   case EMPTY => true
    27   case ONE => true
    86   case CHAR(_) => false
    28   case CHAR(_) => false
    87   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    29   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    88   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    30   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    89   case STAR(_) => true
    31   case STAR(_) => true
       
    32   case RANGE(_) => false
    90   case PLUS(r) => nullable(r)
    33   case PLUS(r) => nullable(r)
    91   case NTIMES(r, i) => if (i == 0) true else nullable(r)
    34   case OPT(_) => true
    92   case NUPTOM(r, i, j) => if (i == 0) true else nullable(r)
    35   case NTIMES(r, n) => if (n == 0) true else nullable(r)
    93   case RANGE(_) => false
    36   case UPNTIMES(_, _) => true
       
    37   case FROMNTIMES(r, n) => if (n == 0) true else nullable(r)
       
    38   case NMTIMES(r, n, m) => if (n == 0) true else nullable(r)
    94   case NOT(r) => !(nullable(r))
    39   case NOT(r) => !(nullable(r))
    95   case OPT(_) => true
    40   case CFUN(_) => false
    96 }
       
    97 
       
    98 def zeroable (r: Rexp) : Boolean = r match {
       
    99   case NULL => true
       
   100   case EMPTY => false
       
   101   case CHAR(_) => false
       
   102   case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
       
   103   case SEQ(r1, r2) => zeroable(r1) || zeroable(r2)
       
   104   case STAR(_) => false
       
   105   case PLUS(r) => zeroable(r)
       
   106   case NTIMES(r, i) => if (i == 0) false else zeroable(r)
       
   107   case NUPTOM(r, i, j) => if (i == 0) false else zeroable(r)
       
   108   case RANGE(_) => false
       
   109   case NOT(r) => !(zeroable(r))
       
   110   case OPT(_) => false
       
   111 }
    41 }
   112 
    42 
   113 // derivative of a regular expression w.r.t. a character
    43 // derivative of a regular expression w.r.t. a character
   114 def der (c: Char, r: Rexp) : Rexp = r match {
    44 def der (c: Char, r: Rexp) : Rexp = r match {
   115   case NULL => NULL
    45   case ZERO => ZERO
   116   case EMPTY => NULL
    46   case ONE => ZERO
   117   case CHAR(d) => if (c == d) EMPTY else NULL
    47   case CHAR(d) => if (c == d) ONE else ZERO
   118   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    48   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
   119   case SEQ(r1, r2) => 
    49   case SEQ(r1, r2) => 
   120     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    50     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
   121     else SEQ(der(c, r1), r2)
    51     else SEQ(der(c, r1), r2)
   122   case STAR(r) => SEQ(der(c, r), STAR(r))
    52   case STAR(r) => SEQ(der(c, r), STAR(r))
       
    53   case RANGE(cs) => if (cs contains c) ONE else ZERO
   123   case PLUS(r) => SEQ(der(c, r), STAR(r))
    54   case PLUS(r) => SEQ(der(c, r), STAR(r))
   124   case NTIMES(r, i) => 
    55   case OPT(r) => der(c, r)
   125     if (i == 0) NULL else der(c, SEQ(r, NTIMES(r, i - 1)))
    56   case NTIMES(r, n) => 
   126   case NUPTOM(r, i, j) =>
    57     if (n == 0) ZERO else SEQ(der(c, r), NTIMES(r, n - 1))
   127     if (i == 0 && j == 0) NULL else 
    58   case UPNTIMES(r, n) =>
   128     if (i == 0) ALT(der(c, NTIMES(r, j)), der(c, NUPTOM(r, 0, j - 1)))
    59     if (n == 0) ZERO else SEQ(der(c, r), UPNTIMES(r, n - 1))
   129     else der(c, SEQ(r, NUPTOM(r, i - 1, j)))
    60   case FROMNTIMES(r, n) =>
   130   case RANGE(cs) => if (cs contains c) EMPTY else NULL
    61     if (n == 0) ZERO else SEQ(der(c, r), FROMNTIMES(r, n - 1))
       
    62   case NMTIMES(r, n, m) =>
       
    63     if (m < n) ZERO else
       
    64     if (n == 0 && m == 0) ZERO else 
       
    65     if (n == 0) SEQ(der(c, r), UPNTIMES(r, m - 1)) 
       
    66     else SEQ(der(c, r), NMTIMES(r, n - 1, m - 1)) 
   131   case NOT(r) => NOT(der (c, r))
    67   case NOT(r) => NOT(der (c, r))
   132   case OPT(r) => der(c, r)
    68   case CFUN(f) => if (f(c)) ONE else ZERO
   133 }
    69 }
       
    70 
       
    71 def simp(r: Rexp) : Rexp = r match {
       
    72   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
       
    73     case (ZERO, r2s) => r2s
       
    74     case (r1s, ZERO) => r1s
       
    75     case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
       
    76   }
       
    77   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match {
       
    78     case (ZERO, _) => ZERO
       
    79     case (_, ZERO) => ZERO
       
    80     case (ONE, r2s) => r2s
       
    81     case (r1s, ONE) => r1s
       
    82     case (r1s, r2s) => SEQ(r1s, r2s)
       
    83   }
       
    84   case r => r
       
    85 }
       
    86 
   134 
    87 
   135 // derivative w.r.t. a string (iterates der)
    88 // derivative w.r.t. a string (iterates der)
   136 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    89 def ders (s: List[Char], r: Rexp) : Rexp = s match {
   137   case Nil => r
    90   case Nil => r
   138   case c::s => ders(s, der(c, r))
    91   case c::s => ders(s, der(c, r))
   139 }
    92 }
   140 
    93 
   141 def ders_simp (s: List[Char], r: Rexp) : Rexp = s match {
    94 def ders_simp (s: List[Char], r: Rexp) : Rexp = s match {
   142   case Nil => r
    95   case Nil => r
   143   case c::s => ders_simp(s, der(c, r).simp)
    96   case c::s => ders_simp(s, simp(der(c, r)))
   144 }
    97 }
   145 
    98 
   146 // main matcher function
    99 // main matcher function including simplification
   147 def matcher(r: Rexp, s: String) : Boolean = nullable(ders_simp(s.toList, r))
   100 def matcher(r: Rexp, s: String) : Boolean = 
       
   101   nullable(ders_simp(s.toList, r))
       
   102 
       
   103 
   148 
   104 
   149 // some convenience for typing in regular expressions
   105 // some convenience for typing in regular expressions
   150 def charlist2rexp(s : List[Char]) : Rexp = s match {
   106 def charlist2rexp(s : List[Char]) : Rexp = s match {
   151   case Nil => EMPTY
   107   case Nil => ONE
   152   case c::Nil => CHAR(c)
   108   case c::Nil => CHAR(c)
   153   case c::s => SEQ(CHAR(c), charlist2rexp(s))
   109   case c::s => SEQ(CHAR(c), charlist2rexp(s))
   154 }
   110 }
   155 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
   111 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
   156 
   112 
   166   def % = STAR(s)
   122   def % = STAR(s)
   167   def ~ (r: Rexp) = SEQ(s, r)
   123   def ~ (r: Rexp) = SEQ(s, r)
   168   def ~ (r: String) = SEQ(s, r)
   124   def ~ (r: String) = SEQ(s, r)
   169 }
   125 }
   170 
   126 
       
   127 val r1 = NTIMES("a", 3)
       
   128 val r2 = NTIMES(OPT("a"), 3)
       
   129 val r3 = UPNTIMES("a", 3)
       
   130 val r4 = UPNTIMES(OPT("a"), 3)
       
   131 val r5 = NMTIMES("a", 3, 5)
       
   132 val r6 = NMTIMES(OPT("a"), 3, 5)
       
   133 
       
   134 val rs = List(r1, r2, r3, r4, r5, r6)
       
   135 
       
   136 rs.map(matcher(_, ""))
       
   137 rs.map(matcher(_, "a"))
       
   138 rs.map(matcher(_, "aa"))
       
   139 rs.map(matcher(_, "aaa"))
       
   140 rs.map(matcher(_, "aaaa"))
       
   141 rs.map(matcher(_, "aaaaa"))
       
   142 rs.map(matcher(_, "aaaaaa"))
       
   143 
   171 println("EMAIL:")
   144 println("EMAIL:")
   172 val LOWERCASE = "abcdefghijklmnopqrstuvwxyz"
   145 val LOWERCASE = ('a' to 'z').toSet
   173 val DIGITS = "0123456789"
   146 val DIGITS = ('0' to '9').toSet
   174 val EMAIL = PLUS(RANGE(LOWERCASE + DIGITS + "_" + "." + "-")) ~ "@" ~ 
   147 val EMAIL = { PLUS(CFUN(LOWERCASE | DIGITS | ("_.-").toSet)) ~ "@" ~ 
   175             PLUS(RANGE(LOWERCASE + DIGITS + "." + "-")) ~ "." ~
   148               PLUS(CFUN(LOWERCASE | DIGITS | (".-").toSet)) ~ "." ~
   176             NMTIMES(RANGE(LOWERCASE + "."), 2, 6)
   149               NMTIMES(CFUN(LOWERCASE | Set('.')), 2, 6) }
   177 
   150 
   178 val my_email = "christian.urban@kcl.ac.uk"
   151 val my_email = "christian.urban@kcl.ac.uk"
   179 
   152 
   180 println(EMAIL);
   153 println(EMAIL);
   181 println(matcher(EMAIL, my_email))
   154 println(matcher(EMAIL, my_email))
   182 println(ders_simp(my_email.toList, EMAIL))
   155 println(ders_simp(my_email.toList, EMAIL))
   183 
   156 
   184 println("COMMENTS:")
   157 println("COMMENTS:")
   185 val ALL = RANGE(LOWERCASE + " ")
   158 val ALL = CFUN((c:Char) => true)
   186 val COMMENT = "/*" ~ (NOT(ALL.% ~ "*/" ~ ALL.%)) ~ "*/"
   159 val COMMENT = "/*" ~ (NOT(ALL.% ~ "*/" ~ ALL.%)) ~ "*/"
   187 
   160 
   188 println(matcher(COMMENT, "/**/"))
   161 println(matcher(COMMENT, "/**/"))
   189 println(matcher(COMMENT, "/*foobar_comment*/"))
   162 println(matcher(COMMENT, "/*foobar_comment*/"))
   190 println(matcher(COMMENT, "/*test*/test*/"))
   163 println(matcher(COMMENT, "/*test*/test*/"))