progs/matcher/cw1template.sc
changeset 967 ce5de01b9632
parent 929 9541e073f2ed
equal deleted inserted replaced
966:4189cb63e5db 967:ce5de01b9632
       
     1 // A template of the simple matcher with simplification 
       
     2 // of derivatives to be used in CW1
       
     3 
       
     4 
       
     5 abstract class Rexp
       
     6 case object ZERO extends Rexp
       
     7 case object ONE extends Rexp
       
     8 case class CHAR(c: Char) extends Rexp
       
     9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
       
    10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
       
    11 case class STAR(r: Rexp) extends Rexp 
       
    12 
       
    13 case class RANGE(cs: Set[Char]) extends Rexp             // set of characters
       
    14 case class PLUS(r: Rexp) extends Rexp                    // plus
       
    15 case class OPTIONAL(r: Rexp) extends Rexp                // optional
       
    16 case class INTER(r1: Rexp, r2: Rexp) extends Rexp        // intersection
       
    17 case class NTIMES(r: Rexp, n: Int) extends Rexp          // n-times
       
    18 case class UPTO(r: Rexp, n: Int) extends Rexp            // up n-times
       
    19 case class FROM(r: Rexp, n: Int) extends Rexp            // from n-times
       
    20 case class BETWEEN(r: Rexp, n: Int, m: Int) extends Rexp // between nm-times
       
    21 case class NOT(r: Rexp) extends Rexp                     // not
       
    22 case class CFUN(f: Char => Boolean) extends Rexp  
       
    23  
       
    24 
       
    25 
       
    26 // the nullable function: tests whether the regular 
       
    27 // expression can recognise the empty string
       
    28 def nullable (r: Rexp) : Boolean = r match {
       
    29   case ZERO => false
       
    30   case ONE => true
       
    31   case CHAR(_) => false
       
    32   case ALT(r1, r2) => nullable(r1) || nullable(r2)
       
    33   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
    34   case STAR(_) => true
       
    35   case NTIMES(r, i) => if (i == 0) true else nullable(r)
       
    36   // ??? other cases
       
    37 }
       
    38 
       
    39 // the derivative of a regular expression w.r.t. a character
       
    40 def der(c: Char, r: Rexp) : Rexp = r match {
       
    41   case ZERO => ZERO
       
    42   case ONE => ZERO
       
    43   case CHAR(d) => if (c == d) ONE else ZERO
       
    44   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
       
    45   case SEQ(r1, r2) => 
       
    46     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
       
    47     else SEQ(der(c, r1), r2)
       
    48   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
       
    49   case NTIMES(r, i) => 
       
    50     if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
       
    51   // ??? other cases  
       
    52 }
       
    53 
       
    54 // simplification
       
    55 def simp(r: Rexp) : Rexp = r match {
       
    56   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
       
    57     case (ZERO, r2s) => r2s
       
    58     case (r1s, ZERO) => r1s
       
    59     case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
       
    60   }
       
    61   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match {
       
    62     case (ZERO, _) => ZERO
       
    63     case (_, ZERO) => ZERO
       
    64     case (ONE, r2s) => r2s
       
    65     case (r1s, ONE) => r1s
       
    66     case (r1s, r2s) => SEQ(r1s, r2s)
       
    67   }
       
    68   case r => r
       
    69 }
       
    70 
       
    71 // the derivative w.r.t. a string (iterates der and simp)
       
    72 def ders(s: List[Char], r: Rexp) : Rexp = s match {
       
    73   case Nil => r
       
    74   case c::s => ders(s, simp(der(c, r)))
       
    75 }
       
    76 
       
    77 // the main matcher function
       
    78 def matcher(r: Rexp, s: String) : Boolean = 
       
    79   nullable(ders(s.toList, r))
       
    80