regexp.scala
changeset 18 d48cfc286cb1
parent 7 73cf4406b773
child 29 774007c4b1b3
equal deleted inserted replaced
17:4d81b2dc8271 18:d48cfc286cb1
       
     1 // regular expressions
     1 abstract class Rexp
     2 abstract class Rexp
     2 
     3 
     3 case object NULL extends Rexp
     4 case object NULL extends Rexp
     4 case object EMPTY extends Rexp
     5 case object EMPTY extends Rexp
     5 case class CHAR(c: Char) extends Rexp
     6 case class CHAR(c: Char) extends Rexp
     6 case class ALT(r1: Rexp, r2: Rexp) extends Rexp
     7 case class ALT(r1: Rexp, r2: Rexp) extends Rexp
     7 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
     8 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
     8 case class STAR(r: Rexp) extends Rexp
     9 case class STAR(r: Rexp) extends Rexp
     9 
    10 
    10 // whether it can match the empty string
    11 // some convenience for typing in regular expressions
       
    12 implicit def string2rexp(s : String) : Rexp = {
       
    13   s.foldRight (EMPTY: Rexp) ( (c, r) => SEQ(CHAR(c), r) )
       
    14 }
       
    15 
       
    16 // for example
       
    17 println(STAR("abc"))
       
    18 
       
    19 // produces STAR(SEQ(CHAR(a),SEQ(CHAR(b),SEQ(CHAR(c),EMPTY))))
       
    20 
       
    21 
       
    22 
       
    23 // a simple-minded regular expression matcher:
       
    24 // it loops for examples like STAR(EMPTY) with
       
    25 // strings this regular expression does not match
       
    26 
       
    27 def smatchers(rs: List[Rexp], s: List[Char]) : Boolean = (rs, s) match {
       
    28   case (NULL::rs, s) => false
       
    29   case (EMPTY::rs, s) => smatchers(rs, s)
       
    30   case (CHAR(c)::rs, Nil) => false
       
    31   case (CHAR(c)::rs, d::s) => (c ==d) && smatchers(rs, s) 
       
    32   case (ALT(r1, r2)::rs, s) => smatchers(r1::rs, s) || smatchers(r2::rs, s)
       
    33   case (SEQ(r1, r2)::rs, s) => smatchers(r1::r2::rs, s) 
       
    34   case (STAR(r)::rs, s) => smatchers(rs, s) || smatchers(r::STAR(r)::rs, s) 
       
    35   case (Nil, s) => s == Nil
       
    36 }
       
    37 
       
    38 def smatcher(r: Rexp, s: String) = smatchers(List(r), s.toList)
       
    39 
       
    40 // regular expression: a
       
    41 println(smatcher(CHAR('a'), "ab")) 
       
    42 
       
    43 // regular expression: a + (b o c)
       
    44 println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "ab")) 
       
    45 
       
    46 // regular expression: a + (b o c)
       
    47 println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "bc")) 
       
    48 
       
    49 // loops for regular expression epsilon* 
       
    50 //println(smatcher(STAR(EMPTY), "a"))
       
    51 
       
    52 
       
    53 
       
    54 // Regular expression matcher that works properly
       
    55 //================================================
       
    56 
       
    57 // nullable function: tests whether the regular 
       
    58 // expression can recognise the empty string
    11 def nullable (r: Rexp) : Boolean = r match {
    59 def nullable (r: Rexp) : Boolean = r match {
    12   case NULL => false
    60   case NULL => false
    13   case EMPTY => true
    61   case EMPTY => true
    14   case CHAR(_) => false
    62   case CHAR(_) => false
    15   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    63   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    16   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    64   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    17   case STAR(_) => true
    65   case STAR(_) => true
    18 }
    66 }
    19 
    67 
    20 // derivative of a regular expression
    68 // derivative of a regular expression w.r.t. a character
    21 def deriv (r: Rexp, c: Char) : Rexp = r match {
    69 def der (c: Char, r: Rexp) : Rexp = r match {
    22   case NULL => NULL
    70   case NULL => NULL
    23   case EMPTY => NULL
    71   case EMPTY => NULL
    24   case CHAR(d) => if (c == d) EMPTY else NULL
    72   case CHAR(d) => if (c == d) EMPTY else NULL
    25   case ALT(r1, r2) => ALT(deriv(r1, c), deriv(r2, c))
    73   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    26   case SEQ(r1, r2) => 
    74   case SEQ(r1, r2) => 
    27     if (nullable(r1)) ALT(SEQ(deriv(r1, c), r2), deriv(r2, c))
    75     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    28     else SEQ(deriv(r1, c), r2)
    76     else SEQ(der(c, r1), r2)
    29   case STAR(r) => SEQ(deriv(r, c), STAR(r))
    77   case STAR(r) => SEQ(der(c, r), STAR(r))
    30 }
    78 }
    31 
    79 
    32 def derivs (r: Rexp, s: List[Char]) : Rexp = s match {
    80 // derivative w.r.t. a string (iterates der)
       
    81 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    33   case Nil => r
    82   case Nil => r
    34   case c::cs => derivs(deriv(r, c), cs)
    83   case c::s => ders(s, der(c, r))
    35 }
    84 }
    36 
    85 
    37 // regular expression matching
    86 // main matcher function
    38 def matches(r: Rexp, s: String) : Boolean = nullable(derivs(r, s.toList))
    87 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
    39 
    88 
    40 /* Examples */
       
    41 
    89 
    42 println(matches(SEQ(SEQ(CHAR('c'), CHAR('a')), CHAR('b')),"cab"))
    90 //examples
    43 println(matches(STAR(CHAR('a')),"aaa"))
       
    44 
    91 
    45 /* Convenience using implicits */
    92 println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa"))
    46 implicit def string2rexp(s : String) : Rexp = {
    93 println(matcher(ALT(STAR("a"), STAR("b")), ""))
    47   s.foldRight (EMPTY: Rexp) ( (c, r) => SEQ(CHAR(c), r) )
    94 println(matcher("abc", ""))
    48 }
    95 println(matcher(STAR(ALT(EMPTY, "a")), ""))
       
    96 println(matcher(STAR(EMPTY), "a"))
       
    97 println(matcher("cab","cab"))
       
    98 println(matcher(STAR("a"),"aaa"))
       
    99 println(matcher("cab" ,"cab"))
       
   100 println(matcher(STAR("a"),"aaa"))
    49 
   101 
    50 println(matches("cab" ,"cab"))
       
    51 println(matches(STAR("a"),"aaa"))
       
    52 println(matches(STAR("a"),"aaab"))