progs/regexp.scala
changeset 93 4794759139ea
parent 92 e85600529ca5
equal deleted inserted replaced
92:e85600529ca5 93:4794759139ea
       
     1 // regular expressions
       
     2 abstract class Rexp
       
     3 
       
     4 case object NULL extends Rexp
       
     5 case object EMPTY extends Rexp
       
     6 case class CHAR(c: Char) extends Rexp
       
     7 case class ALT(r1: Rexp, r2: Rexp) extends Rexp
       
     8 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
       
     9 case class STAR(r: Rexp) extends Rexp
       
    10 
       
    11 // some convenience for typing in regular expressions
       
    12 def charlist2rexp(s : List[Char]) : Rexp = s match {
       
    13   case Nil => EMPTY
       
    14   case c::Nil => CHAR(c)
       
    15   case c::s => SEQ(CHAR(c), charlist2rexp(s))
       
    16 }
       
    17 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
       
    18 
       
    19 
       
    20 // for example
       
    21 println(STAR("abc"))
       
    22 
       
    23 // produces STAR(SEQ(CHAR(a),SEQ(CHAR(b),SEQ(CHAR(c),EMPTY))))
       
    24 
       
    25 
       
    26 
       
    27 // a simple-minded regular expression matcher:
       
    28 // it loops for examples like STAR(EMPTY) with
       
    29 // strings this regular expression does not match
       
    30 
       
    31 def smatchers(rs: List[Rexp], s: List[Char]) : Boolean = (rs, s) match {
       
    32   case (NULL::rs, s) => false
       
    33   case (EMPTY::rs, s) => smatchers(rs, s)
       
    34   case (CHAR(c)::rs, Nil) => false
       
    35   case (CHAR(c)::rs, d::s) => (c ==d) && smatchers(rs, s) 
       
    36   case (ALT(r1, r2)::rs, s) => smatchers(r1::rs, s) || smatchers(r2::rs, s)
       
    37   case (SEQ(r1, r2)::rs, s) => smatchers(r1::r2::rs, s) 
       
    38   case (STAR(r)::rs, s) => smatchers(rs, s) || smatchers(r::STAR(r)::rs, s) 
       
    39   case (Nil, s) => s == Nil
       
    40 }
       
    41 
       
    42 def smatcher(r: Rexp, s: String) = smatchers(List(r), s.toList)
       
    43 
       
    44 // regular expression: a
       
    45 println(smatcher(CHAR('a'), "ab")) 
       
    46 
       
    47 // regular expression: a + (b o c)
       
    48 println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "ab")) 
       
    49 
       
    50 // regular expression: a + (b o c)
       
    51 println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "bc")) 
       
    52 
       
    53 // loops for regular expression epsilon* 
       
    54 //println(smatcher(STAR(EMPTY), "a"))
       
    55 
       
    56 
       
    57 
       
    58 // Regular expression matcher that works properly
       
    59 //================================================
       
    60 
       
    61 // nullable function: tests whether the regular 
       
    62 // expression can recognise the empty string
       
    63 def nullable (r: Rexp) : Boolean = r match {
       
    64   case NULL => false
       
    65   case EMPTY => true
       
    66   case CHAR(_) => false
       
    67   case ALT(r1, r2) => nullable(r1) || nullable(r2)
       
    68   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
    69   case STAR(_) => true
       
    70 }
       
    71 
       
    72 // derivative of a regular expression w.r.t. a character
       
    73 def der (c: Char, r: Rexp) : Rexp = r match {
       
    74   case NULL => NULL
       
    75   case EMPTY => NULL
       
    76   case CHAR(d) => if (c == d) EMPTY else NULL
       
    77   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
       
    78   case SEQ(r1, r2) => 
       
    79     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
       
    80     else SEQ(der(c, r1), r2)
       
    81   case STAR(r) => SEQ(der(c, r), STAR(r))
       
    82 }
       
    83 
       
    84 // derivative w.r.t. a string (iterates der)
       
    85 def ders (s: List[Char], r: Rexp) : Rexp = s match {
       
    86   case Nil => r
       
    87   case c::s => ders(s, der(c, r))
       
    88 }
       
    89 
       
    90 // main matcher function
       
    91 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
       
    92 
       
    93 
       
    94 //examples
       
    95 
       
    96 println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa"))
       
    97 println(matcher(ALT(STAR("a"), STAR("b")), ""))
       
    98 println(matcher("abc", ""))
       
    99 println(matcher(STAR(ALT(EMPTY, "a")), ""))
       
   100 println(matcher(STAR(EMPTY), "a"))
       
   101 println(matcher("cab","cab"))
       
   102 println(matcher(STAR("a"),"aaa"))
       
   103 println(matcher("cab" ,"cab"))
       
   104 println(matcher(STAR("a"),"aaa"))
       
   105 
       
   106