regexp.scala
author Christian Urban <urbanc@in.tum.de>
Thu, 27 Sep 2012 11:59:41 +0100
changeset 7 73cf4406b773
child 18 d48cfc286cb1
permissions -rw-r--r--
updated

abstract class Rexp

case object NULL extends Rexp
case object EMPTY extends Rexp
case class CHAR(c: Char) extends Rexp
case class ALT(r1: Rexp, r2: Rexp) extends Rexp
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
case class STAR(r: Rexp) extends Rexp

// whether it can match the empty string
def nullable (r: Rexp) : Boolean = r match {
  case NULL => false
  case EMPTY => true
  case CHAR(_) => false
  case ALT(r1, r2) => nullable(r1) || nullable(r2)
  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
  case STAR(_) => true
}

// derivative of a regular expression
def deriv (r: Rexp, c: Char) : Rexp = r match {
  case NULL => NULL
  case EMPTY => NULL
  case CHAR(d) => if (c == d) EMPTY else NULL
  case ALT(r1, r2) => ALT(deriv(r1, c), deriv(r2, c))
  case SEQ(r1, r2) => 
    if (nullable(r1)) ALT(SEQ(deriv(r1, c), r2), deriv(r2, c))
    else SEQ(deriv(r1, c), r2)
  case STAR(r) => SEQ(deriv(r, c), STAR(r))
}

def derivs (r: Rexp, s: List[Char]) : Rexp = s match {
  case Nil => r
  case c::cs => derivs(deriv(r, c), cs)
}

// regular expression matching
def matches(r: Rexp, s: String) : Boolean = nullable(derivs(r, s.toList))

/* Examples */

println(matches(SEQ(SEQ(CHAR('c'), CHAR('a')), CHAR('b')),"cab"))
println(matches(STAR(CHAR('a')),"aaa"))

/* Convenience using implicits */
implicit def string2rexp(s : String) : Rexp = {
  s.foldRight (EMPTY: Rexp) ( (c, r) => SEQ(CHAR(c), r) )
}

println(matches("cab" ,"cab"))
println(matches(STAR("a"),"aaa"))
println(matches(STAR("a"),"aaab"))