regexp.scala
author Christian Urban <urbanc@in.tum.de>
Thu, 27 Sep 2012 11:59:41 +0100
changeset 7 73cf4406b773
child 18 d48cfc286cb1
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
7
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     1
abstract class Rexp
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     2
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     3
case object NULL extends Rexp
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     4
case object EMPTY extends Rexp
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     5
case class CHAR(c: Char) extends Rexp
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     6
case class ALT(r1: Rexp, r2: Rexp) extends Rexp
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     7
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     8
case class STAR(r: Rexp) extends Rexp
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     9
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    10
// whether it can match the empty string
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    11
def nullable (r: Rexp) : Boolean = r match {
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    12
  case NULL => false
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    13
  case EMPTY => true
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    14
  case CHAR(_) => false
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    15
  case ALT(r1, r2) => nullable(r1) || nullable(r2)
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    16
  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    17
  case STAR(_) => true
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    18
}
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    19
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    20
// derivative of a regular expression
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    21
def deriv (r: Rexp, c: Char) : Rexp = r match {
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    22
  case NULL => NULL
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    23
  case EMPTY => NULL
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    24
  case CHAR(d) => if (c == d) EMPTY else NULL
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    25
  case ALT(r1, r2) => ALT(deriv(r1, c), deriv(r2, c))
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    26
  case SEQ(r1, r2) => 
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    27
    if (nullable(r1)) ALT(SEQ(deriv(r1, c), r2), deriv(r2, c))
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    28
    else SEQ(deriv(r1, c), r2)
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    29
  case STAR(r) => SEQ(deriv(r, c), STAR(r))
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    30
}
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    31
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    32
def derivs (r: Rexp, s: List[Char]) : Rexp = s match {
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    33
  case Nil => r
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    34
  case c::cs => derivs(deriv(r, c), cs)
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    35
}
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    36
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    37
// regular expression matching
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    38
def matches(r: Rexp, s: String) : Boolean = nullable(derivs(r, s.toList))
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    39
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    40
/* Examples */
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    41
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    42
println(matches(SEQ(SEQ(CHAR('c'), CHAR('a')), CHAR('b')),"cab"))
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    43
println(matches(STAR(CHAR('a')),"aaa"))
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    44
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    45
/* Convenience using implicits */
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    46
implicit def string2rexp(s : String) : Rexp = {
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    47
  s.foldRight (EMPTY: Rexp) ( (c, r) => SEQ(CHAR(c), r) )
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    48
}
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    49
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    50
println(matches("cab" ,"cab"))
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    51
println(matches(STAR("a"),"aaa"))
73cf4406b773 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    52
println(matches(STAR("a"),"aaab"))