regexp.scala
changeset 7 73cf4406b773
child 18 d48cfc286cb1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/regexp.scala	Thu Sep 27 11:59:41 2012 +0100
@@ -0,0 +1,52 @@
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+
+// whether it can match the empty string
+def nullable (r: Rexp) : Boolean = r match {
+  case NULL => false
+  case EMPTY => true
+  case CHAR(_) => false
+  case ALT(r1, r2) => nullable(r1) || nullable(r2)
+  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+  case STAR(_) => true
+}
+
+// derivative of a regular expression
+def deriv (r: Rexp, c: Char) : Rexp = r match {
+  case NULL => NULL
+  case EMPTY => NULL
+  case CHAR(d) => if (c == d) EMPTY else NULL
+  case ALT(r1, r2) => ALT(deriv(r1, c), deriv(r2, c))
+  case SEQ(r1, r2) => 
+    if (nullable(r1)) ALT(SEQ(deriv(r1, c), r2), deriv(r2, c))
+    else SEQ(deriv(r1, c), r2)
+  case STAR(r) => SEQ(deriv(r, c), STAR(r))
+}
+
+def derivs (r: Rexp, s: List[Char]) : Rexp = s match {
+  case Nil => r
+  case c::cs => derivs(deriv(r, c), cs)
+}
+
+// regular expression matching
+def matches(r: Rexp, s: String) : Boolean = nullable(derivs(r, s.toList))
+
+/* Examples */
+
+println(matches(SEQ(SEQ(CHAR('c'), CHAR('a')), CHAR('b')),"cab"))
+println(matches(STAR(CHAR('a')),"aaa"))
+
+/* Convenience using implicits */
+implicit def string2rexp(s : String) : Rexp = {
+  s.foldRight (EMPTY: Rexp) ( (c, r) => SEQ(CHAR(c), r) )
+}
+
+println(matches("cab" ,"cab"))
+println(matches(STAR("a"),"aaa"))
+println(matches(STAR("a"),"aaab"))