progs/re-sulzmann.scala
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Fri, 18 Apr 2014 21:05:07 +0100
changeset 226 e3c454e31224
parent 95 dbe49327b6c5
permissions -rw-r--r--
updated so that it works with more recent versions of Ruby

import scala.language.implicitConversions
import scala.language.reflectiveCalls

abstract class Rexp

case object NULL extends Rexp
case object EMPTY extends Rexp
case class CHAR(c: Char) extends Rexp
case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
case class STAR(r: Rexp) extends Rexp 

abstract class Pat

case class VAR(x: String, w: String, r: Rexp) extends Pat
case class GRP(x: String, w: String, p: Pat) extends Pat
case class PSEQ(p1: Pat, p2: Pat) extends Pat
case class PALT(p1: Pat, p2: Pat) extends Pat
case class PSTAR(p: Pat) extends Pat

object VAR { def apply(x: String, r: Rexp) : VAR = VAR(x, "", r) }
object GRP { def apply(x: String, p: Pat) : GRP = GRP(x, "", p) }


def nullable (r: Rexp) : Boolean = r match {
  case NULL => false
  case EMPTY => true
  case CHAR(_) => false
  case ALT(r1, r2) => nullable(r1) || nullable(r2)
  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
  case STAR(_) => true
}

// derivative of a regular expression w.r.t. a character
def der (c: Char, r: Rexp) : Rexp = r match {
  case NULL => NULL
  case EMPTY => NULL
  case CHAR(d) => if (c == d) EMPTY else NULL
  case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
  case SEQ(r1, r2) => 
    if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    else SEQ(der(c, r1), r2)
  case STAR(r) => SEQ(der(c, r), STAR(r))
}

def down (p: Pat) : Rexp = p match {
  case VAR(x: String, w: String, r: Rexp) => r
  case GRP(x: String, w: String, p: Pat) => down(p)
  case PSEQ(p1: Pat, p2: Pat) => SEQ(down(p1), down(p2))
  case PALT(p1: Pat, p2: Pat) => ALT(down(p1), down(p2))
  case PSTAR(p: Pat) => STAR(down(p))
}

def empty (p: Pat) : Pat = p match {
  case VAR(x: String, w: String, r: Rexp) => 
    if (nullable(r)) VAR(x, w, EMPTY) 
    else VAR(x, w, NULL)
  case GRP(x: String, w: String, p: Pat) => GRP(x, w, empty(p))
  case PSEQ(p1: Pat, p2: Pat) => PSEQ(empty(p1), empty(p2))
  case PALT(p1: Pat, p2: Pat) => PALT(empty(p1), empty(p2))
  case PSTAR(p: Pat) => PSTAR(empty(p))
}

def patder (c: Char, p: Pat) : Pat = p match {
  case VAR(x: String, w: String, r: Rexp) => VAR(x, w + c, der(c, r))
  case GRP(x: String, w: String, p: Pat) => GRP(x, w + c, patder(c, p))
  case PSEQ(p1: Pat, p2: Pat) => 
    if (nullable(down(p1))) PALT(PSEQ(patder(c, p1), p2), PSEQ(empty(p1), patder(c, p2)))
    else PSEQ(patder(c, p1), p2)
  case PALT(p1: Pat, p2: Pat) => PALT(patder(c, p1), patder(c, p2))
  case PSTAR(p: Pat) => PSEQ(patder(c, p), PSTAR(p))
}

def patders (s: List[Char], p: Pat) : Pat = s match {
  case Nil => p
  case c::s => patders(s, patder(c, p))
}

type Env = Set[List[(String, String)]]

def special (p: Pat, env: Env) : Env = 
  if (env == Set() && nullable(down(p))) Set(Nil) else env

def env (p: Pat) : Env = p match {
  case VAR(x: String, w: String, r: Rexp) => 
    if (nullable(r)) Set(List((x, w))) else Set()
  case GRP(x: String, w: String, p1: Pat) => 
    special(p, for (e <- env(p1)) yield List((x, w)) ++ e)
  case PSEQ(p1: Pat, p2: Pat) => 
    special(p, for (e1 <- env(p1); e2 <- env(p2)) yield (e1 ++ e2))
  case PALT(p1: Pat, p2: Pat) => special(p, env(p1) ++ env(p2))
  case PSTAR(p1: Pat) => special(p, env(p1))
}

def matcher (p: Pat, s: String) = env(empty(patders(s.toList, p)))


// some convenience for typing in regular expressions
def charlist2rexp (s : List[Char]) : Rexp = s match {
  case Nil => EMPTY
  case c::Nil => CHAR(c)
  case c::s => SEQ(CHAR(c), charlist2rexp(s))
}
implicit def string2rexp (s : String) : Rexp = charlist2rexp(s.toList)

implicit def RexpOps (r: Rexp) = new {
  def | (s: Rexp) = ALT(r, s)
  def % = STAR(r)
  def ~ (s: Rexp) = SEQ(r, s)
}

implicit def stringOps (s: String) = new {
  def | (r: Rexp) = ALT(s, r)
  def | (r: String) = ALT(s, r)
  def % = STAR(s)
  def ~ (r: Rexp) = SEQ(s, r)
  def ~ (r: String) = SEQ(s, r)
}

implicit def PatOps (p: Pat) = new {
  def | (q: Pat) = PALT(p, q)
  def % = PSTAR(p)
  def ~ (q: Pat) = PSEQ(p, q)
}


val p0 = VAR("x", "A")

patders("A".toList, p0)
matcher(p0, "A")

val p1 = VAR("x", "A") | VAR("y", "A") | VAR("z", "B")

patders("A".toList, p1)
matcher(p1, "A")
matcher(p1, "B")

val p2 = VAR("x", "AB")
matcher(p2, "AB")
matcher(p2, "AA")

val p3 = VAR("x", "A" ~ "B")
matcher(p3, "AB")
matcher(p3, "AA")

val p4 = VAR("x", "A") ~ VAR("y", "B")
matcher(p4, "AB")
matcher(p4, "AA")

val p5 = VAR("x", "A".%)
matcher(p5, "A")
matcher(p5, "AA")
matcher(p5, "")
matcher(p5, "AAA")

val p6 = VAR("x", "A").%
matcher(p6, "A")
matcher(p6, "AA")
matcher(p6, "")
matcher(p6, "AAA")

val p7 = (VAR("x", "A") | VAR("y", "AB") | VAR("z", "B")).%
matcher(p7, "AB")