diff -r 9ea667baf097 -r dbe49327b6c5 progs/re-sulzmann-partial.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/re-sulzmann-partial.scala Tue Sep 24 01:12:36 2013 +0100 @@ -0,0 +1,136 @@ +import scala.language.implicitConversions +import scala.language.reflectiveCalls + +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +abstract class Pat + +case class VAR(x: String, r: Rexp) extends Pat +case class GRP(x: String, p: Pat) extends Pat +case class PSEQ(p1: Pat, p2: Pat) extends Pat +case class PALT(p1: Pat, p2: Pat) extends Pat +case class PSTAR(p: Pat) extends Pat + + +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +def down (p: Pat) : Rexp = p match { + case VAR(x: String, w: String, r: Rexp) => r + case GRP(x: String, w: String, p: Pat) => down(p) + case PSEQ(p1: Pat, p2: Pat) => SEQ(down(p1), down(p2)) + case PALT(p1: Pat, p2: Pat) => ALT(down(p1), down(p2)) + case PSTAR(p: Pat) => STAR(down(p)) +} + +def patnullable (p: Pat) : Boolean = p match { + case PVar(_, r) => nullable(r) + case PSEQ(p1, p2) => patnullable(p1) && patnullable(p2) + case PCHOICE(p1, p2) => patnullable(p1) || patnullable(p2) + case PSTAR(p) => true + case PatVar(_, p) => patnullable(p) +} + +//type Env = Set[List[(String, String)]] +type Env = Map[Int, String] + +def update(n: Int, c: Char) (env: Env) = + env + (n -> (env.getOrElse(n, "") + c.toString)) + +def pderivPat (p: Pat, c: Char) : Set[(Pat, Env => Env)] = p match { + case PVar(n: Int, r: Rexp) => { + val pds = pderiv(r, c) + if (pds.isEmpty) Set() + else Set((PVar(n, toRexp(pds.toList)), update(n, c))) + } + case PSEQ(p1: Pat, p2: Pat) => { + val pats : Set[(Pat, Env => Env)] = + for ((p, f) <- pderivPat(p1, c)) yield (PSEQ(p, p2), f) + if (nullable(strip(p1))) pats ++ pderivPat(p2, c) + else pats + } + case PCHOICE(p1: Pat, p2: Pat) => pderivPat(p1, c) ++ pderivPat(p2, c) + case PSTAR(p1: Pat) => + for ((p, f) <- pderivPat(p1, c)) yield (PSEQ(p, PSTAR(p1)), f) + case PatVar(n: Int, p1: Pat) => + for ((p, f) <- pderivPat(p1, c)) yield (PatVar(n, p), f compose (update (n, c))) +} + + +val p2 = PSEQ(PVar(1, STAR("A")), PVar(2, STAR("A"))) +pderivPat(p2, 'A').mkString("\n") + + +def greedy_aux(env: Set[(Pat, Env)], w: List[Char]) : Set[Env] = w match { + case Nil => + for ((p, e) <- env if patnullable(p)) yield e + case c::cs => { + val env2 = for ((p, e) <- env; (p1, f) <- pderivPat(p, c)) yield (p1, f(e)) + greedy_aux(env2, cs) + } +} + +def greedy(p: Pat, w: String) = { + val res = greedy_aux(Set((p, Map())), w.toList) + if (res.isEmpty) None else Some(res) +} + +// some convenience for typing in regular expressions +def charlist2rexp (s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp (s : String) : Rexp = charlist2rexp(s.toList) + +implicit def RexpOps (r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +implicit def stringOps (s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) +} + +implicit def PatOps (p: Pat) = new { + def | (q: Pat) = PALT(p, q) + def % = PSTAR(p) + def ~ (q: Pat) = PSEQ(p, q) +} + +val p3 = PSTAR(PSEQ(PVar(1, "A"), PVar(2, "B"))) + +greedy2(Set((p3, Map())), "ABAB".toList) + + +val p4 = PVar(1, "A") +greedy2(Set((p4, Map())), "A".toList) + +val p5 = PSEQ(PVar(1, "A"), PVar(1, "B")) +greedy2(Set((p5, Map())), "AB".toList) + +val res = pderivPat(p5, 'A') +for ((_, f) <- res) yield f(Map()) + + +val p6 = PatVar(4,PSTAR(PCHOICE(PCHOICE(PVar(1, "A"), PVar(2, "AB")), PVar(3, "B")))) + +greedy(p6, "ABA")