--- a/progs/scala/re-ext.scala Thu Mar 02 12:39:27 2017 +0000
+++ b/progs/scala/re-ext.scala Sat Mar 04 21:35:12 2017 +0000
@@ -6,12 +6,11 @@
abstract class Rexp
case object ZERO extends Rexp
case object ONE extends Rexp
-case class CHAR(c: Char) extends Rexp
+case class CHARS(f: Char => Boolean) extends Rexp
case class ALT(r1: Rexp, r2: Rexp) extends Rexp
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
case class STAR(r: Rexp) extends Rexp
case class RECD(x: String, r: Rexp) extends Rexp
-case class CRANGE(cs: String) extends Rexp
case class PLUS(r: Rexp) extends Rexp
case class OPT(r: Rexp) extends Rexp
case class NTIMES(r: Rexp, n: Int) extends Rexp
@@ -28,8 +27,8 @@
// some convenience for typing in regular expressions
def charlist2rexp(s : List[Char]): Rexp = s match {
case Nil => ONE
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
+ case c::Nil => CHARS(_ == c)
+ case c::s => SEQ(CHARS(_ == c), charlist2rexp(s))
}
implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
@@ -68,12 +67,11 @@
def nullable (r: Rexp) : Boolean = r match {
case ZERO => false
case ONE => true
- case CHAR(_) => false
+ case CHARS(_) => false
case ALT(r1, r2) => nullable(r1) || nullable(r2)
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
case STAR(_) => true
case RECD(_, r1) => nullable(r1)
- case CRANGE(_) => false
case PLUS(r) => nullable(r)
case OPT(_) => true
case NTIMES(r, n) => if (n == 0) true else nullable(r)
@@ -83,17 +81,16 @@
def der (c: Char, r: Rexp) : Rexp = r match {
case ZERO => ZERO
case ONE => ZERO
- case CHAR(d) => if (c == d) ONE else ZERO
+ case CHARS(f) => if (f(c)) ONE else ZERO
case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
case SEQ(r1, r2) =>
if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
else SEQ(der(c, r1), r2)
case STAR(r) => SEQ(der(c, r), STAR(r))
case RECD(_, r1) => der(c, r1)
- case CRANGE(cs) => if (cs.contains(c)) ONE else ZERO
case PLUS(r) => SEQ(der(c, r), STAR(r))
case OPT(r) => ALT(der(c, r), ZERO)
- case NTIMES(r, n) => if (n == 0) ZERO else der(c, SEQ(r, NTIMES(r, n - 1)))
+ case NTIMES(r, n) => if (n == 0) ZERO else SEQ(der(c, r), NTIMES(r, n - 1))
}
// derivative w.r.t. a string (iterates der)
@@ -136,7 +133,7 @@
case RECD(x, r) => Rec(x, mkeps(r))
case PLUS(r) => Stars(List(mkeps(r)))
case OPT(_) => Right(Empty)
- case NTIMES(r, n) => if (n == 0) Stars(Nil) else Stars(Nil.padTo(n, mkeps(r)))
+ case NTIMES(r, n) => Stars(List.fill(n)(mkeps(r)))
}
@@ -147,14 +144,11 @@
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
- case (CHAR(d), Empty) => Chr(c)
+ case (CHARS(_), Empty) => Chr(c)
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
- case (CRANGE(_), Empty) => Chr(c)
case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
case (OPT(r), Left(v1)) => Left(inj(r, c, v1))
case (NTIMES(r, n), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
- case (NTIMES(r, n), Left(Sequ(v1, Stars(vs)))) => Stars(inj(r, c, v1)::vs)
- case (NTIMES(r, n), Right(Stars(v::vs))) => Stars(mkeps(r)::inj(r, c, v)::vs)
}
// main unsimplified lexing function (produces a value)
@@ -368,7 +362,7 @@
// first benchmark regex
//=======================
-val reWord = CRANGE("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
+val reWord = CHARS("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" contains _)
val reWordStar = STAR(reWord)
val reWordPlus = reWord ~ reWordStar