diff -r e74c696821a2 -r 5deefcc8cffa progs/re3.scala --- a/progs/re3.scala Tue Aug 23 23:12:55 2016 +0200 +++ b/progs/re3.scala Tue Sep 20 12:24:29 2016 +0100 @@ -1,36 +1,18 @@ -abstract class Rexp -case object NULL extends Rexp -case object EMPTY extends Rexp + +abstract class Rexp +case object ZERO extends Rexp +case object ONE extends Rexp case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp case class SEQ(r1: Rexp, r2: Rexp) extends Rexp case class STAR(r: Rexp) extends Rexp case class NTIMES(r: Rexp, n: Int) extends Rexp -def simp(r: Rexp): Rexp = r match { - case ALT(r1, r2) => { - (simp(r1), simp(r2)) match { - case (NULL, r2s) => r2s - case (r1s, NULL) => r1s - case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s) - } - } - case SEQ(r1, r2) => { - (simp(r1), simp(r2)) match { - case (NULL, _) => NULL - case (_, NULL) => NULL - case (EMPTY, r2s) => r2s - case (r1s, EMPTY) => r1s - case (r1s, r2s) => SEQ(r1s, r2s) - } - } - case NTIMES(r, n) => NTIMES(simp(r), n) - case r => r -} - +// nullable function: tests whether the regular +// expression can recognise the empty string def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true + case ZERO => false + case ONE => true case CHAR(_) => false case ALT(r1, r2) => nullable(r1) || nullable(r2) case SEQ(r1, r2) => nullable(r1) && nullable(r2) @@ -38,31 +20,56 @@ case NTIMES(r, i) => if (i == 0) true else nullable(r) } +// derivative of a regular expression w.r.t. a character def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) else SEQ(der(c, r1), r2) case STAR(r) => SEQ(der(c, r), STAR(r)) case NTIMES(r, i) => - if (i == 0) NULL else SEQ(der(c, r), NTIMES(r, i - 1)) + if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) } +def simp(r: Rexp) : Rexp = r match { + case ALT(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, r2s) => r2s + case (r1s, ZERO) => r1s + case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) + } + case SEQ(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, _) => ZERO + case (_, ZERO) => ZERO + case (ONE, r2s) => r2s + case (r1s, ONE) => r1s + case (r1s, r2s) => SEQ(r1s, r2s) + } + case NTIMES(r1, n) => NTIMES(simp(r), n) + case r => r +} + + +// derivative w.r.t. a string (iterates der) def ders (s: List[Char], r: Rexp) : Rexp = s match { case Nil => r case c::s => ders(s, simp(der(c, r))) } -def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) //one or zero -def OPT(r: Rexp) = ALT(r, EMPTY) +def OPT(r: Rexp) = ALT(r, ONE) -def EVIL(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) +//evil regular expressions +def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) +val EVIL2 = SEQ(STAR(CHAR('a')), CHAR('b')) + def time_needed[T](i: Int, code: => T) = { val start = System.nanoTime() @@ -71,70 +78,16 @@ (end - start)/(i * 1.0e9) } +val i = 5000 +println(i + " " + "%.5f".format(time_needed(10, matcher(EVIL1(i), "a" * i)))) -for (i <- 1 to 12001 by 500) { - println(i + " " + "%.5f".format(time_needed(1, matches(EVIL(i), "a" * i)))) +for (i <- 1 to 9001 by 1000) { + println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i)))) } - -// some convenience for typing in regular expressions -import scala.language.implicitConversions -import scala.language.reflectiveCalls -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - -implicit def RexpOps (r: Rexp) = new { - def | (s: Rexp) = ALT(r, s) - def % = STAR(r) - def ~ (s: Rexp) = SEQ(r, s) -} - -implicit def stringOps (s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) +for (i <- 1 to 7500001 by 500000) { + println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i)))) } -def PLUS(r: Rexp) = SEQ(r, STAR(r)) -def RANGE(s: List[Char]) : Rexp = s match { - case Nil => NULL - case c::s => ALT(CHAR(c), RANGE(s)) -} - -println("EVILS:") -val EVIL1 = PLUS(PLUS("a" ~ "a" ~ "a")) -val EVIL2 = PLUS(PLUS("aaaaaaaaaaaaaaaaaaa" ~ OPT("a"))) // 19 as ~ a? - - -//40 * aaa matches -//43 * aaa + aa does not -//45 * aaa + a - -println("EVIL1:") -println(matches(EVIL1, "aaa" * 40)) -println(matches(EVIL1, "aaa" * 43 + "aa")) -println(matches(EVIL1, "aaa" * 45 + "a")) -println("EVIL2:") -println(matches(EVIL2, "aaa" * 40)) -println(matches(EVIL2, "aaa" * 43 + "aa")) -println(matches(EVIL2, "aaa" * 45 + "a")) - - - - -println("EMAIL:") -val LOWERCASE = "abcdefghijklmnopqrstuvwxyz" -val DIGITS = "0123456789" -val EMAIL = PLUS(RANGE((LOWERCASE + DIGITS + "_" + "." + "-").toList)) ~ "@" ~ - PLUS(RANGE((LOWERCASE + DIGITS + "." + "-").toList)) ~ "." ~ - NMTIMES(RANGE((LOWERCASE + ".").toList), 2, 6) - -val my_email = "christian.urban@kcl.ac.uk"