diff -r d922cc83b70c -r b78664a24f5d progs/re3.scala --- a/progs/re3.scala Mon Feb 13 23:22:45 2017 +0000 +++ b/progs/re3.scala Wed Mar 15 01:24:39 2017 +0000 @@ -1,3 +1,6 @@ +// Version with simplification during derivatives; +// this keeps the regular expressions small, which +// is good for run-time abstract class Rexp case object ZERO extends Rexp @@ -32,7 +35,7 @@ else SEQ(der(c, r1), r2) case STAR(r1) => SEQ(der(c, r1), STAR(r1)) case NTIMES(r, i) => - if (i == 0) ZERO else der(c, SEQ(r, NTIMES(r, i - 1))) + if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) } def simp(r: Rexp) : Rexp = r match { @@ -63,12 +66,12 @@ def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) -var regex = NTIMES(CHAR('a'),5) -println(matcher(regex,"aaaaa")) - //one or zero def OPT(r: Rexp) = ALT(r, ONE) + +// Test Cases + //evil regular expressions def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) @@ -83,20 +86,43 @@ //test: (a?{n}) (a{n}) -for (i <- 1 to 9001 by 10) { +for (i <- 1 to 11001 by 1000) { println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i)))) } -for (i <- 1 to 9001 by 1000) { +for (i <- 1 to 11001 by 1000) { println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i)))) } //test: (a*)* b -for (i <- 1 to 7000001 by 500000) { +for (i <- 1 to 6000001 by 500000) { + println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i)))) +} + +for (i <- 1 to 6000001 by 500000) { println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i)))) } -for (i <- 1 to 7000001 by 500000) { - println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i)))) + + +// size of a regular expressions - for testing purposes +def size(r: Rexp) : Int = r match { + case ZERO => 1 + case ONE => 1 + case CHAR(_) => 1 + case ALT(r1, r2) => 1 + size(r1) + size(r2) + case SEQ(r1, r2) => 1 + size(r1) + size(r2) + case STAR(r) => 1 + size(r) + case NTIMES(r, _) => 1 + size(r) } + +// now the size of the derivatives grows +// much, much slower + +size(ders("".toList, EVIL2)) // 5 +size(ders("a".toList, EVIL2)) // 8 +size(ders("aa".toList, EVIL2)) // 8 +size(ders("aaa".toList, EVIL2)) // 8 +size(ders("aaaa".toList, EVIL2)) // 8 +size(ders("aaaaa".toList, EVIL2)) // 8