diff -r 3b9496db3fb9 -r 3390e863d796 progs/re1.scala --- a/progs/re1.scala Tue Sep 26 14:07:29 2017 +0100 +++ b/progs/re1.scala Tue Sep 26 14:08:49 2017 +0100 @@ -1,3 +1,6 @@ +// Simple matcher for basic regular expressions + +object Test { abstract class Rexp case object ZERO extends Rexp // matches nothing @@ -19,6 +22,8 @@ } +} + // derivative of a regular expression w.r.t. a character def der (c: Char, r: Rexp) : Rexp = r match { case ZERO => ZERO @@ -40,22 +45,27 @@ // main matcher function def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + //examples from the homework + val r = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b'))) der('a', r) der('b', r) der('c', r) -//optional (one or zero times) +//optional regular expression (one or zero times) def OPT(r: Rexp) = ALT(r, ONE) -//n-times (explicitly expanded) +//n-times regular expression (explicitly expanded) def NTIMES(r: Rexp, n: Int) : Rexp = n match { case 0 => ONE case 1 => r case n => SEQ(r, NTIMES(r, n - 1)) } + +// Test Cases + // the evil regular expression a?{n} a{n} def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) @@ -70,6 +80,7 @@ (end - start)/(i * 1.0e9) } + //test: (a?{n}) (a{n}) for (i <- 1 to 20) { println(i + ": " + "%.5f".format(time_needed(2, matches(EVIL1(i), "a" * i)))) @@ -88,3 +99,43 @@ println(i + " " + "%.5f".format(time_needed(2, matches(EVIL2, "a" * i)))) } + + + +// size of a regular expressions - for testing purposes +def size(r: Rexp) : Int = r match { + case ZERO => 1 + case ONE => 1 + case CHAR(_) => 1 + case ALT(r1, r2) => 1 + size(r1) + size(r2) + case SEQ(r1, r2) => 1 + size(r1) + size(r2) + case STAR(r) => 1 + size(r) +} + +// the expicit expansion in EVIL1(n) increases +// drastically its size + +size(EVIL1(1)) // 5 +size(EVIL1(3)) // 17 +size(EVIL1(5)) // 29 +size(EVIL1(7)) // 41 + + +// given a regular expression and building successive +// derivatives might result into bigger and bigger +// regular expressions...here is an example for this: + +val BIG_aux = STAR(ALT(CHAR('a'), CHAR('b'))) +val BIG = SEQ(BIG_aux, SEQ(CHAR('a'),SEQ(CHAR('b'), BIG_aux))) + +size(ders("".toList, BIG)) // 13 +size(ders("ab".toList, BIG)) // 51 +size(ders("abab".toList, BIG)) // 112 +size(ders("ababab".toList, BIG)) // 191 +size(ders("abababab".toList, BIG)) // 288 +size(ders("ababababab".toList, BIG)) // 403 +size(ders("abababababab".toList, BIG)) // 536 + + +size(ders(("ab" * 200).toList, BIG)) // 366808 +