|      1 // version with explicit an n-times regular expression |      1 // Version with explicit an explicit n-times regular expression; | 
|      2 // this keeps the regular expression small |      2 // this keeps the overall regular expression in the EVIL1 regular  | 
|         |      3 // expression small | 
|      3  |      4  | 
|      4 abstract class Rexp  |      5 abstract class Rexp  | 
|      5 case object ZERO extends Rexp |      6 case object ZERO extends Rexp | 
|      6 case object ONE extends Rexp |      7 case object ONE extends Rexp | 
|      7 case class CHAR(c: Char) extends Rexp |      8 case class CHAR(c: Char) extends Rexp | 
|      8 case class ALT(r1: Rexp, r2: Rexp) extends Rexp  |      9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp  | 
|      9 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp  |     10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp  | 
|     10 case class STAR(r: Rexp) extends Rexp  |     11 case class STAR(r: Rexp) extends Rexp  | 
|     11 case class NTIMES(r: Rexp, n: Int) extends Rexp //explicit constructor |     12 case class NTIMES(r: Rexp, n: Int) extends Rexp   //explicit constructor for n-times | 
|         |     13  | 
|     12  |     14  | 
|     13 def nullable (r: Rexp) : Boolean = r match { |     15 def nullable (r: Rexp) : Boolean = r match { | 
|     14   case ZERO => false |     16   case ZERO => false | 
|     15   case ONE => true |     17   case ONE => true | 
|     16   case CHAR(_) => false |     18   case CHAR(_) => false | 
|     18   case SEQ(r1, r2) => nullable(r1) && nullable(r2) |     20   case SEQ(r1, r2) => nullable(r1) && nullable(r2) | 
|     19   case STAR(_) => true |     21   case STAR(_) => true | 
|     20   case NTIMES(r, i) => if (i == 0) true else nullable(r) |     22   case NTIMES(r, i) => if (i == 0) true else nullable(r) | 
|     21 } |     23 } | 
|     22  |     24  | 
|         |     25  | 
|     23 def der (c: Char, r: Rexp) : Rexp = r match { |     26 def der (c: Char, r: Rexp) : Rexp = r match { | 
|     24   case ZERO => ZERO |     27   case ZERO => ZERO | 
|     25   case ONE => ZERO |     28   case ONE => ZERO | 
|     26   case CHAR(d) => if (c == d) ONE else ZERO |     29   case CHAR(d) => if (c == d) ONE else ZERO | 
|     27   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |     30   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) | 
|     28   case SEQ(r1, r2) =>  |     31   case SEQ(r1, r2) =>  | 
|     29     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |     32     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) | 
|     30     else SEQ(der(c, r1), r2) |     33     else SEQ(der(c, r1), r2) | 
|     31   case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |     34   case STAR(r1) => SEQ(der(c, r1), STAR(r1)) | 
|     32   case NTIMES(r1, i) =>  |     35   case NTIMES(r1, i) =>  | 
|     33     if (i == 0) ZERO else der(c, SEQ(r1, NTIMES(r1, i - 1))) |     36     if (i == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, i - 1)) | 
|     34 } |     37 } | 
|     35  |     38  | 
|     36 def ders (s: List[Char], r: Rexp) : Rexp = s match { |     39 def ders (s: List[Char], r: Rexp) : Rexp = s match { | 
|     37   case Nil => r |     40   case Nil => r | 
|     38   case c::s => ders(s, der(c, r)) |     41   case c::s => ders(s, der(c, r)) | 
|     39 } |     42 } | 
|     40  |     43  | 
|     41 def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) |     44 def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) | 
|     42  |     45  | 
|     43  |     46  | 
|     44 //optional: one or zero times |     47 //optional regular expression: one or zero times | 
|     45 def OPT(r: Rexp) = ALT(r, ONE) |     48 def OPT(r: Rexp) = ALT(r, ONE) | 
|         |     49  | 
|         |     50  | 
|         |     51 // Test Cases | 
|     46  |     52  | 
|     47 //evil regular expressions |     53 //evil regular expressions | 
|     48 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) |     54 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) | 
|     49 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |     55 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) | 
|     50  |     56  | 
|     55   (end - start)/(i * 1.0e9) |     61   (end - start)/(i * 1.0e9) | 
|     56 } |     62 } | 
|     57  |     63  | 
|     58  |     64  | 
|     59 //test: (a?{n}) (a{n}) |     65 //test: (a?{n}) (a{n}) | 
|     60 for (i <- 1 to 1001 by 10) { |     66 for (i <- 1 to 1201 by 100) { | 
|     61   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL1(i), "a" * i)))) |     67   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL1(i), "a" * i)))) | 
|     62 } |     68 } | 
|     63  |     69  | 
|     64 for (i <- 1 to 1001 by 10) { |     70 for (i <- 1 to 1201 by 100) { | 
|     65   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL1(i), "a" * i)))) |     71   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL1(i), "a" * i)))) | 
|     66 } |     72 } | 
|     67  |     73  | 
|     68  |     74  | 
|     69 //test: (a*)* b |     75 //test: (a*)* b | 
|     74 for (i <- 1 to 20) { |     80 for (i <- 1 to 20) { | 
|     75   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL2, "a" * i)))) |     81   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL2, "a" * i)))) | 
|     76 } |     82 } | 
|     77  |     83  | 
|     78  |     84  | 
|         |     85  | 
|         |     86 // size of a regular expressions - for testing purposes  | 
|         |     87 def size(r: Rexp) : Int = r match { | 
|         |     88   case ZERO => 1 | 
|         |     89   case ONE => 1 | 
|         |     90   case CHAR(_) => 1 | 
|         |     91   case ALT(r1, r2) => 1 + size(r1) + size(r2) | 
|         |     92   case SEQ(r1, r2) => 1 + size(r1) + size(r2) | 
|         |     93   case STAR(r) => 1 + size(r) | 
|         |     94   case NTIMES(r, _) => 1 + size(r) | 
|         |     95 } | 
|         |     96  | 
|         |     97 // EVIL1(n) has now a constant size, no matter | 
|         |     98 // what n is  | 
|         |     99  | 
|         |    100 size(EVIL1(1))  // 7 | 
|         |    101 size(EVIL1(3))  // 7 | 
|         |    102 size(EVIL1(5))  // 7 | 
|         |    103 size(EVIL1(7))  // 7 | 
|         |    104  | 
|         |    105  | 
|         |    106 // but the size of the derivatives can still grow  | 
|         |    107 // quite dramatically | 
|         |    108  | 
|         |    109 size(ders("".toList, EVIL2))     // 5 | 
|         |    110 size(ders("a".toList, EVIL2))    // 12 | 
|         |    111 size(ders("aa".toList, EVIL2))   // 28 | 
|         |    112 size(ders("aaa".toList, EVIL2))  // 58 | 
|         |    113 size(ders("aaaa".toList, EVIL2)) // 116 |