|         |      1 // Version with simplification during derivatives; | 
|         |      2 // this keeps the regular expressions small, which | 
|         |      3 // is good for run-time | 
|      1  |      4  | 
|      2 abstract class Rexp |      5 abstract class Rexp | 
|      3 case object ZERO extends Rexp |      6 case object ZERO extends Rexp | 
|      4 case object ONE extends Rexp |      7 case object ONE extends Rexp | 
|      5 case class CHAR(c: Char) extends Rexp |      8 case class CHAR(c: Char) extends Rexp | 
|     30   case SEQ(r1, r2) =>  |     33   case SEQ(r1, r2) =>  | 
|     31     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |     34     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) | 
|     32     else SEQ(der(c, r1), r2) |     35     else SEQ(der(c, r1), r2) | 
|     33   case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |     36   case STAR(r1) => SEQ(der(c, r1), STAR(r1)) | 
|     34   case NTIMES(r, i) =>  |     37   case NTIMES(r, i) =>  | 
|     35     if (i == 0) ZERO else der(c, SEQ(r, NTIMES(r, i - 1))) |     38     if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) | 
|     36 } |     39 } | 
|     37  |     40  | 
|     38 def simp(r: Rexp) : Rexp = r match { |     41 def simp(r: Rexp) : Rexp = r match { | 
|     39   case ALT(r1, r2) => (simp(r1), simp(r2)) match { |     42   case ALT(r1, r2) => (simp(r1), simp(r2)) match { | 
|     40     case (ZERO, r2s) => r2s |     43     case (ZERO, r2s) => r2s | 
|     61  |     64  | 
|     62 // main matcher function |     65 // main matcher function | 
|     63 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) |     66 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) | 
|     64  |     67  | 
|     65  |     68  | 
|     66 var regex = NTIMES(CHAR('a'),5) |         | 
|     67 println(matcher(regex,"aaaaa")) |         | 
|     68  |         | 
|     69 //one or zero |     69 //one or zero | 
|     70 def OPT(r: Rexp) = ALT(r, ONE) |     70 def OPT(r: Rexp) = ALT(r, ONE) | 
|         |     71  | 
|         |     72  | 
|         |     73 // Test Cases | 
|     71  |     74  | 
|     72 //evil regular expressions |     75 //evil regular expressions | 
|     73 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) |     76 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) | 
|     74 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |     77 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) | 
|     75  |     78  | 
|     81   (end - start)/(i * 1.0e9) |     84   (end - start)/(i * 1.0e9) | 
|     82 } |     85 } | 
|     83  |     86  | 
|     84  |     87  | 
|     85 //test: (a?{n}) (a{n}) |     88 //test: (a?{n}) (a{n}) | 
|     86 for (i <- 1 to 9001 by 10) { |     89 for (i <- 1 to 11001 by 1000) { | 
|     87   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i)))) |     90   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i)))) | 
|     88 } |     91 } | 
|     89  |     92  | 
|     90 for (i <- 1 to 9001 by 1000) { |     93 for (i <- 1 to 11001 by 1000) { | 
|     91   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i)))) |     94   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i)))) | 
|     92 } |     95 } | 
|     93  |     96  | 
|     94 //test: (a*)* b |     97 //test: (a*)* b | 
|     95 for (i <- 1 to 7000001 by 500000) { |     98 for (i <- 1 to 6000001 by 500000) { | 
|     96   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i)))) |     99   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i)))) | 
|     97 } |    100 } | 
|     98  |    101  | 
|     99 for (i <- 1 to 7000001 by 500000) { |    102 for (i <- 1 to 6000001 by 500000) { | 
|    100   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i)))) |    103   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i)))) | 
|    101 } |    104 } | 
|    102  |    105  | 
|         |    106  | 
|         |    107  | 
|         |    108 // size of a regular expressions - for testing purposes  | 
|         |    109 def size(r: Rexp) : Int = r match { | 
|         |    110   case ZERO => 1 | 
|         |    111   case ONE => 1 | 
|         |    112   case CHAR(_) => 1 | 
|         |    113   case ALT(r1, r2) => 1 + size(r1) + size(r2) | 
|         |    114   case SEQ(r1, r2) => 1 + size(r1) + size(r2) | 
|         |    115   case STAR(r) => 1 + size(r) | 
|         |    116   case NTIMES(r, _) => 1 + size(r) | 
|         |    117 } | 
|         |    118  | 
|         |    119  | 
|         |    120 // now the size of the derivatives grows  | 
|         |    121 // much, much slower | 
|         |    122  | 
|         |    123 size(ders("".toList, EVIL2))      // 5 | 
|         |    124 size(ders("a".toList, EVIL2))     // 8 | 
|         |    125 size(ders("aa".toList, EVIL2))    // 8 | 
|         |    126 size(ders("aaa".toList, EVIL2))   // 8 | 
|         |    127 size(ders("aaaa".toList, EVIL2))  // 8 | 
|         |    128 size(ders("aaaaa".toList, EVIL2)) // 8 |