|      9 case class CHAR(c: Char) extends Rexp |      9 case class CHAR(c: Char) extends Rexp | 
|     10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp  |     10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp  | 
|     11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp  |     11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp  | 
|     12 case class STAR(r: Rexp) extends Rexp  |     12 case class STAR(r: Rexp) extends Rexp  | 
|     13 case class NTIMES(r: Rexp, n: Int) extends Rexp  |     13 case class NTIMES(r: Rexp, n: Int) extends Rexp  | 
|     14  |         | 
|     15  |         | 
|     16 // string of a regular expressions - for testing purposes  |         | 
|     17 def string(r: Rexp) : String = r match { |         | 
|     18   case ZERO => "0" |         | 
|     19   case ONE => "1" |         | 
|     20   case CHAR(c) => c.toString  |         | 
|     21   case ALT(r1, r2) => s"(${string(r1)} + ${string(r2)})" |         | 
|     22   case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}" |         | 
|     23   case SEQ(ONE, CHAR(c)) => s"1${c}" |         | 
|     24   case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})" |         | 
|     25   case STAR(r) => s"(${string(r)})*" |         | 
|     26   case NTIMES(r, n) =>  s"(${string(r)}){${n}}" |         | 
|     27 } |         | 
|     28  |         | 
|     29 // size of a regular expressions - for testing purposes  |         | 
|     30 def size(r: Rexp) : Int = r match { |         | 
|     31   case ZERO => 1 |         | 
|     32   case ONE => 1 |         | 
|     33   case CHAR(_) => 1 |         | 
|     34   case ALT(r1, r2) => 1 + size(r1) + size(r2) |         | 
|     35   case SEQ(r1, r2) => 1 + size(r1) + size(r2) |         | 
|     36   case STAR(r) => 1 + size(r) |         | 
|     37   case NTIMES(r, _) => 1 + size(r) |         | 
|     38 } |         | 
|     39  |     14  | 
|     40  |     15  | 
|     41  |     16  | 
|     42 // nullable function: tests whether the regular  |     17 // nullable function: tests whether the regular  | 
|     43 // expression can recognise the empty string |     18 // expression can recognise the empty string | 
|     63   case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |     38   case STAR(r1) => SEQ(der(c, r1), STAR(r1)) | 
|     64   case NTIMES(r, i) =>  |     39   case NTIMES(r, i) =>  | 
|     65     if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) |     40     if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) | 
|     66 } |     41 } | 
|     67  |     42  | 
|     68  |     43 def simp(r: Rexp) : Rexp = r match { | 
|     69  |     44   case ALT(r1, r2) => (simp(r1), simp(r2)) match { | 
|     70 def simp(r: Rexp, seen: Set[Rexp]) : (Rexp, Set[Rexp]) = r match { |     45     case (ZERO, r2s) => r2s | 
|     71   case ALT(r1, r2) => { |     46     case (r1s, ZERO) => r1s | 
|     72     val (r1s, seen1) = simp(r1, seen) |     47     case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) | 
|     73     val (r2s, seen2) = simp(r2, seen1) |         | 
|     74     (r1s, r2s) match { |         | 
|     75       case (ZERO, r2s) => (r2s, seen2) |         | 
|     76       case (r1s, ZERO) => (r1s, seen2) |         | 
|     77       case (r1s, r2s) => (ALT(r1s, r2s), seen2) |         | 
|     78     } |         | 
|     79   } |     48   } | 
|     80   case SEQ(r1, r2) =>  { |     49   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match { | 
|     81     val (r1s, _) = simp(r1, Set()) |     50     case (ZERO, _) => ZERO | 
|     82     val (r2s, _) = simp(r2, Set()) |     51     case (_, ZERO) => ZERO | 
|     83     if (seen.contains(SEQ(r1s, r2s))) (ZERO, seen) |     52     case (ONE, r2s) => r2s | 
|     84     else (r1s, r2s) match { |     53     case (r1s, ONE) => r1s | 
|     85       case (ZERO, _) => (ZERO, seen) |     54     case (r1s, r2s) => SEQ(r1s, r2s) | 
|     86       case (_, ZERO) => (ZERO, seen) |         | 
|     87       case (ONE, r2s) => (r2s, seen + r2s) |         | 
|     88       case (r1s, ONE) => (r1s, seen + r1s) |         | 
|     89       case (r1s, r2s) => (SEQ(r1s, r2s), seen + SEQ(r1s, r2s)) |         | 
|     90     } |         | 
|     91   } |     55   } | 
|     92   case r => if (seen.contains(r)) (ZERO, seen) else (r, seen + r) |     56   case r => r | 
|     93 } |     57 } | 
|     94  |     58  | 
|     95  |     59  | 
|     96 // derivative w.r.t. a string (iterates der) |     60 // derivative w.r.t. a string (iterates der) | 
|     97 def ders (s: List[Char], r: Rexp) : Rexp = s match { |     61 def ders (s: List[Char], r: Rexp) : Rexp = s match { | 
|     98   case Nil => r |     62   case Nil => r | 
|     99   case c::s => ders(s, simp(der(c, r), Set())._1) |     63   case c::s => ders(s, simp(der(c, r))) | 
|    100 } |     64 } | 
|    101  |     65  | 
|    102  |     66  | 
|    103 // main matcher function |     67 // main matcher function | 
|    104 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) |     68 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) | 
|    105  |     69  | 
|    106  |     70  | 
|    107 //one or zero |     71 //one or zero | 
|    108 def OPT(r: Rexp) = ALT(r, ONE) |     72 def OPT(r: Rexp) = ALT(r, ONE) | 
|    109  |         | 
|    110  |         | 
|    111  |         | 
|    112  |         | 
|    113  |         | 
|    114  |         | 
|    115  |         | 
|    116  |         | 
|    117  |         | 
|    118 def time_needed[T](i: Int, code: => T) = { |         | 
|    119   val start = System.nanoTime() |         | 
|    120   for (j <- 1 to i) code |         | 
|    121   val end = System.nanoTime() |         | 
|    122   (end - start)/(i * 1.0e9) |         | 
|    123 } |         | 
|    124  |         | 
|    125  |         | 
|    126 // star example |         | 
|    127 val Tstar = STAR(STAR(STAR(CHAR('a')))) |         | 
|    128  |         | 
|    129 string(ders("".toList, Tstar)) |         | 
|    130 size(ders("".toList, Tstar))      // 4 |         | 
|    131 string(ders("a".toList, Tstar)) |         | 
|    132 size(ders("a".toList, Tstar))     // 11 |         | 
|    133 string(ders("aa".toList, Tstar)) |         | 
|    134 size(ders("aa".toList, Tstar))    // 11 |         | 
|    135 string(ders("aaa".toList, Tstar)) |         | 
|    136 size(ders("aaa".toList, Tstar))   // 11 |         | 
|    137 string(ders("aaaa".toList, Tstar)) |         | 
|    138 size(ders("aaaa".toList, Tstar))  // 11 |         | 
|    139 string(ders("aaaa".toList, Tstar)) |         | 
|    140 size(ders("aaaaa".toList, Tstar)) // 11 |         | 
|    141 string(ders("aaaab".toList, Tstar)) |         | 
|    142 size(ders("aaaaab".toList, Tstar)) // 1 |         | 
|    143  |         | 
|    144 // test: ("a" | "aa")* |         | 
|    145 val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))) |         | 
|    146  |         | 
|    147 for (i <- 1 to 100 by 1) { |         | 
|    148   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL3, "a" * i))) +  |         | 
|    149 	  " size: " + size(ders(("a" * i).toList, EVIL3))) |         | 
|    150 } |         | 
|    151  |         | 
|    152  |         | 
|    153 println("start " + string(EVIL3) + "    " + size(EVIL3)) |         | 
|    154 val t1  = der('a', EVIL3) |         | 
|    155 println(string(t1) + "    " + size(t1)) |         | 
|    156 val t1s = simp(t1, Set())._1  |         | 
|    157 println("simplified " + string(t1s) + "    " + size(t1s)) |         | 
|    158 val t2  = der('a', t1s) |         | 
|    159 println(string(t2) + "    " + size(t2)) |         | 
|    160 val t2s = simp(t2, Set())._1  |         | 
|    161 println("simplified " + string(t2s) + "    " + size(t2s)) |         | 
|    162 val t3  = der('a', t2s) |         | 
|    163 println(string(t3) + "    " + size(t3)) |         | 
|    164 val t3s = simp(t3, Set())._1  |         | 
|    165 println("simplified " + string(t3s) + "    " + size(t3s)) |         | 
|    166 val t4  = der('a', t3s) |         | 
|    167 val t4s = simp(t4, Set())._1  |         | 
|    168  |         | 
|    169  |         | 
|    170  |         | 
|    171  |         | 
|    172  |         | 
|    173  |         | 
|    174  |         | 
|    175  |         | 
|    176 println(string(t4) + "    " + size(t4)) |         | 
|    177 println("simplified " + string(t4s) + "    " + size(t4s)) |         | 
|    178  |     73  | 
|    179  |     74  | 
|    180 // Test Cases |     75 // Test Cases | 
|    181  |     76  | 
|    182 //evil regular expressions: (a?){n} a{n}  and (a*)* b |     77 //evil regular expressions: (a?){n} a{n}  and (a*)* b | 
|    233 size(ders("aaa".toList, EVIL2))   // 8 |    127 size(ders("aaa".toList, EVIL2))   // 8 | 
|    234 size(ders("aaaa".toList, EVIL2))  // 8 |    128 size(ders("aaaa".toList, EVIL2))  // 8 | 
|    235 size(ders("aaaaa".toList, EVIL2)) // 8 |    129 size(ders("aaaaa".toList, EVIL2)) // 8 | 
|    236  |    130  | 
|    237  |    131  | 
|         |    132 // test: ("a" | "aa")* | 
|         |    133 val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))) | 
|         |    134  | 
|         |    135 for (i <- 1 to 29 by 1) { | 
|         |    136   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL3, "a" * i))) +  | 
|         |    137 	  " size: " + size(ders(("a" * i).toList, EVIL3))) | 
|         |    138 } | 
|    238  |    139  | 
|    239  |    140  | 
|    240  |    141  | 
|    241 // Examples from the Sulzmann paper |         | 
|    242 val sulzmann = STAR(ALT(ALT(CHAR('a'), CHAR('b')), SEQ(CHAR('a'), CHAR('b')))) |         | 
|    243  |         | 
|    244  |         | 
|    245 for (i <- 1 to 4501 by 500) { |         | 
|    246   println(i + ": " + "%.5f".format(time_needed(1, matcher(sulzmann, "a" * i)))) |         | 
|    247 } |         | 
|    248  |         | 
|    249 for (i <- 1 to 4501 by 500) { |         | 
|    250   println(i + ": " + "%.5f".format(time_needed(1, matcher(sulzmann, "ab" * i)))) |         | 
|    251 } |         | 
|    252  |         | 
|    253 size(ders("".toList, EVIL2))      // 5 |         | 
|    254 size(ders("a".toList, EVIL2))     // 8 |         | 
|    255 size(ders("aa".toList, EVIL2))    // 8 |         | 
|    256 size(ders("aaa".toList, EVIL2))   // 8 |         | 
|    257 size(ders("aaaa".toList, EVIL2))  // 8 |         | 
|    258 size(ders("aaaaa".toList, EVIL2)) // 8 |         | 
|    259  |         | 
|    260  |         | 
|    261  |         | 
|    262 (((1 + 1a) ~ ((a + aa))*) + (((0 + 1) ~ ((a + aa))*) + ((1 + 1a) ~ ((a + aa))*))) |         |