|      9 case class CHAR(c: Char) extends Rexp |      9 case class CHAR(c: Char) extends Rexp | 
|     10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp  |     10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp  | 
|     11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp  |     11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp  | 
|     12 case class STAR(r: Rexp) extends Rexp  |     12 case class STAR(r: Rexp) extends Rexp  | 
|     13 case class NTIMES(r: Rexp, n: Int) extends Rexp  |     13 case class NTIMES(r: Rexp, n: Int) extends Rexp  | 
|         |     14  | 
|         |     15  | 
|         |     16 // string of a regular expressions - for testing purposes  | 
|         |     17 def string(r: Rexp) : String = r match { | 
|         |     18   case ZERO => "0" | 
|         |     19   case ONE => "1" | 
|         |     20   case CHAR(c) => c.toString  | 
|         |     21   case ALT(r1, r2) => s"(${string(r1)} + ${string(r2)})" | 
|         |     22   case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}" | 
|         |     23   case SEQ(ONE, CHAR(c)) => s"1${c}" | 
|         |     24   case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})" | 
|         |     25   case STAR(r) => s"(${string(r)})*" | 
|         |     26   case NTIMES(r, n) =>  s"(${string(r)}){${n}}" | 
|         |     27 } | 
|         |     28  | 
|         |     29 // size of a regular expressions - for testing purposes  | 
|         |     30 def size(r: Rexp) : Int = r match { | 
|         |     31   case ZERO => 1 | 
|         |     32   case ONE => 1 | 
|         |     33   case CHAR(_) => 1 | 
|         |     34   case ALT(r1, r2) => 1 + size(r1) + size(r2) | 
|         |     35   case SEQ(r1, r2) => 1 + size(r1) + size(r2) | 
|         |     36   case STAR(r) => 1 + size(r) | 
|         |     37   case NTIMES(r, _) => 1 + size(r) | 
|         |     38 } | 
|         |     39  | 
|     14  |     40  | 
|     15  |     41  | 
|     16 // nullable function: tests whether the regular  |     42 // nullable function: tests whether the regular  | 
|     17 // expression can recognise the empty string |     43 // expression can recognise the empty string | 
|     18 def nullable (r: Rexp) : Boolean = r match { |     44 def nullable (r: Rexp) : Boolean = r match { | 
|     37   case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |     63   case STAR(r1) => SEQ(der(c, r1), STAR(r1)) | 
|     38   case NTIMES(r, i) =>  |     64   case NTIMES(r, i) =>  | 
|     39     if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) |     65     if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) | 
|     40 } |     66 } | 
|     41  |     67  | 
|     42 def simp(r: Rexp) : Rexp = r match { |     68  | 
|     43   case ALT(r1, r2) => (simp(r1), simp(r2)) match { |     69  | 
|     44     case (ZERO, r2s) => r2s |     70 def simp(r: Rexp, seen: Set[Rexp]) : (Rexp, Set[Rexp]) = r match { | 
|     45     case (r1s, ZERO) => r1s |     71   case ALT(r1, r2) => { | 
|     46     case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) |     72     val (r1s, seen1) = simp(r1, seen) | 
|         |     73     val (r2s, seen2) = simp(r2, seen1) | 
|         |     74     (r1s, r2s) match { | 
|         |     75       case (ZERO, r2s) => (r2s, seen2) | 
|         |     76       case (r1s, ZERO) => (r1s, seen2) | 
|         |     77       case (r1s, r2s) => (ALT(r1s, r2s), seen2) | 
|         |     78     } | 
|     47   } |     79   } | 
|     48   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match { |     80   case SEQ(r1, r2) =>  { | 
|     49     case (ZERO, _) => ZERO |     81     val (r1s, _) = simp(r1, Set()) | 
|     50     case (_, ZERO) => ZERO |     82     val (r2s, _) = simp(r2, Set()) | 
|     51     case (ONE, r2s) => r2s |     83     if (seen.contains(SEQ(r1s, r2s))) (ZERO, seen) | 
|     52     case (r1s, ONE) => r1s |     84     else (r1s, r2s) match { | 
|     53     case (r1s, r2s) => SEQ(r1s, r2s) |     85       case (ZERO, _) => (ZERO, seen) | 
|         |     86       case (_, ZERO) => (ZERO, seen) | 
|         |     87       case (ONE, r2s) => (r2s, seen + r2s) | 
|         |     88       case (r1s, ONE) => (r1s, seen + r1s) | 
|         |     89       case (r1s, r2s) => (SEQ(r1s, r2s), seen + SEQ(r1s, r2s)) | 
|         |     90     } | 
|     54   } |     91   } | 
|     55   case r => r |     92   case r => if (seen.contains(r)) (ZERO, seen) else (r, seen + r) | 
|     56 } |     93 } | 
|     57  |     94  | 
|     58  |     95  | 
|     59 // derivative w.r.t. a string (iterates der) |     96 // derivative w.r.t. a string (iterates der) | 
|     60 def ders (s: List[Char], r: Rexp) : Rexp = s match { |     97 def ders (s: List[Char], r: Rexp) : Rexp = s match { | 
|     61   case Nil => r |     98   case Nil => r | 
|     62   case c::s => ders(s, simp(der(c, r))) |     99   case c::s => ders(s, simp(der(c, r), Set())._1) | 
|     63 } |    100 } | 
|     64  |    101  | 
|     65  |    102  | 
|     66 // main matcher function |    103 // main matcher function | 
|     67 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) |    104 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) | 
|     68  |    105  | 
|     69  |    106  | 
|     70 //one or zero |    107 //one or zero | 
|     71 def OPT(r: Rexp) = ALT(r, ONE) |    108 def OPT(r: Rexp) = ALT(r, ONE) | 
|         |    109  | 
|         |    110  | 
|         |    111  | 
|         |    112  | 
|         |    113  | 
|         |    114  | 
|         |    115  | 
|         |    116  | 
|         |    117  | 
|         |    118 def time_needed[T](i: Int, code: => T) = { | 
|         |    119   val start = System.nanoTime() | 
|         |    120   for (j <- 1 to i) code | 
|         |    121   val end = System.nanoTime() | 
|         |    122   (end - start)/(i * 1.0e9) | 
|         |    123 } | 
|         |    124  | 
|         |    125  | 
|         |    126 // star example | 
|         |    127 val Tstar = STAR(STAR(STAR(CHAR('a')))) | 
|         |    128  | 
|         |    129 string(ders("".toList, Tstar)) | 
|         |    130 size(ders("".toList, Tstar))      // 4 | 
|         |    131 string(ders("a".toList, Tstar)) | 
|         |    132 size(ders("a".toList, Tstar))     // 11 | 
|         |    133 string(ders("aa".toList, Tstar)) | 
|         |    134 size(ders("aa".toList, Tstar))    // 11 | 
|         |    135 string(ders("aaa".toList, Tstar)) | 
|         |    136 size(ders("aaa".toList, Tstar))   // 11 | 
|         |    137 string(ders("aaaa".toList, Tstar)) | 
|         |    138 size(ders("aaaa".toList, Tstar))  // 11 | 
|         |    139 string(ders("aaaa".toList, Tstar)) | 
|         |    140 size(ders("aaaaa".toList, Tstar)) // 11 | 
|         |    141 string(ders("aaaab".toList, Tstar)) | 
|         |    142 size(ders("aaaaab".toList, Tstar)) // 1 | 
|         |    143  | 
|         |    144 // test: ("a" | "aa")* | 
|         |    145 val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))) | 
|         |    146  | 
|         |    147 for (i <- 1 to 100 by 1) { | 
|         |    148   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL3, "a" * i))) +  | 
|         |    149 	  " size: " + size(ders(("a" * i).toList, EVIL3))) | 
|         |    150 } | 
|         |    151  | 
|         |    152  | 
|         |    153 println("start " + string(EVIL3) + "    " + size(EVIL3)) | 
|         |    154 val t1  = der('a', EVIL3) | 
|         |    155 println(string(t1) + "    " + size(t1)) | 
|         |    156 val t1s = simp(t1, Set())._1  | 
|         |    157 println("simplified " + string(t1s) + "    " + size(t1s)) | 
|         |    158 val t2  = der('a', t1s) | 
|         |    159 println(string(t2) + "    " + size(t2)) | 
|         |    160 val t2s = simp(t2, Set())._1  | 
|         |    161 println("simplified " + string(t2s) + "    " + size(t2s)) | 
|         |    162 val t3  = der('a', t2s) | 
|         |    163 println(string(t3) + "    " + size(t3)) | 
|         |    164 val t3s = simp(t3, Set())._1  | 
|         |    165 println("simplified " + string(t3s) + "    " + size(t3s)) | 
|         |    166 val t4  = der('a', t3s) | 
|         |    167 val t4s = simp(t4, Set())._1  | 
|         |    168  | 
|         |    169  | 
|         |    170  | 
|         |    171  | 
|         |    172  | 
|         |    173  | 
|         |    174  | 
|         |    175  | 
|         |    176 println(string(t4) + "    " + size(t4)) | 
|         |    177 println("simplified " + string(t4s) + "    " + size(t4s)) | 
|     72  |    178  | 
|     73  |    179  | 
|     74 // Test Cases |    180 // Test Cases | 
|     75  |    181  | 
|     76 //evil regular expressions: (a?){n} a{n}  and (a*)* b |    182 //evil regular expressions: (a?){n} a{n}  and (a*)* b |