44 // function checks whether a regular expression  | 
    42 // function checks whether a regular expression  | 
    45 // can match the empty string and Returns a boolean  | 
    43 // can match the empty string and Returns a boolean  | 
    46 // accordingly.  | 
    44 // accordingly.  | 
    47   | 
    45   | 
    48 def nullable (r: Rexp) : Boolean = r match { | 
    46 def nullable (r: Rexp) : Boolean = r match { | 
    49   case ZERO => false  | 
    47     case ZERO => false  | 
    50   case ONE => true  | 
    48     case ONE => true  | 
    51   case CHAR(_) => false  | 
    49     case CHAR(_) => false  | 
    52   case ALT(r1, r2) => nullable(r1) || nullable(r2)  | 
    50     case ALT(r1, r2) => nullable(r1) | nullable(r2)  | 
    53   case SEQ(r1, r2) => nullable(r1) && nullable(r2)  | 
    51     case SEQ(r1, r2) => nullable(r1) & nullable(r2)  | 
    54   case STAR(_) => true  | 
    52     case STAR(_) => true  | 
    55 }  | 
    53 }  | 
         | 
    54   | 
         | 
    55   | 
    56   | 
    56   | 
    57 // (2) Complete the function der according to  | 
    57 // (2) Complete the function der according to  | 
    58 // the definition given in the coursework; this  | 
    58 // the definition given in the coursework; this  | 
    59 // function calculates the derivative of a   | 
    59 // function calculates the derivative of a   | 
    60 // regular expression w.r.t. a character.  | 
    60 // regular expression w.r.t. a character.  | 
    61   | 
    61   | 
         | 
    62 //TODO: debug  | 
         | 
    63 //TODO: understand this more.   | 
         | 
    64 // first test runs  | 
         | 
    65 // test 2 fails  | 
         | 
    66 // test 3 runs  | 
         | 
    67 // test 4 runs  | 
    62 def der (c: Char, r: Rexp) : Rexp = r match { | 
    68 def der (c: Char, r: Rexp) : Rexp = r match { | 
    63   case ZERO => ZERO  | 
    69     //TODO: debug  | 
    64   case ONE => ZERO  | 
    70     case ZERO => ZERO  | 
    65   case CHAR(d) => if (c == d) ONE else ZERO  | 
    71     case ONE => ZERO  | 
    66   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))  | 
    72     case CHAR(r1) => if (c == r1) ONE else ZERO  | 
    67   case SEQ(r1, r2) =>   | 
    73     case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))  | 
    68     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))  | 
    74     case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) else SEQ(der(c, r1), r2)  | 
    69     else SEQ(der(c, r1), r2)  | 
    75     case STAR(r1) => SEQ(der(c, r1), STAR(r1))  | 
    70   case STAR(r1) => SEQ(der(c, r1), STAR(r1))  | 
         | 
    71 }  | 
    76 }  | 
         | 
    77   | 
    72   | 
    78   | 
    73 // (3) Complete the simp function according to  | 
    79 // (3) Complete the simp function according to  | 
    74 // the specification given in the coursework; this  | 
    80 // the specification given in the coursework; this  | 
    75 // function simplifies a regular expression from  | 
    81 // function simplifies a regular expression from  | 
    76 // the inside out, like you would simplify arithmetic   | 
    82 // the inside out, like you would simplify arithmetic   | 
    77 // expressions; however it does not simplify inside   | 
    83 // expressions; however it does not simplify inside   | 
    78 // STAR-regular expressions.  | 
    84 // STAR-regular expressions.  | 
    79   | 
    85   | 
    80 def simp(r: Rexp) : Rexp = r match { | 
    86 def simp(r: Rexp) : Rexp = r match { | 
    81   case ALT(r1, r2) => (simp(r1), simp(r2)) match { | 
    87     case STAR(_) => r  | 
    82     case (ZERO, r2s) => r2s  | 
    88     case SEQ(r1, r2) => (simp(r1), simp(r2)) match { // potential failure | 
    83     case (r1s, ZERO) => r1s  | 
    89         case (_, ZERO) => ZERO  | 
    84     case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)  | 
    90         case (ZERO, _) => ZERO  | 
    85   }  | 
    91         case (r1, ONE) => simp(r1)  | 
    86   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match { | 
    92         case (ONE, r2) => simp(r2)  | 
    87     case (ZERO, _) => ZERO  | 
    93         case (r1, r2) => SEQ(r1, r2)  | 
    88     case (_, ZERO) => ZERO  | 
    94     }  | 
    89     case (ONE, r2s) => r2s  | 
    95     case ALT(r1, r2) => (simp(r1), simp(r2)) match { | 
    90     case (r1s, ONE) => r1s  | 
    96         case (r1, ZERO) => simp(r1)  | 
    91     case (r1s, r2s) => SEQ(r1s, r2s)  | 
    97         case (ZERO, r1) => simp(r1)  | 
    92   }  | 
    98         case (r1, r2) if r1 == r2 => simp(r1)  | 
    93   case r => r  | 
    99         case (r1, r2) => ALT(r1, r2)  | 
         | 
   100     }  | 
         | 
   101     case r => r  | 
    94 }  | 
   102 }  | 
    95   | 
   103   | 
    96   | 
   104   | 
    97 // (4) Complete the two functions below; the first   | 
   105 // (4) Complete the two functions below; the first   | 
    98 // calculates the derivative w.r.t. a string; the second  | 
   106 // calculates the derivative w.r.t. a string; the second  | 
    99 // is the regular expression matcher taking a regular  | 
   107 // is the regular expression matcher taking a regular  | 
   100 // expression and a string and checks whether the  | 
   108 // expression and a string and checks whether the  | 
   101 // string matches the regular expression.  | 
   109 // string matches the regular expression  | 
   102   | 
   110   | 
   103 def ders (s: List[Char], r: Rexp) : Rexp = s match { | 
   111 def ders (s: List[Char], r: Rexp) : Rexp = s match { | 
   104   case Nil => r  | 
   112     case Nil => r  | 
   105   case c::s => ders(s, simp(der(c, r)))  | 
   113     case c::cs => ders(cs, simp(der(c, r)))  | 
   106 }  | 
   114 }  | 
   107   | 
   115   | 
   108 // main matcher function  | 
   116 def matcher(r: Rexp, s: String): Boolean = { | 
   109 def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r))  | 
   117     nullable(ders(s.toList, r))  | 
         | 
   118 }  | 
         | 
   119   | 
   110   | 
   120   | 
   111 // (5) Complete the size function for regular  | 
   121 // (5) Complete the size function for regular  | 
   112 // expressions according to the specification   | 
   122 // expressions according to the specification   | 
   113 // given in the coursework.  | 
   123 // given in the coursework.  | 
   114   | 
   124   | 
   115   | 
         | 
   116 def size(r: Rexp): Int = r match { | 
   125 def size(r: Rexp): Int = r match { | 
   117   case ZERO => 1  | 
   126     case ZERO => 1  | 
   118   case ONE => 1  | 
   127     case ONE => 1  | 
   119   case CHAR(_) => 1  | 
   128     case CHAR(_) => 1  | 
   120   case ALT(r1, r2) => 1 + size(r1) + size (r2)  | 
   129     case SEQ(r1, r2) => 1 + size(r1) + size(r2)  | 
   121   case SEQ(r1, r2) => 1 + size(r1) + size (r2)  | 
   130     case ALT(r1, r2) => 1 + size(r1) + size(r2)  | 
   122   case STAR(r1) => 1 + size(r1)  | 
   131     case STAR(r1) => 1 + size(r1)  | 
   123 }  | 
   132 }  | 
   124   | 
   133   | 
   125   | 
   134   | 
         | 
   135 // some testing data  | 
   126   | 
   136   | 
   127 // some testing data  | 
   137 //matcher(("a" ~ "b") ~ "c", "abc")  // => true | 
   128 /*  | 
   138 //matcher(("a" ~ "b") ~ "c", "ab")   // => false | 
   129 matcher(("a" ~ "b") ~ "c", "abc")  // => true | 
         | 
   130 matcher(("a" ~ "b") ~ "c", "ab")   // => false | 
         | 
   131   | 
   139   | 
   132 // the supposedly 'evil' regular expression (a*)* b  | 
   140 // the supposedly 'evil' regular expression (a*)* b  | 
   133 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) | 
   141 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) | 
   134   | 
   142   | 
   135 matcher(EVIL, "a" * 1000 ++ "b")   // => true  | 
   143 //matcher(EVIL, "a" * 1000 ++ "b")   // => true  | 
   136 matcher(EVIL, "a" * 1000)          // => false  | 
   144 //matcher(EVIL, "a" * 1000)          // => false  | 
   137   | 
   145   | 
   138 // size without simplifications  | 
   146 // size without simplifications  | 
   139 size(der('a', der('a', EVIL)))             // => 28 | 
   147 //size(der('a', der('a', EVIL)))             // => 28 | 
   140 size(der('a', der('a', der('a', EVIL))))   // => 58 | 
   148 //size(der('a', der('a', der('a', EVIL))))   // => 58 | 
         | 
   149   | 
         | 
   150   | 
   141   | 
   151   | 
   142 // size with simplification  | 
   152 // size with simplification  | 
   143 size(simp(der('a', der('a', EVIL))))           // => 8 | 
   153 //size(simp(der('a', der('a', EVIL))))           // => 8 | 
   144 size(simp(der('a', der('a', der('a', EVIL))))) // => 8 | 
   154 //size(simp(der('a', der('a', der('a', EVIL))))) // => 8 | 
   145   | 
   155   | 
   146 // Python needs around 30 seconds for matching 28 a's with EVIL.   | 
   156 // Python needs around 30 seconds for matching 28 a's with EVIL.   | 
   147 // Java 9 and later increase this to an "astonishing" 40000 a's in  | 
   157 // Java 9 and later increase this to an "astonishing" 40000 a's in  | 
   148 // around 30 seconds.  | 
   158 // 30 seconds.  | 
   149 //  | 
   159 //  | 
   150 // Lets see how long it takes to match strings with   | 
   160 // Lets see how long it really takes to match strings with   | 
   151 // 5 Million a's...it should be in the range of a   | 
   161 // 5 Million a's...it should be in the range of a couple  | 
   152 // couple of seconds.  | 
   162 // of seconds.  | 
   153   | 
   163   | 
   154 def time_needed[T](i: Int, code: => T) = { | 
   164 def time_needed[T](i: Int, code: => T) = { | 
   155   val start = System.nanoTime()  | 
   165   val start = System.nanoTime()  | 
   156   for (j <- 1 to i) code  | 
   166   for (j <- 1 to i) code  | 
   157   val end = System.nanoTime()  | 
   167   val end = System.nanoTime()  | 
   158   (end - start)/(i * 1.0e9)  | 
   168   (end - start)/(i * 1.0e9)  | 
   159 }  | 
   169 }  | 
   160   | 
   170   | 
   161 for (i <- 0 to 5000000 by 500000) { | 
   171 //for (i <- 0 to 5000000 by 500000) { | 
   162   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))))  | 
   172 //  println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))))  | 
   163 }  | 
   173 //}  | 
   164   | 
   174   | 
   165 // another "power" test case   | 
   175 // another "power" test case   | 
   166 simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next) == ONE  | 
   176 println(simp(Iterator.iterate(ONE:Rexp)(r => ALT(r, r)).drop(40).next))  | 
   167   | 
   177   | 
   168 // the Iterator produces the rexp  | 
   178 // the Iterator produces the rexp  | 
   169 //  | 
   179 //  | 
   170 //      SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)  | 
   180 //      SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)  | 
   171 //  | 
   181 //  | 
   172 //    where SEQ is nested 100 times.  | 
   182 //    where SEQ is nested 50 times.  | 
   173    | 
         | 
   174 */  | 
         | 
   175   | 
   183   | 
   176 //}  | 
   184   |