|         |      1 // Core Part about Regular Expression Matching | 
|         |      2 //============================================= | 
|         |      3  | 
|         |      4 object CW8c { | 
|         |      5  | 
|         |      6 // Regular Expressions | 
|         |      7 abstract class Rexp | 
|         |      8 case object ZERO extends Rexp | 
|         |      9 case object ONE extends Rexp | 
|         |     10 case class CHAR(c: Char) extends Rexp | 
|         |     11 case class ALT(r1: Rexp, r2: Rexp) extends Rexp   // alternative  | 
|         |     12 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   // sequence | 
|         |     13 case class STAR(r: Rexp) extends Rexp             // star | 
|         |     14  | 
|         |     15  | 
|         |     16 // some convenience for typing regular expressions | 
|         |     17  | 
|         |     18 import scala.language.implicitConversions     | 
|         |     19 import scala.language.reflectiveCalls  | 
|         |     20  | 
|         |     21 def charlist2rexp(s: List[Char]): Rexp = s match { | 
|         |     22   case Nil => ONE | 
|         |     23   case c::Nil => CHAR(c) | 
|         |     24   case c::s => SEQ(CHAR(c), charlist2rexp(s)) | 
|         |     25 } | 
|         |     26 implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList) | 
|         |     27  | 
|         |     28 implicit def RexpOps (r: Rexp) = new { | 
|         |     29   def | (s: Rexp) = ALT(r, s) | 
|         |     30   def % = STAR(r) | 
|         |     31   def ~ (s: Rexp) = SEQ(r, s) | 
|         |     32 } | 
|         |     33  | 
|         |     34 implicit def stringOps (s: String) = new { | 
|         |     35   def | (r: Rexp) = ALT(s, r) | 
|         |     36   def | (r: String) = ALT(s, r) | 
|         |     37   def % = STAR(s) | 
|         |     38   def ~ (r: Rexp) = SEQ(s, r) | 
|         |     39   def ~ (r: String) = SEQ(s, r) | 
|         |     40 } | 
|         |     41  | 
|         |     42 // (5) Complete the function nullable according to | 
|         |     43 // the definition given in the coursework; this  | 
|         |     44 // function checks whether a regular expression | 
|         |     45 // can match the empty string and Returns a boolean | 
|         |     46 // accordingly. | 
|         |     47  | 
|         |     48 def nullable (r: Rexp) : Boolean = ??? | 
|         |     49  | 
|         |     50  | 
|         |     51 // (6) Complete the function der according to | 
|         |     52 // the definition given in the coursework; this | 
|         |     53 // function calculates the derivative of a  | 
|         |     54 // regular expression w.r.t. a character. | 
|         |     55  | 
|         |     56 def der (c: Char, r: Rexp) : Rexp = ??? | 
|         |     57  | 
|         |     58  | 
|         |     59 // (7) Complete the simp function according to | 
|         |     60 // the specification given in the coursework; this | 
|         |     61 // function simplifies a regular expression from | 
|         |     62 // the inside out, like you would simplify arithmetic  | 
|         |     63 // expressions; however it does not simplify inside  | 
|         |     64 // STAR-regular expressions. | 
|         |     65  | 
|         |     66 def simp(r: Rexp) : Rexp = ??? | 
|         |     67  | 
|         |     68  | 
|         |     69 // (8) Complete the two functions below; the first  | 
|         |     70 // calculates the derivative w.r.t. a string; the second | 
|         |     71 // is the regular expression matcher taking a regular | 
|         |     72 // expression and a string and checks whether the | 
|         |     73 // string matches the regular expression | 
|         |     74  | 
|         |     75 def ders (s: List[Char], r: Rexp) : Rexp = ??? | 
|         |     76  | 
|         |     77 def matcher(r: Rexp, s: String): Boolean = ??? | 
|         |     78  | 
|         |     79  | 
|         |     80 // (9) Complete the size function for regular | 
|         |     81 // expressions according to the specification  | 
|         |     82 // given in the coursework. | 
|         |     83  | 
|         |     84 def size(r: Rexp): Int = ??? | 
|         |     85  | 
|         |     86  | 
|         |     87 // some testing data | 
|         |     88  | 
|         |     89 /* | 
|         |     90 matcher(("a" ~ "b") ~ "c", "abc")  // => true | 
|         |     91 matcher(("a" ~ "b") ~ "c", "ab")   // => false | 
|         |     92  | 
|         |     93 // the supposedly 'evil' regular expression (a*)* b | 
|         |     94 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) | 
|         |     95  | 
|         |     96 matcher(EVIL, "a" * 1000 ++ "b")   // => true | 
|         |     97 matcher(EVIL, "a" * 1000)          // => false | 
|         |     98  | 
|         |     99 // size without simplifications | 
|         |    100 size(der('a', der('a', EVIL)))             // => 28 | 
|         |    101 size(der('a', der('a', der('a', EVIL))))   // => 58 | 
|         |    102  | 
|         |    103 // size with simplification | 
|         |    104 size(simp(der('a', der('a', EVIL))))           // => 8 | 
|         |    105 size(simp(der('a', der('a', der('a', EVIL))))) // => 8 | 
|         |    106  | 
|         |    107 // Python needs around 30 seconds for matching 28 a's with EVIL.  | 
|         |    108 // Java 9 and later increase this to an "astonishing" 40000 a's in | 
|         |    109 // 30 seconds. | 
|         |    110 // | 
|         |    111 // Lets see how long it really takes to match strings with  | 
|         |    112 // 5 Million a's...it should be in the range of a couple | 
|         |    113 // of seconds. | 
|         |    114  | 
|         |    115 def time_needed[T](i: Int, code: => T) = { | 
|         |    116   val start = System.nanoTime() | 
|         |    117   for (j <- 1 to i) code | 
|         |    118   val end = System.nanoTime() | 
|         |    119   (end - start)/(i * 1.0e9) | 
|         |    120 } | 
|         |    121  | 
|         |    122 for (i <- 0 to 5000000 by 500000) { | 
|         |    123   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i)))) | 
|         |    124 } | 
|         |    125  | 
|         |    126 // another "power" test case  | 
|         |    127 simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next) == ONE | 
|         |    128  | 
|         |    129 // the Iterator produces the rexp | 
|         |    130 // | 
|         |    131 //      SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) | 
|         |    132 // | 
|         |    133 //    where SEQ is nested 50 times. | 
|         |    134  | 
|         |    135 */ | 
|         |    136  | 
|         |    137 } |