|         |      1 // Main Part 3 about Regular Expression Matching | 
|         |      2 //============================================= | 
|         |      3  | 
|         |      4 object M3 { | 
|         |      5  | 
|         |      6 // Regular Expressions | 
|         |      7 abstract class Rexp | 
|         |      8 case object ZERO extends Rexp | 
|         |      9 case object ONE extends Rexp | 
|         |     10 case class CHAR(c: Char) extends Rexp | 
|         |     11 case class ALTs(rs: List[Rexp]) extends Rexp      // alternatives  | 
|         |     12 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   // sequence | 
|         |     13 case class STAR(r: Rexp) extends Rexp             // star | 
|         |     14  | 
|         |     15  | 
|         |     16 // some convenience for typing regular expressions | 
|         |     17  | 
|         |     18 //the usual binary choice can be defined in terms of ALTs | 
|         |     19 def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2)) | 
|         |     20  | 
|         |     21  | 
|         |     22 import scala.language.implicitConversions     | 
|         |     23 import scala.language.reflectiveCalls  | 
|         |     24  | 
|         |     25 def charlist2rexp(s: List[Char]): Rexp = s match { | 
|         |     26   case Nil => ONE | 
|         |     27   case c::Nil => CHAR(c) | 
|         |     28   case c::s => SEQ(CHAR(c), charlist2rexp(s)) | 
|         |     29 } | 
|         |     30 implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList) | 
|         |     31  | 
|         |     32 implicit def RexpOps (r: Rexp) = new { | 
|         |     33   def | (s: Rexp) = ALT(r, s) | 
|         |     34   def % = STAR(r) | 
|         |     35   def ~ (s: Rexp) = SEQ(r, s) | 
|         |     36 } | 
|         |     37  | 
|         |     38 implicit def stringOps (s: String) = new { | 
|         |     39   def | (r: Rexp) = ALT(s, r) | 
|         |     40   def | (r: String) = ALT(s, r) | 
|         |     41   def % = STAR(s) | 
|         |     42   def ~ (r: Rexp) = SEQ(s, r) | 
|         |     43   def ~ (r: String) = SEQ(s, r) | 
|         |     44 } | 
|         |     45  | 
|         |     46 // (1) Complete the function nullable according to | 
|         |     47 // the definition given in the coursework; this  | 
|         |     48 // function checks whether a regular expression | 
|         |     49 // can match the empty string and Returns a boolean | 
|         |     50 // accordingly. | 
|         |     51  | 
|         |     52 def nullable (r: Rexp) : Boolean = ??? | 
|         |     53  | 
|         |     54  | 
|         |     55 // (2) Complete the function der according to | 
|         |     56 // the definition given in the coursework; this | 
|         |     57 // function calculates the derivative of a  | 
|         |     58 // regular expression w.r.t. a character. | 
|         |     59  | 
|         |     60 def der (c: Char, r: Rexp) : Rexp = ??? | 
|         |     61  | 
|         |     62  | 
|         |     63 // (3) Implement the flatten function flts. It | 
|         |     64 // deletes 0s from a list of regular expressions | 
|         |     65 // and also 'spills out', or flattens, nested  | 
|         |     66 // ALTernativeS. | 
|         |     67  | 
|         |     68 def flts(rs: List[Rexp]) : List[Rexp] = ??? | 
|         |     69  | 
|         |     70  | 
|         |     71  | 
|         |     72 // (4) Complete the simp function according to | 
|         |     73 // the specification given in the coursework description;  | 
|         |     74 // this function simplifies a regular expression from | 
|         |     75 // the inside out, like you would simplify arithmetic  | 
|         |     76 // expressions; however it does not simplify inside  | 
|         |     77 // STAR-regular expressions. Use the _.distinct and  | 
|         |     78 // flts functions. | 
|         |     79  | 
|         |     80 def simp(r: Rexp) : Rexp = ??? | 
|         |     81  | 
|         |     82  | 
|         |     83 // (5) Complete the two functions below; the first  | 
|         |     84 // calculates the derivative w.r.t. a string; the second | 
|         |     85 // is the regular expression matcher taking a regular | 
|         |     86 // expression and a string and checks whether the | 
|         |     87 // string matches the regular expression | 
|         |     88  | 
|         |     89 def ders (s: List[Char], r: Rexp) : Rexp = ??? | 
|         |     90  | 
|         |     91 def matcher(r: Rexp, s: String): Boolean = ??? | 
|         |     92  | 
|         |     93  | 
|         |     94 // (6) Complete the size function for regular | 
|         |     95 // expressions according to the specification  | 
|         |     96 // given in the coursework. | 
|         |     97  | 
|         |     98 def size(r: Rexp): Int = ??? | 
|         |     99  | 
|         |    100  | 
|         |    101 // some testing data | 
|         |    102  | 
|         |    103 /* | 
|         |    104 matcher(("a" ~ "b") ~ "c", "abc")  // => true | 
|         |    105 matcher(("a" ~ "b") ~ "c", "ab")   // => false | 
|         |    106  | 
|         |    107 // the supposedly 'evil' regular expression (a*)* b | 
|         |    108 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) | 
|         |    109  | 
|         |    110 matcher(EVIL, "a" * 1000 ++ "b")   // => true | 
|         |    111 matcher(EVIL, "a" * 1000)          // => false | 
|         |    112  | 
|         |    113 // size without simplifications | 
|         |    114 size(der('a', der('a', EVIL)))             // => 28 | 
|         |    115 size(der('a', der('a', der('a', EVIL))))   // => 58 | 
|         |    116  | 
|         |    117 // size with simplification | 
|         |    118 size(simp(der('a', der('a', EVIL))))           // => 8 | 
|         |    119 size(simp(der('a', der('a', der('a', EVIL))))) // => 8 | 
|         |    120  | 
|         |    121 // Python needs around 30 seconds for matching 28 a's with EVIL.  | 
|         |    122 // Java 9 and later increase this to an "astonishing" 40000 a's in | 
|         |    123 // 30 seconds. | 
|         |    124 // | 
|         |    125 // Lets see how long it really takes to match strings with  | 
|         |    126 // 5 Million a's...it should be in the range of a couple | 
|         |    127 // of seconds. | 
|         |    128  | 
|         |    129 def time_needed[T](i: Int, code: => T) = { | 
|         |    130   val start = System.nanoTime() | 
|         |    131   for (j <- 1 to i) code | 
|         |    132   val end = System.nanoTime() | 
|         |    133   "%.5f".format((end - start)/(i * 1.0e9)) | 
|         |    134 } | 
|         |    135  | 
|         |    136 for (i <- 0 to 5000000 by 500000) { | 
|         |    137   println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.")  | 
|         |    138 } | 
|         |    139  | 
|         |    140 // another "power" test case  | 
|         |    141 simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next()) == ONE | 
|         |    142  | 
|         |    143 // the Iterator produces the rexp | 
|         |    144 // | 
|         |    145 //      SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) | 
|         |    146 // | 
|         |    147 //    where SEQ is nested 50 times. | 
|         |    148  | 
|         |    149 */ | 
|         |    150  | 
|         |    151 } |