| 
     1 // Main Part 3 about Regular Expression Matching  | 
         | 
     2 //=============================================  | 
         | 
     3   | 
         | 
     4 object M3 { | 
         | 
     5   | 
         | 
     6 // Regular Expressions  | 
         | 
     7 abstract class Rexp  | 
         | 
     8 case object ZERO extends Rexp  | 
         | 
     9 case object ONE extends Rexp  | 
         | 
    10 case class CHAR(c: Char) extends Rexp  | 
         | 
    11 case class ALTs(rs: List[Rexp]) extends Rexp      // alternatives   | 
         | 
    12 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   // sequence  | 
         | 
    13 case class STAR(r: Rexp) extends Rexp             // star  | 
         | 
    14   | 
         | 
    15   | 
         | 
    16 // some convenience for typing regular expressions  | 
         | 
    17   | 
         | 
    18 //the usual binary choice can be defined in terms of ALTs  | 
         | 
    19 def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2))  | 
         | 
    20   | 
         | 
    21   | 
         | 
    22 import scala.language.implicitConversions      | 
         | 
    23 import scala.language.reflectiveCalls   | 
         | 
    24   | 
         | 
    25 def charlist2rexp(s: List[Char]): Rexp = s match { | 
         | 
    26   case Nil => ONE  | 
         | 
    27   case c::Nil => CHAR(c)  | 
         | 
    28   case c::s => SEQ(CHAR(c), charlist2rexp(s))  | 
         | 
    29 }  | 
         | 
    30 implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList)  | 
         | 
    31   | 
         | 
    32 implicit def RexpOps (r: Rexp) = new { | 
         | 
    33   def | (s: Rexp) = ALT(r, s)  | 
         | 
    34   def % = STAR(r)  | 
         | 
    35   def ~ (s: Rexp) = SEQ(r, s)  | 
         | 
    36 }  | 
         | 
    37   | 
         | 
    38 implicit def stringOps (s: String) = new { | 
         | 
    39   def | (r: Rexp) = ALT(s, r)  | 
         | 
    40   def | (r: String) = ALT(s, r)  | 
         | 
    41   def % = STAR(s)  | 
         | 
    42   def ~ (r: Rexp) = SEQ(s, r)  | 
         | 
    43   def ~ (r: String) = SEQ(s, r)  | 
         | 
    44 }  | 
         | 
    45   | 
         | 
    46 // (1) Complete the function nullable according to  | 
         | 
    47 // the definition given in the coursework; this   | 
         | 
    48 // function checks whether a regular expression  | 
         | 
    49 // can match the empty string and Returns a boolean  | 
         | 
    50 // accordingly.  | 
         | 
    51   | 
         | 
    52 def nullable (r: Rexp) : Boolean = ???  | 
         | 
    53   | 
         | 
    54   | 
         | 
    55 // (2) Complete the function der according to  | 
         | 
    56 // the definition given in the coursework; this  | 
         | 
    57 // function calculates the derivative of a   | 
         | 
    58 // regular expression w.r.t. a character.  | 
         | 
    59   | 
         | 
    60 def der (c: Char, r: Rexp) : Rexp = ???  | 
         | 
    61   | 
         | 
    62   | 
         | 
    63 // (3) Implement the flatten function flts. It  | 
         | 
    64 // deletes 0s from a list of regular expressions  | 
         | 
    65 // and also 'spills out', or flattens, nested   | 
         | 
    66 // ALTernativeS.  | 
         | 
    67   | 
         | 
    68 def flts(rs: List[Rexp]) : List[Rexp] = ???  | 
         | 
    69   | 
         | 
    70   | 
         | 
    71   | 
         | 
    72 // (4) Complete the simp function according to  | 
         | 
    73 // the specification given in the coursework description;   | 
         | 
    74 // this function simplifies a regular expression from  | 
         | 
    75 // the inside out, like you would simplify arithmetic   | 
         | 
    76 // expressions; however it does not simplify inside   | 
         | 
    77 // STAR-regular expressions. Use the _.distinct and   | 
         | 
    78 // flts functions.  | 
         | 
    79   | 
         | 
    80 def simp(r: Rexp) : Rexp = ???  | 
         | 
    81   | 
         | 
    82   | 
         | 
    83 // (5) Complete the two functions below; the first   | 
         | 
    84 // calculates the derivative w.r.t. a string; the second  | 
         | 
    85 // is the regular expression matcher taking a regular  | 
         | 
    86 // expression and a string and checks whether the  | 
         | 
    87 // string matches the regular expression  | 
         | 
    88   | 
         | 
    89 def ders (s: List[Char], r: Rexp) : Rexp = ???  | 
         | 
    90   | 
         | 
    91 def matcher(r: Rexp, s: String): Boolean = ???  | 
         | 
    92   | 
         | 
    93   | 
         | 
    94 // (6) Complete the size function for regular  | 
         | 
    95 // expressions according to the specification   | 
         | 
    96 // given in the coursework.  | 
         | 
    97   | 
         | 
    98 def size(r: Rexp): Int = ???  | 
         | 
    99   | 
         | 
   100   | 
         | 
   101 // some testing data  | 
         | 
   102   | 
         | 
   103 /*  | 
         | 
   104 matcher(("a" ~ "b") ~ "c", "abc")  // => true | 
         | 
   105 matcher(("a" ~ "b") ~ "c", "ab")   // => false | 
         | 
   106   | 
         | 
   107 // the supposedly 'evil' regular expression (a*)* b  | 
         | 
   108 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) | 
         | 
   109   | 
         | 
   110 matcher(EVIL, "a" * 1000 ++ "b")   // => true  | 
         | 
   111 matcher(EVIL, "a" * 1000)          // => false  | 
         | 
   112   | 
         | 
   113 // size without simplifications  | 
         | 
   114 size(der('a', der('a', EVIL)))             // => 28 | 
         | 
   115 size(der('a', der('a', der('a', EVIL))))   // => 58 | 
         | 
   116   | 
         | 
   117 // size with simplification  | 
         | 
   118 size(simp(der('a', der('a', EVIL))))           // => 8 | 
         | 
   119 size(simp(der('a', der('a', der('a', EVIL))))) // => 8 | 
         | 
   120   | 
         | 
   121 // Python needs around 30 seconds for matching 28 a's with EVIL.   | 
         | 
   122 // Java 9 and later increase this to an "astonishing" 40000 a's in  | 
         | 
   123 // 30 seconds.  | 
         | 
   124 //  | 
         | 
   125 // Lets see how long it really takes to match strings with   | 
         | 
   126 // 5 Million a's...it should be in the range of a couple  | 
         | 
   127 // of seconds.  | 
         | 
   128   | 
         | 
   129 def time_needed[T](i: Int, code: => T) = { | 
         | 
   130   val start = System.nanoTime()  | 
         | 
   131   for (j <- 1 to i) code  | 
         | 
   132   val end = System.nanoTime()  | 
         | 
   133   "%.5f".format((end - start)/(i * 1.0e9))  | 
         | 
   134 }  | 
         | 
   135   | 
         | 
   136 for (i <- 0 to 5000000 by 500000) { | 
         | 
   137   println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.")  | 
         | 
   138 }  | 
         | 
   139   | 
         | 
   140 // another "power" test case   | 
         | 
   141 simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next()) == ONE  | 
         | 
   142   | 
         | 
   143 // the Iterator produces the rexp  | 
         | 
   144 //  | 
         | 
   145 //      SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)  | 
         | 
   146 //  | 
         | 
   147 //    where SEQ is nested 50 times.  | 
         | 
   148   | 
         | 
   149 */  | 
         | 
   150   | 
         | 
   151 }  |