testing4/re.scala
changeset 221 9e7897f25e13
parent 215 438459a8e48b
child 228 33c2655be47d
equal deleted inserted replaced
220:3020f8c76baa 221:9e7897f25e13
     1 // Part 1 about Regular Expression Matching
     1 // Part 1 about Regular Expression Matching
     2 //==========================================
     2 //==========================================
     3 
     3 
     4 object CW8a {
     4 //object CW9a {
     5 
     5 
       
     6 // Regular Expressions
     6 abstract class Rexp
     7 abstract class Rexp
     7 case object ZERO extends Rexp
     8 case object ZERO extends Rexp
     8 case object ONE extends Rexp
     9 case object ONE extends Rexp
     9 case class CHAR(c: Char) extends Rexp
    10 case class CHAR(c: Char) extends Rexp
    10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    11 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    36   def % = STAR(s)
    37   def % = STAR(s)
    37   def ~ (r: Rexp) = SEQ(s, r)
    38   def ~ (r: Rexp) = SEQ(s, r)
    38   def ~ (r: String) = SEQ(s, r)
    39   def ~ (r: String) = SEQ(s, r)
    39 }
    40 }
    40 
    41 
    41 // (1a) Complete the function nullable according to
    42 // (1) Complete the function nullable according to
    42 // the definition given in the coursework; this 
    43 // the definition given in the coursework; this 
    43 // function checks whether a regular expression
    44 // function checks whether a regular expression
    44 // can match the empty string
    45 // can match the empty string and Returns a boolean
       
    46 // accordingly.
    45 
    47 
    46 def nullable (r: Rexp) : Boolean = r match {
    48 def nullable (r: Rexp) : Boolean = r match {
    47   case ZERO => false
    49   case ZERO => false
    48   case ONE => true
    50   case ONE => true
    49   case CHAR(_) => false
    51   case CHAR(_) => false
    50   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    52   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    51   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    53   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    52   case STAR(_) => true
    54   case STAR(_) => true
    53 }
    55 }
    54 
    56 
    55 // (1b) Complete the function der according to
    57 // (2) Complete the function der according to
    56 // the definition given in the coursework; this
    58 // the definition given in the coursework; this
    57 // function calculates the derivative of a 
    59 // function calculates the derivative of a 
    58 // regular expression w.r.t. a character
    60 // regular expression w.r.t. a character.
    59 
    61 
    60 def der (c: Char, r: Rexp) : Rexp = r match {
    62 def der (c: Char, r: Rexp) : Rexp = r match {
    61   case ZERO => ZERO
    63   case ZERO => ZERO
    62   case ONE => ZERO
    64   case ONE => ZERO
    63   case CHAR(d) => if (c == d) ONE else ZERO
    65   case CHAR(d) => if (c == d) ONE else ZERO
    66     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    68     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    67     else SEQ(der(c, r1), r2)
    69     else SEQ(der(c, r1), r2)
    68   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
    70   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
    69 }
    71 }
    70 
    72 
    71 // (1c) Complete the function der according to
    73 // (3) Complete the simp function according to
    72 // the specification given in the coursework; this
    74 // the specification given in the coursework; this
    73 // function simplifies a regular expression;
    75 // function simplifies a regular expression from
    74 // however it does not simplify inside STAR-regular
    76 // the inside out, like you would simplify arithmetic 
    75 // expressions
    77 // expressions; however it does not simplify inside 
       
    78 // STAR-regular expressions.
    76 
    79 
    77 def simp(r: Rexp) : Rexp = r match {
    80 def simp(r: Rexp) : Rexp = r match {
    78   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
    81   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
    79     case (ZERO, r2s) => r2s
    82     case (ZERO, r2s) => r2s
    80     case (r1s, ZERO) => r1s
    83     case (r1s, ZERO) => r1s
    88     case (r1s, r2s) => SEQ(r1s, r2s)
    91     case (r1s, r2s) => SEQ(r1s, r2s)
    89   }
    92   }
    90   case r => r
    93   case r => r
    91 }
    94 }
    92 
    95 
    93 // (1d) Complete the two functions below; the first 
    96 
       
    97 // (4) Complete the two functions below; the first 
    94 // calculates the derivative w.r.t. a string; the second
    98 // calculates the derivative w.r.t. a string; the second
    95 // is the regular expression matcher taking a regular
    99 // is the regular expression matcher taking a regular
    96 // expression and a string and checks whether the
   100 // expression and a string and checks whether the
    97 // string matches the regular expression
   101 // string matches the regular expression.
    98 
   102 
    99 def ders (s: List[Char], r: Rexp) : Rexp = s match {
   103 def ders (s: List[Char], r: Rexp) : Rexp = s match {
   100   case Nil => r
   104   case Nil => r
   101   case c::s => ders(s, simp(der(c, r)))
   105   case c::s => ders(s, simp(der(c, r)))
   102 }
   106 }
   103 
   107 
   104 // main matcher function
   108 // main matcher function
   105 def matcher(r: Rexp, s: String): Boolean = nullable(ders(s.toList, r))
   109 def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r))
   106 
   110 
   107 // (1e) Complete the size function for regular
   111 // (5) Complete the size function for regular
   108 // expressions  according to the specification 
   112 // expressions according to the specification 
   109 // given in the coursework.
   113 // given in the coursework.
       
   114 
   110 
   115 
   111 def size(r: Rexp): Int = r match {
   116 def size(r: Rexp): Int = r match {
   112   case ZERO => 1
   117   case ZERO => 1
   113   case ONE => 1
   118   case ONE => 1
   114   case CHAR(_) => 1
   119   case CHAR(_) => 1
   136 
   141 
   137 // size with simplification
   142 // size with simplification
   138 size(simp(der('a', der('a', EVIL))))           // => 8
   143 size(simp(der('a', der('a', EVIL))))           // => 8
   139 size(simp(der('a', der('a', der('a', EVIL))))) // => 8
   144 size(simp(der('a', der('a', der('a', EVIL))))) // => 8
   140 
   145 
   141 // Java needs around 30 seconds for matching 28 a's with EVIL. 
   146 // Python needs around 30 seconds for matching 28 a's with EVIL. 
       
   147 // Java 9 and later increase this to an "astonishing" 40000 a's in
       
   148 // around 30 seconds.
   142 //
   149 //
   143 // Lets see how long it takes to match strings with 
   150 // Lets see how long it takes to match strings with 
   144 // 0.5 Million a's...it should be in the range of some
   151 // 5 Million a's...it should be in the range of a 
   145 // seconds.
   152 // couple of seconds.
   146 
   153 
   147 def time_needed[T](i: Int, code: => T) = {
   154 def time_needed[T](i: Int, code: => T) = {
   148   val start = System.nanoTime()
   155   val start = System.nanoTime()
   149   for (j <- 1 to i) code
   156   for (j <- 1 to i) code
   150   val end = System.nanoTime()
   157   val end = System.nanoTime()
   152 }
   159 }
   153 
   160 
   154 for (i <- 0 to 5000000 by 500000) {
   161 for (i <- 0 to 5000000 by 500000) {
   155   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))))
   162   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))))
   156 }
   163 }
       
   164 
       
   165 // another "power" test case 
       
   166 simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next) == ONE
       
   167 
       
   168 // the Iterator produces the rexp
       
   169 //
       
   170 //      SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)
       
   171 //
       
   172 //    where SEQ is nested 100 times.
       
   173  
   157 */
   174 */
   158 
   175 
   159 }
   176 //}