testing4/re.scala
changeset 300 72688efdf17c
parent 249 1997cfcd6334
child 329 8a34b2ebc8cc
equal deleted inserted replaced
299:940b6c68102a 300:72688efdf17c
     1 // Part 1 about Regular Expression Matching
     1 // Core Part about Regular Expression Matching
     2 //==========================================
     2 //=============================================
     3 
     3 
     4 //object CW9a {
     4 object CW9c {
     5 
     5 
     6 // Regular Expressions
     6 // Regular Expressions
     7 abstract class Rexp
     7 abstract class Rexp
     8 case object ZERO extends Rexp
     8 case object ZERO extends Rexp
     9 case object ONE extends Rexp
     9 case object ONE extends Rexp
   123 }
   123 }
   124 
   124 
   125 
   125 
   126 
   126 
   127 // some testing data
   127 // some testing data
   128 /*
   128 
   129 matcher(("a" ~ "b") ~ "c", "abc")  // => true
   129 //matcher(("a" ~ "b") ~ "c", "abc")  // => true
   130 matcher(("a" ~ "b") ~ "c", "ab")   // => false
   130 //matcher(("a" ~ "b") ~ "c", "ab")   // => false
   131 
   131 
   132 // the supposedly 'evil' regular expression (a*)* b
   132 // the supposedly 'evil' regular expression (a*)* b
   133 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
   133 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
   134 
   134 
   135 matcher(EVIL, "a" * 1000 ++ "b")   // => true
   135 //matcher(EVIL, "a" * 1000 ++ "b")   // => true
   136 matcher(EVIL, "a" * 1000)          // => false
   136 //matcher(EVIL, "a" * 1000)          // => false
   137 
   137 
   138 // size without simplifications
   138 // size without simplifications
   139 size(der('a', der('a', EVIL)))             // => 28
   139 //size(der('a', der('a', EVIL)))             // => 28
   140 size(der('a', der('a', der('a', EVIL))))   // => 58
   140 //size(der('a', der('a', der('a', EVIL))))   // => 58
   141 
   141 
   142 // size with simplification
   142 // size with simplification
   143 size(simp(der('a', der('a', EVIL))))           // => 8
   143 //size(simp(der('a', der('a', EVIL))))           // => 8
   144 size(simp(der('a', der('a', der('a', EVIL))))) // => 8
   144 //size(simp(der('a', der('a', der('a', EVIL))))) // => 8
   145 
   145 
   146 // Python needs around 30 seconds for matching 28 a's with EVIL. 
   146 // Python needs around 30 seconds for matching 28 a's with EVIL. 
   147 // Java 9 and later increase this to an "astonishing" 40000 a's in
   147 // Java 9 and later increase this to an "astonishing" 40000 a's in
   148 // around 30 seconds.
   148 // around 30 seconds.
   149 //
   149 //
   156   for (j <- 1 to i) code
   156   for (j <- 1 to i) code
   157   val end = System.nanoTime()
   157   val end = System.nanoTime()
   158   (end - start)/(i * 1.0e9)
   158   (end - start)/(i * 1.0e9)
   159 }
   159 }
   160 
   160 
   161 for (i <- 0 to 5000000 by 500000) {
   161 //for (i <- 0 to 5000000 by 500000) {
   162   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))))
   162 //  println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))) + " secs.") 
   163 }
   163 //}
   164 
   164 
   165 // another "power" test case 
   165 // another "power" test case 
   166 simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next) == ONE
   166 //simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next) == ONE
   167 
   167 
   168 // the Iterator produces the rexp
   168 // the Iterator produces the rexp
   169 //
   169 //
   170 //      SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)
   170 //      SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)
   171 //
   171 //
   172 //    where SEQ is nested 100 times.
   172 //    where SEQ is nested 100 times.
   173  
   173  
   174 */
       
   175 
   174 
   176 //}
   175 
       
   176 }