templates4/re.scala
changeset 220 3020f8c76baa
parent 219 44161f2c3226
child 288 65731df141a5
equal deleted inserted replaced
219:44161f2c3226 220:3020f8c76baa
     1 // Part 1 about Regular Expression Matching
     1 // Part 1 about Regular Expression Matching
     2 //==========================================
     2 //==========================================
     3 
     3 
     4 
     4 // Regular Expressions
     5 abstract class Rexp
     5 abstract class Rexp
     6 case object ZERO extends Rexp
     6 case object ZERO extends Rexp
     7 case object ONE extends Rexp
     7 case object ONE extends Rexp
     8 case class CHAR(c: Char) extends Rexp
     8 case class CHAR(c: Char) extends Rexp
     9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp   // alternative 
     9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp   // alternative 
    10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   // sequence
    10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   // sequence
    11 case class STAR(r: Rexp) extends Rexp             // star
    11 case class STAR(r: Rexp) extends Rexp             // star
    12 
    12 
    13 
    13 
    14 // some convenience for typing in regular expressions
    14 // some convenience for typing regular expressions
    15 
    15 
    16 import scala.language.implicitConversions    
    16 import scala.language.implicitConversions    
    17 import scala.language.reflectiveCalls 
    17 import scala.language.reflectiveCalls 
    18 
    18 
    19 def charlist2rexp(s: List[Char]): Rexp = s match {
    19 def charlist2rexp(s: List[Char]): Rexp = s match {
   100 
   100 
   101 // size with simplification
   101 // size with simplification
   102 size(simp(der('a', der('a', EVIL))))           // => 8
   102 size(simp(der('a', der('a', EVIL))))           // => 8
   103 size(simp(der('a', der('a', der('a', EVIL))))) // => 8
   103 size(simp(der('a', der('a', der('a', EVIL))))) // => 8
   104 
   104 
   105 // Java needs around 30 seconds for matching 28 a's with EVIL. 
   105 // Python needs around 30 seconds for matching 28 a's with EVIL. 
       
   106 // Java 9 and later increase this to an "astonishing" 40000 a's in
       
   107 // 30 seconds.
   106 //
   108 //
   107 // Lets see how long it really takes to match strings with 
   109 // Lets see how long it really takes to match strings with 
   108 // 0.5 Million a's...it should be in the range of some
   110 // 5 Million a's...it should be in the range of a couple
   109 // seconds.
   111 // of seconds.
   110 
   112 
   111 def time_needed[T](i: Int, code: => T) = {
   113 def time_needed[T](i: Int, code: => T) = {
   112   val start = System.nanoTime()
   114   val start = System.nanoTime()
   113   for (j <- 1 to i) code
   115   for (j <- 1 to i) code
   114   val end = System.nanoTime()
   116   val end = System.nanoTime()
   117 
   119 
   118 for (i <- 0 to 5000000 by 500000) {
   120 for (i <- 0 to 5000000 by 500000) {
   119   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))))
   121   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))))
   120 }
   122 }
   121 
   123 
       
   124 // another "power" test case 
       
   125 simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next) == ONE
       
   126 
       
   127 // the Iterator produces the rexp
       
   128 //
       
   129 //      SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)
       
   130 //
       
   131 //    where SEQ is nested 50 times.
       
   132 
   122 */
   133 */
   123 
   134