1 // Part 1 about Regular Expression Matching  | 
     1 // Part 1 about Regular Expression Matching  | 
     2 //==========================================  | 
     2 //==========================================  | 
     3   | 
     3   | 
     4   | 
     4 // Regular Expressions  | 
     5 abstract class Rexp  | 
     5 abstract class Rexp  | 
     6 case object ZERO extends Rexp  | 
     6 case object ZERO extends Rexp  | 
     7 case object ONE extends Rexp  | 
     7 case object ONE extends Rexp  | 
     8 case class CHAR(c: Char) extends Rexp  | 
     8 case class CHAR(c: Char) extends Rexp  | 
     9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp   // alternative   | 
     9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp   // alternative   | 
    10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   // sequence  | 
    10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   // sequence  | 
    11 case class STAR(r: Rexp) extends Rexp             // star  | 
    11 case class STAR(r: Rexp) extends Rexp             // star  | 
    12   | 
    12   | 
    13   | 
    13   | 
    14 // some convenience for typing in regular expressions  | 
    14 // some convenience for typing regular expressions  | 
    15   | 
    15   | 
    16 import scala.language.implicitConversions      | 
    16 import scala.language.implicitConversions      | 
    17 import scala.language.reflectiveCalls   | 
    17 import scala.language.reflectiveCalls   | 
    18   | 
    18   | 
    19 def charlist2rexp(s: List[Char]): Rexp = s match { | 
    19 def charlist2rexp(s: List[Char]): Rexp = s match { | 
   100   | 
   100   | 
   101 // size with simplification  | 
   101 // size with simplification  | 
   102 size(simp(der('a', der('a', EVIL))))           // => 8 | 
   102 size(simp(der('a', der('a', EVIL))))           // => 8 | 
   103 size(simp(der('a', der('a', der('a', EVIL))))) // => 8 | 
   103 size(simp(der('a', der('a', der('a', EVIL))))) // => 8 | 
   104   | 
   104   | 
   105 // Java needs around 30 seconds for matching 28 a's with EVIL.   | 
   105 // Python needs around 30 seconds for matching 28 a's with EVIL.   | 
         | 
   106 // Java 9 and later increase this to an "astonishing" 40000 a's in  | 
         | 
   107 // 30 seconds.  | 
   106 //  | 
   108 //  | 
   107 // Lets see how long it really takes to match strings with   | 
   109 // Lets see how long it really takes to match strings with   | 
   108 // 0.5 Million a's...it should be in the range of some  | 
   110 // 5 Million a's...it should be in the range of a couple  | 
   109 // seconds.  | 
   111 // of seconds.  | 
   110   | 
   112   | 
   111 def time_needed[T](i: Int, code: => T) = { | 
   113 def time_needed[T](i: Int, code: => T) = { | 
   112   val start = System.nanoTime()  | 
   114   val start = System.nanoTime()  | 
   113   for (j <- 1 to i) code  | 
   115   for (j <- 1 to i) code  | 
   114   val end = System.nanoTime()  | 
   116   val end = System.nanoTime()  | 
   117   | 
   119   | 
   118 for (i <- 0 to 5000000 by 500000) { | 
   120 for (i <- 0 to 5000000 by 500000) { | 
   119   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))))  | 
   121   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))))  | 
   120 }  | 
   122 }  | 
   121   | 
   123   | 
         | 
   124 // another "power" test case   | 
         | 
   125 simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next) == ONE  | 
         | 
   126   | 
         | 
   127 // the Iterator produces the rexp  | 
         | 
   128 //  | 
         | 
   129 //      SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)  | 
         | 
   130 //  | 
         | 
   131 //    where SEQ is nested 50 times.  | 
         | 
   132   | 
   122 */  | 
   133 */  | 
   123   | 
   134   |