1 // Part 1 about Regular Expression Matching |
1 // Part 1 about Regular Expression Matching |
2 //========================================== |
2 //========================================== |
3 |
3 |
4 |
4 // Regular Expressions |
5 abstract class Rexp |
5 abstract class Rexp |
6 case object ZERO extends Rexp |
6 case object ZERO extends Rexp |
7 case object ONE extends Rexp |
7 case object ONE extends Rexp |
8 case class CHAR(c: Char) extends Rexp |
8 case class CHAR(c: Char) extends Rexp |
9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative |
9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative |
10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence |
10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence |
11 case class STAR(r: Rexp) extends Rexp // star |
11 case class STAR(r: Rexp) extends Rexp // star |
12 |
12 |
13 |
13 |
14 // some convenience for typing in regular expressions |
14 // some convenience for typing regular expressions |
15 |
15 |
16 import scala.language.implicitConversions |
16 import scala.language.implicitConversions |
17 import scala.language.reflectiveCalls |
17 import scala.language.reflectiveCalls |
18 |
18 |
19 def charlist2rexp(s: List[Char]): Rexp = s match { |
19 def charlist2rexp(s: List[Char]): Rexp = s match { |
100 |
100 |
101 // size with simplification |
101 // size with simplification |
102 size(simp(der('a', der('a', EVIL)))) // => 8 |
102 size(simp(der('a', der('a', EVIL)))) // => 8 |
103 size(simp(der('a', der('a', der('a', EVIL))))) // => 8 |
103 size(simp(der('a', der('a', der('a', EVIL))))) // => 8 |
104 |
104 |
105 // Java needs around 30 seconds for matching 28 a's with EVIL. |
105 // Python needs around 30 seconds for matching 28 a's with EVIL. |
|
106 // Java 9 and later increase this to an "astonishing" 40000 a's in |
|
107 // 30 seconds. |
106 // |
108 // |
107 // Lets see how long it really takes to match strings with |
109 // Lets see how long it really takes to match strings with |
108 // 0.5 Million a's...it should be in the range of some |
110 // 5 Million a's...it should be in the range of a couple |
109 // seconds. |
111 // of seconds. |
110 |
112 |
111 def time_needed[T](i: Int, code: => T) = { |
113 def time_needed[T](i: Int, code: => T) = { |
112 val start = System.nanoTime() |
114 val start = System.nanoTime() |
113 for (j <- 1 to i) code |
115 for (j <- 1 to i) code |
114 val end = System.nanoTime() |
116 val end = System.nanoTime() |
117 |
119 |
118 for (i <- 0 to 5000000 by 500000) { |
120 for (i <- 0 to 5000000 by 500000) { |
119 println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i)))) |
121 println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i)))) |
120 } |
122 } |
121 |
123 |
|
124 // another "power" test case |
|
125 simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next) == ONE |
|
126 |
|
127 // the Iterator produces the rexp |
|
128 // |
|
129 // SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) |
|
130 // |
|
131 // where SEQ is nested 50 times. |
|
132 |
122 */ |
133 */ |
123 |
134 |