1 /* lexer without simplification */ |
1 /* lexer without simplification */ |
2 |
2 |
3 import scala.language.implicitConversions |
3 import scala.language.implicitConversions |
4 import scala.language.reflectiveCalls |
4 import scala.language.reflectiveCalls |
5 import scala.annotation.tailrec |
5 import scala.annotation.tailrec |
|
6 import scala.io.Source |
6 |
7 |
7 abstract class Rexp |
8 abstract class Rexp |
8 case object ZERO extends Rexp |
9 case object ZERO extends Rexp |
9 case object ONE extends Rexp |
10 case object ONE extends Rexp |
10 case class CHAR(c: Char) extends Rexp |
11 case class CHAR(c: Char) extends Rexp |
43 def ~ (r: Rexp) = SEQ(s, r) |
44 def ~ (r: Rexp) = SEQ(s, r) |
44 def ~ (r: String) = SEQ(s, r) |
45 def ~ (r: String) = SEQ(s, r) |
45 def $ (r: Rexp) = RECD(s, r) |
46 def $ (r: Rexp) = RECD(s, r) |
46 } |
47 } |
47 |
48 |
|
49 def Alts(rs: List[Rexp]) : Rexp = rs match { |
|
50 case Nil => ZERO |
|
51 case r::Nil => r |
|
52 case r::rs => ALT(r, Alts(rs)) |
|
53 } |
|
54 def ALTS(rs: Rexp*) = Alts(rs.toList) |
|
55 |
|
56 def Seqs(rs: List[Rexp]) : Rexp = rs match { |
|
57 case Nil => ONE |
|
58 case r::Nil => r |
|
59 case r::rs => SEQ(r, Seqs(rs)) |
|
60 } |
|
61 def SEQS(rs: Rexp*) = Seqs(rs.toList) |
|
62 |
48 // nullable function: tests whether the regular |
63 // nullable function: tests whether the regular |
49 // expression can recognise the empty string |
64 // expression can recognise the empty string |
50 def nullable (r: Rexp) : Boolean = r match { |
65 def nullable (r: Rexp) : Boolean = r match { |
51 case ZERO => false |
66 case ZERO => false |
52 case ONE => true |
67 case ONE => true |
139 val I2: Rexp = ("id" $ ("ab" | "ba")) |
154 val I2: Rexp = ("id" $ ("ab" | "ba")) |
140 |
155 |
141 println(lexing((K2 | I2).%, "abaa")) |
156 println(lexing((K2 | I2).%, "abaa")) |
142 println(env(lexing((K2 | I2).%, "abaa"))) |
157 println(env(lexing((K2 | I2).%, "abaa"))) |
143 |
158 |
144 |
159 // time keeping |
|
160 def time_needed[T](i: Int, code: => T) = { |
|
161 val start = System.nanoTime() |
|
162 for (j <- 1 to i) code |
|
163 val end = System.nanoTime() |
|
164 (end - start)/(i * 1.0e9) |
|
165 } |
|
166 |
|
167 |
|
168 // first benchmark regex |
|
169 |
|
170 val reWord = ALTS("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v", |
|
171 "w","x","y","z","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R", |
|
172 "S","T","U","V","W","X","Y","Z","0","1","2","3","4","5","6","7","8","9") |
|
173 |
|
174 |
|
175 val reWordStar = STAR(reWord) |
|
176 val reWordPlus = reWord ~ reWordStar |
|
177 val optionSet1 = "-" | "+" | "." |
|
178 val optionSet2 = "-" | "." |
|
179 val atTheRate = "@" |
|
180 val period = "." |
|
181 val optionSet3 = "," | ";" |
|
182 val whitespace = " " |
|
183 |
|
184 val re01 = reWordPlus |
|
185 val re02 = STAR(optionSet1 ~ reWordPlus) |
|
186 val re03 = atTheRate |
|
187 val re04 = reWordPlus |
|
188 val re05 = STAR(optionSet2 ~ reWordPlus) |
|
189 val re06 = period |
|
190 val re07 = reWordPlus |
|
191 val re08 = re05 |
|
192 |
|
193 val re09 = optionSet3 |
|
194 val re10 = STAR(whitespace) |
|
195 val re11 = reWordPlus |
|
196 val re12 = re02 |
|
197 val re13 = atTheRate |
|
198 val re14 = reWordPlus |
|
199 val re15 = re05 |
|
200 val re16 = period |
|
201 val re17 = reWordPlus |
|
202 val re18 = re05 |
|
203 |
|
204 |
|
205 val re01_08 = SEQS(re01, re02, re03, re04, re05, re06, re07, re08) |
|
206 val re09_10 = re09 ~ re10 |
|
207 val re11_18 = re01_08 |
|
208 |
|
209 val re = re01_08 ~ STAR(re09_10 ~ re11_18) |
|
210 |
|
211 |
|
212 def process(s: String, i: Int) : Unit = { |
|
213 println(i + " " + "%.5f".format(time_needed(1, lexing(re, s)))) |
|
214 } |
|
215 |
|
216 val filename = "../tests/emails.txt" |
|
217 val filelines = Source.fromFile(filename).getLines.take(22).zipWithIndex |
|
218 |
|
219 |
|
220 filelines.foreach({ case (s: String, i: Int) => process(s, i) }) |
|
221 |