40 def % = STAR(s) |
41 def % = STAR(s) |
41 def ~ (r: Rexp) = SEQ(s, r) |
42 def ~ (r: Rexp) = SEQ(s, r) |
42 def ~ (r: String) = SEQ(s, r) |
43 def ~ (r: String) = SEQ(s, r) |
43 def $ (r: Rexp) = RECD(s, r) |
44 def $ (r: Rexp) = RECD(s, r) |
44 } |
45 } |
|
46 |
|
47 def Alts(rs: List[Rexp]) : Rexp = rs match { |
|
48 case Nil => ZERO |
|
49 case r::Nil => r |
|
50 case r::rs => ALT(r, Alts(rs)) |
|
51 } |
|
52 def ALTS(rs: Rexp*) = Alts(rs.toList) |
|
53 |
|
54 def Seqs(rs: List[Rexp]) : Rexp = rs match { |
|
55 case Nil => ONE |
|
56 case r::Nil => r |
|
57 case r::rs => SEQ(r, Seqs(rs)) |
|
58 } |
|
59 def SEQS(rs: Rexp*) = Seqs(rs.toList) |
|
60 |
45 |
61 |
46 // nullable function: tests whether the regular |
62 // nullable function: tests whether the regular |
47 // expression can recognise the empty string |
63 // expression can recognise the empty string |
48 def nullable (r: Rexp) : Boolean = r match { |
64 def nullable (r: Rexp) : Boolean = r match { |
49 case ZERO => false |
65 case ZERO => false |
324 } |
340 } |
325 |
341 |
326 for (i <- 1 to 2001 by 500) { |
342 for (i <- 1 to 2001 by 500) { |
327 println(i + ": " + "%.5f".format(time_needed(1, lexing_simp(sulzmann, "ab" * i)))) |
343 println(i + ": " + "%.5f".format(time_needed(1, lexing_simp(sulzmann, "ab" * i)))) |
328 } |
344 } |
|
345 |
|
346 |
|
347 // first benchmark regex |
|
348 //======================= |
|
349 |
|
350 val reWord = ALTS("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v", |
|
351 "w","x","y","z","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R", |
|
352 "S","T","U","V","W","X","Y","Z","0","1","2","3","4","5","6","7","8","9") |
|
353 |
|
354 |
|
355 val reWordStar = STAR(reWord) |
|
356 val reWordPlus = reWord ~ reWordStar |
|
357 val optionSet1 = "-" | "+" | "." |
|
358 val optionSet2 = "-" | "." |
|
359 val atTheRate = "@" |
|
360 val period = "." |
|
361 val optionSet3 = "," | ";" |
|
362 val whitespace = " " |
|
363 |
|
364 val re01 = reWordPlus |
|
365 val re02 = STAR(optionSet1 ~ reWordPlus) |
|
366 val re03 = atTheRate |
|
367 val re04 = reWordPlus |
|
368 val re05 = STAR(optionSet2 ~ reWordPlus) |
|
369 val re06 = period |
|
370 val re07 = reWordPlus |
|
371 val re08 = re05 |
|
372 |
|
373 val re09 = optionSet3 |
|
374 val re10 = STAR(whitespace) |
|
375 val re11 = reWordPlus |
|
376 val re12 = re02 |
|
377 val re13 = atTheRate |
|
378 val re14 = reWordPlus |
|
379 val re15 = re05 |
|
380 val re16 = period |
|
381 val re17 = reWordPlus |
|
382 val re18 = re05 |
|
383 |
|
384 |
|
385 val re01_08 = SEQS(re01, re02, re03, re04, re05, re06, re07, re08) |
|
386 val re09_10 = re09 ~ re10 |
|
387 val re11_18 = re01_08 |
|
388 |
|
389 val re = re01_08 ~ STAR(re09_10 ~ re11_18) |
|
390 |
|
391 |
|
392 def process(s: String, i: Int) : Unit = { |
|
393 println(i + " " + "%.5f".format(time_needed(1, lexing(re, s)))) |
|
394 } |
|
395 |
|
396 val filename = "../tests/emails.txt" |
|
397 val filelines = Source.fromFile(filename).getLines.take(26).zipWithIndex |
|
398 |
|
399 |
|
400 filelines.foreach({ case (s: String, i: Int) => process(s, i) }) |
|
401 |
|
402 |