--- a/progs/scala/re-simp.scala Wed May 04 11:29:37 2016 +0100
+++ b/progs/scala/re-simp.scala Wed May 04 17:19:39 2016 +0100
@@ -1,6 +1,7 @@
import scala.language.implicitConversions
import scala.language.reflectiveCalls
import scala.annotation.tailrec
+import scala.io.Source
abstract class Rexp
case object ZERO extends Rexp
@@ -43,6 +44,21 @@
def $ (r: Rexp) = RECD(s, r)
}
+def Alts(rs: List[Rexp]) : Rexp = rs match {
+ case Nil => ZERO
+ case r::Nil => r
+ case r::rs => ALT(r, Alts(rs))
+}
+def ALTS(rs: Rexp*) = Alts(rs.toList)
+
+def Seqs(rs: List[Rexp]) : Rexp = rs match {
+ case Nil => ONE
+ case r::Nil => r
+ case r::rs => SEQ(r, Seqs(rs))
+}
+def SEQS(rs: Rexp*) = Seqs(rs.toList)
+
+
// nullable function: tests whether the regular
// expression can recognise the empty string
def nullable (r: Rexp) : Boolean = r match {
@@ -326,3 +342,61 @@
for (i <- 1 to 2001 by 500) {
println(i + ": " + "%.5f".format(time_needed(1, lexing_simp(sulzmann, "ab" * i))))
}
+
+
+// first benchmark regex
+//=======================
+
+val reWord = ALTS("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v",
+ "w","x","y","z","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R",
+ "S","T","U","V","W","X","Y","Z","0","1","2","3","4","5","6","7","8","9")
+
+
+val reWordStar = STAR(reWord)
+val reWordPlus = reWord ~ reWordStar
+val optionSet1 = "-" | "+" | "."
+val optionSet2 = "-" | "."
+val atTheRate = "@"
+val period = "."
+val optionSet3 = "," | ";"
+val whitespace = " "
+
+val re01 = reWordPlus
+val re02 = STAR(optionSet1 ~ reWordPlus)
+val re03 = atTheRate
+val re04 = reWordPlus
+val re05 = STAR(optionSet2 ~ reWordPlus)
+val re06 = period
+val re07 = reWordPlus
+val re08 = re05
+
+val re09 = optionSet3
+val re10 = STAR(whitespace)
+val re11 = reWordPlus
+val re12 = re02
+val re13 = atTheRate
+val re14 = reWordPlus
+val re15 = re05
+val re16 = period
+val re17 = reWordPlus
+val re18 = re05
+
+
+val re01_08 = SEQS(re01, re02, re03, re04, re05, re06, re07, re08)
+val re09_10 = re09 ~ re10
+val re11_18 = re01_08
+
+val re = re01_08 ~ STAR(re09_10 ~ re11_18)
+
+
+def process(s: String, i: Int) : Unit = {
+ println(i + " " + "%.5f".format(time_needed(1, lexing(re, s))))
+}
+
+val filename = "../tests/emails.txt"
+val filelines = Source.fromFile(filename).getLines.take(26).zipWithIndex
+
+
+filelines.foreach({ case (s: String, i: Int) => process(s, i) })
+
+