added benchmark from Fahad
authorChristian Urban <christian dot urban at kcl dot ac dot uk>
Wed, 04 May 2016 17:19:39 +0100
changeset 166 cab1ae6f339a
parent 165 ca4dcfd912cb
child 167 1b2b9000be88
added benchmark from Fahad
progs/scala/re-basic.scala
progs/scala/re-simp.scala
--- a/progs/scala/re-basic.scala	Wed May 04 11:29:37 2016 +0100
+++ b/progs/scala/re-basic.scala	Wed May 04 17:19:39 2016 +0100
@@ -3,6 +3,7 @@
 import scala.language.implicitConversions    
 import scala.language.reflectiveCalls
 import scala.annotation.tailrec   
+import scala.io.Source
 
 abstract class Rexp 
 case object ZERO extends Rexp
@@ -45,6 +46,20 @@
   def $ (r: Rexp) = RECD(s, r)
 }
 
+def Alts(rs: List[Rexp]) : Rexp = rs match {
+  case Nil => ZERO
+  case r::Nil => r
+  case r::rs => ALT(r, Alts(rs))
+}
+def ALTS(rs: Rexp*) = Alts(rs.toList)
+
+def Seqs(rs: List[Rexp]) : Rexp = rs match {
+  case Nil => ONE
+  case r::Nil => r
+  case r::rs => SEQ(r, Seqs(rs))
+}
+def SEQS(rs: Rexp*) = Seqs(rs.toList)
+
 // nullable function: tests whether the regular 
 // expression can recognise the empty string
 def nullable (r: Rexp) : Boolean = r match {
@@ -141,4 +156,66 @@
 println(lexing((K2 | I2).%, "abaa"))
 println(env(lexing((K2 | I2).%, "abaa")))
 
+// time keeping
+def time_needed[T](i: Int, code: => T) = {
+  val start = System.nanoTime()
+  for (j <- 1 to i) code
+  val end = System.nanoTime()
+  (end - start)/(i * 1.0e9)
+}
 
+
+// first benchmark regex 
+
+val reWord = ALTS("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v",
+                  "w","x","y","z","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R",
+                  "S","T","U","V","W","X","Y","Z","0","1","2","3","4","5","6","7","8","9")
+
+
+val reWordStar = STAR(reWord)
+val reWordPlus = reWord ~ reWordStar
+val optionSet1 = "-" | "+" | "."
+val optionSet2 = "-" | "."
+val atTheRate = "@"
+val period = "."
+val optionSet3 = "," | ";"
+val whitespace = " "
+
+val re01 = reWordPlus
+val re02 = STAR(optionSet1 ~ reWordPlus)
+val re03 = atTheRate
+val re04 = reWordPlus
+val re05 = STAR(optionSet2 ~ reWordPlus)
+val re06 = period
+val re07 = reWordPlus
+val re08 = re05
+
+val re09 = optionSet3
+val re10 = STAR(whitespace)
+val re11 = reWordPlus
+val re12 = re02
+val re13 = atTheRate
+val re14 = reWordPlus
+val re15 = re05
+val re16 = period
+val re17 = reWordPlus
+val re18 = re05
+
+
+val re01_08 = SEQS(re01, re02, re03, re04, re05, re06, re07, re08)
+val re09_10 = re09 ~ re10
+val re11_18 = re01_08
+
+val re = re01_08 ~ STAR(re09_10 ~ re11_18)
+
+
+def process(s: String, i: Int) : Unit = {
+  println(i + " " + "%.5f".format(time_needed(1, lexing(re, s))))
+}
+
+val filename = "../tests/emails.txt"
+val filelines = Source.fromFile(filename).getLines.take(22).zipWithIndex
+
+
+filelines.foreach({ case (s: String, i: Int) => process(s, i) })
+
--- a/progs/scala/re-simp.scala	Wed May 04 11:29:37 2016 +0100
+++ b/progs/scala/re-simp.scala	Wed May 04 17:19:39 2016 +0100
@@ -1,6 +1,7 @@
 import scala.language.implicitConversions    
 import scala.language.reflectiveCalls
 import scala.annotation.tailrec   
+import scala.io.Source
 
 abstract class Rexp 
 case object ZERO extends Rexp
@@ -43,6 +44,21 @@
   def $ (r: Rexp) = RECD(s, r)
 }
 
+def Alts(rs: List[Rexp]) : Rexp = rs match {
+  case Nil => ZERO
+  case r::Nil => r
+  case r::rs => ALT(r, Alts(rs))
+}
+def ALTS(rs: Rexp*) = Alts(rs.toList)
+
+def Seqs(rs: List[Rexp]) : Rexp = rs match {
+  case Nil => ONE
+  case r::Nil => r
+  case r::rs => SEQ(r, Seqs(rs))
+}
+def SEQS(rs: Rexp*) = Seqs(rs.toList)
+
+
 // nullable function: tests whether the regular 
 // expression can recognise the empty string
 def nullable (r: Rexp) : Boolean = r match {
@@ -326,3 +342,61 @@
 for (i <- 1 to 2001 by 500) {
   println(i + ": " + "%.5f".format(time_needed(1, lexing_simp(sulzmann, "ab" * i))))
 }
+
+
+// first benchmark regex 
+//=======================
+
+val reWord = ALTS("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v",
+                  "w","x","y","z","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R",
+                  "S","T","U","V","W","X","Y","Z","0","1","2","3","4","5","6","7","8","9")
+
+
+val reWordStar = STAR(reWord)
+val reWordPlus = reWord ~ reWordStar
+val optionSet1 = "-" | "+" | "."
+val optionSet2 = "-" | "."
+val atTheRate = "@"
+val period = "."
+val optionSet3 = "," | ";"
+val whitespace = " "
+
+val re01 = reWordPlus
+val re02 = STAR(optionSet1 ~ reWordPlus)
+val re03 = atTheRate
+val re04 = reWordPlus
+val re05 = STAR(optionSet2 ~ reWordPlus)
+val re06 = period
+val re07 = reWordPlus
+val re08 = re05
+
+val re09 = optionSet3
+val re10 = STAR(whitespace)
+val re11 = reWordPlus
+val re12 = re02
+val re13 = atTheRate
+val re14 = reWordPlus
+val re15 = re05
+val re16 = period
+val re17 = reWordPlus
+val re18 = re05
+
+
+val re01_08 = SEQS(re01, re02, re03, re04, re05, re06, re07, re08)
+val re09_10 = re09 ~ re10
+val re11_18 = re01_08
+
+val re = re01_08 ~ STAR(re09_10 ~ re11_18)
+
+
+def process(s: String, i: Int) : Unit = {
+  println(i + " " + "%.5f".format(time_needed(1, lexing(re, s))))
+}
+
+val filename = "../tests/emails.txt"
+val filelines = Source.fromFile(filename).getLines.take(26).zipWithIndex
+
+
+filelines.foreach({ case (s: String, i: Int) => process(s, i) })
+
+