diff -r ca4dcfd912cb -r cab1ae6f339a progs/scala/re-basic.scala --- a/progs/scala/re-basic.scala Wed May 04 11:29:37 2016 +0100 +++ b/progs/scala/re-basic.scala Wed May 04 17:19:39 2016 +0100 @@ -3,6 +3,7 @@ import scala.language.implicitConversions import scala.language.reflectiveCalls import scala.annotation.tailrec +import scala.io.Source abstract class Rexp case object ZERO extends Rexp @@ -45,6 +46,20 @@ def $ (r: Rexp) = RECD(s, r) } +def Alts(rs: List[Rexp]) : Rexp = rs match { + case Nil => ZERO + case r::Nil => r + case r::rs => ALT(r, Alts(rs)) +} +def ALTS(rs: Rexp*) = Alts(rs.toList) + +def Seqs(rs: List[Rexp]) : Rexp = rs match { + case Nil => ONE + case r::Nil => r + case r::rs => SEQ(r, Seqs(rs)) +} +def SEQS(rs: Rexp*) = Seqs(rs.toList) + // nullable function: tests whether the regular // expression can recognise the empty string def nullable (r: Rexp) : Boolean = r match { @@ -141,4 +156,66 @@ println(lexing((K2 | I2).%, "abaa")) println(env(lexing((K2 | I2).%, "abaa"))) +// time keeping +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +// first benchmark regex + +val reWord = ALTS("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v", + "w","x","y","z","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R", + "S","T","U","V","W","X","Y","Z","0","1","2","3","4","5","6","7","8","9") + + +val reWordStar = STAR(reWord) +val reWordPlus = reWord ~ reWordStar +val optionSet1 = "-" | "+" | "." +val optionSet2 = "-" | "." +val atTheRate = "@" +val period = "." +val optionSet3 = "," | ";" +val whitespace = " " + +val re01 = reWordPlus +val re02 = STAR(optionSet1 ~ reWordPlus) +val re03 = atTheRate +val re04 = reWordPlus +val re05 = STAR(optionSet2 ~ reWordPlus) +val re06 = period +val re07 = reWordPlus +val re08 = re05 + +val re09 = optionSet3 +val re10 = STAR(whitespace) +val re11 = reWordPlus +val re12 = re02 +val re13 = atTheRate +val re14 = reWordPlus +val re15 = re05 +val re16 = period +val re17 = reWordPlus +val re18 = re05 + + +val re01_08 = SEQS(re01, re02, re03, re04, re05, re06, re07, re08) +val re09_10 = re09 ~ re10 +val re11_18 = re01_08 + +val re = re01_08 ~ STAR(re09_10 ~ re11_18) + + +def process(s: String, i: Int) : Unit = { + println(i + " " + "%.5f".format(time_needed(1, lexing(re, s)))) +} + +val filename = "../tests/emails.txt" +val filelines = Source.fromFile(filename).getLines.take(22).zipWithIndex + + +filelines.foreach({ case (s: String, i: Int) => process(s, i) }) +