progs/scala/re-simp.scala
changeset 166 cab1ae6f339a
parent 158 4e00dd2398ac
equal deleted inserted replaced
165:ca4dcfd912cb 166:cab1ae6f339a
     1 import scala.language.implicitConversions    
     1 import scala.language.implicitConversions    
     2 import scala.language.reflectiveCalls
     2 import scala.language.reflectiveCalls
     3 import scala.annotation.tailrec   
     3 import scala.annotation.tailrec   
       
     4 import scala.io.Source
     4 
     5 
     5 abstract class Rexp 
     6 abstract class Rexp 
     6 case object ZERO extends Rexp
     7 case object ZERO extends Rexp
     7 case object ONE extends Rexp
     8 case object ONE extends Rexp
     8 case class CHAR(c: Char) extends Rexp
     9 case class CHAR(c: Char) extends Rexp
    40   def % = STAR(s)
    41   def % = STAR(s)
    41   def ~ (r: Rexp) = SEQ(s, r)
    42   def ~ (r: Rexp) = SEQ(s, r)
    42   def ~ (r: String) = SEQ(s, r)
    43   def ~ (r: String) = SEQ(s, r)
    43   def $ (r: Rexp) = RECD(s, r)
    44   def $ (r: Rexp) = RECD(s, r)
    44 }
    45 }
       
    46 
       
    47 def Alts(rs: List[Rexp]) : Rexp = rs match {
       
    48   case Nil => ZERO
       
    49   case r::Nil => r
       
    50   case r::rs => ALT(r, Alts(rs))
       
    51 }
       
    52 def ALTS(rs: Rexp*) = Alts(rs.toList)
       
    53 
       
    54 def Seqs(rs: List[Rexp]) : Rexp = rs match {
       
    55   case Nil => ONE
       
    56   case r::Nil => r
       
    57   case r::rs => SEQ(r, Seqs(rs))
       
    58 }
       
    59 def SEQS(rs: Rexp*) = Seqs(rs.toList)
       
    60 
    45 
    61 
    46 // nullable function: tests whether the regular 
    62 // nullable function: tests whether the regular 
    47 // expression can recognise the empty string
    63 // expression can recognise the empty string
    48 def nullable (r: Rexp) : Boolean = r match {
    64 def nullable (r: Rexp) : Boolean = r match {
    49   case ZERO => false
    65   case ZERO => false
   324 }
   340 }
   325 
   341 
   326 for (i <- 1 to 2001 by 500) {
   342 for (i <- 1 to 2001 by 500) {
   327   println(i + ": " + "%.5f".format(time_needed(1, lexing_simp(sulzmann, "ab" * i))))
   343   println(i + ": " + "%.5f".format(time_needed(1, lexing_simp(sulzmann, "ab" * i))))
   328 }
   344 }
       
   345 
       
   346 
       
   347 // first benchmark regex 
       
   348 //=======================
       
   349 
       
   350 val reWord = ALTS("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v",
       
   351                   "w","x","y","z","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R",
       
   352                   "S","T","U","V","W","X","Y","Z","0","1","2","3","4","5","6","7","8","9")
       
   353 
       
   354 
       
   355 val reWordStar = STAR(reWord)
       
   356 val reWordPlus = reWord ~ reWordStar
       
   357 val optionSet1 = "-" | "+" | "."
       
   358 val optionSet2 = "-" | "."
       
   359 val atTheRate = "@"
       
   360 val period = "."
       
   361 val optionSet3 = "," | ";"
       
   362 val whitespace = " "
       
   363 
       
   364 val re01 = reWordPlus
       
   365 val re02 = STAR(optionSet1 ~ reWordPlus)
       
   366 val re03 = atTheRate
       
   367 val re04 = reWordPlus
       
   368 val re05 = STAR(optionSet2 ~ reWordPlus)
       
   369 val re06 = period
       
   370 val re07 = reWordPlus
       
   371 val re08 = re05
       
   372 
       
   373 val re09 = optionSet3
       
   374 val re10 = STAR(whitespace)
       
   375 val re11 = reWordPlus
       
   376 val re12 = re02
       
   377 val re13 = atTheRate
       
   378 val re14 = reWordPlus
       
   379 val re15 = re05
       
   380 val re16 = period
       
   381 val re17 = reWordPlus
       
   382 val re18 = re05
       
   383 
       
   384 
       
   385 val re01_08 = SEQS(re01, re02, re03, re04, re05, re06, re07, re08)
       
   386 val re09_10 = re09 ~ re10
       
   387 val re11_18 = re01_08
       
   388 
       
   389 val re = re01_08 ~ STAR(re09_10 ~ re11_18)
       
   390 
       
   391 
       
   392 def process(s: String, i: Int) : Unit = {
       
   393   println(i + " " + "%.5f".format(time_needed(1, lexing(re, s))))
       
   394 }
       
   395 
       
   396 val filename = "../tests/emails.txt"
       
   397 val filelines = Source.fromFile(filename).getLines.take(26).zipWithIndex
       
   398 
       
   399 
       
   400 filelines.foreach({ case (s: String, i: Int) => process(s, i) })
       
   401 
       
   402