progs/scala/re-basic.scala
changeset 166 cab1ae6f339a
parent 157 1fe44fb6d0a4
child 178 2835d13be702
equal deleted inserted replaced
165:ca4dcfd912cb 166:cab1ae6f339a
     1 /* lexer without simplification */
     1 /* lexer without simplification */
     2 
     2 
     3 import scala.language.implicitConversions    
     3 import scala.language.implicitConversions    
     4 import scala.language.reflectiveCalls
     4 import scala.language.reflectiveCalls
     5 import scala.annotation.tailrec   
     5 import scala.annotation.tailrec   
       
     6 import scala.io.Source
     6 
     7 
     7 abstract class Rexp 
     8 abstract class Rexp 
     8 case object ZERO extends Rexp
     9 case object ZERO extends Rexp
     9 case object ONE extends Rexp
    10 case object ONE extends Rexp
    10 case class CHAR(c: Char) extends Rexp
    11 case class CHAR(c: Char) extends Rexp
    43   def ~ (r: Rexp) = SEQ(s, r)
    44   def ~ (r: Rexp) = SEQ(s, r)
    44   def ~ (r: String) = SEQ(s, r)
    45   def ~ (r: String) = SEQ(s, r)
    45   def $ (r: Rexp) = RECD(s, r)
    46   def $ (r: Rexp) = RECD(s, r)
    46 }
    47 }
    47 
    48 
       
    49 def Alts(rs: List[Rexp]) : Rexp = rs match {
       
    50   case Nil => ZERO
       
    51   case r::Nil => r
       
    52   case r::rs => ALT(r, Alts(rs))
       
    53 }
       
    54 def ALTS(rs: Rexp*) = Alts(rs.toList)
       
    55 
       
    56 def Seqs(rs: List[Rexp]) : Rexp = rs match {
       
    57   case Nil => ONE
       
    58   case r::Nil => r
       
    59   case r::rs => SEQ(r, Seqs(rs))
       
    60 }
       
    61 def SEQS(rs: Rexp*) = Seqs(rs.toList)
       
    62 
    48 // nullable function: tests whether the regular 
    63 // nullable function: tests whether the regular 
    49 // expression can recognise the empty string
    64 // expression can recognise the empty string
    50 def nullable (r: Rexp) : Boolean = r match {
    65 def nullable (r: Rexp) : Boolean = r match {
    51   case ZERO => false
    66   case ZERO => false
    52   case ONE => true
    67   case ONE => true
   139 val I2: Rexp = ("id" $ ("ab" | "ba"))  
   154 val I2: Rexp = ("id" $ ("ab" | "ba"))  
   140 
   155 
   141 println(lexing((K2 | I2).%, "abaa"))
   156 println(lexing((K2 | I2).%, "abaa"))
   142 println(env(lexing((K2 | I2).%, "abaa")))
   157 println(env(lexing((K2 | I2).%, "abaa")))
   143 
   158 
   144 
   159 // time keeping
       
   160 def time_needed[T](i: Int, code: => T) = {
       
   161   val start = System.nanoTime()
       
   162   for (j <- 1 to i) code
       
   163   val end = System.nanoTime()
       
   164   (end - start)/(i * 1.0e9)
       
   165 }
       
   166 
       
   167 
       
   168 // first benchmark regex 
       
   169 
       
   170 val reWord = ALTS("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v",
       
   171                   "w","x","y","z","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R",
       
   172                   "S","T","U","V","W","X","Y","Z","0","1","2","3","4","5","6","7","8","9")
       
   173 
       
   174 
       
   175 val reWordStar = STAR(reWord)
       
   176 val reWordPlus = reWord ~ reWordStar
       
   177 val optionSet1 = "-" | "+" | "."
       
   178 val optionSet2 = "-" | "."
       
   179 val atTheRate = "@"
       
   180 val period = "."
       
   181 val optionSet3 = "," | ";"
       
   182 val whitespace = " "
       
   183 
       
   184 val re01 = reWordPlus
       
   185 val re02 = STAR(optionSet1 ~ reWordPlus)
       
   186 val re03 = atTheRate
       
   187 val re04 = reWordPlus
       
   188 val re05 = STAR(optionSet2 ~ reWordPlus)
       
   189 val re06 = period
       
   190 val re07 = reWordPlus
       
   191 val re08 = re05
       
   192 
       
   193 val re09 = optionSet3
       
   194 val re10 = STAR(whitespace)
       
   195 val re11 = reWordPlus
       
   196 val re12 = re02
       
   197 val re13 = atTheRate
       
   198 val re14 = reWordPlus
       
   199 val re15 = re05
       
   200 val re16 = period
       
   201 val re17 = reWordPlus
       
   202 val re18 = re05
       
   203 
       
   204 
       
   205 val re01_08 = SEQS(re01, re02, re03, re04, re05, re06, re07, re08)
       
   206 val re09_10 = re09 ~ re10
       
   207 val re11_18 = re01_08
       
   208 
       
   209 val re = re01_08 ~ STAR(re09_10 ~ re11_18)
       
   210 
       
   211 
       
   212 def process(s: String, i: Int) : Unit = {
       
   213   println(i + " " + "%.5f".format(time_needed(1, lexing(re, s))))
       
   214 }
       
   215 
       
   216 val filename = "../tests/emails.txt"
       
   217 val filelines = Source.fromFile(filename).getLines.take(22).zipWithIndex
       
   218 
       
   219 
       
   220 filelines.foreach({ case (s: String, i: Int) => process(s, i) })
       
   221