progs/re2.scala
changeset 120 3e71efb25ce9
parent 93 4794759139ea
child 121 43c116860e47
equal deleted inserted replaced
119:a6684e8961d0 120:3e71efb25ce9
     1     
     1     
     2 abstract class Rexp {
     2 abstract class Rexp 
     3   def simp : Rexp = this
       
     4 }
       
     5 
     3 
     6 case object NULL extends Rexp
     4 case object NULL extends Rexp
     7 case object EMPTY extends Rexp
     5 case object EMPTY extends Rexp
     8 case class CHAR(c: Char) extends Rexp
     6 case class CHAR(c: Char) extends Rexp
     9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp {
     7 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    10   override def simp = (r1.simp, r2.simp) match {
     8 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    11     case (NULL, r) => r
     9 case class STAR(r: Rexp) extends Rexp 
    12     case (r, NULL) => r
    10 case class NTIMES(r: Rexp, n: Int) extends Rexp 
    13     case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY)
       
    14     case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY)
       
    15     case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2)
       
    16   }
       
    17 }
       
    18 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
       
    19   override def simp = (r1.simp, r2.simp) match {
       
    20     case (NULL, _) => NULL
       
    21     case (_, NULL) => NULL
       
    22     case (EMPTY, r) => r
       
    23     case (r, EMPTY) => r
       
    24     case (r1, r2) => SEQ(r1, r2)
       
    25   }
       
    26 }
       
    27 case class STAR(r: Rexp) extends Rexp {
       
    28   override def simp = r.simp match {
       
    29     case NULL => EMPTY
       
    30     case EMPTY => EMPTY
       
    31     case r => STAR(r)
       
    32   }
       
    33 }
       
    34 
    11 
    35 // some convenience for typing in regular expressions
    12 // some convenience for typing in regular expressions
    36 def charlist2rexp(s : List[Char]) : Rexp = s match {
    13 def charlist2rexp(s : List[Char]) : Rexp = s match {
    37   case Nil => EMPTY
    14   case Nil => EMPTY
    38   case c::Nil => CHAR(c)
    15   case c::Nil => CHAR(c)
    48   case EMPTY => true
    25   case EMPTY => true
    49   case CHAR(_) => false
    26   case CHAR(_) => false
    50   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    27   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    51   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    28   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    52   case STAR(_) => true
    29   case STAR(_) => true
       
    30   case NTIMES(r, i) => if (i == 0) true else nullable(r)
    53 }
    31 }
    54 
    32 
    55 // derivative of a regular expression w.r.t. a character
    33 // derivative of a regular expression w.r.t. a character
    56 def der (c: Char, r: Rexp) : Rexp = r match {
    34 def der (c: Char, r: Rexp) : Rexp = r match {
    57   case NULL => NULL
    35   case NULL => NULL
    60   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    38   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    61   case SEQ(r1, r2) => 
    39   case SEQ(r1, r2) => 
    62     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    40     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    63     else SEQ(der(c, r1), r2)
    41     else SEQ(der(c, r1), r2)
    64   case STAR(r) => SEQ(der(c, r), STAR(r))
    42   case STAR(r) => SEQ(der(c, r), STAR(r))
       
    43   case NTIMES(r, i) => 
       
    44     if (i == 0) NULL else SEQ(der(c, r), NTIMES(r, i - 1))
    65 }
    45 }
    66 
    46 
    67 // derivative w.r.t. a string (iterates der)
    47 // derivative w.r.t. a string (iterates der)
    68 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    48 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    69   case Nil => r
    49   case Nil => r
    70   case c::s => ders(s, der(c, r).simp)
    50   case c::s => ders(s, der(c, r))
    71 }
    51 }
    72 
    52 
    73 // main matcher function
    53 // main matcher function
    74 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
    54 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
    75 
    55 
    76 
    56 
    77 //one or zero
    57 //one or zero
    78 def OPT(r: Rexp) = ALT(r, EMPTY)
    58 def OPT(r: Rexp) = ALT(r, EMPTY)
    79 
    59 
    80 //n-times
    60 def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
    81 def NTIMES(r: Rexp, n: Int) : Rexp = n match {
       
    82   case 0 => EMPTY
       
    83   case 1 => r
       
    84   case n => SEQ(r, NTIMES(r, n - 1))
       
    85 }
       
    86 
       
    87 def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
       
    88 
    61 
    89 def time_needed[T](i: Int, code: => T) = {
    62 def time_needed[T](i: Int, code: => T) = {
    90   val start = System.nanoTime()
    63   val start = System.nanoTime()
    91   for (j <- 1 to i) code
    64   for (j <- 1 to i) code
    92   val end = System.nanoTime()
    65   val end = System.nanoTime()
    93   (end - start)/(i * 1.0e9)
    66   (end - start)/(i * 1.0e9)
    94 }
    67 }
    95 
    68 
    96 for (i <- 1 to 100) {
    69 for (i <- 1 to 100) {
    97   println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i))))
    70   println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i))))
    98 }
    71 }
    99 
    72 
   100 
    73 
       
    74 for (i <- 1 to 1000 by 50) {
       
    75   println(i + " " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i))))
       
    76 }
       
    77 
       
    78