progs/re2.scala
changeset 477 b78664a24f5d
parent 471 9476086849ad
child 478 48b842c997c7
equal deleted inserted replaced
476:d922cc83b70c 477:b78664a24f5d
     1 // version with explicit an n-times regular expression
     1 // Version with explicit an explicit n-times regular expression;
     2 // this keeps the regular expression small
     2 // this keeps the overall regular expression in the EVIL1 regular 
       
     3 // expression small
     3 
     4 
     4 abstract class Rexp 
     5 abstract class Rexp 
     5 case object ZERO extends Rexp
     6 case object ZERO extends Rexp
     6 case object ONE extends Rexp
     7 case object ONE extends Rexp
     7 case class CHAR(c: Char) extends Rexp
     8 case class CHAR(c: Char) extends Rexp
     8 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
     9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
     9 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    10 case class STAR(r: Rexp) extends Rexp 
    11 case class STAR(r: Rexp) extends Rexp 
    11 case class NTIMES(r: Rexp, n: Int) extends Rexp //explicit constructor
    12 case class NTIMES(r: Rexp, n: Int) extends Rexp   //explicit constructor for n-times
       
    13 
    12 
    14 
    13 def nullable (r: Rexp) : Boolean = r match {
    15 def nullable (r: Rexp) : Boolean = r match {
    14   case ZERO => false
    16   case ZERO => false
    15   case ONE => true
    17   case ONE => true
    16   case CHAR(_) => false
    18   case CHAR(_) => false
    18   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    20   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    19   case STAR(_) => true
    21   case STAR(_) => true
    20   case NTIMES(r, i) => if (i == 0) true else nullable(r)
    22   case NTIMES(r, i) => if (i == 0) true else nullable(r)
    21 }
    23 }
    22 
    24 
       
    25 
    23 def der (c: Char, r: Rexp) : Rexp = r match {
    26 def der (c: Char, r: Rexp) : Rexp = r match {
    24   case ZERO => ZERO
    27   case ZERO => ZERO
    25   case ONE => ZERO
    28   case ONE => ZERO
    26   case CHAR(d) => if (c == d) ONE else ZERO
    29   case CHAR(d) => if (c == d) ONE else ZERO
    27   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    30   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    28   case SEQ(r1, r2) => 
    31   case SEQ(r1, r2) => 
    29     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    32     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    30     else SEQ(der(c, r1), r2)
    33     else SEQ(der(c, r1), r2)
    31   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
    34   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
    32   case NTIMES(r1, i) => 
    35   case NTIMES(r1, i) => 
    33     if (i == 0) ZERO else der(c, SEQ(r1, NTIMES(r1, i - 1)))
    36     if (i == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, i - 1))
    34 }
    37 }
    35 
    38 
    36 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    39 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    37   case Nil => r
    40   case Nil => r
    38   case c::s => ders(s, der(c, r))
    41   case c::s => ders(s, der(c, r))
    39 }
    42 }
    40 
    43 
    41 def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
    44 def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
    42 
    45 
    43 
    46 
    44 //optional: one or zero times
    47 //optional regular expression: one or zero times
    45 def OPT(r: Rexp) = ALT(r, ONE)
    48 def OPT(r: Rexp) = ALT(r, ONE)
       
    49 
       
    50 
       
    51 // Test Cases
    46 
    52 
    47 //evil regular expressions
    53 //evil regular expressions
    48 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
    54 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
    49 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
    55 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
    50 
    56 
    55   (end - start)/(i * 1.0e9)
    61   (end - start)/(i * 1.0e9)
    56 }
    62 }
    57 
    63 
    58 
    64 
    59 //test: (a?{n}) (a{n})
    65 //test: (a?{n}) (a{n})
    60 for (i <- 1 to 1001 by 10) {
    66 for (i <- 1 to 1201 by 100) {
    61   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL1(i), "a" * i))))
    67   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL1(i), "a" * i))))
    62 }
    68 }
    63 
    69 
    64 for (i <- 1 to 1001 by 10) {
    70 for (i <- 1 to 1201 by 100) {
    65   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL1(i), "a" * i))))
    71   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL1(i), "a" * i))))
    66 }
    72 }
    67 
    73 
    68 
    74 
    69 //test: (a*)* b
    75 //test: (a*)* b
    74 for (i <- 1 to 20) {
    80 for (i <- 1 to 20) {
    75   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL2, "a" * i))))
    81   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL2, "a" * i))))
    76 }
    82 }
    77 
    83 
    78 
    84 
       
    85 
       
    86 // size of a regular expressions - for testing purposes 
       
    87 def size(r: Rexp) : Int = r match {
       
    88   case ZERO => 1
       
    89   case ONE => 1
       
    90   case CHAR(_) => 1
       
    91   case ALT(r1, r2) => 1 + size(r1) + size(r2)
       
    92   case SEQ(r1, r2) => 1 + size(r1) + size(r2)
       
    93   case STAR(r) => 1 + size(r)
       
    94   case NTIMES(r, _) => 1 + size(r)
       
    95 }
       
    96 
       
    97 // EVIL1(n) has now a constant size, no matter
       
    98 // what n is 
       
    99 
       
   100 size(EVIL1(1))  // 7
       
   101 size(EVIL1(3))  // 7
       
   102 size(EVIL1(5))  // 7
       
   103 size(EVIL1(7))  // 7
       
   104 
       
   105 
       
   106 // but the size of the derivatives can grow 
       
   107 // quite dramatically
       
   108 
       
   109 size(ders("".toList, EVIL2))     // 5
       
   110 size(ders("a".toList, EVIL2))    // 12
       
   111 size(ders("aa".toList, EVIL2))   // 28
       
   112 size(ders("aaa".toList, EVIL2))  // 58
       
   113 size(ders("aaaa".toList, EVIL2)) // 116