progs/re1.scala
changeset 504 3390e863d796
parent 498 cd2d192775a4
child 513 7b9a0782a804
equal deleted inserted replaced
503:3b9496db3fb9 504:3390e863d796
       
     1 // Simple matcher for basic regular expressions
       
     2 
       
     3 object Test {
     1 
     4 
     2 abstract class Rexp
     5 abstract class Rexp
     3 case object ZERO extends Rexp                    // matches nothing
     6 case object ZERO extends Rexp                    // matches nothing
     4 case object ONE extends Rexp                     // matches the empty string
     7 case object ONE extends Rexp                     // matches the empty string
     5 case class CHAR(c: Char) extends Rexp            // matches a character c
     8 case class CHAR(c: Char) extends Rexp            // matches a character c
    14   case ONE => true
    17   case ONE => true
    15   case CHAR(_) => false
    18   case CHAR(_) => false
    16   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    19   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    17   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    20   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    18   case STAR(_) => true
    21   case STAR(_) => true
       
    22 
       
    23 }
    19 
    24 
    20 }
    25 }
    21 
    26 
    22 // derivative of a regular expression w.r.t. a character
    27 // derivative of a regular expression w.r.t. a character
    23 def der (c: Char, r: Rexp) : Rexp = r match {
    28 def der (c: Char, r: Rexp) : Rexp = r match {
    38 }
    43 }
    39 
    44 
    40 // main matcher function
    45 // main matcher function
    41 def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
    46 def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
    42 
    47 
       
    48 
    43 //examples from the homework
    49 //examples from the homework
       
    50 
    44 val r = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b')))
    51 val r = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b')))
    45 der('a', r)
    52 der('a', r)
    46 der('b', r)
    53 der('b', r)
    47 der('c', r)
    54 der('c', r)
    48 
    55 
    49 //optional (one or zero times)
    56 //optional regular expression (one or zero times)
    50 def OPT(r: Rexp) = ALT(r, ONE)
    57 def OPT(r: Rexp) = ALT(r, ONE)
    51 
    58 
    52 //n-times (explicitly expanded)
    59 //n-times regular expression (explicitly expanded)
    53 def NTIMES(r: Rexp, n: Int) : Rexp = n match {
    60 def NTIMES(r: Rexp, n: Int) : Rexp = n match {
    54   case 0 => ONE
    61   case 0 => ONE
    55   case 1 => r
    62   case 1 => r
    56   case n => SEQ(r, NTIMES(r, n - 1))
    63   case n => SEQ(r, NTIMES(r, n - 1))
    57 }
    64 }
       
    65 
       
    66 
       
    67 // Test Cases
    58 
    68 
    59 // the evil regular expression  a?{n} a{n}
    69 // the evil regular expression  a?{n} a{n}
    60 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
    70 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
    61 
    71 
    62 // the evil regular expression (a*)*b
    72 // the evil regular expression (a*)*b
    67   val start = System.nanoTime()
    77   val start = System.nanoTime()
    68   for (j <- 1 to i) code
    78   for (j <- 1 to i) code
    69   val end = System.nanoTime()
    79   val end = System.nanoTime()
    70   (end - start)/(i * 1.0e9)
    80   (end - start)/(i * 1.0e9)
    71 }
    81 }
       
    82 
    72 
    83 
    73 //test: (a?{n}) (a{n})
    84 //test: (a?{n}) (a{n})
    74 for (i <- 1 to 20) {
    85 for (i <- 1 to 20) {
    75   println(i + ": " + "%.5f".format(time_needed(2, matches(EVIL1(i), "a" * i))))
    86   println(i + ": " + "%.5f".format(time_needed(2, matches(EVIL1(i), "a" * i))))
    76 }
    87 }
    86 
    97 
    87 for (i <- 1 to 20) {
    98 for (i <- 1 to 20) {
    88   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL2, "a" * i))))
    99   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL2, "a" * i))))
    89 }
   100 }
    90 
   101 
       
   102 
       
   103 
       
   104 
       
   105 // size of a regular expressions - for testing purposes 
       
   106 def size(r: Rexp) : Int = r match {
       
   107   case ZERO => 1
       
   108   case ONE => 1
       
   109   case CHAR(_) => 1
       
   110   case ALT(r1, r2) => 1 + size(r1) + size(r2)
       
   111   case SEQ(r1, r2) => 1 + size(r1) + size(r2)
       
   112   case STAR(r) => 1 + size(r)
       
   113 }
       
   114 
       
   115 // the expicit expansion in EVIL1(n) increases
       
   116 // drastically its size
       
   117 
       
   118 size(EVIL1(1))  // 5
       
   119 size(EVIL1(3))  // 17
       
   120 size(EVIL1(5))  // 29
       
   121 size(EVIL1(7))  // 41
       
   122 
       
   123 
       
   124 // given a regular expression and building successive
       
   125 // derivatives might result into bigger and bigger
       
   126 // regular expressions...here is an example for this:
       
   127 
       
   128 val BIG_aux = STAR(ALT(CHAR('a'), CHAR('b')))
       
   129 val BIG = SEQ(BIG_aux, SEQ(CHAR('a'),SEQ(CHAR('b'), BIG_aux)))
       
   130 
       
   131 size(ders("".toList, BIG))              // 13
       
   132 size(ders("ab".toList, BIG))            // 51
       
   133 size(ders("abab".toList, BIG))          // 112
       
   134 size(ders("ababab".toList, BIG))        // 191
       
   135 size(ders("abababab".toList, BIG))      // 288
       
   136 size(ders("ababababab".toList, BIG))    // 403
       
   137 size(ders("abababababab".toList, BIG))  // 536
       
   138 
       
   139 
       
   140 size(ders(("ab" * 200).toList, BIG))    // 366808
       
   141