progs/re4.scala
changeset 623 47a299e7010f
parent 550 71fc4a7a7039
child 631 f618dd4de24a
equal deleted inserted replaced
622:b47e140bcccd 623:47a299e7010f
     1 // Version which attempts to move whole strings, not
     1 // A version which attempts to move whole strings, not
     2 // just characters, under derivatives whenever possible
     2 // just characters, under derivatives whenever possible
     3    
     3    
     4 abstract class Rexp
     4 abstract class Rexp
     5 case object ZERO extends Rexp
     5 case object ZERO extends Rexp
     6 case object ONE extends Rexp
     6 case object ONE extends Rexp
     8 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
     8 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
     9 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
     9 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    10 case class STAR(r: Rexp) extends Rexp 
    10 case class STAR(r: Rexp) extends Rexp 
    11 case class NTIMES(r: Rexp, n: Int) extends Rexp 
    11 case class NTIMES(r: Rexp, n: Int) extends Rexp 
    12 
    12 
    13 // nullable function: tests whether the regular 
    13 // the nullable function: tests whether the regular 
    14 // expression can recognise the empty string
    14 // expression can recognise the empty string
    15 def nullable (r: Rexp) : Boolean = r match {
    15 def nullable (r: Rexp) : Boolean = r match {
    16   case ZERO => false
    16   case ZERO => false
    17   case ONE => true
    17   case ONE => true
    18   case CHAR(_) => false
    18   case CHAR(_) => false
    20   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    20   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    21   case STAR(_) => true
    21   case STAR(_) => true
    22   case NTIMES(r, i) => if (i == 0) true else nullable(r)
    22   case NTIMES(r, i) => if (i == 0) true else nullable(r)
    23 }
    23 }
    24 
    24 
    25 // derivative of a regular expression w.r.t. a character
    25 // the derivative of a regular expression w.r.t. a character
    26 def der (c: Char, r: Rexp) : Rexp = r match {
    26 def der (c: Char, r: Rexp) : Rexp = r match {
    27   case ZERO => ZERO
    27   case ZERO => ZERO
    28   case ONE => ZERO
    28   case ONE => ZERO
    29   case CHAR(d) => if (c == d) ONE else ZERO
    29   case CHAR(d) => if (c == d) ONE else ZERO
    30   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    30   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    50     case (r1s, r2s) => SEQ(r1s, r2s)
    50     case (r1s, r2s) => SEQ(r1s, r2s)
    51   }
    51   }
    52   case r => r
    52   case r => r
    53 }
    53 }
    54 
    54 
    55 //example
    55 // an example
    56 val r = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z'))
    56 val r = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z'))
    57 der('x', r)
    57 der('x', r)
    58 der('y', der('x', r))
    58 der('y', der('x', r))
    59 der('z', der('y', der('x', r)))
    59 der('z', der('y', der('x', r)))
    60 simp(der('z', der('y', der('x', r))))
    60 simp(der('z', der('y', der('x', r))))
    61 
    61 
    62 // *new*
    62 // *new*
    63 // derivative w.r.t. a string (iterates der)
    63 // the derivative w.r.t. a string (iterates der)
    64 def ders2(s: List[Char], r: Rexp) : Rexp = (s, r) match {
    64 def ders2(s: List[Char], r: Rexp) : Rexp = (s, r) match {
    65   case (Nil, r) => r
    65   case (Nil, r) => r
    66   case (s, ZERO) => ZERO
    66   case (s, ZERO) => ZERO
    67   case (s, ONE) => if (s == Nil) ONE else ZERO
    67   case (s, ONE) => if (s == Nil) ONE else ZERO
    68   case (s, CHAR(c)) => if (s == List(c)) ONE else 
    68   case (s, CHAR(c)) => if (s == List(c)) ONE else 
    69                        if (s == Nil) CHAR(c) else ZERO
    69                        if (s == Nil) CHAR(c) else ZERO
    70   case (s, ALT(r1, r2)) => ALT(ders2(s, r1), ders2(s, r2))
    70   case (s, ALT(r1, r2)) => ALT(ders2(s, r1), ders2(s, r2))
    71   case (c::s, r) => ders2(s, simp(der(c, r)))
    71   case (c::s, r) => ders2(s, simp(der(c, r)))
    72 }
    72 }
    73 
    73 
    74 // main matcher function
    74 // the main matcher function
    75 def matcher(r: Rexp, s: String) : Boolean = nullable(ders2(s.toList, r))
    75 def matches(r: Rexp, s: String) : Boolean = nullable(ders2(s.toList, r))
    76 
    76 
    77 
    77 
    78 //one or zero
    78 // one or zero
    79 def OPT(r: Rexp) = ALT(r, ONE)
    79 def OPT(r: Rexp) = ALT(r, ONE)
    80 
    80 
    81 
    81 
    82 // Test Cases
    82 // Test Cases
    83 
    83 
    84 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
    84 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
    85 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
    85 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
    86 
    86 
    87 
    87 // for measuring time
    88 def time_needed[T](i: Int, code: => T) = {
    88 def time_needed[T](i: Int, code: => T) = {
    89   val start = System.nanoTime()
    89   val start = System.nanoTime()
    90   for (j <- 1 to i) code
    90   for (j <- 1 to i) code
    91   val end = System.nanoTime()
    91   val end = System.nanoTime()
    92   (end - start)/(i * 1.0e9)
    92   "%.5f".format((end - start) / (i * 1.0e9))
    93 }
    93 }
    94 
    94 
    95 //test: (a?{n}) (a{n})
    95 
    96 for (i <- 1 to 7000001 by 500000) {
    96 // test: (a?{n}) (a{n})
    97   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i))))
    97 for (i <- 0 to 7000000 by 500000) {
       
    98   println(s"$i: ${time_needed(2, matches(EVIL1(i), "a" * i))}")
    98 }
    99 }
    99 
   100 
       
   101 
       
   102 // test: (a*)* b
   100 for (i <- 1 to 7000001 by 500000) {
   103 for (i <- 1 to 7000001 by 500000) {
   101   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i))))
   104   println(s"$i: ${time_needed(2, matches(EVIL2, "a" * i))}")
   102 }
       
   103 
       
   104 //test: (a*)* b
       
   105 for (i <- 1 to 7000001 by 500000) {
       
   106   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i))))
       
   107 }
       
   108 
       
   109 for (i <- 1 to 7000001 by 500000) {
       
   110   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i))))
       
   111 }
   105 }
   112 
   106 
   113 
   107 
   114 
   108 
   115 // size of a regular expressions - for testing purposes 
   109 // the size of a regular expressions - for testing purposes 
   116 def size(r: Rexp) : Int = r match {
   110 def size(r: Rexp) : Int = r match {
   117   case ZERO => 1
   111   case ZERO => 1
   118   case ONE => 1
   112   case ONE => 1
   119   case CHAR(_) => 1
   113   case CHAR(_) => 1
   120   case ALT(r1, r2) => 1 + size(r1) + size(r2)
   114   case ALT(r1, r2) => 1 + size(r1) + size(r2)
   138 
   132 
   139 
   133 
   140 // test: ("a" | "aa")*
   134 // test: ("a" | "aa")*
   141 val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))))
   135 val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))))
   142 
   136 
   143 //test: ("" | "a" | "aa")*
   137 // test: ("" | "a" | "aa")*
   144 val EVIL3 = STAR(ALT(ONE, ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))))
   138 val EVIL3 = STAR(ALT(ONE, ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))))
   145 
   139 
   146 val t1  = ders2("a".toList, EVIL3)
   140 val t1  = ders2("a".toList, EVIL3)
   147 val t1s = simp(t1) 
   141 val t1s = simp(t1) 
   148 val t2  = ders2("aa".toList, t1s)
   142 val t2  = ders2("aa".toList, t1s)