progs/re3.scala
changeset 564 b5d57d7064bb
parent 556 40e22ad45744
child 566 b153c04834eb
equal deleted inserted replaced
563:bddf14e026b3 564:b5d57d7064bb
     1 // Version with simplification during derivatives;
     1 // A version with simplification of derivatives;
     2 // this keeps the regular expressions small, which
     2 // this keeps the regular expressions small, which
     3 // is good for run-time
     3 // is good for run-time
     4  
     4  
     5 
     5 
     6 abstract class Rexp
     6 abstract class Rexp
     9 case class CHAR(c: Char) extends Rexp
     9 case class CHAR(c: Char) extends Rexp
    10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    12 case class STAR(r: Rexp) extends Rexp 
    12 case class STAR(r: Rexp) extends Rexp 
    13 case class NTIMES(r: Rexp, n: Int) extends Rexp 
    13 case class NTIMES(r: Rexp, n: Int) extends Rexp 
    14 
       
    15 
       
    16 // string of a regular expressions - for testing purposes 
       
    17 def string(r: Rexp) : String = r match {
       
    18   case ZERO => "0"
       
    19   case ONE => "1"
       
    20   case CHAR(c) => c.toString 
       
    21   case ALT(r1, r2) => s"(${string(r1)} + ${string(r2)})"
       
    22   case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}"
       
    23   case SEQ(ONE, CHAR(c)) => s"1${c}"
       
    24   case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
       
    25   case STAR(r) => s"(${string(r)})*"
       
    26   case NTIMES(r, n) =>  s"(${string(r)}){${n}}"
       
    27 }
       
    28 
       
    29 // size of a regular expressions - for testing purposes 
       
    30 def size(r: Rexp) : Int = r match {
       
    31   case ZERO => 1
       
    32   case ONE => 1
       
    33   case CHAR(_) => 1
       
    34   case ALT(r1, r2) => 1 + size(r1) + size(r2)
       
    35   case SEQ(r1, r2) => 1 + size(r1) + size(r2)
       
    36   case STAR(r) => 1 + size(r)
       
    37   case NTIMES(r, _) => 1 + size(r)
       
    38 }
       
    39 
    14 
    40 
    15 
    41 
    16 
    42 // nullable function: tests whether the regular 
    17 // nullable function: tests whether the regular 
    43 // expression can recognise the empty string
    18 // expression can recognise the empty string
    63   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
    38   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
    64   case NTIMES(r, i) => 
    39   case NTIMES(r, i) => 
    65     if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
    40     if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
    66 }
    41 }
    67 
    42 
    68 
    43 def simp(r: Rexp) : Rexp = r match {
    69 
    44   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
    70 def simp(r: Rexp, seen: Set[Rexp]) : (Rexp, Set[Rexp]) = r match {
    45     case (ZERO, r2s) => r2s
    71   case ALT(r1, r2) => {
    46     case (r1s, ZERO) => r1s
    72     val (r1s, seen1) = simp(r1, seen)
    47     case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
    73     val (r2s, seen2) = simp(r2, seen1)
       
    74     (r1s, r2s) match {
       
    75       case (ZERO, r2s) => (r2s, seen2)
       
    76       case (r1s, ZERO) => (r1s, seen2)
       
    77       case (r1s, r2s) => (ALT(r1s, r2s), seen2)
       
    78     }
       
    79   }
    48   }
    80   case SEQ(r1, r2) =>  {
    49   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match {
    81     val (r1s, _) = simp(r1, Set())
    50     case (ZERO, _) => ZERO
    82     val (r2s, _) = simp(r2, Set())
    51     case (_, ZERO) => ZERO
    83     if (seen.contains(SEQ(r1s, r2s))) (ZERO, seen)
    52     case (ONE, r2s) => r2s
    84     else (r1s, r2s) match {
    53     case (r1s, ONE) => r1s
    85       case (ZERO, _) => (ZERO, seen)
    54     case (r1s, r2s) => SEQ(r1s, r2s)
    86       case (_, ZERO) => (ZERO, seen)
       
    87       case (ONE, r2s) => (r2s, seen + r2s)
       
    88       case (r1s, ONE) => (r1s, seen + r1s)
       
    89       case (r1s, r2s) => (SEQ(r1s, r2s), seen + SEQ(r1s, r2s))
       
    90     }
       
    91   }
    55   }
    92   case r => if (seen.contains(r)) (ZERO, seen) else (r, seen + r)
    56   case r => r
    93 }
    57 }
    94 
    58 
    95 
    59 
    96 // derivative w.r.t. a string (iterates der)
    60 // derivative w.r.t. a string (iterates der)
    97 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    61 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    98   case Nil => r
    62   case Nil => r
    99   case c::s => ders(s, simp(der(c, r), Set())._1)
    63   case c::s => ders(s, simp(der(c, r)))
   100 }
    64 }
   101 
    65 
   102 
    66 
   103 // main matcher function
    67 // main matcher function
   104 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
    68 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
   105 
    69 
   106 
    70 
   107 //one or zero
    71 //one or zero
   108 def OPT(r: Rexp) = ALT(r, ONE)
    72 def OPT(r: Rexp) = ALT(r, ONE)
   109 
       
   110 
       
   111 
       
   112 
       
   113 
       
   114 
       
   115 
       
   116 
       
   117 
       
   118 def time_needed[T](i: Int, code: => T) = {
       
   119   val start = System.nanoTime()
       
   120   for (j <- 1 to i) code
       
   121   val end = System.nanoTime()
       
   122   (end - start)/(i * 1.0e9)
       
   123 }
       
   124 
       
   125 
       
   126 // star example
       
   127 val Tstar = STAR(STAR(STAR(CHAR('a'))))
       
   128 
       
   129 string(ders("".toList, Tstar))
       
   130 size(ders("".toList, Tstar))      // 4
       
   131 string(ders("a".toList, Tstar))
       
   132 size(ders("a".toList, Tstar))     // 11
       
   133 string(ders("aa".toList, Tstar))
       
   134 size(ders("aa".toList, Tstar))    // 11
       
   135 string(ders("aaa".toList, Tstar))
       
   136 size(ders("aaa".toList, Tstar))   // 11
       
   137 string(ders("aaaa".toList, Tstar))
       
   138 size(ders("aaaa".toList, Tstar))  // 11
       
   139 string(ders("aaaa".toList, Tstar))
       
   140 size(ders("aaaaa".toList, Tstar)) // 11
       
   141 string(ders("aaaab".toList, Tstar))
       
   142 size(ders("aaaaab".toList, Tstar)) // 1
       
   143 
       
   144 // test: ("a" | "aa")*
       
   145 val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))))
       
   146 
       
   147 for (i <- 1 to 100 by 1) {
       
   148   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL3, "a" * i))) + 
       
   149 	  " size: " + size(ders(("a" * i).toList, EVIL3)))
       
   150 }
       
   151 
       
   152 
       
   153 println("start " + string(EVIL3) + "    " + size(EVIL3))
       
   154 val t1  = der('a', EVIL3)
       
   155 println(string(t1) + "    " + size(t1))
       
   156 val t1s = simp(t1, Set())._1 
       
   157 println("simplified " + string(t1s) + "    " + size(t1s))
       
   158 val t2  = der('a', t1s)
       
   159 println(string(t2) + "    " + size(t2))
       
   160 val t2s = simp(t2, Set())._1 
       
   161 println("simplified " + string(t2s) + "    " + size(t2s))
       
   162 val t3  = der('a', t2s)
       
   163 println(string(t3) + "    " + size(t3))
       
   164 val t3s = simp(t3, Set())._1 
       
   165 println("simplified " + string(t3s) + "    " + size(t3s))
       
   166 val t4  = der('a', t3s)
       
   167 val t4s = simp(t4, Set())._1 
       
   168 
       
   169 
       
   170 
       
   171 
       
   172 
       
   173 
       
   174 
       
   175 
       
   176 println(string(t4) + "    " + size(t4))
       
   177 println("simplified " + string(t4s) + "    " + size(t4s))
       
   178 
    73 
   179 
    74 
   180 // Test Cases
    75 // Test Cases
   181 
    76 
   182 //evil regular expressions: (a?){n} a{n}  and (a*)* b
    77 //evil regular expressions: (a?){n} a{n}  and (a*)* b
   191   (end - start)/(i * 1.0e9)
    86   (end - start)/(i * 1.0e9)
   192 }
    87 }
   193 
    88 
   194 
    89 
   195 //test: (a?{n}) (a{n})
    90 //test: (a?{n}) (a{n})
   196 for (i <- 1 to 8001 by 1000) {
    91 for (i <- 1 to 7001 by 1000) {
   197   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i))))
    92   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i))))
   198 }
    93 }
   199 
    94 
   200 for (i <- 1 to 8001 by 1000) {
    95 for (i <- 1 to 7001 by 1000) {
   201   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i))))
    96   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i))))
   202 }
    97 }
   203 
    98 
   204 //test: (a*)* b
    99 //test: (a*)* b
   205 for (i <- 1 to 6000001 by 500000) {
   100 for (i <- 1 to 6000001 by 500000) {
   207 }
   102 }
   208 
   103 
   209 for (i <- 1 to 6000001 by 500000) {
   104 for (i <- 1 to 6000001 by 500000) {
   210   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i))))
   105   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i))))
   211 }
   106 }
   212 
       
   213 
   107 
   214 
   108 
   215 // size of a regular expressions - for testing purposes 
   109 // size of a regular expressions - for testing purposes 
   216 def size(r: Rexp) : Int = r match {
   110 def size(r: Rexp) : Int = r match {
   217   case ZERO => 1
   111   case ZERO => 1
   233 size(ders("aaa".toList, EVIL2))   // 8
   127 size(ders("aaa".toList, EVIL2))   // 8
   234 size(ders("aaaa".toList, EVIL2))  // 8
   128 size(ders("aaaa".toList, EVIL2))  // 8
   235 size(ders("aaaaa".toList, EVIL2)) // 8
   129 size(ders("aaaaa".toList, EVIL2)) // 8
   236 
   130 
   237 
   131 
       
   132 // test: ("a" | "aa")*
       
   133 val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))))
       
   134 
       
   135 for (i <- 1 to 29 by 1) {
       
   136   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL3, "a" * i))) + 
       
   137 	  " size: " + size(ders(("a" * i).toList, EVIL3)))
       
   138 }
   238 
   139 
   239 
   140 
   240 
   141 
   241 // Examples from the Sulzmann paper
       
   242 val sulzmann = STAR(ALT(ALT(CHAR('a'), CHAR('b')), SEQ(CHAR('a'), CHAR('b'))))
       
   243 
       
   244 
       
   245 for (i <- 1 to 4501 by 500) {
       
   246   println(i + ": " + "%.5f".format(time_needed(1, matcher(sulzmann, "a" * i))))
       
   247 }
       
   248 
       
   249 for (i <- 1 to 4501 by 500) {
       
   250   println(i + ": " + "%.5f".format(time_needed(1, matcher(sulzmann, "ab" * i))))
       
   251 }
       
   252 
       
   253 size(ders("".toList, EVIL2))      // 5
       
   254 size(ders("a".toList, EVIL2))     // 8
       
   255 size(ders("aa".toList, EVIL2))    // 8
       
   256 size(ders("aaa".toList, EVIL2))   // 8
       
   257 size(ders("aaaa".toList, EVIL2))  // 8
       
   258 size(ders("aaaaa".toList, EVIL2)) // 8
       
   259 
       
   260 
       
   261 
       
   262 (((1 + 1a) ~ ((a + aa))*) + (((0 + 1) ~ ((a + aa))*) + ((1 + 1a) ~ ((a + aa))*)))