progs/re3.scala
changeset 550 71fc4a7a7039
parent 544 748207ad3ef0
child 556 40e22ad45744
equal deleted inserted replaced
549:352d15782d35 550:71fc4a7a7039
     9 case class CHAR(c: Char) extends Rexp
     9 case class CHAR(c: Char) extends Rexp
    10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    12 case class STAR(r: Rexp) extends Rexp 
    12 case class STAR(r: Rexp) extends Rexp 
    13 case class NTIMES(r: Rexp, n: Int) extends Rexp 
    13 case class NTIMES(r: Rexp, n: Int) extends Rexp 
       
    14 
       
    15 
       
    16 // string of a regular expressions - for testing purposes 
       
    17 def string(r: Rexp) : String = r match {
       
    18   case ZERO => "0"
       
    19   case ONE => "1"
       
    20   case CHAR(c) => c.toString 
       
    21   case ALT(r1, r2) => s"(${string(r1)} + ${string(r2)})"
       
    22   case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}"
       
    23   case SEQ(ONE, CHAR(c)) => s"1${c}"
       
    24   case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
       
    25   case STAR(r) => s"(${string(r)})*"
       
    26   case NTIMES(r, n) =>  s"(${string(r)}){${n}}"
       
    27 }
       
    28 
       
    29 // size of a regular expressions - for testing purposes 
       
    30 def size(r: Rexp) : Int = r match {
       
    31   case ZERO => 1
       
    32   case ONE => 1
       
    33   case CHAR(_) => 1
       
    34   case ALT(r1, r2) => 1 + size(r1) + size(r2)
       
    35   case SEQ(r1, r2) => 1 + size(r1) + size(r2)
       
    36   case STAR(r) => 1 + size(r)
       
    37   case NTIMES(r, _) => 1 + size(r)
       
    38 }
       
    39 
    14 
    40 
    15 
    41 
    16 // nullable function: tests whether the regular 
    42 // nullable function: tests whether the regular 
    17 // expression can recognise the empty string
    43 // expression can recognise the empty string
    18 def nullable (r: Rexp) : Boolean = r match {
    44 def nullable (r: Rexp) : Boolean = r match {
    37   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
    63   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
    38   case NTIMES(r, i) => 
    64   case NTIMES(r, i) => 
    39     if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
    65     if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
    40 }
    66 }
    41 
    67 
    42 def simp(r: Rexp) : Rexp = r match {
    68 
    43   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
    69 
    44     case (ZERO, r2s) => r2s
    70 def simp(r: Rexp, seen: Set[Rexp]) : (Rexp, Set[Rexp]) = r match {
    45     case (r1s, ZERO) => r1s
    71   case ALT(r1, r2) => {
    46     case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
    72     val (r1s, seen1) = simp(r1, seen)
       
    73     val (r2s, seen2) = simp(r2, seen1)
       
    74     (r1s, r2s) match {
       
    75       case (ZERO, r2s) => (r2s, seen2)
       
    76       case (r1s, ZERO) => (r1s, seen2)
       
    77       case (r1s, r2s) => (ALT(r1s, r2s), seen2)
       
    78     }
    47   }
    79   }
    48   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match {
    80   case SEQ(r1, r2) =>  {
    49     case (ZERO, _) => ZERO
    81     val (r1s, _) = simp(r1, Set())
    50     case (_, ZERO) => ZERO
    82     val (r2s, _) = simp(r2, Set())
    51     case (ONE, r2s) => r2s
    83     if (seen.contains(SEQ(r1s, r2s))) (ZERO, seen)
    52     case (r1s, ONE) => r1s
    84     else (r1s, r2s) match {
    53     case (r1s, r2s) => SEQ(r1s, r2s)
    85       case (ZERO, _) => (ZERO, seen)
       
    86       case (_, ZERO) => (ZERO, seen)
       
    87       case (ONE, r2s) => (r2s, seen + r2s)
       
    88       case (r1s, ONE) => (r1s, seen + r1s)
       
    89       case (r1s, r2s) => (SEQ(r1s, r2s), seen + SEQ(r1s, r2s))
       
    90     }
    54   }
    91   }
    55   case r => r
    92   case r => if (seen.contains(r)) (ZERO, seen) else (r, seen + r)
    56 }
    93 }
    57 
    94 
    58 
    95 
    59 // derivative w.r.t. a string (iterates der)
    96 // derivative w.r.t. a string (iterates der)
    60 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    97 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    61   case Nil => r
    98   case Nil => r
    62   case c::s => ders(s, simp(der(c, r)))
    99   case c::s => ders(s, simp(der(c, r), Set())._1)
    63 }
   100 }
    64 
   101 
    65 
   102 
    66 // main matcher function
   103 // main matcher function
    67 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
   104 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
    68 
   105 
    69 
   106 
    70 //one or zero
   107 //one or zero
    71 def OPT(r: Rexp) = ALT(r, ONE)
   108 def OPT(r: Rexp) = ALT(r, ONE)
       
   109 
       
   110 
       
   111 
       
   112 
       
   113 
       
   114 
       
   115 
       
   116 
       
   117 
       
   118 def time_needed[T](i: Int, code: => T) = {
       
   119   val start = System.nanoTime()
       
   120   for (j <- 1 to i) code
       
   121   val end = System.nanoTime()
       
   122   (end - start)/(i * 1.0e9)
       
   123 }
       
   124 
       
   125 
       
   126 // star example
       
   127 val Tstar = STAR(STAR(STAR(CHAR('a'))))
       
   128 
       
   129 string(ders("".toList, Tstar))
       
   130 size(ders("".toList, Tstar))      // 4
       
   131 string(ders("a".toList, Tstar))
       
   132 size(ders("a".toList, Tstar))     // 11
       
   133 string(ders("aa".toList, Tstar))
       
   134 size(ders("aa".toList, Tstar))    // 11
       
   135 string(ders("aaa".toList, Tstar))
       
   136 size(ders("aaa".toList, Tstar))   // 11
       
   137 string(ders("aaaa".toList, Tstar))
       
   138 size(ders("aaaa".toList, Tstar))  // 11
       
   139 string(ders("aaaa".toList, Tstar))
       
   140 size(ders("aaaaa".toList, Tstar)) // 11
       
   141 string(ders("aaaab".toList, Tstar))
       
   142 size(ders("aaaaab".toList, Tstar)) // 1
       
   143 
       
   144 // test: ("a" | "aa")*
       
   145 val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))))
       
   146 
       
   147 for (i <- 1 to 100 by 1) {
       
   148   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL3, "a" * i))) + 
       
   149 	  " size: " + size(ders(("a" * i).toList, EVIL3)))
       
   150 }
       
   151 
       
   152 
       
   153 println("start " + string(EVIL3) + "    " + size(EVIL3))
       
   154 val t1  = der('a', EVIL3)
       
   155 println(string(t1) + "    " + size(t1))
       
   156 val t1s = simp(t1, Set())._1 
       
   157 println("simplified " + string(t1s) + "    " + size(t1s))
       
   158 val t2  = der('a', t1s)
       
   159 println(string(t2) + "    " + size(t2))
       
   160 val t2s = simp(t2, Set())._1 
       
   161 println("simplified " + string(t2s) + "    " + size(t2s))
       
   162 val t3  = der('a', t2s)
       
   163 println(string(t3) + "    " + size(t3))
       
   164 val t3s = simp(t3, Set())._1 
       
   165 println("simplified " + string(t3s) + "    " + size(t3s))
       
   166 val t4  = der('a', t3s)
       
   167 val t4s = simp(t4, Set())._1 
       
   168 
       
   169 
       
   170 
       
   171 
       
   172 
       
   173 
       
   174 
       
   175 
       
   176 println(string(t4) + "    " + size(t4))
       
   177 println("simplified " + string(t4s) + "    " + size(t4s))
    72 
   178 
    73 
   179 
    74 // Test Cases
   180 // Test Cases
    75 
   181 
    76 //evil regular expressions: (a?){n} a{n}  and (a*)* b
   182 //evil regular expressions: (a?){n} a{n}  and (a*)* b
   141 }
   247 }
   142 
   248 
   143 for (i <- 1 to 4501 by 500) {
   249 for (i <- 1 to 4501 by 500) {
   144   println(i + ": " + "%.5f".format(time_needed(1, matcher(sulzmann, "ab" * i))))
   250   println(i + ": " + "%.5f".format(time_needed(1, matcher(sulzmann, "ab" * i))))
   145 }
   251 }
       
   252 
       
   253 
       
   254 
       
   255 
       
   256 (((1 + 1a) ~ ((a + aa))*) + (((0 + 1) ~ ((a + aa))*) + ((1 + 1a) ~ ((a + aa))*)))