progs/scala/autos.scala
changeset 241 1075bba7b8b1
parent 240 820228ac1d0f
child 242 dcfc9b23b263
equal deleted inserted replaced
240:820228ac1d0f 241:1075bba7b8b1
   251 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
   251 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
   252 case class STAR(r: Rexp) extends Rexp 
   252 case class STAR(r: Rexp) extends Rexp 
   253 case class NTIMES(r: Rexp, n: Int) extends Rexp 
   253 case class NTIMES(r: Rexp, n: Int) extends Rexp 
   254 case class UPNTIMES(r: Rexp, n: Int) extends Rexp 
   254 case class UPNTIMES(r: Rexp, n: Int) extends Rexp 
   255 case class NOT(r: Rexp) extends Rexp
   255 case class NOT(r: Rexp) extends Rexp
       
   256 case class AND(r1: Rexp, r2: Rexp) extends Rexp 
   256 
   257 
   257 def get_chars(r: Rexp) : Set[Char] = r match {
   258 def get_chars(r: Rexp) : Set[Char] = r match {
   258   case ZERO => Set()
   259   case ZERO => Set()
   259   case ONE => Set()
   260   case ONE => Set()
   260   case CHAR(c) => Set(c)
   261   case CHAR(c) => Set(c)
   263   case STAR(r) => get_chars(r)
   264   case STAR(r) => get_chars(r)
   264   case NTIMES(r, _) => get_chars(r)
   265   case NTIMES(r, _) => get_chars(r)
   265   case UPNTIMES(r, _) => get_chars(r)
   266   case UPNTIMES(r, _) => get_chars(r)
   266   case ALL => ('a' to 'z').toSet ++ Set('*','/','\\')
   267   case ALL => ('a' to 'z').toSet ++ Set('*','/','\\')
   267   case NOT(r) => get_chars(r)
   268   case NOT(r) => get_chars(r)
       
   269   case AND(r1, r2) => get_chars(r1) ++ get_chars(r2)
   268 }
   270 }
   269 
   271 
   270 
   272 
   271 
   273 
   272 import scala.language.implicitConversions    
   274 import scala.language.implicitConversions    
   307     case (r1s, r2s) => SEQ(r1s, r2s)
   309     case (r1s, r2s) => SEQ(r1s, r2s)
   308   }
   310   }
   309   case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n)
   311   case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n)
   310   case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n)
   312   case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n)
   311   case NOT(r) => NOT(simp(r))
   313   case NOT(r) => NOT(simp(r))
       
   314   case AND(r1, r2) => AND(simp(r1), simp(r2))  
   312   case r => r
   315   case r => r
   313 }
   316 }
   314 
   317 
   315 
   318 
   316 // nullable function: tests whether the regular 
   319 // nullable function: tests whether the regular 
   324   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
   327   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
   325   case STAR(_) => true
   328   case STAR(_) => true
   326   case NTIMES(r, i) => if (i == 0) true else nullable(r)
   329   case NTIMES(r, i) => if (i == 0) true else nullable(r)
   327   case UPNTIMES(r: Rexp, n: Int) => true
   330   case UPNTIMES(r: Rexp, n: Int) => true
   328   case NOT(r) => !nullable(r)
   331   case NOT(r) => !nullable(r)
       
   332   case AND(r1, r2) => nullable(r1) && nullable(r2)
   329 }
   333 }
   330 
   334 
   331 // derivative of a regular expression w.r.t. a character
   335 // derivative of a regular expression w.r.t. a character
   332 def der(c: Char, r: Rexp) : Rexp = r match {
   336 def der(c: Char, r: Rexp) : Rexp = r match {
   333   case ZERO => ZERO
   337   case ZERO => ZERO
   345     else SEQ(der(c, r1), NTIMES(r1, i - 1))
   349     else SEQ(der(c, r1), NTIMES(r1, i - 1))
   346   case UPNTIMES(r1, i) => 
   350   case UPNTIMES(r1, i) => 
   347     if (i == 0) ZERO
   351     if (i == 0) ZERO
   348     else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) 
   352     else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) 
   349   case NOT(r) => NOT(der(c, r))
   353   case NOT(r) => NOT(der(c, r))
       
   354   // AND case?
   350 }
   355 }
   351 
   356 
   352 
   357 
   353 // partial derivative of a regular expression w.r.t. a character
   358 // partial derivative of a regular expression w.r.t. a character
       
   359 // does not work for NOT and AND ... see below
   354 def pder(c: Char, r: Rexp) : Set[Rexp] = r match {
   360 def pder(c: Char, r: Rexp) : Set[Rexp] = r match {
   355   case ZERO => Set()
   361   case ZERO => Set()
   356   case ONE => Set()
   362   case ONE => Set()
   357   case CHAR(d) => if (c == d) Set(ONE) else Set()
   363   case CHAR(d) => if (c == d) Set(ONE) else Set()
   358   case ALL => Set(ONE)
   364   case ALL => Set(ONE)
   370       for (pr1 <- pder(c, r1)) yield SEQ(pr1, NTIMES(r1, i - 1))
   376       for (pr1 <- pder(c, r1)) yield SEQ(pr1, NTIMES(r1, i - 1))
   371   case UPNTIMES(r1, i) => 
   377   case UPNTIMES(r1, i) => 
   372     if (i == 0) Set()
   378     if (i == 0) Set()
   373     else 
   379     else 
   374       for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1))
   380       for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1))
   375   case NOT(r1) => 
       
   376     for (pr1 <- pder(c, r1)) yield NOT(pr1)
       
   377 }
   381 }
   378 
   382 
   379 def ppder(c: Char, rs: Set[Rexp]) : Set[Rexp] =
   383 def ppder(c: Char, rs: Set[Rexp]) : Set[Rexp] =
   380   rs.flatMap(pder(c, _))
   384   rs.flatMap(pder(c, _))
   381 
   385 
   387 } 
   391 } 
   388 
   392 
   389 def pmatcher(rs: Set[Rexp], s: List[Char]): Boolean = s match {
   393 def pmatcher(rs: Set[Rexp], s: List[Char]): Boolean = s match {
   390   case Nil => rs.exists(nullable(_))
   394   case Nil => rs.exists(nullable(_))
   391   case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs)
   395   case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs)
       
   396 } 
       
   397 
       
   398 
       
   399 
       
   400 
       
   401 def seq_add(rss: Set[Set[Rexp]], r2: Rexp) : Set[Set[Rexp]] =
       
   402   for (rs <- rss) yield (for (r <- rs) yield (SEQ(r, r2) : Rexp))
       
   403 
       
   404 def not_add(rss: Set[Set[Rexp]]) : Set[Set[Set[Rexp]]] =
       
   405   for (rs <- rss) yield (for (r <- rs) yield (Set(NOT(r)) : Set[Rexp]))
       
   406 
       
   407 def and_compose(rss1: Set[Set[Rexp]], rss2: Set[Set[Rexp]]) : Set[Set[Rexp]] =
       
   408   for (rs1 <- rss1; rs2 <- rss2) yield rs1 ++ rs2
       
   409 
       
   410 def pder2(c: Char, r: Rexp) : Set[Set[Rexp]] = r match {
       
   411   case ZERO => Set()
       
   412   case ONE => Set()
       
   413   case CHAR(d) => if (c == d) Set(Set(ONE)) else Set()
       
   414   case ALL => Set(Set(ONE))
       
   415   case ALT(r1, r2) => pder2(c, r1) ++ pder2(c, r2)
       
   416   case SEQ(r1, r2) => {
       
   417     val prs1 = seq_add(pder2(c, r1), r2)
       
   418     if (nullable(r1)) (prs1 ++ pder2(c, r2)) else prs1
       
   419   }
       
   420   case STAR(r1) => 
       
   421     seq_add(pder2(c, r1), STAR(r1))
       
   422   case AND(r1, r2) => and_compose(pder2(c, r1), pder2(c, r2))
       
   423   case NOT(r1) => {
       
   424     val prs1 = not_add(pder2(c, r1))
       
   425     if (prs1.isEmpty) Set() else 
       
   426       prs1.tail.foldRight(prs1.head) { (rs1, rs2) => rs1 ++ rs2 }
       
   427   }
       
   428 }
       
   429 
       
   430 def pmatcher2(rss: Set[Set[Rexp]], s: List[Char]): Boolean = s match {
       
   431   case Nil => rss.exists((rs) => rs.exists((r) => nullable(r)))
       
   432   case c::cs => pmatcher2(rss.flatMap(_.flatMap(pder2(c, _))), cs)
   392 } 
   433 } 
   393 
   434 
   394 
   435 
   395 // quick-and-dirty translation of a pder automaton 
   436 // quick-and-dirty translation of a pder automaton 
   396 
   437 
   551 pmatcher(Set(rnot), "/*aaabaa*/".toList)  // true
   592 pmatcher(Set(rnot), "/*aaabaa*/".toList)  // true
   552 pmatcher(Set(rnot), "/*/**/".toList)      // true
   593 pmatcher(Set(rnot), "/*/**/".toList)      // true
   553 pmatcher(Set(rnot), "/*aaa*/aa*/".toList) // false  ?????
   594 pmatcher(Set(rnot), "/*aaa*/aa*/".toList) // false  ?????
   554 pmatcher(Set(rnot), "/aaa*/aa*/".toList) // false
   595 pmatcher(Set(rnot), "/aaa*/aa*/".toList) // false
   555 
   596 
       
   597 pmatcher2(Set(Set(rnot)), "/**/".toList)        // true
       
   598 pmatcher2(Set(Set(rnot)), "/*aaabaa*/".toList)  // true
       
   599 pmatcher2(Set(Set(rnot)), "/*/**/".toList)      // true
       
   600 pmatcher2(Set(Set(rnot)), "/*aaa*/aa*/".toList) // false  ?????
       
   601 pmatcher2(Set(Set(rnot)), "/aaa*/aa*/".toList) // false
       
   602 
   556 // example from automata paper with counters and backreferences
   603 // example from automata paper with counters and backreferences
   557 
   604 
   558 val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc"
   605 val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc"
   559 mkNFA(r1)     // size = 5
   606 mkNFA(r1)     // size = 5
   560  
   607