progs/scala/autos.scala
changeset 240 820228ac1d0f
parent 239 e59cec211f4f
child 241 1075bba7b8b1
equal deleted inserted replaced
239:e59cec211f4f 240:820228ac1d0f
   243 // Regular expressions fro derivative automata
   243 // Regular expressions fro derivative automata
   244 
   244 
   245 abstract class Rexp
   245 abstract class Rexp
   246 case object ZERO extends Rexp
   246 case object ZERO extends Rexp
   247 case object ONE extends Rexp
   247 case object ONE extends Rexp
   248 case class CHAR(c: Char) extends Rexp {
   248 case class CHAR(c: Char) extends Rexp 
   249   override def toString = c.toString
   249 case object ALL extends Rexp 
   250 }
       
   251 case object ALL extends Rexp {
       
   252   override def toString = "."
       
   253 }
       
   254 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
   250 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
   255 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
   251 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
   256   override def toString = r1.toString + " ~ " + r2.toString
   252 case class STAR(r: Rexp) extends Rexp 
   257 }
   253 case class NTIMES(r: Rexp, n: Int) extends Rexp 
   258 case class STAR(r: Rexp) extends Rexp {
       
   259   override def toString = r.toString + "*"
       
   260 } 
       
   261 case class NTIMES(r: Rexp, n: Int) extends Rexp {
       
   262   override def toString = r.toString + "{" + n.toString + "}"
       
   263 }
       
   264 case class UPNTIMES(r: Rexp, n: Int) extends Rexp 
   254 case class UPNTIMES(r: Rexp, n: Int) extends Rexp 
   265 
   255 case class NOT(r: Rexp) extends Rexp
   266 
   256 
   267 def get_chars(r: Rexp) : Set[Char] = r match {
   257 def get_chars(r: Rexp) : Set[Char] = r match {
   268   case ZERO => Set()
   258   case ZERO => Set()
   269   case ONE => Set()
   259   case ONE => Set()
   270   case CHAR(c) => Set(c)
   260   case CHAR(c) => Set(c)
   271   case ALT(r1, r2) => get_chars(r1) ++ get_chars(r2)
   261   case ALT(r1, r2) => get_chars(r1) ++ get_chars(r2)
   272   case SEQ(r1, r2) => get_chars(r1) ++ get_chars(r2)
   262   case SEQ(r1, r2) => get_chars(r1) ++ get_chars(r2)
   273   case STAR(r) => get_chars(r)
   263   case STAR(r) => get_chars(r)
   274   case NTIMES(r, _) => get_chars(r)
   264   case NTIMES(r, _) => get_chars(r)
   275   case UPNTIMES(r, _) => get_chars(r)
   265   case UPNTIMES(r, _) => get_chars(r)
   276   case ALL => ('a' to 'z').toSet
   266   case ALL => ('a' to 'z').toSet ++ Set('*','/','\\')
       
   267   case NOT(r) => get_chars(r)
   277 }
   268 }
   278 
   269 
   279 
   270 
   280 
   271 
   281 import scala.language.implicitConversions    
   272 import scala.language.implicitConversions    
   315     case (r1s, ONE) => r1s
   306     case (r1s, ONE) => r1s
   316     case (r1s, r2s) => SEQ(r1s, r2s)
   307     case (r1s, r2s) => SEQ(r1s, r2s)
   317   }
   308   }
   318   case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n)
   309   case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n)
   319   case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n)
   310   case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n)
       
   311   case NOT(r) => NOT(simp(r))
   320   case r => r
   312   case r => r
   321 }
   313 }
   322 
   314 
   323 
   315 
   324 // nullable function: tests whether the regular 
   316 // nullable function: tests whether the regular 
   331   case ALT(r1, r2) => nullable(r1) || nullable(r2)
   323   case ALT(r1, r2) => nullable(r1) || nullable(r2)
   332   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
   324   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
   333   case STAR(_) => true
   325   case STAR(_) => true
   334   case NTIMES(r, i) => if (i == 0) true else nullable(r)
   326   case NTIMES(r, i) => if (i == 0) true else nullable(r)
   335   case UPNTIMES(r: Rexp, n: Int) => true
   327   case UPNTIMES(r: Rexp, n: Int) => true
       
   328   case NOT(r) => !nullable(r)
   336 }
   329 }
   337 
   330 
   338 // derivative of a regular expression w.r.t. a character
   331 // derivative of a regular expression w.r.t. a character
   339 def der(c: Char, r: Rexp) : Rexp = r match {
   332 def der(c: Char, r: Rexp) : Rexp = r match {
   340   case ZERO => ZERO
   333   case ZERO => ZERO
   351     if (nullable(r1)) SEQ(der(c, r1), UPNTIMES(r1, i - 1))
   344     if (nullable(r1)) SEQ(der(c, r1), UPNTIMES(r1, i - 1))
   352     else SEQ(der(c, r1), NTIMES(r1, i - 1))
   345     else SEQ(der(c, r1), NTIMES(r1, i - 1))
   353   case UPNTIMES(r1, i) => 
   346   case UPNTIMES(r1, i) => 
   354     if (i == 0) ZERO
   347     if (i == 0) ZERO
   355     else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) 
   348     else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) 
       
   349   case NOT(r) => NOT(der(c, r))
   356 }
   350 }
   357 
   351 
   358 
   352 
   359 // partial derivative of a regular expression w.r.t. a character
   353 // partial derivative of a regular expression w.r.t. a character
   360 def pder(c: Char, r: Rexp) : Set[Rexp] = r match {
   354 def pder(c: Char, r: Rexp) : Set[Rexp] = r match {
   376       for (pr1 <- pder(c, r1)) yield SEQ(pr1, NTIMES(r1, i - 1))
   370       for (pr1 <- pder(c, r1)) yield SEQ(pr1, NTIMES(r1, i - 1))
   377   case UPNTIMES(r1, i) => 
   371   case UPNTIMES(r1, i) => 
   378     if (i == 0) Set()
   372     if (i == 0) Set()
   379     else 
   373     else 
   380       for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1))
   374       for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1))
       
   375   case NOT(r1) => 
       
   376     for (pr1 <- pder(c, r1)) yield NOT(pr1)
   381 }
   377 }
   382 
   378 
   383 def ppder(c: Char, rs: Set[Rexp]) : Set[Rexp] =
   379 def ppder(c: Char, rs: Set[Rexp]) : Set[Rexp] =
   384   rs.flatMap(pder(c, _))
   380   rs.flatMap(pder(c, _))
   385 
   381 
       
   382 
       
   383 // matchers
       
   384 def matcher(r: Rexp, s: List[Char]): Boolean = s match {
       
   385   case Nil => nullable(r)
       
   386   case c::cs => matcher(der(c, r), cs)
       
   387 } 
       
   388 
       
   389 def pmatcher(rs: Set[Rexp], s: List[Char]): Boolean = s match {
       
   390   case Nil => rs.exists(nullable(_))
       
   391   case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs)
       
   392 } 
   386 
   393 
   387 
   394 
   388 // quick-and-dirty translation of a pder automaton 
   395 // quick-and-dirty translation of a pder automaton 
   389 
   396 
   390 val r = STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc"
   397 val r = STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc"
   429 def mkDFA(r: Rexp) = {
   436 def mkDFA(r: Rexp) = {
   430   val sigma = get_chars(r)
   437   val sigma = get_chars(r)
   431   val (qs, delta) = explore(sigma, Map(), Set[Rexp](r), r)
   438   val (qs, delta) = explore(sigma, Map(), Set[Rexp](r), r)
   432   val fins = qs.filter(nullable(_))
   439   val fins = qs.filter(nullable(_))
   433   val deltaf : (Rexp, Char) :=> Rexp =  { case (q, c) => delta(q, c) }
   440   val deltaf : (Rexp, Char) :=> Rexp =  { case (q, c) => delta(q, c) }
   434   println(s"Automata size: ${qs.size}")
   441   println(s"DFA size: ${qs.size}")
       
   442   println(s"""DFA states\n${qs.mkString("\n")}""")
   435   DFA(r, deltaf, fins)
   443   DFA(r, deltaf, fins)
   436 }
   444 }
   437 
   445 
   438 val re = "ab" | "ac"
   446 val re = "ab" | "ac"
   439 val d1 = mkDFA(re)
   447 val d1 = mkDFA(re)
   487   val sigma = get_chars(r)
   495   val sigma = get_chars(r)
   488   val (qs, delta) = pexplore(sigma, Set(), Set(r), r)
   496   val (qs, delta) = pexplore(sigma, Set(), Set(r), r)
   489   val fins = qs.filter(nullable(_))
   497   val fins = qs.filter(nullable(_))
   490   val deltaf = delta.map(toFun(_))
   498   val deltaf = delta.map(toFun(_))
   491   println(s"NFA size: ${qs.size}")
   499   println(s"NFA size: ${qs.size}")
       
   500   println(s"""NFA states\n${qs.mkString("\n")}""")
   492   NFA(Set(r), deltaf, fins)
   501   NFA(Set(r), deltaf, fins)
   493 }
   502 }
   494 
   503 
   495 
   504 
   496 // simple example from Scott's paper
   505 // simple example from Scott's paper
   518 
   527 
   519 n2.accepts("aaaaaab".toList)   // true
   528 n2.accepts("aaaaaab".toList)   // true
   520 n2.accepts("aaaaaa".toList)    // false
   529 n2.accepts("aaaaaa".toList)    // false
   521 n2.accepts(("a" * 100).toList) // false
   530 n2.accepts(("a" * 100).toList) // false
   522 
   531 
       
   532 
       
   533 // example involving not
       
   534 
       
   535 val rnot = "/*" ~ (NOT ((ALL.%) ~ "*/" ~ (ALL.%))) ~ "*/"
       
   536 val nnot = mkNFA(rnot)  // size 7
       
   537 
       
   538 nnot.accepts("/**/".toList)        // true
       
   539 nnot.accepts("/*aaabaa*/".toList)  // true
       
   540 nnot.accepts("/*/**/".toList)      // true
       
   541 nnot.accepts("/*aaa*/aa*/".toList) // false  ????
       
   542 nnot.accepts("/aaa*/aa*/".toList) // false
       
   543 
       
   544 matcher(rnot, "/**/".toList)        // true
       
   545 matcher(rnot, "/*aaabaa*/".toList)  // true
       
   546 matcher(rnot, "/*/**/".toList)      // true
       
   547 matcher(rnot, "/*aaa*/aa*/".toList) // false
       
   548 matcher(rnot, "/aaa*/aa*/".toList) // false
       
   549 
       
   550 pmatcher(Set(rnot), "/**/".toList)        // true
       
   551 pmatcher(Set(rnot), "/*aaabaa*/".toList)  // true
       
   552 pmatcher(Set(rnot), "/*/**/".toList)      // true
       
   553 pmatcher(Set(rnot), "/*aaa*/aa*/".toList) // false  ?????
       
   554 pmatcher(Set(rnot), "/aaa*/aa*/".toList) // false
       
   555 
       
   556 // example from automata paper with counters and backreferences
       
   557 
   523 val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc"
   558 val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc"
   524 mkNFA(r1)     // size = 5
   559 mkNFA(r1)     // size = 5
   525  
   560  
   526 val n3 = mkNFA(r) // size = 7
   561 val n3 = mkNFA(r) // size = 7
   527 
   562