progs/scala/autos.scala
changeset 242 dcfc9b23b263
parent 241 1075bba7b8b1
child 243 09ab631ce7fa
equal deleted inserted replaced
241:1075bba7b8b1 242:dcfc9b23b263
   245 abstract class Rexp
   245 abstract class Rexp
   246 case object ZERO extends Rexp
   246 case object ZERO extends Rexp
   247 case object ONE extends Rexp
   247 case object ONE extends Rexp
   248 case class CHAR(c: Char) extends Rexp 
   248 case class CHAR(c: Char) extends Rexp 
   249 case object ALL extends Rexp 
   249 case object ALL extends Rexp 
       
   250 case object ALLPlus extends Rexp
   250 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
   251 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
   251 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
   252 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
   252 case class STAR(r: Rexp) extends Rexp 
   253 case class STAR(r: Rexp) extends Rexp 
   253 case class NTIMES(r: Rexp, n: Int) extends Rexp 
   254 case class NTIMES(r: Rexp, n: Int) extends Rexp 
   254 case class UPNTIMES(r: Rexp, n: Int) extends Rexp 
   255 case class UPNTIMES(r: Rexp, n: Int) extends Rexp 
   297 
   298 
   298 def simp(r: Rexp) : Rexp = r match {
   299 def simp(r: Rexp) : Rexp = r match {
   299   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
   300   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
   300     case (ZERO, r2s) => r2s
   301     case (ZERO, r2s) => r2s
   301     case (r1s, ZERO) => r1s
   302     case (r1s, ZERO) => r1s
   302     case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
   303     case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s)
   303   }
   304   }
   304   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match {
   305   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match {
   305     case (ZERO, _) => ZERO
   306     case (ZERO, _) => ZERO
   306     case (_, ZERO) => ZERO
   307     case (_, ZERO) => ZERO
   307     case (ONE, r2s) => r2s
   308     case (ONE, r2s) => r2s
   308     case (r1s, ONE) => r1s
   309     case (r1s, ONE) => r1s
   309     case (r1s, r2s) => SEQ(r1s, r2s)
   310     case (r1s, r2s) => SEQ(r1s, r2s)
   310   }
   311   }
   311   case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n)
   312   case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n)
   312   case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n)
   313   case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n)
       
   314   /*
   313   case NOT(r) => NOT(simp(r))
   315   case NOT(r) => NOT(simp(r))
   314   case AND(r1, r2) => AND(simp(r1), simp(r2))  
   316   case AND(r1, r2) => (simp(r1), simp(r2)) match {
       
   317     case (ALL, r2s) => r2s
       
   318     case (r1s, ALL) => r1s
       
   319     case (r1s, r2s) => if (r1s == r2s) r1s else AND(r1s, r2s)  
       
   320   }
       
   321   */
   315   case r => r
   322   case r => r
   316 }
   323 }
   317 
   324 
   318 
   325 
   319 // nullable function: tests whether the regular 
   326 // nullable function: tests whether the regular 
   320 // expression can recognise the empty string
   327 // expression can recognise the empty string
   321 def nullable(r: Rexp) : Boolean = r match {
   328 def nullable(r: Rexp) : Boolean = r match {
   322   case ZERO => false
   329   case ZERO => false
   323   case ONE => true
   330   case ONE => true
   324   case CHAR(_) => false
   331   case CHAR(_) => false
   325   case ALL => false
   332   case ALL => true
       
   333   case ALLPlus => false
   326   case ALT(r1, r2) => nullable(r1) || nullable(r2)
   334   case ALT(r1, r2) => nullable(r1) || nullable(r2)
   327   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
   335   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
   328   case STAR(_) => true
   336   case STAR(_) => true
   329   case NTIMES(r, i) => if (i == 0) true else nullable(r)
   337   case NTIMES(r, i) => if (i == 0) true else nullable(r)
   330   case UPNTIMES(r: Rexp, n: Int) => true
   338   case UPNTIMES(r: Rexp, n: Int) => true
   335 // derivative of a regular expression w.r.t. a character
   343 // derivative of a regular expression w.r.t. a character
   336 def der(c: Char, r: Rexp) : Rexp = r match {
   344 def der(c: Char, r: Rexp) : Rexp = r match {
   337   case ZERO => ZERO
   345   case ZERO => ZERO
   338   case ONE => ZERO
   346   case ONE => ZERO
   339   case CHAR(d) => if (c == d) ONE else ZERO
   347   case CHAR(d) => if (c == d) ONE else ZERO
   340   case ALL => ONE
   348   case ALL => ALL
       
   349   case ALLPlus => ALL 
   341   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
   350   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
   342   case SEQ(r1, r2) => 
   351   case SEQ(r1, r2) => 
   343     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
   352     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
   344     else SEQ(der(c, r1), r2)
   353     else SEQ(der(c, r1), r2)
   345   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
   354   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
   349     else SEQ(der(c, r1), NTIMES(r1, i - 1))
   358     else SEQ(der(c, r1), NTIMES(r1, i - 1))
   350   case UPNTIMES(r1, i) => 
   359   case UPNTIMES(r1, i) => 
   351     if (i == 0) ZERO
   360     if (i == 0) ZERO
   352     else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) 
   361     else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) 
   353   case NOT(r) => NOT(der(c, r))
   362   case NOT(r) => NOT(der(c, r))
   354   // AND case?
   363   case AND(r1, r2) => AND(der(c, r1), der(c, r2))
   355 }
   364 }
   356 
   365 
   357 
   366 
   358 // partial derivative of a regular expression w.r.t. a character
   367 // partial derivative of a regular expression w.r.t. a character
   359 // does not work for NOT and AND ... see below
   368 // does not work for NOT and AND ... see below
   360 def pder(c: Char, r: Rexp) : Set[Rexp] = r match {
   369 def pder(c: Char, r: Rexp) : Set[Rexp] = r match {
   361   case ZERO => Set()
   370   case ZERO => Set()
   362   case ONE => Set()
   371   case ONE => Set()
   363   case CHAR(d) => if (c == d) Set(ONE) else Set()
   372   case CHAR(d) => if (c == d) Set(ONE) else Set()
   364   case ALL => Set(ONE)
   373   case ALL => Set(ALL)
       
   374   case ALLPlus => Set(ALL)
   365   case ALT(r1, r2) => pder(c, r1) ++ pder(c, r2)
   375   case ALT(r1, r2) => pder(c, r1) ++ pder(c, r2)
   366   case SEQ(r1, r2) => 
   376   case SEQ(r1, r2) => 
   367     (for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) ++ 
   377     (for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) ++ 
   368     (if (nullable(r1)) pder(c, r2) else Set())
   378     (if (nullable(r1)) pder(c, r2) else Set())
   369   case STAR(r1) => 
   379   case STAR(r1) => 
   394   case Nil => rs.exists(nullable(_))
   404   case Nil => rs.exists(nullable(_))
   395   case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs)
   405   case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs)
   396 } 
   406 } 
   397 
   407 
   398 
   408 
   399 
   409 // partial derivatives including NOT and AND according to
   400 
   410 // PhD-thesis: Manipulation of Extended Regular Expressions 
   401 def seq_add(rss: Set[Set[Rexp]], r2: Rexp) : Set[Set[Rexp]] =
   411 // with Derivatives; these partial derivatives are also not
   402   for (rs <- rss) yield (for (r <- rs) yield (SEQ(r, r2) : Rexp))
   412 // finite...for example NOT((a*)a)
   403 
   413 
   404 def not_add(rss: Set[Set[Rexp]]) : Set[Set[Set[Rexp]]] =
   414 def seq_compose(rs: Set[Rexp], r2: Rexp) : Set[Rexp] =
   405   for (rs <- rss) yield (for (r <- rs) yield (Set(NOT(r)) : Set[Rexp]))
   415   for (r <- rs) yield (SEQ(r, r2) : Rexp)
   406 
   416 
   407 def and_compose(rss1: Set[Set[Rexp]], rss2: Set[Set[Rexp]]) : Set[Set[Rexp]] =
   417 def not_compose(rs: Set[Rexp]) : Set[Rexp] =
   408   for (rs1 <- rss1; rs2 <- rss2) yield rs1 ++ rs2
   418   Set(rs.fold(ALL)((r1, r2) => AND(r1, NOT(r2))))
   409 
   419 
   410 def pder2(c: Char, r: Rexp) : Set[Set[Rexp]] = r match {
   420 def and_compose(rs1: Set[Rexp], rs2: Set[Rexp]) : Set[Rexp] =
       
   421   for (r1 <- rs1; r2 <- rs2) yield AND(r1, r2)
       
   422 
       
   423 def pder2(c: Char, r: Rexp) : Set[Rexp] = r match {
   411   case ZERO => Set()
   424   case ZERO => Set()
   412   case ONE => Set()
   425   case ONE => Set()
   413   case CHAR(d) => if (c == d) Set(Set(ONE)) else Set()
   426   case ALL => Set(ALL)
   414   case ALL => Set(Set(ONE))
   427   case ALLPlus => Set(ALL)
       
   428   case CHAR(d) => if (c == d) Set(ONE) else Set()
   415   case ALT(r1, r2) => pder2(c, r1) ++ pder2(c, r2)
   429   case ALT(r1, r2) => pder2(c, r1) ++ pder2(c, r2)
   416   case SEQ(r1, r2) => {
   430   case SEQ(r1, r2) => {
   417     val prs1 = seq_add(pder2(c, r1), r2)
   431     val prs1 = seq_compose(pder2(c, r1), r2)
   418     if (nullable(r1)) (prs1 ++ pder2(c, r2)) else prs1
   432     if (nullable(r1)) (prs1 ++ pder2(c, r2)) else prs1
   419   }
   433   }
   420   case STAR(r1) => 
   434   case STAR(r1) => seq_compose(pder2(c, r1), STAR(r1))
   421     seq_add(pder2(c, r1), STAR(r1))
       
   422   case AND(r1, r2) => and_compose(pder2(c, r1), pder2(c, r2))
   435   case AND(r1, r2) => and_compose(pder2(c, r1), pder2(c, r2))
   423   case NOT(r1) => {
   436   case NOT(r1) => nder2(c, r1)
   424     val prs1 = not_add(pder2(c, r1))
   437 }
   425     if (prs1.isEmpty) Set() else 
   438 
   426       prs1.tail.foldRight(prs1.head) { (rs1, rs2) => rs1 ++ rs2 }
   439 def nder2(c: Char, r: Rexp) : Set[Rexp] = r match {
   427   }
   440   case ZERO => Set(ALL)
   428 }
   441   case ONE => Set(ALL)
   429 
   442   case ALL => Set()
   430 def pmatcher2(rss: Set[Set[Rexp]], s: List[Char]): Boolean = s match {
   443   case ALLPlus => Set()
   431   case Nil => rss.exists((rs) => rs.exists((r) => nullable(r)))
   444   case CHAR(d) => if (c == d) Set(ALLPlus) else Set(ALL)
   432   case c::cs => pmatcher2(rss.flatMap(_.flatMap(pder2(c, _))), cs)
   445   case ALT(r1, r2) => and_compose(nder2(c, r1), nder2(c, r2))
       
   446   case SEQ(r1, r2) => if (nullable(r1))
       
   447                       and_compose(not_compose(seq_compose(pder2(c, r1), r2)), nder2(c, r2))
       
   448                       else not_compose(seq_compose(pder2(c, r1), r2))
       
   449   case STAR(r1) => not_compose(pder2(c, STAR(r1)))
       
   450   case AND(r1, r2) => nder2(c, r1) ++ nder2(c, r2)
       
   451   case NOT(r1) => pder2(c, r1)
       
   452 }
       
   453 
       
   454 
       
   455 def pmatcher2(rs: Set[Rexp], s: List[Char]): Boolean = s match {
       
   456   case Nil => rs.exists(nullable(_))
       
   457   case c::cs => pmatcher2(rs.flatMap(pder2(c, _)), cs)
   433 } 
   458 } 
   434 
   459 
   435 
   460 // pder2/nder2 example
   436 // quick-and-dirty translation of a pder automaton 
   461 
       
   462 val r_ex = AND("aa" | "a", AND(STAR("a"), NOT(STAR("a") ~ "a"))) 
       
   463 nder2('a', r_ex).map(simp(_)).mkString("\n")
       
   464 
       
   465 
       
   466 
       
   467 
       
   468 
       
   469 // quick-and-dirty translation of a pder-automaton 
   437 
   470 
   438 val r = STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc"
   471 val r = STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc"
   439 val pder_nfa = NFA[Set[Rexp], Char](Set(Set(r)), 
   472 val pder_nfa = NFA[Set[Rexp], Char](Set(Set(r)), 
   440                                     Set( { case (rs, c) => rs.flatMap(pder(c, _))} ), 
   473                                     Set( { case (rs, c) => rs.flatMap(pder(c, _))} ), 
   441                                     _.exists(nullable))
   474                                     _.exists(nullable))
   507 
   540 
   508 //mkDFA(big)   // does not terminate
   541 //mkDFA(big)   // does not terminate
   509 
   542 
   510 
   543 
   511 
   544 
   512 // Antimirov Partial derivative automata construction ... definitely terminates
   545 // Antimirov Partial derivative automata construction ... definitely 
       
   546 // terminates but does not work for extensions of NOT and AND
       
   547 //
       
   548 // for this we have to use the extended partial derivatives
       
   549 // pder2/nder2
   513 
   550 
   514 
   551 
   515 // to transform (concrete) Maps into functions
   552 // to transform (concrete) Maps into functions
   516 def toFun(m: MTrans) : Trans = 
   553 def toFun(m: MTrans) : Trans = 
   517   { case (q, c) => m(q, c) }
   554   { case (q, c) => m(q, c) }
   518 
   555 
   519 def pgoto(sigma: Set[Char], delta: STrans, qs: DStates, q: DState, c: Char) : (DStates, STrans) = {
   556 def pgoto(sigma: Set[Char], delta: STrans, qs: DStates, q: DState, c: Char) : (DStates, STrans) = {
   520   val qders = pder(c, q).map(simp(_))  // set of simplified partial derivatives
   557   val qders = pder2(c, q).map(simp(_))  // set of simplified partial derivatives
   521   qders.foldLeft((qs, delta)) { case ((qs, delta), qnew) => padd(sigma, delta, qs, q, qnew, c) }
   558   qders.foldLeft((qs, delta)) { case ((qs, delta), qnew) => padd(sigma, delta, qs, q, qnew, c) }
   522 }
   559 }
   523 
   560 
   524 def padd(sigma: Set[Char], delta: STrans, qs: DStates, 
   561 def padd(sigma: Set[Char], delta: STrans, qs: DStates, 
   525          q: DState, qnew: DState, c: Char) : (DStates, STrans) = {
   562          q: DState, qnew: DState, c: Char) : (DStates, STrans) = {
   584 
   621 
   585 matcher(rnot, "/**/".toList)        // true
   622 matcher(rnot, "/**/".toList)        // true
   586 matcher(rnot, "/*aaabaa*/".toList)  // true
   623 matcher(rnot, "/*aaabaa*/".toList)  // true
   587 matcher(rnot, "/*/**/".toList)      // true
   624 matcher(rnot, "/*/**/".toList)      // true
   588 matcher(rnot, "/*aaa*/aa*/".toList) // false
   625 matcher(rnot, "/*aaa*/aa*/".toList) // false
   589 matcher(rnot, "/aaa*/aa*/".toList) // false
   626 matcher(rnot, "/aaa*/aa*/".toList)  // false
   590 
   627 
   591 pmatcher(Set(rnot), "/**/".toList)        // true
   628 pmatcher2(Set(rnot), "/**/".toList)        // true
   592 pmatcher(Set(rnot), "/*aaabaa*/".toList)  // true
   629 pmatcher2(Set(rnot), "/*aaabaa*/".toList)  // true
   593 pmatcher(Set(rnot), "/*/**/".toList)      // true
   630 pmatcher2(Set(rnot), "/*/**/".toList)      // true
   594 pmatcher(Set(rnot), "/*aaa*/aa*/".toList) // false  ?????
   631 pmatcher2(Set(rnot), "/*aaa*/aa*/".toList) // false
   595 pmatcher(Set(rnot), "/aaa*/aa*/".toList) // false
   632 pmatcher2(Set(rnot), "/aaa*/aa*/".toList)  // false
   596 
       
   597 pmatcher2(Set(Set(rnot)), "/**/".toList)        // true
       
   598 pmatcher2(Set(Set(rnot)), "/*aaabaa*/".toList)  // true
       
   599 pmatcher2(Set(Set(rnot)), "/*/**/".toList)      // true
       
   600 pmatcher2(Set(Set(rnot)), "/*aaa*/aa*/".toList) // false  ?????
       
   601 pmatcher2(Set(Set(rnot)), "/aaa*/aa*/".toList) // false
       
   602 
   633 
   603 // example from automata paper with counters and backreferences
   634 // example from automata paper with counters and backreferences
   604 
   635 
   605 val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc"
   636 val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc"
   606 mkNFA(r1)     // size = 5
   637 mkNFA(r1)     // size = 5