progs/scala/autos.scala
changeset 243 09ab631ce7fa
parent 242 dcfc9b23b263
child 283 c4d821c6309d
equal deleted inserted replaced
242:dcfc9b23b263 243:09ab631ce7fa
    74                      fins:  A => Boolean) {     // final states 
    74                      fins:  A => Boolean) {     // final states 
    75 
    75 
    76   // given a state and a character, what is the set of next states?
    76   // given a state and a character, what is the set of next states?
    77   // if there is none => empty set
    77   // if there is none => empty set
    78   def next(q: A, c: C) : Set[A] = 
    78   def next(q: A, c: C) : Set[A] = 
    79     delta.flatMap((d) => Try(d(q, c)).toOption)
    79     delta.flatMap(d => Try(d(q, c)).toOption)
    80 
    80 
    81   def nexts(qs: Set[A], c: C) : Set[A] =
    81   def nexts(qs: Set[A], c: C) : Set[A] =
    82     qs.flatMap(next(_, c))
    82     qs.flatMap(next(_, c))
    83 
    83 
    84   // given some states and a string, what is the set of next states?
    84   // given some states and a string, what is the set of next states?
    88   }
    88   }
    89 
    89 
    90   // is a string accepted by an NFA?
    90   // is a string accepted by an NFA?
    91   def accepts(s: List[C]) : Boolean = 
    91   def accepts(s: List[C]) : Boolean = 
    92     deltas(starts, s).exists(fins)
    92     deltas(starts, s).exists(fins)
       
    93 
       
    94   // depth-first search version of accept
       
    95   def search(q: A, s: List[C]) : Boolean = s match {
       
    96     case Nil => fins(q)
       
    97     case c::cs => delta.exists(d => Try(search(d(q, c), cs)) getOrElse false)
       
    98   }
       
    99 
       
   100   def accepts2(s: List[C]) : Boolean = 
       
   101     starts.exists(search(_, s))
       
   102 
    93 }
   103 }
    94 
   104 
    95 
   105 
    96 
   106 
    97 
   107 
    98 // NFA test cases
   108 // NFA test cases
    99 
   109 
   100 // 1 
   110 // 1:  NFA for STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc"
   101 val trans1 = Set[(State, Char) :=> State](
   111 val trans1 = Set[(State, Char) :=> State](
   102   { case (Q0, 'a') => Q1 },
   112   { case (Q0, 'a') => Q1 },
   103   { case (Q0, _)   => Q0 },
   113   { case (Q0, _)   => Q0 },
   104   { case (Q1, _)   => Q2 },
   114   { case (Q1, _)   => Q2 },
   105   { case (Q2, _)   => Q3 },
   115   { case (Q2, _)   => Q3 },
   112 
   122 
   113 nfa1.accepts("axaybzbc".toList)     // true
   123 nfa1.accepts("axaybzbc".toList)     // true
   114 nfa1.accepts("aaaaxaybzbc".toList)  // true
   124 nfa1.accepts("aaaaxaybzbc".toList)  // true
   115 nfa1.accepts("axaybzbd".toList)     // false
   125 nfa1.accepts("axaybzbd".toList)     // false
   116 
   126 
       
   127 nfa1.accepts2("axaybzbc".toList)     // true
       
   128 nfa1.accepts2("aaaaxaybzbc".toList)  // true
       
   129 nfa1.accepts2("axaybzbd".toList)     // false
       
   130 
   117 
   131 
   118 // 2
   132 // 2
   119 val trans2 = Set[(State, Char) :=> State](
   133 val trans2 = Set[(State, Char) :=> State](
   120   { case (Q0, 'a') => Q0 },
   134   { case (Q0, 'a') => Q0 },
   121   { case (Q0, 'a') => Q1 },
   135   { case (Q0, 'a') => Q1 },
   130 nfa2.accepts("aaaaa".toList)          // false
   144 nfa2.accepts("aaaaa".toList)          // false
   131 nfa2.accepts("aaaaab".toList)         // true
   145 nfa2.accepts("aaaaab".toList)         // true
   132 nfa2.accepts("aaaaabbb".toList)       // true
   146 nfa2.accepts("aaaaabbb".toList)       // true
   133 nfa2.accepts("aaaaabbbaaa".toList)    // false
   147 nfa2.accepts("aaaaabbbaaa".toList)    // false
   134 nfa2.accepts("ac".toList)             // false
   148 nfa2.accepts("ac".toList)             // false
       
   149 
       
   150 nfa2.accepts2("aa".toList)             // false
       
   151 nfa2.accepts2("aaaaa".toList)          // false
       
   152 nfa2.accepts2("aaaaab".toList)         // true
       
   153 nfa2.accepts2("aaaaabbb".toList)       // true
       
   154 nfa2.accepts2("aaaaabbbaaa".toList)    // false
       
   155 nfa2.accepts2("ac".toList)             // false
       
   156 
   135 
   157 
   136 // 3
   158 // 3
   137 val trans3 = Set[(State, Char) :=> State](
   159 val trans3 = Set[(State, Char) :=> State](
   138   { case (Q0, _)   => Q0 },
   160   { case (Q0, _)   => Q0 },
   139   { case (Q0, 'a') => Q1 },
   161   { case (Q0, 'a') => Q1 },
   189   val fx = f(x)
   211   val fx = f(x)
   190   if (fx == x) x else fixpT(f, fx) 
   212   if (fx == x) x else fixpT(f, fx) 
   191 }
   213 }
   192 
   214 
   193 
   215 
   194 case class eNFA[A, C](starts: Set[A],                    // starting state
   216 case class eNFA[A, C](start: A,                          // starting state
   195                       delta: Set[(A, Option[C]) :=> A],  // transition edges
   217                       delta: Set[(A, Option[C]) :=> A],  // transition edges
   196                       fins:  A => Boolean) {             // final states 
   218                       fins:  A => Boolean) {             // final states 
   197 
   219 
   198   // epsilon transitions
   220   // epsilon transitions
   199   def enext(q: A) : Set[A] = 
   221   def enext(q: A) : Set[A] = 
   200     delta.flatMap((d) => Try(d(q, None)).toOption)
   222     delta.flatMap((d) => Try(d(q, None)).toOption)
   201 
   223 
   202   def enexts(qs: Set[A]) : Set[A] = 
   224   def enexts(qs: Set[A]) : Set[A] = 
   203     qs ++ qs.flatMap(enext(_))
   225     qs | qs.flatMap(enext(_))
   204 
   226 
   205   // epsilon closure
   227   // epsilon closure
   206   def ecl(qs: Set[A]) : Set[A] = 
   228   def ecl(qs: Set[A]) : Set[A] = 
   207     fixpT(enexts, qs)
   229     fixpT(enexts, qs)
   208 
   230 
   209   // "normal" transition
   231   // "normal" transition
   210   def next(q: A, c: C) : Set[A] = 
   232   def next(q: A, c: C) : Set[A] = 
   211     delta.flatMap((d) => Try(d(q, Some(c))).toOption)
   233     delta.flatMap((d) => Try(d(q, Some(c))).toOption)
   212 
   234 
   213   def nexts(qs: Set[A], c: C) : Set[A] = 
   235   def nexts(qs: Set[A], c: C) : Set[A] = 
   214     qs.flatMap(next(_, c))
   236     ecl(ecl(qs).flatMap(next(_, c)))
   215 
   237 
   216   def deltas(qs: Set[A], s: List[C]) : Set[A] = s match {
   238   def deltas(qs: Set[A], s: List[C]) : Set[A] = s match {
   217     case Nil => ecl(qs)
   239     case Nil => ecl(qs)
   218     case c::cs => deltas(ecl(nexts(ecl(qs), c)), cs)
   240     case c::cs => deltas(nexts(qs, c), cs)
   219   }
   241   }
   220 
   242 
   221   def accepts(s: List[C]) : Boolean = 
   243   def accepts(s: List[C]) : Boolean = 
   222     deltas(starts, s.toList).exists(fins)
   244     deltas(Set(start), s.toList).exists(fins)
   223 }
   245 }
   224 
   246 
   225 
   247 // test cases for eNFAs
   226 val etrans1 = Set[(State, Option[Char]) :=> State](
   248 val etrans1 = Set[(State, Option[Char]) :=> State](
   227   { case (Q0, Some('a')) => Q1 },
   249   { case (Q0, Some('a')) => Q1 },
   228   { case (Q1, None) => Q0 }
   250   { case (Q1, None) => Q0 }
   229 )
   251 )
   230 
   252 
   231 val enfa = eNFA(Set[State](Q0), etrans1, Set[State](Q1))
   253 val enfa1 = eNFA(Q0, etrans1, Set[State](Q1))
   232 
   254 
   233 enfa.accepts("a".toList)              // true
   255 enfa1.accepts("a".toList)              // true
   234 enfa.accepts("".toList)               // false
   256 enfa1.accepts("".toList)               // false
   235 enfa.accepts("aaaaa".toList)          // true
   257 enfa1.accepts("aaaaa".toList)          // true
   236 enfa.accepts("aaaaab".toList)         // flase
   258 enfa1.accepts("aaaaab".toList)         // false
   237 enfa.accepts("aaaaabbb".toList)       // false
   259 enfa1.accepts("aaaaabbb".toList)       // false
   238 enfa.accepts("aaaaabbbaaa".toList)    // false
   260 enfa1.accepts("aaaaabbbaaa".toList)    // false
   239 enfa.accepts("ac".toList)             // false
   261 enfa1.accepts("ac".toList)             // false
   240 
   262 
   241 
   263 // example from handouts 
       
   264 val etrans2 = Set[(State, Option[Char]) :=> State](
       
   265   { case (Q0, Some('a')) => Q0 },
       
   266   { case (Q0, None) => Q1 },
       
   267   { case (Q0, None) => Q2 },
       
   268   { case (Q1, Some('a')) => Q1 },
       
   269   { case (Q2, Some('b')) => Q2 },
       
   270   { case (Q1, None) => Q0 }
       
   271 )
       
   272 
       
   273 val enfa2 = eNFA(Q0, etrans2, Set[State](Q2))
       
   274 
       
   275 enfa2.accepts("a".toList)              // true
       
   276 enfa2.accepts("".toList)               // true
       
   277 enfa2.accepts("aaaaa".toList)          // true
       
   278 enfa2.accepts("aaaaab".toList)         // true
       
   279 enfa2.accepts("aaaaabbb".toList)       // true
       
   280 enfa2.accepts("aaaaabbbaaa".toList)    // false
       
   281 enfa2.accepts("ac".toList)             // false
       
   282 
       
   283 
       
   284 // states for Thompson construction
       
   285 case class TState(i: Int) extends State
       
   286 
       
   287 object TState {
       
   288   var counter = 0
       
   289   
       
   290   def apply() : TState = {
       
   291     counter += 1;
       
   292     new TState(counter - 1)
       
   293   }
       
   294 }
       
   295 
       
   296 // eNFA that does not accept any string
       
   297 def eNFA_ZERO(): eNFA[TState, Char] = {
       
   298   val Q = TState()
       
   299   eNFA(Q, Set(), Set())
       
   300 }
       
   301 
       
   302 // eNFA that accepts the empty string
       
   303 def eNFA_ONE() : eNFA[TState, Char] = {
       
   304   val Q = TState()
       
   305   eNFA(Q, Set(), Set(Q))
       
   306 }
       
   307 
       
   308 // eNFA that accepts the string "c"
       
   309 def eNFA_CHAR(c: Char) : eNFA[TState, Char] = {
       
   310   val Q1 = TState()
       
   311   val Q2 = TState()
       
   312   eNFA(Q1, 
       
   313        Set({ case (Q1, Some(d)) if (c == d) => Q2 }),
       
   314        Set(Q2))
       
   315 }
       
   316 
       
   317 // alternative of two eNFAs
       
   318 def eNFA_ALT(enfa1: eNFA[TState, Char], enfa2: eNFA[TState, Char]) : eNFA[TState, Char] = {
       
   319   val Q = TState()
       
   320   eNFA(Q,
       
   321        enfa1.delta | enfa2.delta |
       
   322        Set({ case (Q, None) => enfa1.start },
       
   323            { case (Q, None) => enfa2.start }),
       
   324        q => enfa1.fins(q) || enfa2.fins(q))
       
   325 }
       
   326 
       
   327 // sequence of two eNFAs
       
   328 def eNFA_SEQ(enfa1: eNFA[TState, Char], enfa2: eNFA[TState, Char]) : eNFA[TState, Char] = {
       
   329   eNFA(enfa1.start,
       
   330        enfa1.delta | enfa2.delta | 
       
   331        Set({ case (q, None) if enfa1.fins(q) => enfa2.start }),
       
   332        enfa2.fins)
       
   333 }
       
   334 
       
   335 // star of an eNFA
       
   336 def eNFA_STAR(enfa: eNFA[TState, Char]) : eNFA[TState, Char] = {
       
   337   val Q = TState()
       
   338   eNFA(Q,
       
   339        enfa.delta |
       
   340        Set({ case (q, None) if enfa.fins(q) => Q },
       
   341            { case (Q, None) => enfa.start }),
       
   342        Set(Q))
       
   343 }
       
   344 
       
   345 /*
       
   346 def tofunset[A, C](d: (A, Option[C]) :=> Set[A])(q: A, c: C) : Set[(A, C) :=> A] = {
       
   347   d((q, Some(c))).map ((qs: A) => { case (qi, ci) if (qi == q && ci == c) => qs } : (A, C) :=> A)
       
   348 }
       
   349 
       
   350 
       
   351 def eNFA2NFA[A, C](starts: Set[A],                    // starting state
       
   352                    delta: Set[(A, Option[C]) :=> A],  // transition edges
       
   353                    fins:  A => Boolean) {             // final states 
       
   354 
       
   355   // epsilon transitions
       
   356   def enext(q: A) : Set[A] = 
       
   357     delta.flatMap(d => Try(d(q, None)).toOption)
       
   358 
       
   359   def enexts(qs: Set[A]) : Set[A] = 
       
   360     qs | qs.flatMap(enext(_))
       
   361 
       
   362   // epsilon closure
       
   363   def ecl(qs: Set[A]) : Set[A] = 
       
   364     fixpT(enexts, qs)
       
   365 
       
   366   
       
   367   // "normal" transition
       
   368   def next(q: A, c: C) : Set[A] = 
       
   369     delta.flatMap(d => Try(d(q, Some(c))).toOption)    
       
   370 
       
   371   def nexts(qs: Set[A], c: C) : Set[A] = 
       
   372     ecl(ecl(qs).flatMap(next(_, c)))
       
   373         
       
   374 
       
   375   def nfa_delta : Set[(A, C) :=> A] = delta.flatMap(d => tofunset(d))
       
   376 
       
   377   def nfa_starts = ecl(starts)
       
   378 
       
   379   def nfa_fins = (q: A) => ecl(Set[A](q)) exists fins
       
   380 
       
   381   NFA(nfa_starts, nfa_delta, nfa_fins)
       
   382 }
       
   383 
       
   384 */ 
   242 
   385 
   243 // Regular expressions fro derivative automata
   386 // Regular expressions fro derivative automata
   244 
   387 
   245 abstract class Rexp
   388 abstract class Rexp
   246 case object ZERO extends Rexp
   389 case object ZERO extends Rexp
   258 
   401 
   259 def get_chars(r: Rexp) : Set[Char] = r match {
   402 def get_chars(r: Rexp) : Set[Char] = r match {
   260   case ZERO => Set()
   403   case ZERO => Set()
   261   case ONE => Set()
   404   case ONE => Set()
   262   case CHAR(c) => Set(c)
   405   case CHAR(c) => Set(c)
   263   case ALT(r1, r2) => get_chars(r1) ++ get_chars(r2)
   406   case ALT(r1, r2) => get_chars(r1) | get_chars(r2)
   264   case SEQ(r1, r2) => get_chars(r1) ++ get_chars(r2)
   407   case SEQ(r1, r2) => get_chars(r1) | get_chars(r2)
   265   case STAR(r) => get_chars(r)
   408   case STAR(r) => get_chars(r)
   266   case NTIMES(r, _) => get_chars(r)
   409   case NTIMES(r, _) => get_chars(r)
   267   case UPNTIMES(r, _) => get_chars(r)
   410   case UPNTIMES(r, _) => get_chars(r)
   268   case ALL => ('a' to 'z').toSet ++ Set('*','/','\\')
   411   case ALL => ('a' to 'z').toSet | Set('*','/','\\')
   269   case NOT(r) => get_chars(r)
   412   case NOT(r) => get_chars(r)
   270   case AND(r1, r2) => get_chars(r1) ++ get_chars(r2)
   413   case AND(r1, r2) => get_chars(r1) | get_chars(r2)
   271 }
   414 }
   272 
   415 
   273 
   416 
   274 
   417 
   275 import scala.language.implicitConversions    
   418 import scala.language.implicitConversions    
   294   def % = STAR(s)
   437   def % = STAR(s)
   295   def ~ (r: Rexp) = SEQ(s, r)
   438   def ~ (r: Rexp) = SEQ(s, r)
   296   def ~ (r: String) = SEQ(s, r)
   439   def ~ (r: String) = SEQ(s, r)
   297 }
   440 }
   298 
   441 
       
   442 // thompson construction only for basic regular expressions
       
   443 def thompson (r: Rexp) : eNFA[TState, Char] = r match {
       
   444   case ZERO => eNFA_ZERO()
       
   445   case ONE => eNFA_ONE()
       
   446   case CHAR(c) => eNFA_CHAR(c)  
       
   447   case ALT(r1, r2) => eNFA_ALT(thompson(r1), thompson(r2))
       
   448   case SEQ(r1, r2) => eNFA_SEQ(thompson(r1), thompson(r2))
       
   449   case STAR(r1) => eNFA_STAR(thompson(r1))
       
   450 }
       
   451 
       
   452 // regular expression matcher using Thompson's
       
   453 def tmatcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s.toList)
       
   454 
       
   455 
       
   456 // test cases for thompson construction
       
   457 tmatcher(ZERO, "")   // false
       
   458 tmatcher(ZERO, "a")  // false
       
   459 
       
   460 tmatcher(ONE, "")    // true
       
   461 tmatcher(ONE, "a")   // false
       
   462 
       
   463 tmatcher(CHAR('a'), "")    // false
       
   464 tmatcher(CHAR('a'), "a")   // true
       
   465 tmatcher(CHAR('a'), "b")   // false
       
   466 
       
   467 tmatcher("a" | "b", "")    // false
       
   468 tmatcher("a" | "b", "a")   // true
       
   469 tmatcher("a" | "b", "b")   // true
       
   470 tmatcher("a" | "b", "c")   // false
       
   471 tmatcher("a" | "b", "ab")  // false
       
   472 
       
   473 tmatcher("a" ~ "b", "")    // false
       
   474 tmatcher("a" ~ "b", "a")   // false
       
   475 tmatcher("a" ~ "b", "b")   // false
       
   476 tmatcher("a" ~ "b", "c")   // false
       
   477 tmatcher("a" ~ "b", "ab")  // true
       
   478 tmatcher("a" ~ "b", "aba") // false
       
   479 
       
   480 tmatcher(EVIL2, "aaaaaab")   // true
       
   481 tmatcher(EVIL2, "aaaaaa")    // false
       
   482 tmatcher(EVIL2, "a" * 100)   // false
       
   483 
       
   484 
       
   485 //optional
       
   486 def OPT(r: Rexp) = ALT(r, ONE)
       
   487 
       
   488 //n-times
       
   489 def NTIMES(r: Rexp, n: Int) : Rexp = n match {
       
   490   case 0 => ONE
       
   491   case 1 => r
       
   492   case n => SEQ(r, NTIMES(r, n - 1))
       
   493 }
       
   494 
       
   495 // evil regular exproession
       
   496 def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
       
   497 
       
   498 
       
   499 val EVIL2 = STAR(STAR("a")) ~ "b"
       
   500 
       
   501 // helper function for recording time
       
   502 def time_needed[T](i: Int, code: => T) = {
       
   503   val start = System.nanoTime()
       
   504   for (j <- 1 to i) code
       
   505   val end = System.nanoTime()
       
   506   (end - start)/(i * 1.0e9)
       
   507 }
       
   508 
       
   509 
       
   510 
       
   511 // test harness for the matcher
       
   512 for (i <- 0 to 35 by 5) {
       
   513   println(i + ": " + "%.5f".format(time_needed(1, tmatcher(EVIL(i), "a" * i))))
       
   514 }
       
   515 
       
   516 
       
   517 
       
   518 // normalisation of regular expressions
       
   519 // for derivative automata
       
   520 
       
   521 case class ALTs(rs: Set[Rexp]) extends Rexp
       
   522 case class ANDs(rs: Set[Rexp]) extends Rexp
       
   523 case class SEQs(rs: List[Rexp]) extends Rexp 
       
   524 
       
   525 def normalise(r: Rexp) : Rexp = r match {
       
   526   case ALT(r1, r2) => (normalise(r1), normalise(r2)) match {
       
   527     case (ALTs(rs1), ALTs(rs2)) => ALTs(rs1 | rs2)
       
   528     case (r1s, ALTs(rs2)) => ALTs(rs2 + r1s)
       
   529     case (ALTs(rs1), r2s) => ALTs(rs1 + r2s)
       
   530     case (r1s, r2s) => ALTs(Set(r1s, r2s))
       
   531   }
       
   532   case AND(r1, r2) => (normalise(r1), normalise(r2)) match {
       
   533     case (ANDs(rs1), ANDs(rs2)) => ANDs(rs1 | rs2)
       
   534     case (r1s, ANDs(rs2)) => ANDs(rs2 + r1s)
       
   535     case (ANDs(rs1), r2s) => ANDs(rs1 + r2s)
       
   536     case (r1s, r2s) => ANDs(Set(r1s, r2s))
       
   537   }
       
   538   case SEQ(r1, r2) =>  (normalise(r1), normalise(r2)) match {
       
   539     case (SEQs(rs1), SEQs(rs2)) => SEQs(rs1 ++ rs2)
       
   540     case (r1s, SEQs(rs2)) => SEQs(r1s :: rs2)
       
   541     case (SEQs(rs1), r2s) => SEQs(rs1 ++ List(r2s))
       
   542     case (r1s, r2s) => SEQs(List(r1s, r2s))
       
   543   }
       
   544   case STAR(r) => STAR(normalise(r))
       
   545   case NTIMES(r, n) => NTIMES(normalise(r), n)
       
   546   case UPNTIMES(r, n) => UPNTIMES(normalise(r), n)
       
   547   case NOT(r) => NOT(normalise(r))
       
   548   case r => r
       
   549 }
       
   550 
       
   551 
       
   552 // simplification of regular expressions
       
   553 
   299 def simp(r: Rexp) : Rexp = r match {
   554 def simp(r: Rexp) : Rexp = r match {
   300   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
   555   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
   301     case (ZERO, r2s) => r2s
   556     case (ZERO, r2s) => r2s
   302     case (r1s, ZERO) => r1s
   557     case (r1s, ZERO) => r1s
   303     case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s)
   558     case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s)
   309     case (r1s, ONE) => r1s
   564     case (r1s, ONE) => r1s
   310     case (r1s, r2s) => SEQ(r1s, r2s)
   565     case (r1s, r2s) => SEQ(r1s, r2s)
   311   }
   566   }
   312   case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n)
   567   case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n)
   313   case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n)
   568   case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n)
   314   /*
       
   315   case NOT(r) => NOT(simp(r))
   569   case NOT(r) => NOT(simp(r))
   316   case AND(r1, r2) => (simp(r1), simp(r2)) match {
   570   case AND(r1, r2) => (simp(r1), simp(r2)) match {
   317     case (ALL, r2s) => r2s
   571     case (ALL, r2s) => r2s
   318     case (r1s, ALL) => r1s
   572     case (r1s, ALL) => r1s
   319     case (r1s, r2s) => if (r1s == r2s) r1s else AND(r1s, r2s)  
   573     case (r1s, r2s) => if (r1s == r2s) r1s else AND(r1s, r2s)  
   320   }
   574   }
   321   */
       
   322   case r => r
   575   case r => r
   323 }
   576 }
   324 
   577 
   325 
   578 
   326 // nullable function: tests whether the regular 
   579 // nullable function: tests whether the regular 
   339   case NOT(r) => !nullable(r)
   592   case NOT(r) => !nullable(r)
   340   case AND(r1, r2) => nullable(r1) && nullable(r2)
   593   case AND(r1, r2) => nullable(r1) && nullable(r2)
   341 }
   594 }
   342 
   595 
   343 // derivative of a regular expression w.r.t. a character
   596 // derivative of a regular expression w.r.t. a character
       
   597 // they are not finite even for simple regular expressions
   344 def der(c: Char, r: Rexp) : Rexp = r match {
   598 def der(c: Char, r: Rexp) : Rexp = r match {
   345   case ZERO => ZERO
   599   case ZERO => ZERO
   346   case ONE => ZERO
   600   case ONE => ZERO
   347   case CHAR(d) => if (c == d) ONE else ZERO
   601   case CHAR(d) => if (c == d) ONE else ZERO
   348   case ALL => ALL
   602   case ALL => ALL
   362   case NOT(r) => NOT(der(c, r))
   616   case NOT(r) => NOT(der(c, r))
   363   case AND(r1, r2) => AND(der(c, r1), der(c, r2))
   617   case AND(r1, r2) => AND(der(c, r1), der(c, r2))
   364 }
   618 }
   365 
   619 
   366 
   620 
       
   621 // derivative based matcher
       
   622 def matcher(r: Rexp, s: List[Char]): Boolean = s match {
       
   623   case Nil => nullable(r)
       
   624   case c::cs => matcher(der(c, r), cs)
       
   625 } 
       
   626 
       
   627 
       
   628 
   367 // partial derivative of a regular expression w.r.t. a character
   629 // partial derivative of a regular expression w.r.t. a character
   368 // does not work for NOT and AND ... see below
   630 // does not work for NOT and AND ... see below
   369 def pder(c: Char, r: Rexp) : Set[Rexp] = r match {
   631 def pder(c: Char, r: Rexp) : Set[Rexp] = r match {
   370   case ZERO => Set()
   632   case ZERO => Set()
   371   case ONE => Set()
   633   case ONE => Set()
   372   case CHAR(d) => if (c == d) Set(ONE) else Set()
   634   case CHAR(d) => if (c == d) Set(ONE) else Set()
   373   case ALL => Set(ALL)
   635   case ALL => Set(ALL)
   374   case ALLPlus => Set(ALL)
   636   case ALLPlus => Set(ALL)
   375   case ALT(r1, r2) => pder(c, r1) ++ pder(c, r2)
   637   case ALT(r1, r2) => pder(c, r1) | pder(c, r2)
   376   case SEQ(r1, r2) => 
   638   case SEQ(r1, r2) => 
   377     (for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) ++ 
   639     (for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) | 
   378     (if (nullable(r1)) pder(c, r2) else Set())
   640     (if (nullable(r1)) pder(c, r2) else Set())
   379   case STAR(r1) => 
   641   case STAR(r1) => 
   380     for (pr1 <- pder(c, r1)) yield SEQ(pr1, STAR(r1))
   642     for (pr1 <- pder(c, r1)) yield SEQ(pr1, STAR(r1))
   381   case NTIMES(r1, i) => 
   643   case NTIMES(r1, i) => 
   382     if (i == 0) Set() else
   644     if (i == 0) Set() else
   388     if (i == 0) Set()
   650     if (i == 0) Set()
   389     else 
   651     else 
   390       for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1))
   652       for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1))
   391 }
   653 }
   392 
   654 
   393 def ppder(c: Char, rs: Set[Rexp]) : Set[Rexp] =
   655 // partial derivative matcher (not for NOT and AND)
   394   rs.flatMap(pder(c, _))
       
   395 
       
   396 
       
   397 // matchers
       
   398 def matcher(r: Rexp, s: List[Char]): Boolean = s match {
       
   399   case Nil => nullable(r)
       
   400   case c::cs => matcher(der(c, r), cs)
       
   401 } 
       
   402 
       
   403 def pmatcher(rs: Set[Rexp], s: List[Char]): Boolean = s match {
   656 def pmatcher(rs: Set[Rexp], s: List[Char]): Boolean = s match {
   404   case Nil => rs.exists(nullable(_))
   657   case Nil => rs.exists(nullable(_))
   405   case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs)
   658   case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs)
   406 } 
   659 } 
       
   660 
       
   661 
       
   662 // quick-and-dirty translation of a pder-automaton 
       
   663 // does not work for NOT and AND
       
   664 
       
   665 val r = STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc"
       
   666 val pder_nfa = NFA[Set[Rexp], Char](Set(Set(r)), 
       
   667                                     Set( { case (rs, c) => rs.flatMap(pder(c, _))} ), 
       
   668                                     _.exists(nullable))
       
   669 
       
   670 
       
   671 
       
   672 pder_nfa.accepts("axaybzbc".toList)     // true
       
   673 pder_nfa.accepts("aaaaxaybzbc".toList)  // true
       
   674 pder_nfa.accepts("axaybzbd".toList)     // false
       
   675 
       
   676 
   407 
   677 
   408 
   678 
   409 // partial derivatives including NOT and AND according to
   679 // partial derivatives including NOT and AND according to
   410 // PhD-thesis: Manipulation of Extended Regular Expressions 
   680 // PhD-thesis: Manipulation of Extended Regular Expressions 
   411 // with Derivatives; these partial derivatives are also not
   681 // with Derivatives; these partial derivatives are also not
   424   case ZERO => Set()
   694   case ZERO => Set()
   425   case ONE => Set()
   695   case ONE => Set()
   426   case ALL => Set(ALL)
   696   case ALL => Set(ALL)
   427   case ALLPlus => Set(ALL)
   697   case ALLPlus => Set(ALL)
   428   case CHAR(d) => if (c == d) Set(ONE) else Set()
   698   case CHAR(d) => if (c == d) Set(ONE) else Set()
   429   case ALT(r1, r2) => pder2(c, r1) ++ pder2(c, r2)
   699   case ALT(r1, r2) => pder2(c, r1) | pder2(c, r2)
   430   case SEQ(r1, r2) => {
   700   case SEQ(r1, r2) => {
   431     val prs1 = seq_compose(pder2(c, r1), r2)
   701     val prs1 = seq_compose(pder2(c, r1), r2)
   432     if (nullable(r1)) (prs1 ++ pder2(c, r2)) else prs1
   702     if (nullable(r1)) (prs1 | pder2(c, r2)) else prs1
   433   }
   703   }
   434   case STAR(r1) => seq_compose(pder2(c, r1), STAR(r1))
   704   case STAR(r1) => seq_compose(pder2(c, r1), STAR(r1))
   435   case AND(r1, r2) => and_compose(pder2(c, r1), pder2(c, r2))
   705   case AND(r1, r2) => and_compose(pder2(c, r1), pder2(c, r2))
   436   case NOT(r1) => nder2(c, r1)
   706   case NOT(r1) => nder2(c, r1)
   437 }
   707 }
   445   case ALT(r1, r2) => and_compose(nder2(c, r1), nder2(c, r2))
   715   case ALT(r1, r2) => and_compose(nder2(c, r1), nder2(c, r2))
   446   case SEQ(r1, r2) => if (nullable(r1))
   716   case SEQ(r1, r2) => if (nullable(r1))
   447                       and_compose(not_compose(seq_compose(pder2(c, r1), r2)), nder2(c, r2))
   717                       and_compose(not_compose(seq_compose(pder2(c, r1), r2)), nder2(c, r2))
   448                       else not_compose(seq_compose(pder2(c, r1), r2))
   718                       else not_compose(seq_compose(pder2(c, r1), r2))
   449   case STAR(r1) => not_compose(pder2(c, STAR(r1)))
   719   case STAR(r1) => not_compose(pder2(c, STAR(r1)))
   450   case AND(r1, r2) => nder2(c, r1) ++ nder2(c, r2)
   720   case AND(r1, r2) => nder2(c, r1) | nder2(c, r2)
   451   case NOT(r1) => pder2(c, r1)
   721   case NOT(r1) => pder2(c, r1)
   452 }
   722 }
   453 
   723 
   454 
   724 
   455 def pmatcher2(rs: Set[Rexp], s: List[Char]): Boolean = s match {
   725 def pmatcher2(rs: Set[Rexp], s: List[Char]): Boolean = s match {
   462 val r_ex = AND("aa" | "a", AND(STAR("a"), NOT(STAR("a") ~ "a"))) 
   732 val r_ex = AND("aa" | "a", AND(STAR("a"), NOT(STAR("a") ~ "a"))) 
   463 nder2('a', r_ex).map(simp(_)).mkString("\n")
   733 nder2('a', r_ex).map(simp(_)).mkString("\n")
   464 
   734 
   465 
   735 
   466 
   736 
   467 
       
   468 
       
   469 // quick-and-dirty translation of a pder-automaton 
       
   470 
       
   471 val r = STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc"
       
   472 val pder_nfa = NFA[Set[Rexp], Char](Set(Set(r)), 
       
   473                                     Set( { case (rs, c) => rs.flatMap(pder(c, _))} ), 
       
   474                                     _.exists(nullable))
       
   475 
       
   476 
       
   477 
       
   478 pder_nfa.accepts("axaybzbc".toList)     // true
       
   479 pder_nfa.accepts("aaaaxaybzbc".toList)  // true
       
   480 pder_nfa.accepts("axaybzbd".toList)     // false
       
   481 
   737 
   482 
   738 
   483 
   739 
   484 // Derivative and Partial Derivative Automaton construction
   740 // Derivative and Partial Derivative Automaton construction
   485 
   741 
   494 
   750 
   495 // Brzozoswki Derivative automaton construction ... simple
   751 // Brzozoswki Derivative automaton construction ... simple
   496 // version, might not terminate
   752 // version, might not terminate
   497 
   753 
   498 def goto(sigma: Set[Char], delta: MTrans, qs: DStates, q: DState, c: Char) : (DStates, MTrans) = {
   754 def goto(sigma: Set[Char], delta: MTrans, qs: DStates, q: DState, c: Char) : (DStates, MTrans) = {
   499   val qder = simp(der(c, q))  
   755   val qder = simp(der(c, q)) 
   500   qs.find(_ == qder) match {
   756   qs.find(normalise(_) == normalise(qder)) match {
   501     case Some(qexists) => (qs, delta + ((q, c) -> qexists))
   757     case Some(qexists) => (qs, delta + ((q, c) -> qexists))
   502     case None => explore(sigma, delta + ((q, c) -> qder), qs + qder, qder)
   758     case None => explore(sigma, delta + ((q, c) -> qder), qs + qder, qder)
   503   }
   759   }
   504 }
   760 }
   505  
   761  
   511   val sigma = get_chars(r)
   767   val sigma = get_chars(r)
   512   val (qs, delta) = explore(sigma, Map(), Set[Rexp](r), r)
   768   val (qs, delta) = explore(sigma, Map(), Set[Rexp](r), r)
   513   val fins = qs.filter(nullable(_))
   769   val fins = qs.filter(nullable(_))
   514   val deltaf : (Rexp, Char) :=> Rexp =  { case (q, c) => delta(q, c) }
   770   val deltaf : (Rexp, Char) :=> Rexp =  { case (q, c) => delta(q, c) }
   515   println(s"DFA size: ${qs.size}")
   771   println(s"DFA size: ${qs.size}")
   516   println(s"""DFA states\n${qs.mkString("\n")}""")
   772   //println(s"""DFA states\n${qs.mkString("\n")}""")
   517   DFA(r, deltaf, fins)
   773   DFA(r, deltaf, fins)
   518 }
   774 }
   519 
   775 
   520 val re = "ab" | "ac"
   776 val re = "ab" | "ac"
   521 val d1 = mkDFA(re)
   777 val d1 = mkDFA(re)
   528 
   784 
   529 d2.accepts("axaybzbc".toList)     // true
   785 d2.accepts("axaybzbc".toList)     // true
   530 d2.accepts("aaaaxaybzbc".toList)  // true
   786 d2.accepts("aaaaxaybzbc".toList)  // true
   531 d2.accepts("axaybzbd".toList)     // false
   787 d2.accepts("axaybzbd".toList)     // false
   532 
   788 
   533 for (n <- (1 to 10).toList) 
   789 
   534   mkDFA(STAR(ALL) ~ "a" ~ NTIMES(ALL, n) ~ "bc")
   790 //for (n <- (1 to 10).toList) 
   535 
   791 //  mkDFA(STAR(ALL) ~ "a" ~ NTIMES(ALL, n) ~ "bc")
   536 
   792 
   537 // this is an example where mkDFA does not terminate
   793 
       
   794 // this is an example where mkDFA without normalisation
       
   795 // does not terminate
   538 val big_aux = STAR("a" | "b")
   796 val big_aux = STAR("a" | "b")
   539 val big = SEQ(big_aux, SEQ("a",SEQ("b", big_aux)))
   797 val big = SEQ(big_aux, SEQ("a", SEQ("b", big_aux)))
   540 
   798 
   541 //mkDFA(big)   // does not terminate
   799 mkDFA(big)   // does not terminate without normalisation
   542 
   800 
   543 
   801 
   544 
   802 
   545 // Antimirov Partial derivative automata construction ... definitely 
   803 // Antimirov Partial derivative automata construction ... definitely 
   546 // terminates but does not work for extensions of NOT and AND
   804 // terminates but does not work for extensions of NOT and AND
   547 //
   805 //
   548 // for this we have to use the extended partial derivatives
   806 // for this we have to use the extended partial derivatives
   549 // pder2/nder2
   807 // pder2/nder2...but termination is also not guaranteed
   550 
   808 
   551 
   809 
   552 // to transform (concrete) Maps into functions
   810 // to transform (concrete) Maps into functions
   553 def toFun(m: MTrans) : Trans = 
   811 def toFun(m: MTrans) : Trans = 
   554   { case (q, c) => m(q, c) }
   812   { case (q, c) => m(q, c) }
   555 
   813 
   556 def pgoto(sigma: Set[Char], delta: STrans, qs: DStates, q: DState, c: Char) : (DStates, STrans) = {
   814 def pgoto(sigma: Set[Char], delta: STrans, qs: DStates, q: DState, c: Char) : (DStates, STrans) = {
   557   val qders = pder2(c, q).map(simp(_))  // set of simplified partial derivatives
   815   val qders = pder(c, q).map(simp(_))  // set of simplified partial derivatives
   558   qders.foldLeft((qs, delta)) { case ((qs, delta), qnew) => padd(sigma, delta, qs, q, qnew, c) }
   816   qders.foldLeft((qs, delta)) { case ((qs, delta), qnew) => padd(sigma, delta, qs, q, qnew, c) }
   559 }
   817 }
   560 
   818 
   561 def padd(sigma: Set[Char], delta: STrans, qs: DStates, 
   819 def padd(sigma: Set[Char], delta: STrans, qs: DStates, 
   562          q: DState, qnew: DState, c: Char) : (DStates, STrans) = {
   820          q: DState, qnew: DState, c: Char) : (DStates, STrans) = {
   572 def mkNFA(r: Rexp) : NFA[Rexp, Char]= {
   830 def mkNFA(r: Rexp) : NFA[Rexp, Char]= {
   573   val sigma = get_chars(r)
   831   val sigma = get_chars(r)
   574   val (qs, delta) = pexplore(sigma, Set(), Set(r), r)
   832   val (qs, delta) = pexplore(sigma, Set(), Set(r), r)
   575   val fins = qs.filter(nullable(_))
   833   val fins = qs.filter(nullable(_))
   576   val deltaf = delta.map(toFun(_))
   834   val deltaf = delta.map(toFun(_))
   577   println(s"NFA size: ${qs.size}")
   835   //println(s"NFA size: ${qs.size}")
   578   println(s"""NFA states\n${qs.mkString("\n")}""")
   836   //println(s"""NFA states\n${qs.mkString("\n")}""")
       
   837   //println(s"""NFA transitions\n${delta.mkString("\n")} """)
   579   NFA(Set(r), deltaf, fins)
   838   NFA(Set(r), deltaf, fins)
   580 }
   839 }
   581 
   840 
   582 
   841 
   583 // simple example from Scott's paper
   842 // simple example from Scott's paper
   596 val t2 = pder('b', r_test).map(simp(_))
   855 val t2 = pder('b', r_test).map(simp(_))
   597 
   856 
   598 mkNFA(r_test) // size = 3
   857 mkNFA(r_test) // size = 3
   599 
   858 
   600 
   859 
   601 // simple example involving double star 
   860 // helper function for recording time
       
   861 def time_needed[T](i: Int, code: => T) = {
       
   862   val start = System.nanoTime()
       
   863   for (j <- 1 to i) code
       
   864   val end = System.nanoTime()
       
   865   (end - start)/(i * 1.0e9)
       
   866 }
       
   867 
       
   868 // simple example involving double star (a*)* b
   602 // with depth-first search causes catastrophic backtracing
   869 // with depth-first search causes catastrophic backtracing
   603 
   870 
   604 val n2 = mkNFA(STAR(STAR("a")) ~ "b")  // size 3
   871 val n2 = mkNFA(EVIL2)  // size 3
   605 
   872 
   606 n2.accepts("aaaaaab".toList)   // true
   873 n2.accepts("aaaaaab".toList)   // true
   607 n2.accepts("aaaaaa".toList)    // false
   874 n2.accepts("aaaaaa".toList)    // false
   608 n2.accepts(("a" * 100).toList) // false
   875 n2.accepts(("a" * 100).toList) // false
   609 
   876 
       
   877 n2.accepts2("aaaaaab".toList)   // true
       
   878 n2.accepts2("aaaaaa".toList)    // false
       
   879 n2.accepts2(("a" * 100).toList) // false
       
   880 
       
   881 time_needed(2, n2.accepts("aaaaaab".toList)) 
       
   882 time_needed(2, n2.accepts("aaaaaa".toList))   
       
   883 time_needed(2, n2.accepts(("a" * 2000).toList))
       
   884 
       
   885 time_needed(2, n2.accepts2("aaaaaab".toList)) 
       
   886 time_needed(2, n2.accepts2("aaaaaa".toList))  
       
   887 time_needed(2, n2.accepts2(("a" * 2000).toList))
       
   888 
       
   889 
       
   890 // other evil regular expression
       
   891 
       
   892 for (i <- 0 to 100 by 5) {
       
   893   println(i + ": " + "%.5f".format(time_needed(1, mkNFA(EVIL(i)).accepts(("a" * i).toList))))
       
   894 }
       
   895 
   610 
   896 
   611 // example involving not
   897 // example involving not
   612 
   898 
   613 val rnot = "/*" ~ (NOT ((ALL.%) ~ "*/" ~ (ALL.%))) ~ "*/"
   899 val rnot = "/*" ~ (NOT ((ALL.%) ~ "*/" ~ (ALL.%))) ~ "*/"
   614 val nnot = mkNFA(rnot)  // size 7
   900 
   615 
   901 
   616 nnot.accepts("/**/".toList)        // true
   902 val dfa_not = mkDFA(rnot)  // size 10
   617 nnot.accepts("/*aaabaa*/".toList)  // true
   903 
   618 nnot.accepts("/*/**/".toList)      // true
   904 dfa_not.accepts("/**/".toList)        // true
   619 nnot.accepts("/*aaa*/aa*/".toList) // false  ????
   905 dfa_not.accepts("/*aaabaa*/".toList)  // true
   620 nnot.accepts("/aaa*/aa*/".toList) // false
   906 dfa_not.accepts("/*/**/".toList)      // true
   621 
   907 dfa_not.accepts("/*aaa*/aa*/".toList) // false 
       
   908 dfa_not.accepts("/aaa*/aa*/".toList)  // false
       
   909 
       
   910 
       
   911 /* nfa construction according to pder does not work for NOT and AND;
       
   912  * nfa construction according to pder2/nder2 does not necesarily terminate
       
   913  
       
   914 val nfa_not = mkNFA(rnot)  // does not termninate
       
   915 
       
   916 nfa_not.accepts("/**/".toList)        // true
       
   917 nfa_not.accepts("/*aaabaa*/".toList)  // true
       
   918 nfa_not.accepts("/*/**/".toList)      // true
       
   919 nfa_not.accepts("/*aaa*/aa*/".toList) // false  ????
       
   920 nfa_not.accepts("/aaa*/aa*/".toList) // false
       
   921 */
       
   922 
       
   923 // derivative matcher
   622 matcher(rnot, "/**/".toList)        // true
   924 matcher(rnot, "/**/".toList)        // true
   623 matcher(rnot, "/*aaabaa*/".toList)  // true
   925 matcher(rnot, "/*aaabaa*/".toList)  // true
   624 matcher(rnot, "/*/**/".toList)      // true
   926 matcher(rnot, "/*/**/".toList)      // true
   625 matcher(rnot, "/*aaa*/aa*/".toList) // false
   927 matcher(rnot, "/*aaa*/aa*/".toList) // false
   626 matcher(rnot, "/aaa*/aa*/".toList)  // false
   928 matcher(rnot, "/aaa*/aa*/".toList)  // false
   627 
   929 
       
   930 // pder2/nder2 partial derivative matcher
   628 pmatcher2(Set(rnot), "/**/".toList)        // true
   931 pmatcher2(Set(rnot), "/**/".toList)        // true
   629 pmatcher2(Set(rnot), "/*aaabaa*/".toList)  // true
   932 pmatcher2(Set(rnot), "/*aaabaa*/".toList)  // true
   630 pmatcher2(Set(rnot), "/*/**/".toList)      // true
   933 pmatcher2(Set(rnot), "/*/**/".toList)      // true
   631 pmatcher2(Set(rnot), "/*aaa*/aa*/".toList) // false
   934 pmatcher2(Set(rnot), "/*aaa*/aa*/".toList) // false
   632 pmatcher2(Set(rnot), "/aaa*/aa*/".toList)  // false
   935 pmatcher2(Set(rnot), "/aaa*/aa*/".toList)  // false