progs/scala/nfas.scala
changeset 243 09ab631ce7fa
parent 242 dcfc9b23b263
equal deleted inserted replaced
242:dcfc9b23b263 243:09ab631ce7fa
       
     1 // NFAs based on Scala's partial functions (returning
       
     2 // sets of states)
       
     3 
       
     4 
       
     5 import scala.util.Try
       
     6 
       
     7 
       
     8 // type abbreviation for partial functions
       
     9 type :=>[A, B] = PartialFunction[A, B]
       
    10 
       
    11 
       
    12 // some states for test cases 
       
    13 abstract class State
       
    14 case object Q0 extends State
       
    15 case object Q1 extends State
       
    16 case object Q2 extends State
       
    17 case object Q3 extends State
       
    18 case object Q4 extends State
       
    19 case object Q5 extends State
       
    20 case object Q6 extends State
       
    21 
       
    22 
       
    23 // return empty set when not defined
       
    24 def applyOrElse[A, B](f: A :=> Set[B], x: A) : Set[B] =
       
    25   Try(f(x)) getOrElse Set[B]()
       
    26 
       
    27 
       
    28 
       
    29 // class for NFAs
       
    30 case class NFA[A, C](starts: Set[A],            // starting states
       
    31                      delta: (A, C) :=> Set[A],  // transitions
       
    32                      fins:  A => Boolean) {     // final states 
       
    33 
       
    34   // given a state and a character, what is the set of next states?
       
    35   // if there is none => empty set
       
    36   def next(q: A, c: C) : Set[A] = 
       
    37     applyOrElse(delta, (q, c))
       
    38 
       
    39   def nexts(qs: Set[A], c: C) : Set[A] =
       
    40     qs.flatMap(next(_, c))
       
    41 
       
    42   // given some states and a string, what is the set of next states?
       
    43   def deltas(qs: Set[A], s: List[C]) : Set[A] = s match {
       
    44     case Nil => qs
       
    45     case c::cs => deltas(nexts(qs, c), cs)
       
    46   }
       
    47 
       
    48   // is a string accepted by an NFA?
       
    49   def accepts(s: List[C]) : Boolean = 
       
    50     deltas(starts, s).exists(fins)
       
    51 
       
    52   // depth-first search version of accept
       
    53   def search(q: A, s: List[C]) : Boolean = s match {
       
    54     case Nil => fins(q)
       
    55     case c::cs => next(q, c).exists(search(_, cs)) 
       
    56   }
       
    57 
       
    58   def accepts2(s: List[C]) : Boolean = 
       
    59     starts.exists(search(_, s))
       
    60 
       
    61 }
       
    62 
       
    63 
       
    64 // NFA test cases
       
    65 
       
    66 val trans2 : (State, Char) :=> Set[State] = 
       
    67  { case (Q0, 'a') => Set(Q0, Q1)
       
    68    case (Q0, 'b') => Set(Q2)
       
    69    case (Q1, 'a') => Set(Q1)
       
    70    case (Q2, 'b') => Set(Q2)
       
    71  }
       
    72 
       
    73 val nfa2 = NFA(Set[State](Q0), trans2, Set[State](Q2))
       
    74 
       
    75 nfa2.accepts("aa".toList)             // false
       
    76 nfa2.accepts("aaaaa".toList)          // false
       
    77 nfa2.accepts("aaaaab".toList)         // true
       
    78 nfa2.accepts("aaaaabbb".toList)       // true
       
    79 nfa2.accepts("aaaaabbbaaa".toList)    // false
       
    80 nfa2.accepts("ac".toList)             // false
       
    81 
       
    82 nfa2.accepts2("aa".toList)             // false
       
    83 nfa2.accepts2("aaaaa".toList)          // false
       
    84 nfa2.accepts2("aaaaab".toList)         // true
       
    85 nfa2.accepts2("aaaaabbb".toList)       // true
       
    86 nfa2.accepts2("aaaaabbbaaa".toList)    // false
       
    87 nfa2.accepts2("ac".toList)             // false
       
    88 
       
    89 
       
    90 
       
    91 
       
    92 // epsilon NFAs
       
    93 // (not explicitly defined, but immediately translated into NFAs)
       
    94 
       
    95 // fixpoint construction
       
    96 import scala.annotation.tailrec
       
    97 @tailrec
       
    98 def fixpT[A](f: A => A, x: A): A = {
       
    99   val fx = f(x)
       
   100   if (fx == x) x else fixpT(f, fx) 
       
   101 }
       
   102 
       
   103 // translates eNFAs directly into NFAs 
       
   104 def eNFA[A, C](starts: Set[A], 
       
   105 	       delta: (A, Option[C]) :=> Set[A], 
       
   106 	       fins: A => Boolean) : NFA[A, C] = { 
       
   107 
       
   108   // epsilon transitions
       
   109   def enext(q: A) : Set[A] = 
       
   110     applyOrElse(delta, (q, None))
       
   111 
       
   112   def enexts(qs: Set[A]) : Set[A] = 
       
   113     qs | qs.flatMap(enext(_))
       
   114 
       
   115   // epsilon closure
       
   116   def ecl(qs: Set[A]) : Set[A] = 
       
   117     fixpT(enexts, qs)
       
   118 
       
   119   // "normal" transitions
       
   120   def next(q: A, c: C) : Set[A] = 
       
   121     applyOrElse(delta, (q, Some(c)))
       
   122 
       
   123   def nexts(qs: Set[A], c: C) : Set[A] = 
       
   124     ecl(ecl(qs).flatMap(next(_, c)))
       
   125 
       
   126   NFA(ecl(starts), 
       
   127       { case (q, c) => nexts(Set(q), c) }, 
       
   128       q => ecl(Set(q)) exists fins)
       
   129 }
       
   130 
       
   131 
       
   132 
       
   133 
       
   134 
       
   135 // test cases for eNFAs
       
   136 val etrans1 : (State, Option[Char]) :=> Set[State] =
       
   137   { case (Q0, Some('a')) => Set(Q1)
       
   138     case (Q1, None) => Set(Q0)
       
   139   }
       
   140 
       
   141 val enfa1 = eNFA(Set[State](Q0), etrans1, Set[State](Q1))
       
   142 
       
   143 enfa1.accepts("a".toList)              // true
       
   144 enfa1.accepts("".toList)               // false
       
   145 enfa1.accepts("aaaaa".toList)          // true
       
   146 enfa1.accepts("aaaaab".toList)         // false
       
   147 enfa1.accepts("aaaaabbb".toList)       // false
       
   148 enfa1.accepts("aaaaabbbaaa".toList)    // false
       
   149 enfa1.accepts("ac".toList)             // false
       
   150 
       
   151 // example from handouts 
       
   152 val etrans2 : (State, Option[Char]) :=> Set[State] = 
       
   153   { case (Q0, Some('a')) => Set(Q0)
       
   154     case (Q0, None) => Set(Q1, Q2)
       
   155     case (Q1, Some('a')) => Set(Q1)
       
   156     case (Q2, Some('b')) => Set(Q2)
       
   157     case (Q1, None) => Set(Q0)
       
   158   }
       
   159 
       
   160 val enfa2 = eNFA(Set[State](Q0), etrans2, Set[State](Q2))
       
   161 
       
   162 enfa2.accepts("a".toList)              // true
       
   163 enfa2.accepts("".toList)               // true
       
   164 enfa2.accepts("aaaaa".toList)          // true
       
   165 enfa2.accepts("aaaaab".toList)         // true
       
   166 enfa2.accepts("aaaaabbb".toList)       // true
       
   167 enfa2.accepts("aaaaabbbaaa".toList)    // false
       
   168 enfa2.accepts("ac".toList)             // false
       
   169 
       
   170 
       
   171 // states for Thompson construction
       
   172 case class TState(i: Int) extends State
       
   173 
       
   174 object TState {
       
   175   var counter = 0
       
   176   
       
   177   def apply() : TState = {
       
   178     counter += 1;
       
   179     new TState(counter - 1)
       
   180   }
       
   181 }
       
   182 
       
   183 // some types abbreviations
       
   184 type NFAt = NFA[TState, Char]
       
   185 type NFAtrans = (TState, Char) :=> Set[TState]
       
   186 type eNFAtrans = (TState, Option[Char]) :=> Set[TState]
       
   187 
       
   188 
       
   189 // for composing an eNFA transition with a NFA transition
       
   190 implicit class RichPF(val f: eNFAtrans) extends AnyVal {
       
   191   def +++(g: NFAtrans) : eNFAtrans = 
       
   192   { case (q, None) =>  applyOrElse(f, (q, None)) 
       
   193     case (q, Some(c)) => applyOrElse(f, (q, Some(c))) | applyOrElse(g, (q, c))  }
       
   194 }
       
   195 
       
   196 
       
   197 // NFA that does not accept any string
       
   198 def NFA_ZERO(): NFAt = {
       
   199   val Q = TState()
       
   200   NFA(Set(Q), { case _ => Set() }, Set())
       
   201 }
       
   202 
       
   203 // NFA that accepts the empty string
       
   204 def NFA_ONE() : NFAt = {
       
   205   val Q = TState()
       
   206   NFA(Set(Q), { case _ => Set() }, Set(Q))
       
   207 }
       
   208 
       
   209 // NFA that accepts the string "c"
       
   210 def NFA_CHAR(c: Char) : NFAt = {
       
   211   val Q1 = TState()
       
   212   val Q2 = TState()
       
   213   NFA(Set(Q1), { case (Q1, d) if (c == d) => Set(Q2) }, Set(Q2))
       
   214 }
       
   215 
       
   216 // sequence of two NFAs
       
   217 def NFA_SEQ(enfa1: NFAt, enfa2: NFAt) : NFAt = {
       
   218   val new_delta : eNFAtrans = 
       
   219     { case (q, None) if enfa1.fins(q) => enfa2.starts }
       
   220   
       
   221   eNFA(enfa1.starts, new_delta +++ enfa1.delta +++ enfa2.delta, 
       
   222        enfa2.fins)
       
   223 }
       
   224 
       
   225 // alternative of two NFAs
       
   226 def NFA_ALT(enfa1: NFAt, enfa2: NFAt) : NFAt = {
       
   227   val new_delta : NFAtrans = { 
       
   228     case (q, c) =>  applyOrElse(enfa1.delta, (q, c)) | 
       
   229                     applyOrElse(enfa2.delta, (q, c)) }
       
   230   val new_fins = (q: TState) => enfa1.fins(q) || enfa2.fins(q)
       
   231 
       
   232   NFA(enfa1.starts | enfa2.starts, new_delta, new_fins)
       
   233 }
       
   234 
       
   235 // star of a NFA
       
   236 def NFA_STAR(enfa: NFAt) : NFAt = {
       
   237   val Q = TState()
       
   238   val new_delta : eNFAtrans = 
       
   239     { case (Q, None) => enfa.starts
       
   240       case (q, None) if enfa.fins(q) => Set(Q) }
       
   241 
       
   242   eNFA(Set(Q), new_delta +++ enfa.delta, Set(Q))
       
   243 }
       
   244 
       
   245 
       
   246 // Regular expressions fro derivative automata
       
   247 
       
   248 abstract class Rexp
       
   249 case object ZERO extends Rexp
       
   250 case object ONE extends Rexp
       
   251 case class CHAR(c: Char) extends Rexp 
       
   252 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
       
   253 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
       
   254 case class STAR(r: Rexp) extends Rexp 
       
   255 
       
   256 import scala.language.implicitConversions    
       
   257 import scala.language.reflectiveCalls 
       
   258 
       
   259 def charlist2rexp(s: List[Char]): Rexp = s match {
       
   260   case Nil => ONE
       
   261   case c::Nil => CHAR(c)
       
   262   case c::s => SEQ(CHAR(c), charlist2rexp(s))
       
   263 }
       
   264 implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList)
       
   265 
       
   266 implicit def RexpOps (r: Rexp) = new {
       
   267   def | (s: Rexp) = ALT(r, s)
       
   268   def % = STAR(r)
       
   269   def ~ (s: Rexp) = SEQ(r, s)
       
   270 }
       
   271 
       
   272 implicit def stringOps (s: String) = new {
       
   273   def | (r: Rexp) = ALT(s, r)
       
   274   def | (r: String) = ALT(s, r)
       
   275   def % = STAR(s)
       
   276   def ~ (r: Rexp) = SEQ(s, r)
       
   277   def ~ (r: String) = SEQ(s, r)
       
   278 }
       
   279 
       
   280 //optional
       
   281 def OPT(r: Rexp) = ALT(r, ONE)
       
   282 
       
   283 //n-times
       
   284 def NTIMES(r: Rexp, n: Int) : Rexp = n match {
       
   285   case 0 => ONE
       
   286   case 1 => r
       
   287   case n => SEQ(r, NTIMES(r, n - 1))
       
   288 }
       
   289 
       
   290 // evil regular exproession
       
   291 def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
       
   292 
       
   293 
       
   294 val EVIL2 = STAR(STAR("a")) ~ "b"
       
   295 
       
   296 // thompson construction 
       
   297 def thompson (r: Rexp) : NFAt = r match {
       
   298   case ZERO => NFA_ZERO()
       
   299   case ONE => NFA_ONE()
       
   300   case CHAR(c) => NFA_CHAR(c)  
       
   301   case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2))
       
   302   case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2))
       
   303   case STAR(r1) => NFA_STAR(thompson(r1))
       
   304 }
       
   305 
       
   306 // regular expression matcher using Thompson's
       
   307 def tmatcher(r: Rexp, s: String) : Boolean = 
       
   308   thompson(r).accepts(s.toList)
       
   309 
       
   310 def tmatcher2(r: Rexp, s: String) : Boolean = 
       
   311   thompson(r).accepts2(s.toList)
       
   312 
       
   313 // test cases for thompson construction
       
   314 tmatcher(ZERO, "")   // false
       
   315 tmatcher(ZERO, "a")  // false
       
   316 
       
   317 tmatcher(ONE, "")    // true
       
   318 tmatcher(ONE, "a")   // false
       
   319 
       
   320 tmatcher(CHAR('a'), "")    // false
       
   321 tmatcher(CHAR('a'), "a")   // true
       
   322 tmatcher(CHAR('a'), "b")   // false
       
   323 
       
   324 tmatcher("a" | "b", "")    // false
       
   325 tmatcher("a" | "b", "a")   // true
       
   326 tmatcher("a" | "b", "b")   // true
       
   327 tmatcher("a" | "b", "c")   // false
       
   328 tmatcher("a" | "b", "ab")  // false
       
   329 
       
   330 tmatcher("a" ~ "b", "")    // false
       
   331 tmatcher("a" ~ "b", "a")   // false
       
   332 tmatcher("a" ~ "b", "b")   // false
       
   333 tmatcher("a" ~ "b", "c")   // false
       
   334 tmatcher("a" ~ "b", "ab")  // true
       
   335 tmatcher("a" ~ "b", "aba") // false
       
   336 
       
   337 tmatcher(STAR("a"), "")      // true
       
   338 tmatcher(STAR("a"), "a")     // true
       
   339 tmatcher(STAR("a"), "aaaaa") // true
       
   340 tmatcher(STAR("a"), "b")     // false
       
   341 tmatcher(STAR("a"), "aaab")  // false
       
   342 
       
   343 tmatcher(STAR(STAR("a")), "")      // true
       
   344 tmatcher(STAR(STAR("a")), "a")     // true
       
   345 tmatcher(STAR(STAR("a")), "aaaaa") // true
       
   346 tmatcher(STAR(STAR("a")), "b")     // false
       
   347 tmatcher(STAR(STAR("a")), "aaab")  // false
       
   348 
       
   349 tmatcher(EVIL2, "aaaaaab")   // true
       
   350 tmatcher(EVIL2, "aaaaaa")    // false
       
   351 tmatcher(EVIL2, "a" * 100)   // false
       
   352 
       
   353 // helper function for recording time
       
   354 def time_needed[T](i: Int, code: => T) = {
       
   355   val start = System.nanoTime()
       
   356   for (j <- 1 to i) code
       
   357   val end = System.nanoTime()
       
   358   (end - start)/(i * 1.0e9)
       
   359 }
       
   360 
       
   361 
       
   362 
       
   363 // test harness for the matcher
       
   364 for (i <- 0 to 9) {
       
   365   println(i + ": " + "%.5f".format(time_needed(1, tmatcher(EVIL(i), "a" * i))))
       
   366 }
       
   367 
       
   368 for (i <- 0 to 7) {
       
   369   println(i + ": " + "%.5f".format(time_needed(1, tmatcher2(EVIL(i), "a" * i))))
       
   370 }
       
   371 
       
   372 for (i <- 0 to 100 by 5) {
       
   373   println(i + ": " + "%.5f".format(time_needed(1, tmatcher(EVIL2, "a" * i))))
       
   374 }
       
   375 
       
   376 
       
   377 for (i <- 0 to 8) {
       
   378   println(i + ": " + "%.5f".format(time_needed(1, tmatcher2(EVIL2, "a" * i))))
       
   379 }