progs/nfa.scala
changeset 504 3390e863d796
parent 491 7a0182c66403
child 586 9cb8dfcb7f30
--- a/progs/nfa.scala	Tue Sep 26 14:07:29 2017 +0100
+++ b/progs/nfa.scala	Tue Sep 26 14:08:49 2017 +0100
@@ -1,242 +1,91 @@
-// NFAs and Thompson's construction 
-
-// helper function for recording time
-def time_needed[T](i: Int, code: => T) = {
-  val start = System.nanoTime()
-  for (j <- 1 to i) code
-  val end = System.nanoTime()
-  (end - start)/(i * 1.0e9)
-}
-
-
-// state nodes
-abstract class State
-type States = Set[State]
-
-case class IntState(i: Int) extends State
-
-object NewState {
-  var counter = 0
-  
-  def apply() : IntState = {
-    counter += 1;
-    new IntState(counter - 1)
-  }
-}
+// NFAs in Scala using partial functions (returning
+// sets of states)
+//
+// needs :load dfa.scala   for states
 
 
-case class NFA(states: States, 
-               start: State, 
-               delta: (State, Char) => States, 
-               eps: State => States,
-               fins: States) {
-  
-  def epsclosure(qs: States) : States = {
-    val ps = qs ++ qs.flatMap(eps(_))
-    if (qs == ps) ps else epsclosure(ps)
-  }
-
-  def deltas(qs: States, s: List[Char]) : States = s match {
-    case Nil => epsclosure(qs)
-    case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs)
-  }
-
-  def accepts(s: String) : Boolean = 
-    deltas(Set(start), s.toList) exists (fins contains (_))
-}
+// type abbreviation for partial functions
+type :=>[A, B] = PartialFunction[A, B]
 
-// A small example NFA from the lectures 
-val Q0 = NewState()
-val Q1 = NewState()
-val Q2 = NewState()
-
-val delta : (State, Char) => States = {
-  case (Q0, 'a') => Set(Q0)
-  case (Q1, 'a') => Set(Q1)
-  case (Q2, 'b') => Set(Q2)
-  case (_, _) => Set ()
-}
-
-val eps : State => States = {
-  case Q0 => Set(Q1, Q2)
-  case _ => Set()
-}
-
-val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2))
-
-NFA1.accepts("aa")
-NFA1.accepts("aaaaa")
-NFA1.accepts("aaaaabbb")
-NFA1.accepts("aaaaabbbaaa")
-NFA1.accepts("ac")
+// return an empty set when not defined
+def applyOrElse[A, B](f: A :=> Set[B], x: A) : Set[B] =
+  Try(f(x)) getOrElse Set[B]()
 
 
-// explicit construction of some NFAs; used in
-// Thompson's construction
-
-// NFA that does not accept any string
-def NFA_NULL() : NFA = {
-  val Q = NewState()
-  NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set())
-}
-
-// NFA that accepts the empty string
-def NFA_EMPTY() : NFA = {
-  val Q = NewState()
-  NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q))
-}
-
-// NFA that accepts the string "c"
-def NFA_CHAR(c: Char) : NFA = {
-  val Q1 = NewState()
-  val Q2 = NewState()
-  NFA(Set(Q1, Q2), 
-      Q1, 
-      { case (Q1, d) if (c == d) => Set(Q2)
-        case (_, _) => Set() },
-      { case _ => Set() },
-      Set(Q2))
-}
-
-// alternative of two NFAs
-def NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = {
-  val Q = NewState()
-  NFA(Set(Q) ++ nfa1.states ++ nfa2.states,
-      Q,
-      { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)
-        case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c)
-        case (_, _) => Set() },
-      { case Q => Set(nfa1.start, nfa2.start)
-        case q if (nfa1.states contains q) => nfa1.eps(q)
-        case q if (nfa2.states contains q) => nfa2.eps(q)
-        case _ => Set() },
-      nfa1.fins ++ nfa2.fins)
-}
-
-// sequence of two NFAs
-def NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = {
-  NFA(nfa1.states ++ nfa2.states,
-      nfa1.start,
-      { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)
-        case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c)
-        case (_, _) => Set() },
-      { case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start)
-        case q if (nfa1.states contains q) => nfa1.eps(q)
-        case q if (nfa2.states contains q) => nfa2.eps(q)
-        case _ => Set() },
-      nfa2.fins)
-}
-
-// star of an NFA
-def NFA_STAR(nfa: NFA) : NFA = {
-  val Q = NewState()
-  NFA(Set(Q) ++ nfa.states, 
-      Q,
-      nfa.delta,
-      { case Q => Set(nfa.start)
-        case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q)
-        case q if (nfa.states contains q) => nfa.eps(q)
-        case _ => Set() },
-      Set(Q))
-}
-
-
-// regular expressions used for Thompson's construction
-abstract class Rexp
+// NFAs
+case class NFA[A, C](starts: Set[A],           // starting states
+                     delta: (A, C) :=> Set[A], // transition function
+                     fins:  A => Boolean) {    // final states 
 
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp 
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
-case class STAR(r: Rexp) extends Rexp
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]) : Rexp = s match {
-  case Nil => EMPTY
-  case c::Nil => CHAR(c)
-  case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-
-
-def thompson (r: Rexp) : NFA = r match {
-  case NULL => NFA_NULL()
-  case EMPTY => NFA_EMPTY()
-  case CHAR(c) => NFA_CHAR(c)  
-  case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2))
-  case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2))
-  case STAR(r1) => NFA_STAR(thompson(r1))
-}
-
-// some examples for Thompson's
-val A = thompson(CHAR('a'))
-
-println(A.accepts("a"))
-println(A.accepts("c"))
-println(A.accepts("aa"))
-
-val B = thompson(ALT("ab","ac"))
-
-println(B.accepts("ab"))
-println(B.accepts("ac"))
-println(B.accepts("bb"))
-println(B.accepts("aa"))
+  // given a state and a character, what is the set of 
+  // next states? if there is none => empty set
+  def next(q: A, c: C) : Set[A] = 
+    applyOrElse(delta, (q, c))
 
-val C = thompson(STAR("ab"))
-
-println(C.accepts(""))
-println(C.accepts("a"))
-println(C.accepts("ababab"))
-println(C.accepts("ab"))
-println(C.accepts("ac"))
-println(C.accepts("bb"))
-println(C.accepts("aa"))
-
-// regular expression matcher using Thompson's
-def matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s)
-
-
-//optional
-def OPT(r: Rexp) = ALT(r, EMPTY)
+  def nexts(qs: Set[A], c: C) : Set[A] =
+    qs.flatMap(next(_, c))
 
-//n-times
-def NTIMES(r: Rexp, n: Int) : Rexp = n match {
-  case 0 => EMPTY
-  case 1 => r
-  case n => SEQ(r, NTIMES(r, n - 1))
-}
-
-// evil regular exproession
-def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
-
-// test harness for the matcher
-for (i <- 0 to 100 by 5) {
-  println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i))))
-}
-
-
-// regular expression matching via search and backtracking
-def accepts2(nfa: NFA, s: String) : Boolean = {
-
-  def search(q: State, s: List[Char]) : Boolean = s match {
-    case Nil => nfa.fins contains (q)
-    case c::cs => 
-      (nfa.delta(q, c) exists (search(_, cs))) || 
-      (nfa.eps(q) exists (search(_, c::cs)))
+  // given some states and a string, what is the set 
+  // of next states?
+  def deltas(qs: Set[A], s: List[C]) : Set[A] = s match {
+    case Nil => qs
+    case c::cs => deltas(nexts(qs, c), cs)
   }
 
-  search(nfa.start, s.toList)
-}
+  // is a string accepted by an NFA?
+  def accepts(s: List[C]) : Boolean = 
+    deltas(starts, s).exists(fins)
 
-def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s)
+  // depth-first version of accepts
+  def search(q: A, s: List[C]) : Boolean = s match {
+    case Nil => fins(q)
+    case c::cs => next(q, c).exists(search(_, cs))
+  }
 
-// test harness for the backtracking matcher
-for (i <- 0 to 20 by 1) {
-  println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i))))
+  def accepts2(s: List[C]) : Boolean =
+    starts.exists(search(_, s))
 }
 
 
 
+// NFA examples
 
+val nfa_trans1 : (State, Char) :=> Set[State] = 
+  { case (Q0, 'a') => Set(Q0, Q1) 
+    case (Q0, 'b') => Set(Q2) 
+    case (Q1, 'a') => Set(Q1) 
+    case (Q2, 'b') => Set(Q2) }
+
+val nfa1 = NFA(Set[State](Q0), nfa_trans1, Set[State](Q2))
+
+nfa1.accepts("aa".toList)             // false
+nfa1.accepts("aaaaa".toList)          // false
+nfa1.accepts("aaaaab".toList)         // true
+nfa1.accepts("aaaaabbb".toList)       // true
+nfa1.accepts("aaaaabbbaaa".toList)    // false
+nfa1.accepts("ac".toList)             // false
+
+nfa1.accepts2("aa".toList)             // false
+nfa1.accepts2("aaaaa".toList)          // false
+nfa1.accepts2("aaaaab".toList)         // true
+nfa1.accepts2("aaaaabbb".toList)       // true
+nfa1.accepts2("aaaaabbbaaa".toList)    // false
+nfa1.accepts2("ac".toList)             // false
+
+
+
+
+// subset constructions
+
+def subset[A, C](nfa: NFA[A, C]) : DFA[Set[A], C] = {
+  DFA(nfa.starts, 
+      { case (qs, c) => nfa.nexts(qs, c) }, 
+      _.exists(nfa.fins))
+}
+
+subset(nfa1).accepts("aa".toList)             // false
+subset(nfa1).accepts("aaaaa".toList)          // false
+subset(nfa1).accepts("aaaaab".toList)         // true
+subset(nfa1).accepts("aaaaabbb".toList)       // true
+subset(nfa1).accepts("aaaaabbbaaa".toList)    // false
+subset(nfa1).accepts("ac".toList)             // false