progs/scala/nfas.scala
changeset 243 09ab631ce7fa
parent 242 dcfc9b23b263
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/scala/nfas.scala	Wed May 17 09:38:58 2017 +0100
@@ -0,0 +1,379 @@
+// NFAs based on Scala's partial functions (returning
+// sets of states)
+
+
+import scala.util.Try
+
+
+// type abbreviation for partial functions
+type :=>[A, B] = PartialFunction[A, B]
+
+
+// some states for test cases 
+abstract class State
+case object Q0 extends State
+case object Q1 extends State
+case object Q2 extends State
+case object Q3 extends State
+case object Q4 extends State
+case object Q5 extends State
+case object Q6 extends State
+
+
+// return empty set when not defined
+def applyOrElse[A, B](f: A :=> Set[B], x: A) : Set[B] =
+  Try(f(x)) getOrElse Set[B]()
+
+
+
+// class for NFAs
+case class NFA[A, C](starts: Set[A],            // starting states
+                     delta: (A, C) :=> Set[A],  // transitions
+                     fins:  A => Boolean) {     // final states 
+
+  // given a state and a character, what is the set of next states?
+  // if there is none => empty set
+  def next(q: A, c: C) : Set[A] = 
+    applyOrElse(delta, (q, c))
+
+  def nexts(qs: Set[A], c: C) : Set[A] =
+    qs.flatMap(next(_, c))
+
+  // given some states and a string, what is the set of next states?
+  def deltas(qs: Set[A], s: List[C]) : Set[A] = s match {
+    case Nil => qs
+    case c::cs => deltas(nexts(qs, c), cs)
+  }
+
+  // is a string accepted by an NFA?
+  def accepts(s: List[C]) : Boolean = 
+    deltas(starts, s).exists(fins)
+
+  // depth-first search version of accept
+  def search(q: A, s: List[C]) : Boolean = s match {
+    case Nil => fins(q)
+    case c::cs => next(q, c).exists(search(_, cs)) 
+  }
+
+  def accepts2(s: List[C]) : Boolean = 
+    starts.exists(search(_, s))
+
+}
+
+
+// NFA test cases
+
+val trans2 : (State, Char) :=> Set[State] = 
+ { case (Q0, 'a') => Set(Q0, Q1)
+   case (Q0, 'b') => Set(Q2)
+   case (Q1, 'a') => Set(Q1)
+   case (Q2, 'b') => Set(Q2)
+ }
+
+val nfa2 = NFA(Set[State](Q0), trans2, Set[State](Q2))
+
+nfa2.accepts("aa".toList)             // false
+nfa2.accepts("aaaaa".toList)          // false
+nfa2.accepts("aaaaab".toList)         // true
+nfa2.accepts("aaaaabbb".toList)       // true
+nfa2.accepts("aaaaabbbaaa".toList)    // false
+nfa2.accepts("ac".toList)             // false
+
+nfa2.accepts2("aa".toList)             // false
+nfa2.accepts2("aaaaa".toList)          // false
+nfa2.accepts2("aaaaab".toList)         // true
+nfa2.accepts2("aaaaabbb".toList)       // true
+nfa2.accepts2("aaaaabbbaaa".toList)    // false
+nfa2.accepts2("ac".toList)             // false
+
+
+
+
+// epsilon NFAs
+// (not explicitly defined, but immediately translated into NFAs)
+
+// fixpoint construction
+import scala.annotation.tailrec
+@tailrec
+def fixpT[A](f: A => A, x: A): A = {
+  val fx = f(x)
+  if (fx == x) x else fixpT(f, fx) 
+}
+
+// translates eNFAs directly into NFAs 
+def eNFA[A, C](starts: Set[A], 
+	       delta: (A, Option[C]) :=> Set[A], 
+	       fins: A => Boolean) : NFA[A, C] = { 
+
+  // epsilon transitions
+  def enext(q: A) : Set[A] = 
+    applyOrElse(delta, (q, None))
+
+  def enexts(qs: Set[A]) : Set[A] = 
+    qs | qs.flatMap(enext(_))
+
+  // epsilon closure
+  def ecl(qs: Set[A]) : Set[A] = 
+    fixpT(enexts, qs)
+
+  // "normal" transitions
+  def next(q: A, c: C) : Set[A] = 
+    applyOrElse(delta, (q, Some(c)))
+
+  def nexts(qs: Set[A], c: C) : Set[A] = 
+    ecl(ecl(qs).flatMap(next(_, c)))
+
+  NFA(ecl(starts), 
+      { case (q, c) => nexts(Set(q), c) }, 
+      q => ecl(Set(q)) exists fins)
+}
+
+
+
+
+
+// test cases for eNFAs
+val etrans1 : (State, Option[Char]) :=> Set[State] =
+  { case (Q0, Some('a')) => Set(Q1)
+    case (Q1, None) => Set(Q0)
+  }
+
+val enfa1 = eNFA(Set[State](Q0), etrans1, Set[State](Q1))
+
+enfa1.accepts("a".toList)              // true
+enfa1.accepts("".toList)               // false
+enfa1.accepts("aaaaa".toList)          // true
+enfa1.accepts("aaaaab".toList)         // false
+enfa1.accepts("aaaaabbb".toList)       // false
+enfa1.accepts("aaaaabbbaaa".toList)    // false
+enfa1.accepts("ac".toList)             // false
+
+// example from handouts 
+val etrans2 : (State, Option[Char]) :=> Set[State] = 
+  { case (Q0, Some('a')) => Set(Q0)
+    case (Q0, None) => Set(Q1, Q2)
+    case (Q1, Some('a')) => Set(Q1)
+    case (Q2, Some('b')) => Set(Q2)
+    case (Q1, None) => Set(Q0)
+  }
+
+val enfa2 = eNFA(Set[State](Q0), etrans2, Set[State](Q2))
+
+enfa2.accepts("a".toList)              // true
+enfa2.accepts("".toList)               // true
+enfa2.accepts("aaaaa".toList)          // true
+enfa2.accepts("aaaaab".toList)         // true
+enfa2.accepts("aaaaabbb".toList)       // true
+enfa2.accepts("aaaaabbbaaa".toList)    // false
+enfa2.accepts("ac".toList)             // false
+
+
+// states for Thompson construction
+case class TState(i: Int) extends State
+
+object TState {
+  var counter = 0
+  
+  def apply() : TState = {
+    counter += 1;
+    new TState(counter - 1)
+  }
+}
+
+// some types abbreviations
+type NFAt = NFA[TState, Char]
+type NFAtrans = (TState, Char) :=> Set[TState]
+type eNFAtrans = (TState, Option[Char]) :=> Set[TState]
+
+
+// for composing an eNFA transition with a NFA transition
+implicit class RichPF(val f: eNFAtrans) extends AnyVal {
+  def +++(g: NFAtrans) : eNFAtrans = 
+  { case (q, None) =>  applyOrElse(f, (q, None)) 
+    case (q, Some(c)) => applyOrElse(f, (q, Some(c))) | applyOrElse(g, (q, c))  }
+}
+
+
+// NFA that does not accept any string
+def NFA_ZERO(): NFAt = {
+  val Q = TState()
+  NFA(Set(Q), { case _ => Set() }, Set())
+}
+
+// NFA that accepts the empty string
+def NFA_ONE() : NFAt = {
+  val Q = TState()
+  NFA(Set(Q), { case _ => Set() }, Set(Q))
+}
+
+// NFA that accepts the string "c"
+def NFA_CHAR(c: Char) : NFAt = {
+  val Q1 = TState()
+  val Q2 = TState()
+  NFA(Set(Q1), { case (Q1, d) if (c == d) => Set(Q2) }, Set(Q2))
+}
+
+// sequence of two NFAs
+def NFA_SEQ(enfa1: NFAt, enfa2: NFAt) : NFAt = {
+  val new_delta : eNFAtrans = 
+    { case (q, None) if enfa1.fins(q) => enfa2.starts }
+  
+  eNFA(enfa1.starts, new_delta +++ enfa1.delta +++ enfa2.delta, 
+       enfa2.fins)
+}
+
+// alternative of two NFAs
+def NFA_ALT(enfa1: NFAt, enfa2: NFAt) : NFAt = {
+  val new_delta : NFAtrans = { 
+    case (q, c) =>  applyOrElse(enfa1.delta, (q, c)) | 
+                    applyOrElse(enfa2.delta, (q, c)) }
+  val new_fins = (q: TState) => enfa1.fins(q) || enfa2.fins(q)
+
+  NFA(enfa1.starts | enfa2.starts, new_delta, new_fins)
+}
+
+// star of a NFA
+def NFA_STAR(enfa: NFAt) : NFAt = {
+  val Q = TState()
+  val new_delta : eNFAtrans = 
+    { case (Q, None) => enfa.starts
+      case (q, None) if enfa.fins(q) => Set(Q) }
+
+  eNFA(Set(Q), new_delta +++ enfa.delta, Set(Q))
+}
+
+
+// Regular expressions fro derivative automata
+
+abstract class Rexp
+case object ZERO extends Rexp
+case object ONE extends Rexp
+case class CHAR(c: Char) extends Rexp 
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
+case class STAR(r: Rexp) extends Rexp 
+
+import scala.language.implicitConversions    
+import scala.language.reflectiveCalls 
+
+def charlist2rexp(s: List[Char]): Rexp = s match {
+  case Nil => ONE
+  case c::Nil => CHAR(c)
+  case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList)
+
+implicit def RexpOps (r: Rexp) = new {
+  def | (s: Rexp) = ALT(r, s)
+  def % = STAR(r)
+  def ~ (s: Rexp) = SEQ(r, s)
+}
+
+implicit def stringOps (s: String) = new {
+  def | (r: Rexp) = ALT(s, r)
+  def | (r: String) = ALT(s, r)
+  def % = STAR(s)
+  def ~ (r: Rexp) = SEQ(s, r)
+  def ~ (r: String) = SEQ(s, r)
+}
+
+//optional
+def OPT(r: Rexp) = ALT(r, ONE)
+
+//n-times
+def NTIMES(r: Rexp, n: Int) : Rexp = n match {
+  case 0 => ONE
+  case 1 => r
+  case n => SEQ(r, NTIMES(r, n - 1))
+}
+
+// evil regular exproession
+def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
+
+
+val EVIL2 = STAR(STAR("a")) ~ "b"
+
+// thompson construction 
+def thompson (r: Rexp) : NFAt = r match {
+  case ZERO => NFA_ZERO()
+  case ONE => NFA_ONE()
+  case CHAR(c) => NFA_CHAR(c)  
+  case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2))
+  case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2))
+  case STAR(r1) => NFA_STAR(thompson(r1))
+}
+
+// regular expression matcher using Thompson's
+def tmatcher(r: Rexp, s: String) : Boolean = 
+  thompson(r).accepts(s.toList)
+
+def tmatcher2(r: Rexp, s: String) : Boolean = 
+  thompson(r).accepts2(s.toList)
+
+// test cases for thompson construction
+tmatcher(ZERO, "")   // false
+tmatcher(ZERO, "a")  // false
+
+tmatcher(ONE, "")    // true
+tmatcher(ONE, "a")   // false
+
+tmatcher(CHAR('a'), "")    // false
+tmatcher(CHAR('a'), "a")   // true
+tmatcher(CHAR('a'), "b")   // false
+
+tmatcher("a" | "b", "")    // false
+tmatcher("a" | "b", "a")   // true
+tmatcher("a" | "b", "b")   // true
+tmatcher("a" | "b", "c")   // false
+tmatcher("a" | "b", "ab")  // false
+
+tmatcher("a" ~ "b", "")    // false
+tmatcher("a" ~ "b", "a")   // false
+tmatcher("a" ~ "b", "b")   // false
+tmatcher("a" ~ "b", "c")   // false
+tmatcher("a" ~ "b", "ab")  // true
+tmatcher("a" ~ "b", "aba") // false
+
+tmatcher(STAR("a"), "")      // true
+tmatcher(STAR("a"), "a")     // true
+tmatcher(STAR("a"), "aaaaa") // true
+tmatcher(STAR("a"), "b")     // false
+tmatcher(STAR("a"), "aaab")  // false
+
+tmatcher(STAR(STAR("a")), "")      // true
+tmatcher(STAR(STAR("a")), "a")     // true
+tmatcher(STAR(STAR("a")), "aaaaa") // true
+tmatcher(STAR(STAR("a")), "b")     // false
+tmatcher(STAR(STAR("a")), "aaab")  // false
+
+tmatcher(EVIL2, "aaaaaab")   // true
+tmatcher(EVIL2, "aaaaaa")    // false
+tmatcher(EVIL2, "a" * 100)   // false
+
+// helper function for recording time
+def time_needed[T](i: Int, code: => T) = {
+  val start = System.nanoTime()
+  for (j <- 1 to i) code
+  val end = System.nanoTime()
+  (end - start)/(i * 1.0e9)
+}
+
+
+
+// test harness for the matcher
+for (i <- 0 to 9) {
+  println(i + ": " + "%.5f".format(time_needed(1, tmatcher(EVIL(i), "a" * i))))
+}
+
+for (i <- 0 to 7) {
+  println(i + ": " + "%.5f".format(time_needed(1, tmatcher2(EVIL(i), "a" * i))))
+}
+
+for (i <- 0 to 100 by 5) {
+  println(i + ": " + "%.5f".format(time_needed(1, tmatcher(EVIL2, "a" * i))))
+}
+
+
+for (i <- 0 to 8) {
+  println(i + ": " + "%.5f".format(time_needed(1, tmatcher2(EVIL2, "a" * i))))
+}