--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/nfa2.scala Mon Apr 03 01:10:54 2017 +0800
@@ -0,0 +1,242 @@
+// NFAs and Thompson's construction
+
+// helper function for recording time
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+
+// state nodes
+abstract class State
+type States = Set[State]
+
+case class IntState(i: Int) extends State
+
+object NewState {
+ var counter = 0
+
+ def apply() : IntState = {
+ counter += 1;
+ new IntState(counter - 1)
+ }
+}
+
+
+case class NFA(states: States,
+ start: State,
+ delta: (State, Char) => States,
+ eps: State => States,
+ fins: States) {
+
+ def epsclosure(qs: States) : States = {
+ val ps = qs ++ qs.flatMap(eps(_))
+ if (qs == ps) ps else epsclosure(ps)
+ }
+
+ def deltas(qs: States, s: List[Char]) : States = s match {
+ case Nil => epsclosure(qs)
+ case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs)
+ }
+
+ def accepts(s: String) : Boolean =
+ deltas(Set(start), s.toList) exists (fins contains (_))
+}
+
+// A small example NFA from the lectures
+val Q0 = NewState()
+val Q1 = NewState()
+val Q2 = NewState()
+
+val delta : (State, Char) => States = {
+ case (Q0, 'a') => Set(Q0)
+ case (Q1, 'a') => Set(Q1)
+ case (Q2, 'b') => Set(Q2)
+ case (_, _) => Set ()
+}
+
+val eps : State => States = {
+ case Q0 => Set(Q1, Q2)
+ case _ => Set()
+}
+
+val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2))
+
+NFA1.accepts("aa")
+NFA1.accepts("aaaaa")
+NFA1.accepts("aaaaabbb")
+NFA1.accepts("aaaaabbbaaa")
+NFA1.accepts("ac")
+
+
+// explicit construction of some NFAs; used in
+// Thompson's construction
+
+// NFA that does not accept any string
+def NFA_NULL() : NFA = {
+ val Q = NewState()
+ NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set())
+}
+
+// NFA that accepts the empty string
+def NFA_EMPTY() : NFA = {
+ val Q = NewState()
+ NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q))
+}
+
+// NFA that accepts the string "c"
+def NFA_CHAR(c: Char) : NFA = {
+ val Q1 = NewState()
+ val Q2 = NewState()
+ NFA(Set(Q1, Q2),
+ Q1,
+ { case (Q1, d) if (c == d) => Set(Q2)
+ case (_, _) => Set() },
+ { case _ => Set() },
+ Set(Q2))
+}
+
+// alternative of two NFAs
+def NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = {
+ val Q = NewState()
+ NFA(Set(Q) ++ nfa1.states ++ nfa2.states,
+ Q,
+ { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)
+ case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c)
+ case (_, _) => Set() },
+ { case Q => Set(nfa1.start, nfa2.start)
+ case q if (nfa1.states contains q) => nfa1.eps(q)
+ case q if (nfa2.states contains q) => nfa2.eps(q)
+ case _ => Set() },
+ nfa1.fins ++ nfa2.fins)
+}
+
+// sequence of two NFAs
+def NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = {
+ NFA(nfa1.states ++ nfa2.states,
+ nfa1.start,
+ { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)
+ case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c)
+ case (_, _) => Set() },
+ { case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start)
+ case q if (nfa1.states contains q) => nfa1.eps(q)
+ case q if (nfa2.states contains q) => nfa2.eps(q)
+ case _ => Set() },
+ nfa2.fins)
+}
+
+// star of an NFA
+def NFA_STAR(nfa: NFA) : NFA = {
+ val Q = NewState()
+ NFA(Set(Q) ++ nfa.states,
+ Q,
+ nfa.delta,
+ { case Q => Set(nfa.start)
+ case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q)
+ case q if (nfa.states contains q) => nfa.eps(q)
+ case _ => Set() },
+ Set(Q))
+}
+
+
+// regular expressions used for Thompson's construction
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+
+def thompson (r: Rexp) : NFA = r match {
+ case NULL => NFA_NULL()
+ case EMPTY => NFA_EMPTY()
+ case CHAR(c) => NFA_CHAR(c)
+ case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2))
+ case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2))
+ case STAR(r1) => NFA_STAR(thompson(r1))
+}
+
+// some examples for Thompson's
+val A = thompson(CHAR('a'))
+
+println(A.accepts("a"))
+println(A.accepts("c"))
+println(A.accepts("aa"))
+
+val B = thompson(ALT("ab","ac"))
+
+println(B.accepts("ab"))
+println(B.accepts("ac"))
+println(B.accepts("bb"))
+println(B.accepts("aa"))
+
+val C = thompson(STAR("ab"))
+
+println(C.accepts(""))
+println(C.accepts("a"))
+println(C.accepts("ababab"))
+println(C.accepts("ab"))
+println(C.accepts("ac"))
+println(C.accepts("bb"))
+println(C.accepts("aa"))
+
+// regular expression matcher using Thompson's
+def matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s)
+
+
+//optional
+def OPT(r: Rexp) = ALT(r, EMPTY)
+
+//n-times
+def NTIMES(r: Rexp, n: Int) : Rexp = n match {
+ case 0 => EMPTY
+ case 1 => r
+ case n => SEQ(r, NTIMES(r, n - 1))
+}
+
+// evil regular exproession
+def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
+
+// test harness for the matcher
+for (i <- 0 to 100 by 5) {
+ println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i))))
+}
+
+
+// regular expression matching via search and backtracking
+def accepts2(nfa: NFA, s: String) : Boolean = {
+
+ def search(q: State, s: List[Char]) : Boolean = s match {
+ case Nil => nfa.fins contains (q)
+ case c::cs =>
+ (nfa.delta(q, c) exists (search(_, cs))) ||
+ (nfa.eps(q) exists (search(_, c::cs)))
+ }
+
+ search(nfa.start, s.toList)
+}
+
+def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s)
+
+// test harness for the backtracking matcher
+for (i <- 0 to 20 by 1) {
+ println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i))))
+}
+
+
+
+