diff -r e3fd4c5995ef -r 0cb61bed557d progs/nfa.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/nfa.scala Tue Oct 15 22:14:04 2013 +0100 @@ -0,0 +1,242 @@ +// NFAs and Thompson's construction + +// helper function for recording time +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + + +// state nodes +abstract class State +type States = Set[State] + +case class IntState(i: Int) extends State + +object NewState { + var counter = 0 + + def apply() : IntState = { + counter += 1; + new IntState(counter - 1) + } +} + + +case class NFA(states: States, + start: State, + delta: (State, Char) => States, + eps: State => States, + fins: States) { + + def epsclosure(qs: States) : States = { + val ps = qs ++ qs.flatMap(eps(_)) + if (qs == ps) ps else epsclosure(ps) + } + + def deltas(qs: States, s: List[Char]) : States = s match { + case Nil => epsclosure(qs) + case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs) + } + + def accepts(s: String) : Boolean = + deltas(Set(start), s.toList) exists (fins contains (_)) +} + +// A small example NFA from the lectures +val Q0 = NewState() +val Q1 = NewState() +val Q2 = NewState() + +val delta : (State, Char) => States = { + case (Q0, 'a') => Set(Q0) + case (Q1, 'a') => Set(Q1) + case (Q2, 'b') => Set(Q2) + case (_, _) => Set () +} + +val eps : State => States = { + case Q0 => Set(Q1, Q2) + case _ => Set() +} + +val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2)) + +NFA1.accepts("aa") +NFA1.accepts("aaaaa") +NFA1.accepts("aaaaabbb") +NFA1.accepts("aaaaabbbaaa") +NFA1.accepts("ac") + + +// explicit construction of some NFAs; used in +// Thompson's construction + +// NFA that does not accept any string +def NFA_NULL() : NFA = { + val Q = NewState() + NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set()) +} + +// NFA that accepts the empty string +def NFA_EMPTY() : NFA = { + val Q = NewState() + NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q)) +} + +// NFA that accepts the string "c" +def NFA_CHAR(c: Char) : NFA = { + val Q1 = NewState() + val Q2 = NewState() + NFA(Set(Q1, Q2), + Q1, + { case (Q1, d) if (c == d) => Set(Q2) + case (_, _) => Set() }, + { case _ => Set() }, + Set(Q2)) +} + +// alternative of two NFAs +def NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = { + val Q = NewState() + NFA(Set(Q) ++ nfa1.states ++ nfa2.states, + Q, + { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) + case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) + case (_, _) => Set() }, + { case Q => Set(nfa1.start, nfa2.start) + case q if (nfa1.states contains q) => nfa1.eps(q) + case q if (nfa2.states contains q) => nfa2.eps(q) + case _ => Set() }, + nfa1.fins ++ nfa2.fins) +} + +// sequence of two NFAs +def NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = { + NFA(nfa1.states ++ nfa2.states, + nfa1.start, + { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) + case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) + case (_, _) => Set() }, + { case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start) + case q if (nfa1.states contains q) => nfa1.eps(q) + case q if (nfa2.states contains q) => nfa2.eps(q) + case _ => Set() }, + nfa2.fins) +} + +// star of an NFA +def NFA_STAR(nfa: NFA) : NFA = { + val Q = NewState() + NFA(Set(Q) ++ nfa.states, + Q, + nfa.delta, + { case Q => Set(nfa.start) + case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q) + case q if (nfa.states contains q) => nfa.eps(q) + case _ => Set() }, + Set(Q)) +} + + +// regular expressions used for Thompson's construction +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + + +def thompson (r: Rexp) : NFA = r match { + case NULL => NFA_NULL() + case EMPTY => NFA_EMPTY() + case CHAR(c) => NFA_CHAR(c) + case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2)) + case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2)) + case STAR(r1) => NFA_STAR(thompson(r1)) +} + +// some examples for Thompson's +val A = thompson(CHAR('a')) + +println(A.accepts("a")) +println(A.accepts("c")) +println(A.accepts("aa")) + +val B = thompson(ALT("ab","ac")) + +println(B.accepts("ab")) +println(B.accepts("ac")) +println(B.accepts("bb")) +println(B.accepts("aa")) + +val C = thompson(STAR("ab")) + +println(C.accepts("")) +println(C.accepts("a")) +println(C.accepts("ababab")) +println(C.accepts("ab")) +println(C.accepts("ac")) +println(C.accepts("bb")) +println(C.accepts("aa")) + +// regular expression matcher using Thompson's +def matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s) + + +//optional +def OPT(r: Rexp) = ALT(r, EMPTY) + +//n-times +def NTIMES(r: Rexp, n: Int) : Rexp = n match { + case 0 => EMPTY + case 1 => r + case n => SEQ(r, NTIMES(r, n - 1)) +} + +// evil regular exproession +def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) + +// test harness for the matcher +for (i <- 1 to 100) { + println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i)))) +} + + +// regular expression matching via search and backtracking +def accepts2(nfa: NFA, s: String) : Boolean = { + + def search(q: State, s: List[Char]) : Boolean = s match { + case Nil => nfa.fins contains (q) + case c::cs => + (nfa.delta(q, c) exists (search(_, cs))) || + (nfa.eps(q) exists (search(_, c::cs))) + } + + search(nfa.start, s.toList) +} + +def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s) + +// test harness for the backtracking matcher +for (i <- 1 to 21) { + println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i)))) +} + + + +