// NFAs and Thompson's construction // helper function for recording timedef time_needed[T](i: Int, code: => T) = { val start = System.nanoTime() for (j <- 1 to i) code val end = System.nanoTime() (end - start)/(i * 1.0e9)}// state nodesabstract class Statetype States = Set[State]case class IntState(i: Int) extends Stateobject NewState { var counter = 0 def apply() : IntState = { counter += 1; new IntState(counter - 1) }}case class NFA(states: States, start: State, delta: (State, Char) => States, eps: State => States, fins: States) { def epsclosure(qs: States) : States = { val ps = qs ++ qs.flatMap(eps(_)) if (qs == ps) ps else epsclosure(ps) } def deltas(qs: States, s: List[Char]) : States = s match { case Nil => epsclosure(qs) case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs) } def accepts(s: String) : Boolean = deltas(Set(start), s.toList) exists (fins contains (_))}// A small example NFA from the lectures val Q0 = NewState()val Q1 = NewState()val Q2 = NewState()val delta : (State, Char) => States = { case (Q0, 'a') => Set(Q0) case (Q1, 'a') => Set(Q1) case (Q2, 'b') => Set(Q2) case (_, _) => Set ()}val eps : State => States = { case Q0 => Set(Q1, Q2) case _ => Set()}val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2))NFA1.accepts("aa")NFA1.accepts("aaaaa")NFA1.accepts("aaaaabbb")NFA1.accepts("aaaaabbbaaa")NFA1.accepts("ac")// explicit construction of some NFAs; used in// Thompson's construction// NFA that does not accept any stringdef NFA_NULL() : NFA = { val Q = NewState() NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set())}// NFA that accepts the empty stringdef NFA_EMPTY() : NFA = { val Q = NewState() NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q))}// NFA that accepts the string "c"def NFA_CHAR(c: Char) : NFA = { val Q1 = NewState() val Q2 = NewState() NFA(Set(Q1, Q2), Q1, { case (Q1, d) if (c == d) => Set(Q2) case (_, _) => Set() }, { case _ => Set() }, Set(Q2))}// alternative of two NFAsdef NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = { val Q = NewState() NFA(Set(Q) ++ nfa1.states ++ nfa2.states, Q, { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) case (_, _) => Set() }, { case Q => Set(nfa1.start, nfa2.start) case q if (nfa1.states contains q) => nfa1.eps(q) case q if (nfa2.states contains q) => nfa2.eps(q) case _ => Set() }, nfa1.fins ++ nfa2.fins)}// sequence of two NFAsdef NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = { NFA(nfa1.states ++ nfa2.states, nfa1.start, { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) case (_, _) => Set() }, { case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start) case q if (nfa1.states contains q) => nfa1.eps(q) case q if (nfa2.states contains q) => nfa2.eps(q) case _ => Set() }, nfa2.fins)}// star of an NFAdef NFA_STAR(nfa: NFA) : NFA = { val Q = NewState() NFA(Set(Q) ++ nfa.states, Q, nfa.delta, { case Q => Set(nfa.start) case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q) case q if (nfa.states contains q) => nfa.eps(q) case _ => Set() }, Set(Q))}// regular expressions used for Thompson's constructionabstract class Rexpcase object NULL extends Rexpcase object EMPTY extends Rexpcase class CHAR(c: Char) extends Rexp case class ALT(r1: Rexp, r2: Rexp) extends Rexpcase class SEQ(r1: Rexp, r2: Rexp) extends Rexp case class STAR(r: Rexp) extends Rexp// some convenience for typing in regular expressionsdef charlist2rexp(s : List[Char]) : Rexp = s match { case Nil => EMPTY case c::Nil => CHAR(c) case c::s => SEQ(CHAR(c), charlist2rexp(s))}implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)def thompson (r: Rexp) : NFA = r match { case NULL => NFA_NULL() case EMPTY => NFA_EMPTY() case CHAR(c) => NFA_CHAR(c) case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2)) case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2)) case STAR(r1) => NFA_STAR(thompson(r1))}// some examples for Thompson'sval A = thompson(CHAR('a'))println(A.accepts("a")) // true println(A.accepts("c")) // false println(A.accepts("aa")) // falseval B = thompson(ALT("ab","ac"))println(B.accepts("ab")) // true println(B.accepts("ac")) // true println(B.accepts("bb")) // false println(B.accepts("aa")) // false val C = thompson(STAR("ab"))println(C.accepts("")) // trueprintln(C.accepts("a")) // false println(C.accepts("ababab")) // trueprintln(C.accepts("ab")) // trueprintln(C.accepts("ac")) // false println(C.accepts("bb")) // false println(C.accepts("aa")) // false // regular expression matcher using Thompson'sdef matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s)//optionaldef OPT(r: Rexp) = ALT(r, EMPTY)//n-timesdef NTIMES(r: Rexp, n: Int) : Rexp = n match { case 0 => EMPTY case 1 => r case n => SEQ(r, NTIMES(r, n - 1))}// evil regular exproessiondef EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))// test harness for the matcherfor (i <- 0 to 100 by 5) { println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i))))}// regular expression matching via search and backtrackingdef accepts2(nfa: NFA, s: String) : Boolean = { def search(q: State, s: List[Char]) : Boolean = s match { case Nil => nfa.fins contains (q) case c::cs => (nfa.delta(q, c) exists (search(_, cs))) || (nfa.eps(q) exists (search(_, c::cs))) } search(nfa.start, s.toList)}def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s)// test harness for the backtracking matcherfor (i <- 0 to 20 by 1) { println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i))))}