// NFAs and Thompson's construction + −
+ −
// helper function for recording time+ −
def time_needed[T](i: Int, code: => T) = {+ −
val start = System.nanoTime()+ −
for (j <- 1 to i) code+ −
val end = System.nanoTime()+ −
(end - start)/(i * 1.0e9)+ −
}+ −
+ −
+ −
// state nodes+ −
abstract class State+ −
type States = Set[State]+ −
+ −
case class IntState(i: Int) extends State+ −
+ −
object NewState {+ −
var counter = 0+ −
+ −
def apply() : IntState = {+ −
counter += 1;+ −
new IntState(counter - 1)+ −
}+ −
}+ −
+ −
+ −
case class NFA(states: States, + −
start: State, + −
delta: (State, Char) => States, + −
eps: State => States,+ −
fins: States) {+ −
+ −
def epsclosure(qs: States) : States = {+ −
val ps = qs ++ qs.flatMap(eps(_))+ −
if (qs == ps) ps else epsclosure(ps)+ −
}+ −
+ −
def deltas(qs: States, s: List[Char]) : States = s match {+ −
case Nil => epsclosure(qs)+ −
case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs)+ −
}+ −
+ −
def accepts(s: String) : Boolean = + −
deltas(Set(start), s.toList) exists (fins contains (_))+ −
}+ −
+ −
// A small example NFA from the lectures + −
val Q0 = NewState()+ −
val Q1 = NewState()+ −
val Q2 = NewState()+ −
+ −
val delta : (State, Char) => States = {+ −
case (Q0, 'a') => Set(Q0)+ −
case (Q1, 'a') => Set(Q1)+ −
case (Q2, 'b') => Set(Q2)+ −
case (_, _) => Set ()+ −
}+ −
+ −
val eps : State => States = {+ −
case Q0 => Set(Q1, Q2)+ −
case _ => Set()+ −
}+ −
+ −
val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2))+ −
+ −
NFA1.accepts("aa")+ −
NFA1.accepts("aaaaa")+ −
NFA1.accepts("aaaaabbb")+ −
NFA1.accepts("aaaaabbbaaa")+ −
NFA1.accepts("ac")+ −
+ −
+ −
// explicit construction of some NFAs; used in+ −
// Thompson's construction+ −
+ −
// NFA that does not accept any string+ −
def NFA_NULL() : NFA = {+ −
val Q = NewState()+ −
NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set())+ −
}+ −
+ −
// NFA that accepts the empty string+ −
def NFA_EMPTY() : NFA = {+ −
val Q = NewState()+ −
NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q))+ −
}+ −
+ −
// NFA that accepts the string "c"+ −
def NFA_CHAR(c: Char) : NFA = {+ −
val Q1 = NewState()+ −
val Q2 = NewState()+ −
NFA(Set(Q1, Q2), + −
Q1, + −
{ case (Q1, d) if (c == d) => Set(Q2)+ −
case (_, _) => Set() },+ −
{ case _ => Set() },+ −
Set(Q2))+ −
}+ −
+ −
// alternative of two NFAs+ −
def NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = {+ −
val Q = NewState()+ −
NFA(Set(Q) ++ nfa1.states ++ nfa2.states,+ −
Q,+ −
{ case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)+ −
case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c)+ −
case (_, _) => Set() },+ −
{ case Q => Set(nfa1.start, nfa2.start)+ −
case q if (nfa1.states contains q) => nfa1.eps(q)+ −
case q if (nfa2.states contains q) => nfa2.eps(q)+ −
case _ => Set() },+ −
nfa1.fins ++ nfa2.fins)+ −
}+ −
+ −
// sequence of two NFAs+ −
def NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = {+ −
NFA(nfa1.states ++ nfa2.states,+ −
nfa1.start,+ −
{ case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)+ −
case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c)+ −
case (_, _) => Set() },+ −
{ case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start)+ −
case q if (nfa1.states contains q) => nfa1.eps(q)+ −
case q if (nfa2.states contains q) => nfa2.eps(q)+ −
case _ => Set() },+ −
nfa2.fins)+ −
}+ −
+ −
// star of an NFA+ −
def NFA_STAR(nfa: NFA) : NFA = {+ −
val Q = NewState()+ −
NFA(Set(Q) ++ nfa.states, + −
Q,+ −
nfa.delta,+ −
{ case Q => Set(nfa.start)+ −
case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q)+ −
case q if (nfa.states contains q) => nfa.eps(q)+ −
case _ => Set() },+ −
Set(Q))+ −
}+ −
+ −
+ −
// regular expressions used for Thompson's construction+ −
abstract class Rexp+ −
+ −
case object NULL extends Rexp+ −
case object EMPTY extends Rexp+ −
case class CHAR(c: Char) extends Rexp + −
case class ALT(r1: Rexp, r2: Rexp) extends Rexp+ −
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp + −
case class STAR(r: Rexp) extends Rexp+ −
+ −
// some convenience for typing in regular expressions+ −
def charlist2rexp(s : List[Char]) : Rexp = s match {+ −
case Nil => EMPTY+ −
case c::Nil => CHAR(c)+ −
case c::s => SEQ(CHAR(c), charlist2rexp(s))+ −
}+ −
implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)+ −
+ −
+ −
+ −
def thompson (r: Rexp) : NFA = r match {+ −
case NULL => NFA_NULL()+ −
case EMPTY => NFA_EMPTY()+ −
case CHAR(c) => NFA_CHAR(c) + −
case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2))+ −
case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2))+ −
case STAR(r1) => NFA_STAR(thompson(r1))+ −
}+ −
+ −
// some examples for Thompson's+ −
val A = thompson(CHAR('a'))+ −
+ −
println(A.accepts("a")) // true + −
println(A.accepts("c")) // false + −
println(A.accepts("aa")) // false+ −
+ −
val B = thompson(ALT("ab","ac"))+ −
+ −
println(B.accepts("ab")) // true + −
println(B.accepts("ac")) // true + −
println(B.accepts("bb")) // false + −
println(B.accepts("aa")) // false + −
+ −
val C = thompson(STAR("ab"))+ −
+ −
println(C.accepts("")) // true+ −
println(C.accepts("a")) // false + −
println(C.accepts("ababab")) // true+ −
println(C.accepts("ab")) // true+ −
println(C.accepts("ac")) // false + −
println(C.accepts("bb")) // false + −
println(C.accepts("aa")) // false + −
+ −
// regular expression matcher using Thompson's+ −
def matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s)+ −
+ −
+ −
//optional+ −
def OPT(r: Rexp) = ALT(r, EMPTY)+ −
+ −
//n-times+ −
def NTIMES(r: Rexp, n: Int) : Rexp = n match {+ −
case 0 => EMPTY+ −
case 1 => r+ −
case n => SEQ(r, NTIMES(r, n - 1))+ −
}+ −
+ −
// evil regular exproession+ −
def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))+ −
+ −
// test harness for the matcher+ −
for (i <- 0 to 100 by 5) {+ −
println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i))))+ −
}+ −
+ −
+ −
// regular expression matching via search and backtracking+ −
def accepts2(nfa: NFA, s: String) : Boolean = {+ −
+ −
def search(q: State, s: List[Char]) : Boolean = s match {+ −
case Nil => nfa.fins contains (q)+ −
case c::cs => + −
(nfa.delta(q, c) exists (search(_, cs))) || + −
(nfa.eps(q) exists (search(_, c::cs)))+ −
}+ −
+ −
search(nfa.start, s.toList)+ −
}+ −
+ −
def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s)+ −
+ −
// test harness for the backtracking matcher+ −
for (i <- 0 to 20 by 1) {+ −
println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i))))+ −
}+ −
+ −
+ −
+ −
+ −