progs/nfa.scala
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Tue, 15 Oct 2013 22:14:04 +0100
changeset 144 0cb61bed557d
child 145 920f675b4ed1
permissions -rw-r--r--
added

// NFAs and Thompson's construction 

// helper function for recording time
def time_needed[T](i: Int, code: => T) = {
  val start = System.nanoTime()
  for (j <- 1 to i) code
  val end = System.nanoTime()
  (end - start)/(i * 1.0e9)
}


// state nodes
abstract class State
type States = Set[State]

case class IntState(i: Int) extends State

object NewState {
  var counter = 0
  
  def apply() : IntState = {
    counter += 1;
    new IntState(counter - 1)
  }
}


case class NFA(states: States, 
               start: State, 
               delta: (State, Char) => States, 
               eps: State => States,
               fins: States) {
  
  def epsclosure(qs: States) : States = {
    val ps = qs ++ qs.flatMap(eps(_))
    if (qs == ps) ps else epsclosure(ps)
  }

  def deltas(qs: States, s: List[Char]) : States = s match {
    case Nil => epsclosure(qs)
    case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs)
  }

  def accepts(s: String) : Boolean = 
    deltas(Set(start), s.toList) exists (fins contains (_))
}

// A small example NFA from the lectures 
val Q0 = NewState()
val Q1 = NewState()
val Q2 = NewState()

val delta : (State, Char) => States = {
  case (Q0, 'a') => Set(Q0)
  case (Q1, 'a') => Set(Q1)
  case (Q2, 'b') => Set(Q2)
  case (_, _) => Set ()
}

val eps : State => States = {
  case Q0 => Set(Q1, Q2)
  case _ => Set()
}

val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2))

NFA1.accepts("aa")
NFA1.accepts("aaaaa")
NFA1.accepts("aaaaabbb")
NFA1.accepts("aaaaabbbaaa")
NFA1.accepts("ac")


// explicit construction of some NFAs; used in
// Thompson's construction

// NFA that does not accept any string
def NFA_NULL() : NFA = {
  val Q = NewState()
  NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set())
}

// NFA that accepts the empty string
def NFA_EMPTY() : NFA = {
  val Q = NewState()
  NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q))
}

// NFA that accepts the string "c"
def NFA_CHAR(c: Char) : NFA = {
  val Q1 = NewState()
  val Q2 = NewState()
  NFA(Set(Q1, Q2), 
      Q1, 
      { case (Q1, d) if (c == d) => Set(Q2)
        case (_, _) => Set() },
      { case _ => Set() },
      Set(Q2))
}

// alternative of two NFAs
def NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = {
  val Q = NewState()
  NFA(Set(Q) ++ nfa1.states ++ nfa2.states,
      Q,
      { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)
        case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c)
        case (_, _) => Set() },
      { case Q => Set(nfa1.start, nfa2.start)
        case q if (nfa1.states contains q) => nfa1.eps(q)
        case q if (nfa2.states contains q) => nfa2.eps(q)
        case _ => Set() },
      nfa1.fins ++ nfa2.fins)
}

// sequence of two NFAs
def NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = {
  NFA(nfa1.states ++ nfa2.states,
      nfa1.start,
      { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)
        case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c)
        case (_, _) => Set() },
      { case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start)
        case q if (nfa1.states contains q) => nfa1.eps(q)
        case q if (nfa2.states contains q) => nfa2.eps(q)
        case _ => Set() },
      nfa2.fins)
}

// star of an NFA
def NFA_STAR(nfa: NFA) : NFA = {
  val Q = NewState()
  NFA(Set(Q) ++ nfa.states, 
      Q,
      nfa.delta,
      { case Q => Set(nfa.start)
        case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q)
        case q if (nfa.states contains q) => nfa.eps(q)
        case _ => Set() },
      Set(Q))
}


// regular expressions used for Thompson's construction
abstract class Rexp

case object NULL extends Rexp
case object EMPTY extends Rexp
case class CHAR(c: Char) extends Rexp 
case class ALT(r1: Rexp, r2: Rexp) extends Rexp
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
case class STAR(r: Rexp) extends Rexp

// some convenience for typing in regular expressions
def charlist2rexp(s : List[Char]) : Rexp = s match {
  case Nil => EMPTY
  case c::Nil => CHAR(c)
  case c::s => SEQ(CHAR(c), charlist2rexp(s))
}
implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)



def thompson (r: Rexp) : NFA = r match {
  case NULL => NFA_NULL()
  case EMPTY => NFA_EMPTY()
  case CHAR(c) => NFA_CHAR(c)  
  case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2))
  case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2))
  case STAR(r1) => NFA_STAR(thompson(r1))
}

// some examples for Thompson's
val A = thompson(CHAR('a'))

println(A.accepts("a"))
println(A.accepts("c"))
println(A.accepts("aa"))

val B = thompson(ALT("ab","ac"))

println(B.accepts("ab"))
println(B.accepts("ac"))
println(B.accepts("bb"))
println(B.accepts("aa"))

val C = thompson(STAR("ab"))

println(C.accepts(""))
println(C.accepts("a"))
println(C.accepts("ababab"))
println(C.accepts("ab"))
println(C.accepts("ac"))
println(C.accepts("bb"))
println(C.accepts("aa"))

// regular expression matcher using Thompson's
def matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s)


//optional
def OPT(r: Rexp) = ALT(r, EMPTY)

//n-times
def NTIMES(r: Rexp, n: Int) : Rexp = n match {
  case 0 => EMPTY
  case 1 => r
  case n => SEQ(r, NTIMES(r, n - 1))
}

// evil regular exproession
def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))

// test harness for the matcher
for (i <- 1 to 100) {
  println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i))))
}


// regular expression matching via search and backtracking
def accepts2(nfa: NFA, s: String) : Boolean = {

  def search(q: State, s: List[Char]) : Boolean = s match {
    case Nil => nfa.fins contains (q)
    case c::cs => 
      (nfa.delta(q, c) exists (search(_, cs))) || 
      (nfa.eps(q) exists (search(_, c::cs)))
  }

  search(nfa.start, s.toList)
}

def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s)

// test harness for the backtracking matcher
for (i <- 1 to 21) {
  println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i))))
}