progs/nfa2.scala
changeset 733 022e2cb1668d
parent 732 c7bdd7eac4cb
child 734 5d860ff01938
equal deleted inserted replaced
732:c7bdd7eac4cb 733:022e2cb1668d
     1 // NFAs and Thompson's construction 
       
     2 
       
     3 // helper function for recording time
       
     4 def time_needed[T](i: Int, code: => T) = {
       
     5   val start = System.nanoTime()
       
     6   for (j <- 1 to i) code
       
     7   val end = System.nanoTime()
       
     8   (end - start)/(i * 1.0e9)
       
     9 }
       
    10 
       
    11 
       
    12 // state nodes
       
    13 abstract class State
       
    14 type States = Set[State]
       
    15 
       
    16 case class IntState(i: Int) extends State
       
    17 
       
    18 object NewState {
       
    19   var counter = 0
       
    20   
       
    21   def apply() : IntState = {
       
    22     counter += 1;
       
    23     new IntState(counter - 1)
       
    24   }
       
    25 }
       
    26 
       
    27 
       
    28 case class NFA(states: States, 
       
    29                start: State, 
       
    30                delta: (State, Char) => States, 
       
    31                eps: State => States,
       
    32                fins: States) {
       
    33   
       
    34   def epsclosure(qs: States) : States = {
       
    35     val ps = qs ++ qs.flatMap(eps(_))
       
    36     if (qs == ps) ps else epsclosure(ps)
       
    37   }
       
    38 
       
    39   def deltas(qs: States, s: List[Char]) : States = s match {
       
    40     case Nil => epsclosure(qs)
       
    41     case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs)
       
    42   }
       
    43 
       
    44   def accepts(s: String) : Boolean = 
       
    45     deltas(Set(start), s.toList) exists (fins contains (_))
       
    46 }
       
    47 
       
    48 // A small example NFA from the lectures 
       
    49 val Q0 = NewState()
       
    50 val Q1 = NewState()
       
    51 val Q2 = NewState()
       
    52 
       
    53 val delta : (State, Char) => States = {
       
    54   case (Q0, 'a') => Set(Q0)
       
    55   case (Q1, 'a') => Set(Q1)
       
    56   case (Q2, 'b') => Set(Q2)
       
    57   case (_, _) => Set ()
       
    58 }
       
    59 
       
    60 val eps : State => States = {
       
    61   case Q0 => Set(Q1, Q2)
       
    62   case _ => Set()
       
    63 }
       
    64 
       
    65 val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2))
       
    66 
       
    67 NFA1.accepts("aa")
       
    68 NFA1.accepts("aaaaa")
       
    69 NFA1.accepts("aaaaabbb")
       
    70 NFA1.accepts("aaaaabbbaaa")
       
    71 NFA1.accepts("ac")
       
    72 
       
    73 
       
    74 // explicit construction of some NFAs; used in
       
    75 // Thompson's construction
       
    76 
       
    77 // NFA that does not accept any string
       
    78 def NFA_NULL() : NFA = {
       
    79   val Q = NewState()
       
    80   NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set())
       
    81 }
       
    82 
       
    83 // NFA that accepts the empty string
       
    84 def NFA_EMPTY() : NFA = {
       
    85   val Q = NewState()
       
    86   NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q))
       
    87 }
       
    88 
       
    89 // NFA that accepts the string "c"
       
    90 def NFA_CHAR(c: Char) : NFA = {
       
    91   val Q1 = NewState()
       
    92   val Q2 = NewState()
       
    93   NFA(Set(Q1, Q2), 
       
    94       Q1, 
       
    95       { case (Q1, d) if (c == d) => Set(Q2)
       
    96         case (_, _) => Set() },
       
    97       { case _ => Set() },
       
    98       Set(Q2))
       
    99 }
       
   100 
       
   101 // alternative of two NFAs
       
   102 def NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = {
       
   103   val Q = NewState()
       
   104   NFA(Set(Q) ++ nfa1.states ++ nfa2.states,
       
   105       Q,
       
   106       { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)
       
   107         case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c)
       
   108         case (_, _) => Set() },
       
   109       { case Q => Set(nfa1.start, nfa2.start)
       
   110         case q if (nfa1.states contains q) => nfa1.eps(q)
       
   111         case q if (nfa2.states contains q) => nfa2.eps(q)
       
   112         case _ => Set() },
       
   113       nfa1.fins ++ nfa2.fins)
       
   114 }
       
   115 
       
   116 // sequence of two NFAs
       
   117 def NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = {
       
   118   NFA(nfa1.states ++ nfa2.states,
       
   119       nfa1.start,
       
   120       { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)
       
   121         case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c)
       
   122         case (_, _) => Set() },
       
   123       { case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start)
       
   124         case q if (nfa1.states contains q) => nfa1.eps(q)
       
   125         case q if (nfa2.states contains q) => nfa2.eps(q)
       
   126         case _ => Set() },
       
   127       nfa2.fins)
       
   128 }
       
   129 
       
   130 // star of an NFA
       
   131 def NFA_STAR(nfa: NFA) : NFA = {
       
   132   val Q = NewState()
       
   133   NFA(Set(Q) ++ nfa.states, 
       
   134       Q,
       
   135       nfa.delta,
       
   136       { case Q => Set(nfa.start)
       
   137         case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q)
       
   138         case q if (nfa.states contains q) => nfa.eps(q)
       
   139         case _ => Set() },
       
   140       Set(Q))
       
   141 }
       
   142 
       
   143 
       
   144 // regular expressions used for Thompson's construction
       
   145 abstract class Rexp
       
   146 
       
   147 case object NULL extends Rexp
       
   148 case object EMPTY extends Rexp
       
   149 case class CHAR(c: Char) extends Rexp 
       
   150 case class ALT(r1: Rexp, r2: Rexp) extends Rexp
       
   151 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
       
   152 case class STAR(r: Rexp) extends Rexp
       
   153 
       
   154 // some convenience for typing in regular expressions
       
   155 def charlist2rexp(s : List[Char]) : Rexp = s match {
       
   156   case Nil => EMPTY
       
   157   case c::Nil => CHAR(c)
       
   158   case c::s => SEQ(CHAR(c), charlist2rexp(s))
       
   159 }
       
   160 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
       
   161 
       
   162 
       
   163 
       
   164 def thompson (r: Rexp) : NFA = r match {
       
   165   case NULL => NFA_NULL()
       
   166   case EMPTY => NFA_EMPTY()
       
   167   case CHAR(c) => NFA_CHAR(c)  
       
   168   case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2))
       
   169   case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2))
       
   170   case STAR(r1) => NFA_STAR(thompson(r1))
       
   171 }
       
   172 
       
   173 // some examples for Thompson's
       
   174 val A = thompson(CHAR('a'))
       
   175 
       
   176 println(A.accepts("a"))   // true 
       
   177 println(A.accepts("c"))   // false 
       
   178 println(A.accepts("aa"))  // false
       
   179 
       
   180 val B = thompson(ALT("ab","ac"))
       
   181  
       
   182 println(B.accepts("ab"))   // true 
       
   183 println(B.accepts("ac"))   // true 
       
   184 println(B.accepts("bb"))   // false 
       
   185 println(B.accepts("aa"))   // false 
       
   186 
       
   187 val C = thompson(STAR("ab"))
       
   188 
       
   189 println(C.accepts(""))       // true
       
   190 println(C.accepts("a"))      // false 
       
   191 println(C.accepts("ababab")) // true
       
   192 println(C.accepts("ab"))     // true
       
   193 println(C.accepts("ac"))     // false 
       
   194 println(C.accepts("bb"))     // false 
       
   195 println(C.accepts("aa"))     // false 
       
   196 
       
   197 // regular expression matcher using Thompson's
       
   198 def matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s)
       
   199 
       
   200 
       
   201 //optional
       
   202 def OPT(r: Rexp) = ALT(r, EMPTY)
       
   203 
       
   204 //n-times
       
   205 def NTIMES(r: Rexp, n: Int) : Rexp = n match {
       
   206   case 0 => EMPTY
       
   207   case 1 => r
       
   208   case n => SEQ(r, NTIMES(r, n - 1))
       
   209 }
       
   210 
       
   211 // evil regular exproession
       
   212 def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
       
   213 
       
   214 // test harness for the matcher
       
   215 for (i <- 0 to 100 by 5) {
       
   216   println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i))))
       
   217 }
       
   218 
       
   219 
       
   220 // regular expression matching via search and backtracking
       
   221 def accepts2(nfa: NFA, s: String) : Boolean = {
       
   222 
       
   223   def search(q: State, s: List[Char]) : Boolean = s match {
       
   224     case Nil => nfa.fins contains (q)
       
   225     case c::cs => 
       
   226       (nfa.delta(q, c) exists (search(_, cs))) || 
       
   227       (nfa.eps(q) exists (search(_, c::cs)))
       
   228   }
       
   229 
       
   230   search(nfa.start, s.toList)
       
   231 }
       
   232 
       
   233 def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s)
       
   234 
       
   235 // test harness for the backtracking matcher
       
   236 for (i <- 0 to 20 by 1) {
       
   237   println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i))))
       
   238 }
       
   239 
       
   240 
       
   241 
       
   242