|      1 // NFAs and Thompson's construction  |      1 // NFAs in Scala using partial functions (returning | 
|      2  |      2 // sets of states) | 
|      3 // helper function for recording time |      3 // | 
|      4 def time_needed[T](i: Int, code: => T) = { |      4 // needs :load dfa.scala   for states | 
|      5   val start = System.nanoTime() |         | 
|      6   for (j <- 1 to i) code |         | 
|      7   val end = System.nanoTime() |         | 
|      8   (end - start)/(i * 1.0e9) |         | 
|      9 } |         | 
|     10  |      5  | 
|     11  |      6  | 
|     12 // state nodes |      7 // type abbreviation for partial functions | 
|     13 abstract class State |      8 type :=>[A, B] = PartialFunction[A, B] | 
|     14 type States = Set[State] |         | 
|     15  |      9  | 
|     16 case class IntState(i: Int) extends State |     10 // return an empty set when not defined | 
|     17  |     11 def applyOrElse[A, B](f: A :=> Set[B], x: A) : Set[B] = | 
|     18 object NewState { |     12   Try(f(x)) getOrElse Set[B]() | 
|     19   var counter = 0 |         | 
|     20    |         | 
|     21   def apply() : IntState = { |         | 
|     22     counter += 1; |         | 
|     23     new IntState(counter - 1) |         | 
|     24   } |         | 
|     25 } |         | 
|     26  |     13  | 
|     27  |     14  | 
|     28 case class NFA(states: States,  |     15 // NFAs | 
|     29                start: State,  |     16 case class NFA[A, C](starts: Set[A],           // starting states | 
|     30                delta: (State, Char) => States,  |     17                      delta: (A, C) :=> Set[A], // transition function | 
|     31                eps: State => States, |     18                      fins:  A => Boolean) {    // final states  | 
|     32                fins: States) { |     19  | 
|     33    |     20   // given a state and a character, what is the set of  | 
|     34   def epsclosure(qs: States) : States = { |     21   // next states? if there is none => empty set | 
|     35     val ps = qs ++ qs.flatMap(eps(_)) |     22   def next(q: A, c: C) : Set[A] =  | 
|     36     if (qs == ps) ps else epsclosure(ps) |     23     applyOrElse(delta, (q, c)) | 
|         |     24  | 
|         |     25   def nexts(qs: Set[A], c: C) : Set[A] = | 
|         |     26     qs.flatMap(next(_, c)) | 
|         |     27  | 
|         |     28   // given some states and a string, what is the set  | 
|         |     29   // of next states? | 
|         |     30   def deltas(qs: Set[A], s: List[C]) : Set[A] = s match { | 
|         |     31     case Nil => qs | 
|         |     32     case c::cs => deltas(nexts(qs, c), cs) | 
|     37   } |     33   } | 
|     38  |     34  | 
|     39   def deltas(qs: States, s: List[Char]) : States = s match { |     35   // is a string accepted by an NFA? | 
|     40     case Nil => epsclosure(qs) |     36   def accepts(s: List[C]) : Boolean =  | 
|     41     case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs) |     37     deltas(starts, s).exists(fins) | 
|         |     38  | 
|         |     39   // depth-first version of accepts | 
|         |     40   def search(q: A, s: List[C]) : Boolean = s match { | 
|         |     41     case Nil => fins(q) | 
|         |     42     case c::cs => next(q, c).exists(search(_, cs)) | 
|     42   } |     43   } | 
|     43  |     44  | 
|     44   def accepts(s: String) : Boolean =  |     45   def accepts2(s: List[C]) : Boolean = | 
|     45     deltas(Set(start), s.toList) exists (fins contains (_)) |     46     starts.exists(search(_, s)) | 
|     46 } |         | 
|     47  |         | 
|     48 // A small example NFA from the lectures  |         | 
|     49 val Q0 = NewState() |         | 
|     50 val Q1 = NewState() |         | 
|     51 val Q2 = NewState() |         | 
|     52  |         | 
|     53 val delta : (State, Char) => States = { |         | 
|     54   case (Q0, 'a') => Set(Q0) |         | 
|     55   case (Q1, 'a') => Set(Q1) |         | 
|     56   case (Q2, 'b') => Set(Q2) |         | 
|     57   case (_, _) => Set () |         | 
|     58 } |         | 
|     59  |         | 
|     60 val eps : State => States = { |         | 
|     61   case Q0 => Set(Q1, Q2) |         | 
|     62   case _ => Set() |         | 
|     63 } |         | 
|     64  |         | 
|     65 val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2)) |         | 
|     66  |         | 
|     67 NFA1.accepts("aa") |         | 
|     68 NFA1.accepts("aaaaa") |         | 
|     69 NFA1.accepts("aaaaabbb") |         | 
|     70 NFA1.accepts("aaaaabbbaaa") |         | 
|     71 NFA1.accepts("ac") |         | 
|     72  |         | 
|     73  |         | 
|     74 // explicit construction of some NFAs; used in |         | 
|     75 // Thompson's construction |         | 
|     76  |         | 
|     77 // NFA that does not accept any string |         | 
|     78 def NFA_NULL() : NFA = { |         | 
|     79   val Q = NewState() |         | 
|     80   NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set()) |         | 
|     81 } |         | 
|     82  |         | 
|     83 // NFA that accepts the empty string |         | 
|     84 def NFA_EMPTY() : NFA = { |         | 
|     85   val Q = NewState() |         | 
|     86   NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q)) |         | 
|     87 } |         | 
|     88  |         | 
|     89 // NFA that accepts the string "c" |         | 
|     90 def NFA_CHAR(c: Char) : NFA = { |         | 
|     91   val Q1 = NewState() |         | 
|     92   val Q2 = NewState() |         | 
|     93   NFA(Set(Q1, Q2),  |         | 
|     94       Q1,  |         | 
|     95       { case (Q1, d) if (c == d) => Set(Q2) |         | 
|     96         case (_, _) => Set() }, |         | 
|     97       { case _ => Set() }, |         | 
|     98       Set(Q2)) |         | 
|     99 } |         | 
|    100  |         | 
|    101 // alternative of two NFAs |         | 
|    102 def NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = { |         | 
|    103   val Q = NewState() |         | 
|    104   NFA(Set(Q) ++ nfa1.states ++ nfa2.states, |         | 
|    105       Q, |         | 
|    106       { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) |         | 
|    107         case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) |         | 
|    108         case (_, _) => Set() }, |         | 
|    109       { case Q => Set(nfa1.start, nfa2.start) |         | 
|    110         case q if (nfa1.states contains q) => nfa1.eps(q) |         | 
|    111         case q if (nfa2.states contains q) => nfa2.eps(q) |         | 
|    112         case _ => Set() }, |         | 
|    113       nfa1.fins ++ nfa2.fins) |         | 
|    114 } |         | 
|    115  |         | 
|    116 // sequence of two NFAs |         | 
|    117 def NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = { |         | 
|    118   NFA(nfa1.states ++ nfa2.states, |         | 
|    119       nfa1.start, |         | 
|    120       { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) |         | 
|    121         case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) |         | 
|    122         case (_, _) => Set() }, |         | 
|    123       { case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start) |         | 
|    124         case q if (nfa1.states contains q) => nfa1.eps(q) |         | 
|    125         case q if (nfa2.states contains q) => nfa2.eps(q) |         | 
|    126         case _ => Set() }, |         | 
|    127       nfa2.fins) |         | 
|    128 } |         | 
|    129  |         | 
|    130 // star of an NFA |         | 
|    131 def NFA_STAR(nfa: NFA) : NFA = { |         | 
|    132   val Q = NewState() |         | 
|    133   NFA(Set(Q) ++ nfa.states,  |         | 
|    134       Q, |         | 
|    135       nfa.delta, |         | 
|    136       { case Q => Set(nfa.start) |         | 
|    137         case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q) |         | 
|    138         case q if (nfa.states contains q) => nfa.eps(q) |         | 
|    139         case _ => Set() }, |         | 
|    140       Set(Q)) |         | 
|    141 } |         | 
|    142  |         | 
|    143  |         | 
|    144 // regular expressions used for Thompson's construction |         | 
|    145 abstract class Rexp |         | 
|    146  |         | 
|    147 case object NULL extends Rexp |         | 
|    148 case object EMPTY extends Rexp |         | 
|    149 case class CHAR(c: Char) extends Rexp  |         | 
|    150 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |         | 
|    151 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp  |         | 
|    152 case class STAR(r: Rexp) extends Rexp |         | 
|    153  |         | 
|    154 // some convenience for typing in regular expressions |         | 
|    155 def charlist2rexp(s : List[Char]) : Rexp = s match { |         | 
|    156   case Nil => EMPTY |         | 
|    157   case c::Nil => CHAR(c) |         | 
|    158   case c::s => SEQ(CHAR(c), charlist2rexp(s)) |         | 
|    159 } |         | 
|    160 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) |         | 
|    161  |         | 
|    162  |         | 
|    163  |         | 
|    164 def thompson (r: Rexp) : NFA = r match { |         | 
|    165   case NULL => NFA_NULL() |         | 
|    166   case EMPTY => NFA_EMPTY() |         | 
|    167   case CHAR(c) => NFA_CHAR(c)   |         | 
|    168   case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2)) |         | 
|    169   case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2)) |         | 
|    170   case STAR(r1) => NFA_STAR(thompson(r1)) |         | 
|    171 } |         | 
|    172  |         | 
|    173 // some examples for Thompson's |         | 
|    174 val A = thompson(CHAR('a')) |         | 
|    175  |         | 
|    176 println(A.accepts("a")) |         | 
|    177 println(A.accepts("c")) |         | 
|    178 println(A.accepts("aa")) |         | 
|    179  |         | 
|    180 val B = thompson(ALT("ab","ac")) |         | 
|    181  |         | 
|    182 println(B.accepts("ab")) |         | 
|    183 println(B.accepts("ac")) |         | 
|    184 println(B.accepts("bb")) |         | 
|    185 println(B.accepts("aa")) |         | 
|    186  |         | 
|    187 val C = thompson(STAR("ab")) |         | 
|    188  |         | 
|    189 println(C.accepts("")) |         | 
|    190 println(C.accepts("a")) |         | 
|    191 println(C.accepts("ababab")) |         | 
|    192 println(C.accepts("ab")) |         | 
|    193 println(C.accepts("ac")) |         | 
|    194 println(C.accepts("bb")) |         | 
|    195 println(C.accepts("aa")) |         | 
|    196  |         | 
|    197 // regular expression matcher using Thompson's |         | 
|    198 def matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s) |         | 
|    199  |         | 
|    200  |         | 
|    201 //optional |         | 
|    202 def OPT(r: Rexp) = ALT(r, EMPTY) |         | 
|    203  |         | 
|    204 //n-times |         | 
|    205 def NTIMES(r: Rexp, n: Int) : Rexp = n match { |         | 
|    206   case 0 => EMPTY |         | 
|    207   case 1 => r |         | 
|    208   case n => SEQ(r, NTIMES(r, n - 1)) |         | 
|    209 } |         | 
|    210  |         | 
|    211 // evil regular exproession |         | 
|    212 def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) |         | 
|    213  |         | 
|    214 // test harness for the matcher |         | 
|    215 for (i <- 0 to 100 by 5) { |         | 
|    216   println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i)))) |         | 
|    217 } |         | 
|    218  |         | 
|    219  |         | 
|    220 // regular expression matching via search and backtracking |         | 
|    221 def accepts2(nfa: NFA, s: String) : Boolean = { |         | 
|    222  |         | 
|    223   def search(q: State, s: List[Char]) : Boolean = s match { |         | 
|    224     case Nil => nfa.fins contains (q) |         | 
|    225     case c::cs =>  |         | 
|    226       (nfa.delta(q, c) exists (search(_, cs))) ||  |         | 
|    227       (nfa.eps(q) exists (search(_, c::cs))) |         | 
|    228   } |         | 
|    229  |         | 
|    230   search(nfa.start, s.toList) |         | 
|    231 } |         | 
|    232  |         | 
|    233 def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s) |         | 
|    234  |         | 
|    235 // test harness for the backtracking matcher |         | 
|    236 for (i <- 0 to 20 by 1) { |         | 
|    237   println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i)))) |         | 
|    238 } |     47 } | 
|    239  |     48  | 
|    240  |     49  | 
|    241  |     50  | 
|         |     51 // NFA examples | 
|    242  |     52  | 
|         |     53 val nfa_trans1 : (State, Char) :=> Set[State] =  | 
|         |     54   { case (Q0, 'a') => Set(Q0, Q1)  | 
|         |     55     case (Q0, 'b') => Set(Q2)  | 
|         |     56     case (Q1, 'a') => Set(Q1)  | 
|         |     57     case (Q2, 'b') => Set(Q2) } | 
|         |     58  | 
|         |     59 val nfa1 = NFA(Set[State](Q0), nfa_trans1, Set[State](Q2)) | 
|         |     60  | 
|         |     61 nfa1.accepts("aa".toList)             // false | 
|         |     62 nfa1.accepts("aaaaa".toList)          // false | 
|         |     63 nfa1.accepts("aaaaab".toList)         // true | 
|         |     64 nfa1.accepts("aaaaabbb".toList)       // true | 
|         |     65 nfa1.accepts("aaaaabbbaaa".toList)    // false | 
|         |     66 nfa1.accepts("ac".toList)             // false | 
|         |     67  | 
|         |     68 nfa1.accepts2("aa".toList)             // false | 
|         |     69 nfa1.accepts2("aaaaa".toList)          // false | 
|         |     70 nfa1.accepts2("aaaaab".toList)         // true | 
|         |     71 nfa1.accepts2("aaaaabbb".toList)       // true | 
|         |     72 nfa1.accepts2("aaaaabbbaaa".toList)    // false | 
|         |     73 nfa1.accepts2("ac".toList)             // false | 
|         |     74  | 
|         |     75  | 
|         |     76  | 
|         |     77  | 
|         |     78 // subset constructions | 
|         |     79  | 
|         |     80 def subset[A, C](nfa: NFA[A, C]) : DFA[Set[A], C] = { | 
|         |     81   DFA(nfa.starts,  | 
|         |     82       { case (qs, c) => nfa.nexts(qs, c) },  | 
|         |     83       _.exists(nfa.fins)) | 
|         |     84 } | 
|         |     85  | 
|         |     86 subset(nfa1).accepts("aa".toList)             // false | 
|         |     87 subset(nfa1).accepts("aaaaa".toList)          // false | 
|         |     88 subset(nfa1).accepts("aaaaab".toList)         // true | 
|         |     89 subset(nfa1).accepts("aaaaabbb".toList)       // true | 
|         |     90 subset(nfa1).accepts("aaaaabbbaaa".toList)    // false | 
|         |     91 subset(nfa1).accepts("ac".toList)             // false |