|         |      1 // NFAs and Thompson's construction  | 
|         |      2  | 
|         |      3 // helper function for recording time | 
|         |      4 def time_needed[T](i: Int, code: => T) = { | 
|         |      5   val start = System.nanoTime() | 
|         |      6   for (j <- 1 to i) code | 
|         |      7   val end = System.nanoTime() | 
|         |      8   (end - start)/(i * 1.0e9) | 
|         |      9 } | 
|         |     10  | 
|         |     11  | 
|         |     12 // state nodes | 
|         |     13 abstract class State | 
|         |     14 type States = Set[State] | 
|         |     15  | 
|         |     16 case class IntState(i: Int) extends State | 
|         |     17  | 
|         |     18 object NewState { | 
|         |     19   var counter = 0 | 
|         |     20    | 
|         |     21   def apply() : IntState = { | 
|         |     22     counter += 1; | 
|         |     23     new IntState(counter - 1) | 
|         |     24   } | 
|         |     25 } | 
|         |     26  | 
|         |     27  | 
|         |     28 case class NFA(states: States,  | 
|         |     29                start: State,  | 
|         |     30                delta: (State, Char) => States,  | 
|         |     31                eps: State => States, | 
|         |     32                fins: States) { | 
|         |     33    | 
|         |     34   def epsclosure(qs: States) : States = { | 
|         |     35     val ps = qs ++ qs.flatMap(eps(_)) | 
|         |     36     if (qs == ps) ps else epsclosure(ps) | 
|         |     37   } | 
|         |     38  | 
|         |     39   def deltas(qs: States, s: List[Char]) : States = s match { | 
|         |     40     case Nil => epsclosure(qs) | 
|         |     41     case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs) | 
|         |     42   } | 
|         |     43  | 
|         |     44   def accepts(s: String) : Boolean =  | 
|         |     45     deltas(Set(start), s.toList) exists (fins contains (_)) | 
|         |     46 } | 
|         |     47  | 
|         |     48 // A small example NFA from the lectures  | 
|         |     49 val Q0 = NewState() | 
|         |     50 val Q1 = NewState() | 
|         |     51 val Q2 = NewState() | 
|         |     52  | 
|         |     53 val delta : (State, Char) => States = { | 
|         |     54   case (Q0, 'a') => Set(Q0) | 
|         |     55   case (Q1, 'a') => Set(Q1) | 
|         |     56   case (Q2, 'b') => Set(Q2) | 
|         |     57   case (_, _) => Set () | 
|         |     58 } | 
|         |     59  | 
|         |     60 val eps : State => States = { | 
|         |     61   case Q0 => Set(Q1, Q2) | 
|         |     62   case _ => Set() | 
|         |     63 } | 
|         |     64  | 
|         |     65 val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2)) | 
|         |     66  | 
|         |     67 NFA1.accepts("aa") | 
|         |     68 NFA1.accepts("aaaaa") | 
|         |     69 NFA1.accepts("aaaaabbb") | 
|         |     70 NFA1.accepts("aaaaabbbaaa") | 
|         |     71 NFA1.accepts("ac") | 
|         |     72  | 
|         |     73  | 
|         |     74 // explicit construction of some NFAs; used in | 
|         |     75 // Thompson's construction | 
|         |     76  | 
|         |     77 // NFA that does not accept any string | 
|         |     78 def NFA_NULL() : NFA = { | 
|         |     79   val Q = NewState() | 
|         |     80   NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set()) | 
|         |     81 } | 
|         |     82  | 
|         |     83 // NFA that accepts the empty string | 
|         |     84 def NFA_EMPTY() : NFA = { | 
|         |     85   val Q = NewState() | 
|         |     86   NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q)) | 
|         |     87 } | 
|         |     88  | 
|         |     89 // NFA that accepts the string "c" | 
|         |     90 def NFA_CHAR(c: Char) : NFA = { | 
|         |     91   val Q1 = NewState() | 
|         |     92   val Q2 = NewState() | 
|         |     93   NFA(Set(Q1, Q2),  | 
|         |     94       Q1,  | 
|         |     95       { case (Q1, d) if (c == d) => Set(Q2) | 
|         |     96         case (_, _) => Set() }, | 
|         |     97       { case _ => Set() }, | 
|         |     98       Set(Q2)) | 
|         |     99 } | 
|         |    100  | 
|         |    101 // alternative of two NFAs | 
|         |    102 def NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = { | 
|         |    103   val Q = NewState() | 
|         |    104   NFA(Set(Q) ++ nfa1.states ++ nfa2.states, | 
|         |    105       Q, | 
|         |    106       { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) | 
|         |    107         case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) | 
|         |    108         case (_, _) => Set() }, | 
|         |    109       { case Q => Set(nfa1.start, nfa2.start) | 
|         |    110         case q if (nfa1.states contains q) => nfa1.eps(q) | 
|         |    111         case q if (nfa2.states contains q) => nfa2.eps(q) | 
|         |    112         case _ => Set() }, | 
|         |    113       nfa1.fins ++ nfa2.fins) | 
|         |    114 } | 
|         |    115  | 
|         |    116 // sequence of two NFAs | 
|         |    117 def NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = { | 
|         |    118   NFA(nfa1.states ++ nfa2.states, | 
|         |    119       nfa1.start, | 
|         |    120       { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) | 
|         |    121         case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) | 
|         |    122         case (_, _) => Set() }, | 
|         |    123       { case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start) | 
|         |    124         case q if (nfa1.states contains q) => nfa1.eps(q) | 
|         |    125         case q if (nfa2.states contains q) => nfa2.eps(q) | 
|         |    126         case _ => Set() }, | 
|         |    127       nfa2.fins) | 
|         |    128 } | 
|         |    129  | 
|         |    130 // star of an NFA | 
|         |    131 def NFA_STAR(nfa: NFA) : NFA = { | 
|         |    132   val Q = NewState() | 
|         |    133   NFA(Set(Q) ++ nfa.states,  | 
|         |    134       Q, | 
|         |    135       nfa.delta, | 
|         |    136       { case Q => Set(nfa.start) | 
|         |    137         case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q) | 
|         |    138         case q if (nfa.states contains q) => nfa.eps(q) | 
|         |    139         case _ => Set() }, | 
|         |    140       Set(Q)) | 
|         |    141 } | 
|         |    142  | 
|         |    143  | 
|         |    144 // regular expressions used for Thompson's construction | 
|         |    145 abstract class Rexp | 
|         |    146  | 
|         |    147 case object NULL extends Rexp | 
|         |    148 case object EMPTY extends Rexp | 
|         |    149 case class CHAR(c: Char) extends Rexp  | 
|         |    150 case class ALT(r1: Rexp, r2: Rexp) extends Rexp | 
|         |    151 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp  | 
|         |    152 case class STAR(r: Rexp) extends Rexp | 
|         |    153  | 
|         |    154 // some convenience for typing in regular expressions | 
|         |    155 def charlist2rexp(s : List[Char]) : Rexp = s match { | 
|         |    156   case Nil => EMPTY | 
|         |    157   case c::Nil => CHAR(c) | 
|         |    158   case c::s => SEQ(CHAR(c), charlist2rexp(s)) | 
|         |    159 } | 
|         |    160 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) | 
|         |    161  | 
|         |    162  | 
|         |    163  | 
|         |    164 def thompson (r: Rexp) : NFA = r match { | 
|         |    165   case NULL => NFA_NULL() | 
|         |    166   case EMPTY => NFA_EMPTY() | 
|         |    167   case CHAR(c) => NFA_CHAR(c)   | 
|         |    168   case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2)) | 
|         |    169   case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2)) | 
|         |    170   case STAR(r1) => NFA_STAR(thompson(r1)) | 
|         |    171 } | 
|         |    172  | 
|         |    173 // some examples for Thompson's | 
|         |    174 val A = thompson(CHAR('a')) | 
|         |    175  | 
|         |    176 println(A.accepts("a"))   // true  | 
|         |    177 println(A.accepts("c"))   // false  | 
|         |    178 println(A.accepts("aa"))  // false | 
|         |    179  | 
|         |    180 val B = thompson(ALT("ab","ac")) | 
|         |    181   | 
|         |    182 println(B.accepts("ab"))   // true  | 
|         |    183 println(B.accepts("ac"))   // true  | 
|         |    184 println(B.accepts("bb"))   // false  | 
|         |    185 println(B.accepts("aa"))   // false  | 
|         |    186  | 
|         |    187 val C = thompson(STAR("ab")) | 
|         |    188  | 
|         |    189 println(C.accepts(""))       // true | 
|         |    190 println(C.accepts("a"))      // false  | 
|         |    191 println(C.accepts("ababab")) // true | 
|         |    192 println(C.accepts("ab"))     // true | 
|         |    193 println(C.accepts("ac"))     // false  | 
|         |    194 println(C.accepts("bb"))     // false  | 
|         |    195 println(C.accepts("aa"))     // false  | 
|         |    196  | 
|         |    197 // regular expression matcher using Thompson's | 
|         |    198 def matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s) | 
|         |    199  | 
|         |    200  | 
|         |    201 //optional | 
|         |    202 def OPT(r: Rexp) = ALT(r, EMPTY) | 
|         |    203  | 
|         |    204 //n-times | 
|         |    205 def NTIMES(r: Rexp, n: Int) : Rexp = n match { | 
|         |    206   case 0 => EMPTY | 
|         |    207   case 1 => r | 
|         |    208   case n => SEQ(r, NTIMES(r, n - 1)) | 
|         |    209 } | 
|         |    210  | 
|         |    211 // evil regular exproession | 
|         |    212 def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) | 
|         |    213  | 
|         |    214 // test harness for the matcher | 
|         |    215 for (i <- 0 to 100 by 5) { | 
|         |    216   println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i)))) | 
|         |    217 } | 
|         |    218  | 
|         |    219  | 
|         |    220 // regular expression matching via search and backtracking | 
|         |    221 def accepts2(nfa: NFA, s: String) : Boolean = { | 
|         |    222  | 
|         |    223   def search(q: State, s: List[Char]) : Boolean = s match { | 
|         |    224     case Nil => nfa.fins contains (q) | 
|         |    225     case c::cs =>  | 
|         |    226       (nfa.delta(q, c) exists (search(_, cs))) ||  | 
|         |    227       (nfa.eps(q) exists (search(_, c::cs))) | 
|         |    228   } | 
|         |    229  | 
|         |    230   search(nfa.start, s.toList) | 
|         |    231 } | 
|         |    232  | 
|         |    233 def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s) | 
|         |    234  | 
|         |    235 // test harness for the backtracking matcher | 
|         |    236 for (i <- 0 to 20 by 1) { | 
|         |    237   println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i)))) | 
|         |    238 } | 
|         |    239  | 
|         |    240  | 
|         |    241  | 
|         |    242  |