1 // NFAs and Thompson's construction |
|
2 |
|
3 // helper function for recording time |
|
4 def time_needed[T](i: Int, code: => T) = { |
|
5 val start = System.nanoTime() |
|
6 for (j <- 1 to i) code |
|
7 val end = System.nanoTime() |
|
8 (end - start)/(i * 1.0e9) |
|
9 } |
|
10 |
|
11 |
|
12 // state nodes |
|
13 abstract class State |
|
14 type States = Set[State] |
|
15 |
|
16 case class IntState(i: Int) extends State |
|
17 |
|
18 object NewState { |
|
19 var counter = 0 |
|
20 |
|
21 def apply() : IntState = { |
|
22 counter += 1; |
|
23 new IntState(counter - 1) |
|
24 } |
|
25 } |
|
26 |
|
27 |
|
28 case class NFA(states: States, |
|
29 start: State, |
|
30 delta: (State, Char) => States, |
|
31 eps: State => States, |
|
32 fins: States) { |
|
33 |
|
34 def epsclosure(qs: States) : States = { |
|
35 val ps = qs ++ qs.flatMap(eps(_)) |
|
36 if (qs == ps) ps else epsclosure(ps) |
|
37 } |
|
38 |
|
39 def deltas(qs: States, s: List[Char]) : States = s match { |
|
40 case Nil => epsclosure(qs) |
|
41 case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs) |
|
42 } |
|
43 |
|
44 def accepts(s: String) : Boolean = |
|
45 deltas(Set(start), s.toList) exists (fins contains (_)) |
|
46 } |
|
47 |
|
48 // A small example NFA from the lectures |
|
49 val Q0 = NewState() |
|
50 val Q1 = NewState() |
|
51 val Q2 = NewState() |
|
52 |
|
53 val delta : (State, Char) => States = { |
|
54 case (Q0, 'a') => Set(Q0) |
|
55 case (Q1, 'a') => Set(Q1) |
|
56 case (Q2, 'b') => Set(Q2) |
|
57 case (_, _) => Set () |
|
58 } |
|
59 |
|
60 val eps : State => States = { |
|
61 case Q0 => Set(Q1, Q2) |
|
62 case _ => Set() |
|
63 } |
|
64 |
|
65 val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2)) |
|
66 |
|
67 NFA1.accepts("aa") |
|
68 NFA1.accepts("aaaaa") |
|
69 NFA1.accepts("aaaaabbb") |
|
70 NFA1.accepts("aaaaabbbaaa") |
|
71 NFA1.accepts("ac") |
|
72 |
|
73 |
|
74 // explicit construction of some NFAs; used in |
|
75 // Thompson's construction |
|
76 |
|
77 // NFA that does not accept any string |
|
78 def NFA_NULL() : NFA = { |
|
79 val Q = NewState() |
|
80 NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set()) |
|
81 } |
|
82 |
|
83 // NFA that accepts the empty string |
|
84 def NFA_EMPTY() : NFA = { |
|
85 val Q = NewState() |
|
86 NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q)) |
|
87 } |
|
88 |
|
89 // NFA that accepts the string "c" |
|
90 def NFA_CHAR(c: Char) : NFA = { |
|
91 val Q1 = NewState() |
|
92 val Q2 = NewState() |
|
93 NFA(Set(Q1, Q2), |
|
94 Q1, |
|
95 { case (Q1, d) if (c == d) => Set(Q2) |
|
96 case (_, _) => Set() }, |
|
97 { case _ => Set() }, |
|
98 Set(Q2)) |
|
99 } |
|
100 |
|
101 // alternative of two NFAs |
|
102 def NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = { |
|
103 val Q = NewState() |
|
104 NFA(Set(Q) ++ nfa1.states ++ nfa2.states, |
|
105 Q, |
|
106 { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) |
|
107 case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) |
|
108 case (_, _) => Set() }, |
|
109 { case Q => Set(nfa1.start, nfa2.start) |
|
110 case q if (nfa1.states contains q) => nfa1.eps(q) |
|
111 case q if (nfa2.states contains q) => nfa2.eps(q) |
|
112 case _ => Set() }, |
|
113 nfa1.fins ++ nfa2.fins) |
|
114 } |
|
115 |
|
116 // sequence of two NFAs |
|
117 def NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = { |
|
118 NFA(nfa1.states ++ nfa2.states, |
|
119 nfa1.start, |
|
120 { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) |
|
121 case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) |
|
122 case (_, _) => Set() }, |
|
123 { case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start) |
|
124 case q if (nfa1.states contains q) => nfa1.eps(q) |
|
125 case q if (nfa2.states contains q) => nfa2.eps(q) |
|
126 case _ => Set() }, |
|
127 nfa2.fins) |
|
128 } |
|
129 |
|
130 // star of an NFA |
|
131 def NFA_STAR(nfa: NFA) : NFA = { |
|
132 val Q = NewState() |
|
133 NFA(Set(Q) ++ nfa.states, |
|
134 Q, |
|
135 nfa.delta, |
|
136 { case Q => Set(nfa.start) |
|
137 case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q) |
|
138 case q if (nfa.states contains q) => nfa.eps(q) |
|
139 case _ => Set() }, |
|
140 Set(Q)) |
|
141 } |
|
142 |
|
143 |
|
144 // regular expressions used for Thompson's construction |
|
145 abstract class Rexp |
|
146 |
|
147 case object NULL extends Rexp |
|
148 case object EMPTY extends Rexp |
|
149 case class CHAR(c: Char) extends Rexp |
|
150 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
|
151 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
|
152 case class STAR(r: Rexp) extends Rexp |
|
153 |
|
154 // some convenience for typing in regular expressions |
|
155 def charlist2rexp(s : List[Char]) : Rexp = s match { |
|
156 case Nil => EMPTY |
|
157 case c::Nil => CHAR(c) |
|
158 case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
|
159 } |
|
160 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) |
|
161 |
|
162 |
|
163 |
|
164 def thompson (r: Rexp) : NFA = r match { |
|
165 case NULL => NFA_NULL() |
|
166 case EMPTY => NFA_EMPTY() |
|
167 case CHAR(c) => NFA_CHAR(c) |
|
168 case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2)) |
|
169 case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2)) |
|
170 case STAR(r1) => NFA_STAR(thompson(r1)) |
|
171 } |
|
172 |
|
173 // some examples for Thompson's |
|
174 val A = thompson(CHAR('a')) |
|
175 |
|
176 println(A.accepts("a")) // true |
|
177 println(A.accepts("c")) // false |
|
178 println(A.accepts("aa")) // false |
|
179 |
|
180 val B = thompson(ALT("ab","ac")) |
|
181 |
|
182 println(B.accepts("ab")) // true |
|
183 println(B.accepts("ac")) // true |
|
184 println(B.accepts("bb")) // false |
|
185 println(B.accepts("aa")) // false |
|
186 |
|
187 val C = thompson(STAR("ab")) |
|
188 |
|
189 println(C.accepts("")) // true |
|
190 println(C.accepts("a")) // false |
|
191 println(C.accepts("ababab")) // true |
|
192 println(C.accepts("ab")) // true |
|
193 println(C.accepts("ac")) // false |
|
194 println(C.accepts("bb")) // false |
|
195 println(C.accepts("aa")) // false |
|
196 |
|
197 // regular expression matcher using Thompson's |
|
198 def matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s) |
|
199 |
|
200 |
|
201 //optional |
|
202 def OPT(r: Rexp) = ALT(r, EMPTY) |
|
203 |
|
204 //n-times |
|
205 def NTIMES(r: Rexp, n: Int) : Rexp = n match { |
|
206 case 0 => EMPTY |
|
207 case 1 => r |
|
208 case n => SEQ(r, NTIMES(r, n - 1)) |
|
209 } |
|
210 |
|
211 // evil regular exproession |
|
212 def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) |
|
213 |
|
214 // test harness for the matcher |
|
215 for (i <- 0 to 100 by 5) { |
|
216 println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i)))) |
|
217 } |
|
218 |
|
219 |
|
220 // regular expression matching via search and backtracking |
|
221 def accepts2(nfa: NFA, s: String) : Boolean = { |
|
222 |
|
223 def search(q: State, s: List[Char]) : Boolean = s match { |
|
224 case Nil => nfa.fins contains (q) |
|
225 case c::cs => |
|
226 (nfa.delta(q, c) exists (search(_, cs))) || |
|
227 (nfa.eps(q) exists (search(_, c::cs))) |
|
228 } |
|
229 |
|
230 search(nfa.start, s.toList) |
|
231 } |
|
232 |
|
233 def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s) |
|
234 |
|
235 // test harness for the backtracking matcher |
|
236 for (i <- 0 to 20 by 1) { |
|
237 println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i)))) |
|
238 } |
|
239 |
|
240 |
|
241 |
|
242 |
|