|
1 // NFAs based on Scala's partial functions (returning |
|
2 // sets of states) |
|
3 |
|
4 |
|
5 import scala.util.Try |
|
6 |
|
7 |
|
8 // type abbreviation for partial functions |
|
9 type :=>[A, B] = PartialFunction[A, B] |
|
10 |
|
11 |
|
12 // some states for test cases |
|
13 abstract class State |
|
14 case object Q0 extends State |
|
15 case object Q1 extends State |
|
16 case object Q2 extends State |
|
17 case object Q3 extends State |
|
18 case object Q4 extends State |
|
19 case object Q5 extends State |
|
20 case object Q6 extends State |
|
21 |
|
22 |
|
23 // return empty set when not defined |
|
24 def applyOrElse[A, B](f: A :=> Set[B], x: A) : Set[B] = |
|
25 Try(f(x)) getOrElse Set[B]() |
|
26 |
|
27 |
|
28 |
|
29 // class for NFAs |
|
30 case class NFA[A, C](starts: Set[A], // starting states |
|
31 delta: (A, C) :=> Set[A], // transitions |
|
32 fins: A => Boolean) { // final states |
|
33 |
|
34 // given a state and a character, what is the set of next states? |
|
35 // if there is none => empty set |
|
36 def next(q: A, c: C) : Set[A] = |
|
37 applyOrElse(delta, (q, c)) |
|
38 |
|
39 def nexts(qs: Set[A], c: C) : Set[A] = |
|
40 qs.flatMap(next(_, c)) |
|
41 |
|
42 // given some states and a string, what is the set of next states? |
|
43 def deltas(qs: Set[A], s: List[C]) : Set[A] = s match { |
|
44 case Nil => qs |
|
45 case c::cs => deltas(nexts(qs, c), cs) |
|
46 } |
|
47 |
|
48 // is a string accepted by an NFA? |
|
49 def accepts(s: List[C]) : Boolean = |
|
50 deltas(starts, s).exists(fins) |
|
51 |
|
52 // depth-first search version of accept |
|
53 def search(q: A, s: List[C]) : Boolean = s match { |
|
54 case Nil => fins(q) |
|
55 case c::cs => next(q, c).exists(search(_, cs)) |
|
56 } |
|
57 |
|
58 def accepts2(s: List[C]) : Boolean = |
|
59 starts.exists(search(_, s)) |
|
60 |
|
61 } |
|
62 |
|
63 |
|
64 // NFA test cases |
|
65 |
|
66 val trans2 : (State, Char) :=> Set[State] = |
|
67 { case (Q0, 'a') => Set(Q0, Q1) |
|
68 case (Q0, 'b') => Set(Q2) |
|
69 case (Q1, 'a') => Set(Q1) |
|
70 case (Q2, 'b') => Set(Q2) |
|
71 } |
|
72 |
|
73 val nfa2 = NFA(Set[State](Q0), trans2, Set[State](Q2)) |
|
74 |
|
75 nfa2.accepts("aa".toList) // false |
|
76 nfa2.accepts("aaaaa".toList) // false |
|
77 nfa2.accepts("aaaaab".toList) // true |
|
78 nfa2.accepts("aaaaabbb".toList) // true |
|
79 nfa2.accepts("aaaaabbbaaa".toList) // false |
|
80 nfa2.accepts("ac".toList) // false |
|
81 |
|
82 nfa2.accepts2("aa".toList) // false |
|
83 nfa2.accepts2("aaaaa".toList) // false |
|
84 nfa2.accepts2("aaaaab".toList) // true |
|
85 nfa2.accepts2("aaaaabbb".toList) // true |
|
86 nfa2.accepts2("aaaaabbbaaa".toList) // false |
|
87 nfa2.accepts2("ac".toList) // false |
|
88 |
|
89 |
|
90 |
|
91 |
|
92 // epsilon NFAs |
|
93 // (not explicitly defined, but immediately translated into NFAs) |
|
94 |
|
95 // fixpoint construction |
|
96 import scala.annotation.tailrec |
|
97 @tailrec |
|
98 def fixpT[A](f: A => A, x: A): A = { |
|
99 val fx = f(x) |
|
100 if (fx == x) x else fixpT(f, fx) |
|
101 } |
|
102 |
|
103 // translates eNFAs directly into NFAs |
|
104 def eNFA[A, C](starts: Set[A], |
|
105 delta: (A, Option[C]) :=> Set[A], |
|
106 fins: A => Boolean) : NFA[A, C] = { |
|
107 |
|
108 // epsilon transitions |
|
109 def enext(q: A) : Set[A] = |
|
110 applyOrElse(delta, (q, None)) |
|
111 |
|
112 def enexts(qs: Set[A]) : Set[A] = |
|
113 qs | qs.flatMap(enext(_)) |
|
114 |
|
115 // epsilon closure |
|
116 def ecl(qs: Set[A]) : Set[A] = |
|
117 fixpT(enexts, qs) |
|
118 |
|
119 // "normal" transitions |
|
120 def next(q: A, c: C) : Set[A] = |
|
121 applyOrElse(delta, (q, Some(c))) |
|
122 |
|
123 def nexts(qs: Set[A], c: C) : Set[A] = |
|
124 ecl(ecl(qs).flatMap(next(_, c))) |
|
125 |
|
126 NFA(ecl(starts), |
|
127 { case (q, c) => nexts(Set(q), c) }, |
|
128 q => ecl(Set(q)) exists fins) |
|
129 } |
|
130 |
|
131 |
|
132 |
|
133 |
|
134 |
|
135 // test cases for eNFAs |
|
136 val etrans1 : (State, Option[Char]) :=> Set[State] = |
|
137 { case (Q0, Some('a')) => Set(Q1) |
|
138 case (Q1, None) => Set(Q0) |
|
139 } |
|
140 |
|
141 val enfa1 = eNFA(Set[State](Q0), etrans1, Set[State](Q1)) |
|
142 |
|
143 enfa1.accepts("a".toList) // true |
|
144 enfa1.accepts("".toList) // false |
|
145 enfa1.accepts("aaaaa".toList) // true |
|
146 enfa1.accepts("aaaaab".toList) // false |
|
147 enfa1.accepts("aaaaabbb".toList) // false |
|
148 enfa1.accepts("aaaaabbbaaa".toList) // false |
|
149 enfa1.accepts("ac".toList) // false |
|
150 |
|
151 // example from handouts |
|
152 val etrans2 : (State, Option[Char]) :=> Set[State] = |
|
153 { case (Q0, Some('a')) => Set(Q0) |
|
154 case (Q0, None) => Set(Q1, Q2) |
|
155 case (Q1, Some('a')) => Set(Q1) |
|
156 case (Q2, Some('b')) => Set(Q2) |
|
157 case (Q1, None) => Set(Q0) |
|
158 } |
|
159 |
|
160 val enfa2 = eNFA(Set[State](Q0), etrans2, Set[State](Q2)) |
|
161 |
|
162 enfa2.accepts("a".toList) // true |
|
163 enfa2.accepts("".toList) // true |
|
164 enfa2.accepts("aaaaa".toList) // true |
|
165 enfa2.accepts("aaaaab".toList) // true |
|
166 enfa2.accepts("aaaaabbb".toList) // true |
|
167 enfa2.accepts("aaaaabbbaaa".toList) // false |
|
168 enfa2.accepts("ac".toList) // false |
|
169 |
|
170 |
|
171 // states for Thompson construction |
|
172 case class TState(i: Int) extends State |
|
173 |
|
174 object TState { |
|
175 var counter = 0 |
|
176 |
|
177 def apply() : TState = { |
|
178 counter += 1; |
|
179 new TState(counter - 1) |
|
180 } |
|
181 } |
|
182 |
|
183 // some types abbreviations |
|
184 type NFAt = NFA[TState, Char] |
|
185 type NFAtrans = (TState, Char) :=> Set[TState] |
|
186 type eNFAtrans = (TState, Option[Char]) :=> Set[TState] |
|
187 |
|
188 |
|
189 // for composing an eNFA transition with a NFA transition |
|
190 implicit class RichPF(val f: eNFAtrans) extends AnyVal { |
|
191 def +++(g: NFAtrans) : eNFAtrans = |
|
192 { case (q, None) => applyOrElse(f, (q, None)) |
|
193 case (q, Some(c)) => applyOrElse(f, (q, Some(c))) | applyOrElse(g, (q, c)) } |
|
194 } |
|
195 |
|
196 |
|
197 // NFA that does not accept any string |
|
198 def NFA_ZERO(): NFAt = { |
|
199 val Q = TState() |
|
200 NFA(Set(Q), { case _ => Set() }, Set()) |
|
201 } |
|
202 |
|
203 // NFA that accepts the empty string |
|
204 def NFA_ONE() : NFAt = { |
|
205 val Q = TState() |
|
206 NFA(Set(Q), { case _ => Set() }, Set(Q)) |
|
207 } |
|
208 |
|
209 // NFA that accepts the string "c" |
|
210 def NFA_CHAR(c: Char) : NFAt = { |
|
211 val Q1 = TState() |
|
212 val Q2 = TState() |
|
213 NFA(Set(Q1), { case (Q1, d) if (c == d) => Set(Q2) }, Set(Q2)) |
|
214 } |
|
215 |
|
216 // sequence of two NFAs |
|
217 def NFA_SEQ(enfa1: NFAt, enfa2: NFAt) : NFAt = { |
|
218 val new_delta : eNFAtrans = |
|
219 { case (q, None) if enfa1.fins(q) => enfa2.starts } |
|
220 |
|
221 eNFA(enfa1.starts, new_delta +++ enfa1.delta +++ enfa2.delta, |
|
222 enfa2.fins) |
|
223 } |
|
224 |
|
225 // alternative of two NFAs |
|
226 def NFA_ALT(enfa1: NFAt, enfa2: NFAt) : NFAt = { |
|
227 val new_delta : NFAtrans = { |
|
228 case (q, c) => applyOrElse(enfa1.delta, (q, c)) | |
|
229 applyOrElse(enfa2.delta, (q, c)) } |
|
230 val new_fins = (q: TState) => enfa1.fins(q) || enfa2.fins(q) |
|
231 |
|
232 NFA(enfa1.starts | enfa2.starts, new_delta, new_fins) |
|
233 } |
|
234 |
|
235 // star of a NFA |
|
236 def NFA_STAR(enfa: NFAt) : NFAt = { |
|
237 val Q = TState() |
|
238 val new_delta : eNFAtrans = |
|
239 { case (Q, None) => enfa.starts |
|
240 case (q, None) if enfa.fins(q) => Set(Q) } |
|
241 |
|
242 eNFA(Set(Q), new_delta +++ enfa.delta, Set(Q)) |
|
243 } |
|
244 |
|
245 |
|
246 // Regular expressions fro derivative automata |
|
247 |
|
248 abstract class Rexp |
|
249 case object ZERO extends Rexp |
|
250 case object ONE extends Rexp |
|
251 case class CHAR(c: Char) extends Rexp |
|
252 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
|
253 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
|
254 case class STAR(r: Rexp) extends Rexp |
|
255 |
|
256 import scala.language.implicitConversions |
|
257 import scala.language.reflectiveCalls |
|
258 |
|
259 def charlist2rexp(s: List[Char]): Rexp = s match { |
|
260 case Nil => ONE |
|
261 case c::Nil => CHAR(c) |
|
262 case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
|
263 } |
|
264 implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList) |
|
265 |
|
266 implicit def RexpOps (r: Rexp) = new { |
|
267 def | (s: Rexp) = ALT(r, s) |
|
268 def % = STAR(r) |
|
269 def ~ (s: Rexp) = SEQ(r, s) |
|
270 } |
|
271 |
|
272 implicit def stringOps (s: String) = new { |
|
273 def | (r: Rexp) = ALT(s, r) |
|
274 def | (r: String) = ALT(s, r) |
|
275 def % = STAR(s) |
|
276 def ~ (r: Rexp) = SEQ(s, r) |
|
277 def ~ (r: String) = SEQ(s, r) |
|
278 } |
|
279 |
|
280 //optional |
|
281 def OPT(r: Rexp) = ALT(r, ONE) |
|
282 |
|
283 //n-times |
|
284 def NTIMES(r: Rexp, n: Int) : Rexp = n match { |
|
285 case 0 => ONE |
|
286 case 1 => r |
|
287 case n => SEQ(r, NTIMES(r, n - 1)) |
|
288 } |
|
289 |
|
290 // evil regular exproession |
|
291 def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) |
|
292 |
|
293 |
|
294 val EVIL2 = STAR(STAR("a")) ~ "b" |
|
295 |
|
296 // thompson construction |
|
297 def thompson (r: Rexp) : NFAt = r match { |
|
298 case ZERO => NFA_ZERO() |
|
299 case ONE => NFA_ONE() |
|
300 case CHAR(c) => NFA_CHAR(c) |
|
301 case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2)) |
|
302 case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2)) |
|
303 case STAR(r1) => NFA_STAR(thompson(r1)) |
|
304 } |
|
305 |
|
306 // regular expression matcher using Thompson's |
|
307 def tmatcher(r: Rexp, s: String) : Boolean = |
|
308 thompson(r).accepts(s.toList) |
|
309 |
|
310 def tmatcher2(r: Rexp, s: String) : Boolean = |
|
311 thompson(r).accepts2(s.toList) |
|
312 |
|
313 // test cases for thompson construction |
|
314 tmatcher(ZERO, "") // false |
|
315 tmatcher(ZERO, "a") // false |
|
316 |
|
317 tmatcher(ONE, "") // true |
|
318 tmatcher(ONE, "a") // false |
|
319 |
|
320 tmatcher(CHAR('a'), "") // false |
|
321 tmatcher(CHAR('a'), "a") // true |
|
322 tmatcher(CHAR('a'), "b") // false |
|
323 |
|
324 tmatcher("a" | "b", "") // false |
|
325 tmatcher("a" | "b", "a") // true |
|
326 tmatcher("a" | "b", "b") // true |
|
327 tmatcher("a" | "b", "c") // false |
|
328 tmatcher("a" | "b", "ab") // false |
|
329 |
|
330 tmatcher("a" ~ "b", "") // false |
|
331 tmatcher("a" ~ "b", "a") // false |
|
332 tmatcher("a" ~ "b", "b") // false |
|
333 tmatcher("a" ~ "b", "c") // false |
|
334 tmatcher("a" ~ "b", "ab") // true |
|
335 tmatcher("a" ~ "b", "aba") // false |
|
336 |
|
337 tmatcher(STAR("a"), "") // true |
|
338 tmatcher(STAR("a"), "a") // true |
|
339 tmatcher(STAR("a"), "aaaaa") // true |
|
340 tmatcher(STAR("a"), "b") // false |
|
341 tmatcher(STAR("a"), "aaab") // false |
|
342 |
|
343 tmatcher(STAR(STAR("a")), "") // true |
|
344 tmatcher(STAR(STAR("a")), "a") // true |
|
345 tmatcher(STAR(STAR("a")), "aaaaa") // true |
|
346 tmatcher(STAR(STAR("a")), "b") // false |
|
347 tmatcher(STAR(STAR("a")), "aaab") // false |
|
348 |
|
349 tmatcher(EVIL2, "aaaaaab") // true |
|
350 tmatcher(EVIL2, "aaaaaa") // false |
|
351 tmatcher(EVIL2, "a" * 100) // false |
|
352 |
|
353 // helper function for recording time |
|
354 def time_needed[T](i: Int, code: => T) = { |
|
355 val start = System.nanoTime() |
|
356 for (j <- 1 to i) code |
|
357 val end = System.nanoTime() |
|
358 (end - start)/(i * 1.0e9) |
|
359 } |
|
360 |
|
361 |
|
362 |
|
363 // test harness for the matcher |
|
364 for (i <- 0 to 9) { |
|
365 println(i + ": " + "%.5f".format(time_needed(1, tmatcher(EVIL(i), "a" * i)))) |
|
366 } |
|
367 |
|
368 for (i <- 0 to 7) { |
|
369 println(i + ": " + "%.5f".format(time_needed(1, tmatcher2(EVIL(i), "a" * i)))) |
|
370 } |
|
371 |
|
372 for (i <- 0 to 100 by 5) { |
|
373 println(i + ": " + "%.5f".format(time_needed(1, tmatcher(EVIL2, "a" * i)))) |
|
374 } |
|
375 |
|
376 |
|
377 for (i <- 0 to 8) { |
|
378 println(i + ": " + "%.5f".format(time_needed(1, tmatcher2(EVIL2, "a" * i)))) |
|
379 } |