189 val fx = f(x) |
211 val fx = f(x) |
190 if (fx == x) x else fixpT(f, fx) |
212 if (fx == x) x else fixpT(f, fx) |
191 } |
213 } |
192 |
214 |
193 |
215 |
194 case class eNFA[A, C](starts: Set[A], // starting state |
216 case class eNFA[A, C](start: A, // starting state |
195 delta: Set[(A, Option[C]) :=> A], // transition edges |
217 delta: Set[(A, Option[C]) :=> A], // transition edges |
196 fins: A => Boolean) { // final states |
218 fins: A => Boolean) { // final states |
197 |
219 |
198 // epsilon transitions |
220 // epsilon transitions |
199 def enext(q: A) : Set[A] = |
221 def enext(q: A) : Set[A] = |
200 delta.flatMap((d) => Try(d(q, None)).toOption) |
222 delta.flatMap((d) => Try(d(q, None)).toOption) |
201 |
223 |
202 def enexts(qs: Set[A]) : Set[A] = |
224 def enexts(qs: Set[A]) : Set[A] = |
203 qs ++ qs.flatMap(enext(_)) |
225 qs | qs.flatMap(enext(_)) |
204 |
226 |
205 // epsilon closure |
227 // epsilon closure |
206 def ecl(qs: Set[A]) : Set[A] = |
228 def ecl(qs: Set[A]) : Set[A] = |
207 fixpT(enexts, qs) |
229 fixpT(enexts, qs) |
208 |
230 |
209 // "normal" transition |
231 // "normal" transition |
210 def next(q: A, c: C) : Set[A] = |
232 def next(q: A, c: C) : Set[A] = |
211 delta.flatMap((d) => Try(d(q, Some(c))).toOption) |
233 delta.flatMap((d) => Try(d(q, Some(c))).toOption) |
212 |
234 |
213 def nexts(qs: Set[A], c: C) : Set[A] = |
235 def nexts(qs: Set[A], c: C) : Set[A] = |
214 qs.flatMap(next(_, c)) |
236 ecl(ecl(qs).flatMap(next(_, c))) |
215 |
237 |
216 def deltas(qs: Set[A], s: List[C]) : Set[A] = s match { |
238 def deltas(qs: Set[A], s: List[C]) : Set[A] = s match { |
217 case Nil => ecl(qs) |
239 case Nil => ecl(qs) |
218 case c::cs => deltas(ecl(nexts(ecl(qs), c)), cs) |
240 case c::cs => deltas(nexts(qs, c), cs) |
219 } |
241 } |
220 |
242 |
221 def accepts(s: List[C]) : Boolean = |
243 def accepts(s: List[C]) : Boolean = |
222 deltas(starts, s.toList).exists(fins) |
244 deltas(Set(start), s.toList).exists(fins) |
223 } |
245 } |
224 |
246 |
225 |
247 // test cases for eNFAs |
226 val etrans1 = Set[(State, Option[Char]) :=> State]( |
248 val etrans1 = Set[(State, Option[Char]) :=> State]( |
227 { case (Q0, Some('a')) => Q1 }, |
249 { case (Q0, Some('a')) => Q1 }, |
228 { case (Q1, None) => Q0 } |
250 { case (Q1, None) => Q0 } |
229 ) |
251 ) |
230 |
252 |
231 val enfa = eNFA(Set[State](Q0), etrans1, Set[State](Q1)) |
253 val enfa1 = eNFA(Q0, etrans1, Set[State](Q1)) |
232 |
254 |
233 enfa.accepts("a".toList) // true |
255 enfa1.accepts("a".toList) // true |
234 enfa.accepts("".toList) // false |
256 enfa1.accepts("".toList) // false |
235 enfa.accepts("aaaaa".toList) // true |
257 enfa1.accepts("aaaaa".toList) // true |
236 enfa.accepts("aaaaab".toList) // flase |
258 enfa1.accepts("aaaaab".toList) // false |
237 enfa.accepts("aaaaabbb".toList) // false |
259 enfa1.accepts("aaaaabbb".toList) // false |
238 enfa.accepts("aaaaabbbaaa".toList) // false |
260 enfa1.accepts("aaaaabbbaaa".toList) // false |
239 enfa.accepts("ac".toList) // false |
261 enfa1.accepts("ac".toList) // false |
240 |
262 |
241 |
263 // example from handouts |
|
264 val etrans2 = Set[(State, Option[Char]) :=> State]( |
|
265 { case (Q0, Some('a')) => Q0 }, |
|
266 { case (Q0, None) => Q1 }, |
|
267 { case (Q0, None) => Q2 }, |
|
268 { case (Q1, Some('a')) => Q1 }, |
|
269 { case (Q2, Some('b')) => Q2 }, |
|
270 { case (Q1, None) => Q0 } |
|
271 ) |
|
272 |
|
273 val enfa2 = eNFA(Q0, etrans2, Set[State](Q2)) |
|
274 |
|
275 enfa2.accepts("a".toList) // true |
|
276 enfa2.accepts("".toList) // true |
|
277 enfa2.accepts("aaaaa".toList) // true |
|
278 enfa2.accepts("aaaaab".toList) // true |
|
279 enfa2.accepts("aaaaabbb".toList) // true |
|
280 enfa2.accepts("aaaaabbbaaa".toList) // false |
|
281 enfa2.accepts("ac".toList) // false |
|
282 |
|
283 |
|
284 // states for Thompson construction |
|
285 case class TState(i: Int) extends State |
|
286 |
|
287 object TState { |
|
288 var counter = 0 |
|
289 |
|
290 def apply() : TState = { |
|
291 counter += 1; |
|
292 new TState(counter - 1) |
|
293 } |
|
294 } |
|
295 |
|
296 // eNFA that does not accept any string |
|
297 def eNFA_ZERO(): eNFA[TState, Char] = { |
|
298 val Q = TState() |
|
299 eNFA(Q, Set(), Set()) |
|
300 } |
|
301 |
|
302 // eNFA that accepts the empty string |
|
303 def eNFA_ONE() : eNFA[TState, Char] = { |
|
304 val Q = TState() |
|
305 eNFA(Q, Set(), Set(Q)) |
|
306 } |
|
307 |
|
308 // eNFA that accepts the string "c" |
|
309 def eNFA_CHAR(c: Char) : eNFA[TState, Char] = { |
|
310 val Q1 = TState() |
|
311 val Q2 = TState() |
|
312 eNFA(Q1, |
|
313 Set({ case (Q1, Some(d)) if (c == d) => Q2 }), |
|
314 Set(Q2)) |
|
315 } |
|
316 |
|
317 // alternative of two eNFAs |
|
318 def eNFA_ALT(enfa1: eNFA[TState, Char], enfa2: eNFA[TState, Char]) : eNFA[TState, Char] = { |
|
319 val Q = TState() |
|
320 eNFA(Q, |
|
321 enfa1.delta | enfa2.delta | |
|
322 Set({ case (Q, None) => enfa1.start }, |
|
323 { case (Q, None) => enfa2.start }), |
|
324 q => enfa1.fins(q) || enfa2.fins(q)) |
|
325 } |
|
326 |
|
327 // sequence of two eNFAs |
|
328 def eNFA_SEQ(enfa1: eNFA[TState, Char], enfa2: eNFA[TState, Char]) : eNFA[TState, Char] = { |
|
329 eNFA(enfa1.start, |
|
330 enfa1.delta | enfa2.delta | |
|
331 Set({ case (q, None) if enfa1.fins(q) => enfa2.start }), |
|
332 enfa2.fins) |
|
333 } |
|
334 |
|
335 // star of an eNFA |
|
336 def eNFA_STAR(enfa: eNFA[TState, Char]) : eNFA[TState, Char] = { |
|
337 val Q = TState() |
|
338 eNFA(Q, |
|
339 enfa.delta | |
|
340 Set({ case (q, None) if enfa.fins(q) => Q }, |
|
341 { case (Q, None) => enfa.start }), |
|
342 Set(Q)) |
|
343 } |
|
344 |
|
345 /* |
|
346 def tofunset[A, C](d: (A, Option[C]) :=> Set[A])(q: A, c: C) : Set[(A, C) :=> A] = { |
|
347 d((q, Some(c))).map ((qs: A) => { case (qi, ci) if (qi == q && ci == c) => qs } : (A, C) :=> A) |
|
348 } |
|
349 |
|
350 |
|
351 def eNFA2NFA[A, C](starts: Set[A], // starting state |
|
352 delta: Set[(A, Option[C]) :=> A], // transition edges |
|
353 fins: A => Boolean) { // final states |
|
354 |
|
355 // epsilon transitions |
|
356 def enext(q: A) : Set[A] = |
|
357 delta.flatMap(d => Try(d(q, None)).toOption) |
|
358 |
|
359 def enexts(qs: Set[A]) : Set[A] = |
|
360 qs | qs.flatMap(enext(_)) |
|
361 |
|
362 // epsilon closure |
|
363 def ecl(qs: Set[A]) : Set[A] = |
|
364 fixpT(enexts, qs) |
|
365 |
|
366 |
|
367 // "normal" transition |
|
368 def next(q: A, c: C) : Set[A] = |
|
369 delta.flatMap(d => Try(d(q, Some(c))).toOption) |
|
370 |
|
371 def nexts(qs: Set[A], c: C) : Set[A] = |
|
372 ecl(ecl(qs).flatMap(next(_, c))) |
|
373 |
|
374 |
|
375 def nfa_delta : Set[(A, C) :=> A] = delta.flatMap(d => tofunset(d)) |
|
376 |
|
377 def nfa_starts = ecl(starts) |
|
378 |
|
379 def nfa_fins = (q: A) => ecl(Set[A](q)) exists fins |
|
380 |
|
381 NFA(nfa_starts, nfa_delta, nfa_fins) |
|
382 } |
|
383 |
|
384 */ |
242 |
385 |
243 // Regular expressions fro derivative automata |
386 // Regular expressions fro derivative automata |
244 |
387 |
245 abstract class Rexp |
388 abstract class Rexp |
246 case object ZERO extends Rexp |
389 case object ZERO extends Rexp |
294 def % = STAR(s) |
437 def % = STAR(s) |
295 def ~ (r: Rexp) = SEQ(s, r) |
438 def ~ (r: Rexp) = SEQ(s, r) |
296 def ~ (r: String) = SEQ(s, r) |
439 def ~ (r: String) = SEQ(s, r) |
297 } |
440 } |
298 |
441 |
|
442 // thompson construction only for basic regular expressions |
|
443 def thompson (r: Rexp) : eNFA[TState, Char] = r match { |
|
444 case ZERO => eNFA_ZERO() |
|
445 case ONE => eNFA_ONE() |
|
446 case CHAR(c) => eNFA_CHAR(c) |
|
447 case ALT(r1, r2) => eNFA_ALT(thompson(r1), thompson(r2)) |
|
448 case SEQ(r1, r2) => eNFA_SEQ(thompson(r1), thompson(r2)) |
|
449 case STAR(r1) => eNFA_STAR(thompson(r1)) |
|
450 } |
|
451 |
|
452 // regular expression matcher using Thompson's |
|
453 def tmatcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s.toList) |
|
454 |
|
455 |
|
456 // test cases for thompson construction |
|
457 tmatcher(ZERO, "") // false |
|
458 tmatcher(ZERO, "a") // false |
|
459 |
|
460 tmatcher(ONE, "") // true |
|
461 tmatcher(ONE, "a") // false |
|
462 |
|
463 tmatcher(CHAR('a'), "") // false |
|
464 tmatcher(CHAR('a'), "a") // true |
|
465 tmatcher(CHAR('a'), "b") // false |
|
466 |
|
467 tmatcher("a" | "b", "") // false |
|
468 tmatcher("a" | "b", "a") // true |
|
469 tmatcher("a" | "b", "b") // true |
|
470 tmatcher("a" | "b", "c") // false |
|
471 tmatcher("a" | "b", "ab") // false |
|
472 |
|
473 tmatcher("a" ~ "b", "") // false |
|
474 tmatcher("a" ~ "b", "a") // false |
|
475 tmatcher("a" ~ "b", "b") // false |
|
476 tmatcher("a" ~ "b", "c") // false |
|
477 tmatcher("a" ~ "b", "ab") // true |
|
478 tmatcher("a" ~ "b", "aba") // false |
|
479 |
|
480 tmatcher(EVIL2, "aaaaaab") // true |
|
481 tmatcher(EVIL2, "aaaaaa") // false |
|
482 tmatcher(EVIL2, "a" * 100) // false |
|
483 |
|
484 |
|
485 //optional |
|
486 def OPT(r: Rexp) = ALT(r, ONE) |
|
487 |
|
488 //n-times |
|
489 def NTIMES(r: Rexp, n: Int) : Rexp = n match { |
|
490 case 0 => ONE |
|
491 case 1 => r |
|
492 case n => SEQ(r, NTIMES(r, n - 1)) |
|
493 } |
|
494 |
|
495 // evil regular exproession |
|
496 def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) |
|
497 |
|
498 |
|
499 val EVIL2 = STAR(STAR("a")) ~ "b" |
|
500 |
|
501 // helper function for recording time |
|
502 def time_needed[T](i: Int, code: => T) = { |
|
503 val start = System.nanoTime() |
|
504 for (j <- 1 to i) code |
|
505 val end = System.nanoTime() |
|
506 (end - start)/(i * 1.0e9) |
|
507 } |
|
508 |
|
509 |
|
510 |
|
511 // test harness for the matcher |
|
512 for (i <- 0 to 35 by 5) { |
|
513 println(i + ": " + "%.5f".format(time_needed(1, tmatcher(EVIL(i), "a" * i)))) |
|
514 } |
|
515 |
|
516 |
|
517 |
|
518 // normalisation of regular expressions |
|
519 // for derivative automata |
|
520 |
|
521 case class ALTs(rs: Set[Rexp]) extends Rexp |
|
522 case class ANDs(rs: Set[Rexp]) extends Rexp |
|
523 case class SEQs(rs: List[Rexp]) extends Rexp |
|
524 |
|
525 def normalise(r: Rexp) : Rexp = r match { |
|
526 case ALT(r1, r2) => (normalise(r1), normalise(r2)) match { |
|
527 case (ALTs(rs1), ALTs(rs2)) => ALTs(rs1 | rs2) |
|
528 case (r1s, ALTs(rs2)) => ALTs(rs2 + r1s) |
|
529 case (ALTs(rs1), r2s) => ALTs(rs1 + r2s) |
|
530 case (r1s, r2s) => ALTs(Set(r1s, r2s)) |
|
531 } |
|
532 case AND(r1, r2) => (normalise(r1), normalise(r2)) match { |
|
533 case (ANDs(rs1), ANDs(rs2)) => ANDs(rs1 | rs2) |
|
534 case (r1s, ANDs(rs2)) => ANDs(rs2 + r1s) |
|
535 case (ANDs(rs1), r2s) => ANDs(rs1 + r2s) |
|
536 case (r1s, r2s) => ANDs(Set(r1s, r2s)) |
|
537 } |
|
538 case SEQ(r1, r2) => (normalise(r1), normalise(r2)) match { |
|
539 case (SEQs(rs1), SEQs(rs2)) => SEQs(rs1 ++ rs2) |
|
540 case (r1s, SEQs(rs2)) => SEQs(r1s :: rs2) |
|
541 case (SEQs(rs1), r2s) => SEQs(rs1 ++ List(r2s)) |
|
542 case (r1s, r2s) => SEQs(List(r1s, r2s)) |
|
543 } |
|
544 case STAR(r) => STAR(normalise(r)) |
|
545 case NTIMES(r, n) => NTIMES(normalise(r), n) |
|
546 case UPNTIMES(r, n) => UPNTIMES(normalise(r), n) |
|
547 case NOT(r) => NOT(normalise(r)) |
|
548 case r => r |
|
549 } |
|
550 |
|
551 |
|
552 // simplification of regular expressions |
|
553 |
299 def simp(r: Rexp) : Rexp = r match { |
554 def simp(r: Rexp) : Rexp = r match { |
300 case ALT(r1, r2) => (simp(r1), simp(r2)) match { |
555 case ALT(r1, r2) => (simp(r1), simp(r2)) match { |
301 case (ZERO, r2s) => r2s |
556 case (ZERO, r2s) => r2s |
302 case (r1s, ZERO) => r1s |
557 case (r1s, ZERO) => r1s |
303 case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s) |
558 case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s) |
362 case NOT(r) => NOT(der(c, r)) |
616 case NOT(r) => NOT(der(c, r)) |
363 case AND(r1, r2) => AND(der(c, r1), der(c, r2)) |
617 case AND(r1, r2) => AND(der(c, r1), der(c, r2)) |
364 } |
618 } |
365 |
619 |
366 |
620 |
|
621 // derivative based matcher |
|
622 def matcher(r: Rexp, s: List[Char]): Boolean = s match { |
|
623 case Nil => nullable(r) |
|
624 case c::cs => matcher(der(c, r), cs) |
|
625 } |
|
626 |
|
627 |
|
628 |
367 // partial derivative of a regular expression w.r.t. a character |
629 // partial derivative of a regular expression w.r.t. a character |
368 // does not work for NOT and AND ... see below |
630 // does not work for NOT and AND ... see below |
369 def pder(c: Char, r: Rexp) : Set[Rexp] = r match { |
631 def pder(c: Char, r: Rexp) : Set[Rexp] = r match { |
370 case ZERO => Set() |
632 case ZERO => Set() |
371 case ONE => Set() |
633 case ONE => Set() |
372 case CHAR(d) => if (c == d) Set(ONE) else Set() |
634 case CHAR(d) => if (c == d) Set(ONE) else Set() |
373 case ALL => Set(ALL) |
635 case ALL => Set(ALL) |
374 case ALLPlus => Set(ALL) |
636 case ALLPlus => Set(ALL) |
375 case ALT(r1, r2) => pder(c, r1) ++ pder(c, r2) |
637 case ALT(r1, r2) => pder(c, r1) | pder(c, r2) |
376 case SEQ(r1, r2) => |
638 case SEQ(r1, r2) => |
377 (for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) ++ |
639 (for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) | |
378 (if (nullable(r1)) pder(c, r2) else Set()) |
640 (if (nullable(r1)) pder(c, r2) else Set()) |
379 case STAR(r1) => |
641 case STAR(r1) => |
380 for (pr1 <- pder(c, r1)) yield SEQ(pr1, STAR(r1)) |
642 for (pr1 <- pder(c, r1)) yield SEQ(pr1, STAR(r1)) |
381 case NTIMES(r1, i) => |
643 case NTIMES(r1, i) => |
382 if (i == 0) Set() else |
644 if (i == 0) Set() else |
388 if (i == 0) Set() |
650 if (i == 0) Set() |
389 else |
651 else |
390 for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1)) |
652 for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1)) |
391 } |
653 } |
392 |
654 |
393 def ppder(c: Char, rs: Set[Rexp]) : Set[Rexp] = |
655 // partial derivative matcher (not for NOT and AND) |
394 rs.flatMap(pder(c, _)) |
|
395 |
|
396 |
|
397 // matchers |
|
398 def matcher(r: Rexp, s: List[Char]): Boolean = s match { |
|
399 case Nil => nullable(r) |
|
400 case c::cs => matcher(der(c, r), cs) |
|
401 } |
|
402 |
|
403 def pmatcher(rs: Set[Rexp], s: List[Char]): Boolean = s match { |
656 def pmatcher(rs: Set[Rexp], s: List[Char]): Boolean = s match { |
404 case Nil => rs.exists(nullable(_)) |
657 case Nil => rs.exists(nullable(_)) |
405 case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs) |
658 case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs) |
406 } |
659 } |
|
660 |
|
661 |
|
662 // quick-and-dirty translation of a pder-automaton |
|
663 // does not work for NOT and AND |
|
664 |
|
665 val r = STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc" |
|
666 val pder_nfa = NFA[Set[Rexp], Char](Set(Set(r)), |
|
667 Set( { case (rs, c) => rs.flatMap(pder(c, _))} ), |
|
668 _.exists(nullable)) |
|
669 |
|
670 |
|
671 |
|
672 pder_nfa.accepts("axaybzbc".toList) // true |
|
673 pder_nfa.accepts("aaaaxaybzbc".toList) // true |
|
674 pder_nfa.accepts("axaybzbd".toList) // false |
|
675 |
|
676 |
407 |
677 |
408 |
678 |
409 // partial derivatives including NOT and AND according to |
679 // partial derivatives including NOT and AND according to |
410 // PhD-thesis: Manipulation of Extended Regular Expressions |
680 // PhD-thesis: Manipulation of Extended Regular Expressions |
411 // with Derivatives; these partial derivatives are also not |
681 // with Derivatives; these partial derivatives are also not |
528 |
784 |
529 d2.accepts("axaybzbc".toList) // true |
785 d2.accepts("axaybzbc".toList) // true |
530 d2.accepts("aaaaxaybzbc".toList) // true |
786 d2.accepts("aaaaxaybzbc".toList) // true |
531 d2.accepts("axaybzbd".toList) // false |
787 d2.accepts("axaybzbd".toList) // false |
532 |
788 |
533 for (n <- (1 to 10).toList) |
789 |
534 mkDFA(STAR(ALL) ~ "a" ~ NTIMES(ALL, n) ~ "bc") |
790 //for (n <- (1 to 10).toList) |
535 |
791 // mkDFA(STAR(ALL) ~ "a" ~ NTIMES(ALL, n) ~ "bc") |
536 |
792 |
537 // this is an example where mkDFA does not terminate |
793 |
|
794 // this is an example where mkDFA without normalisation |
|
795 // does not terminate |
538 val big_aux = STAR("a" | "b") |
796 val big_aux = STAR("a" | "b") |
539 val big = SEQ(big_aux, SEQ("a",SEQ("b", big_aux))) |
797 val big = SEQ(big_aux, SEQ("a", SEQ("b", big_aux))) |
540 |
798 |
541 //mkDFA(big) // does not terminate |
799 mkDFA(big) // does not terminate without normalisation |
542 |
800 |
543 |
801 |
544 |
802 |
545 // Antimirov Partial derivative automata construction ... definitely |
803 // Antimirov Partial derivative automata construction ... definitely |
546 // terminates but does not work for extensions of NOT and AND |
804 // terminates but does not work for extensions of NOT and AND |
547 // |
805 // |
548 // for this we have to use the extended partial derivatives |
806 // for this we have to use the extended partial derivatives |
549 // pder2/nder2 |
807 // pder2/nder2...but termination is also not guaranteed |
550 |
808 |
551 |
809 |
552 // to transform (concrete) Maps into functions |
810 // to transform (concrete) Maps into functions |
553 def toFun(m: MTrans) : Trans = |
811 def toFun(m: MTrans) : Trans = |
554 { case (q, c) => m(q, c) } |
812 { case (q, c) => m(q, c) } |
555 |
813 |
556 def pgoto(sigma: Set[Char], delta: STrans, qs: DStates, q: DState, c: Char) : (DStates, STrans) = { |
814 def pgoto(sigma: Set[Char], delta: STrans, qs: DStates, q: DState, c: Char) : (DStates, STrans) = { |
557 val qders = pder2(c, q).map(simp(_)) // set of simplified partial derivatives |
815 val qders = pder(c, q).map(simp(_)) // set of simplified partial derivatives |
558 qders.foldLeft((qs, delta)) { case ((qs, delta), qnew) => padd(sigma, delta, qs, q, qnew, c) } |
816 qders.foldLeft((qs, delta)) { case ((qs, delta), qnew) => padd(sigma, delta, qs, q, qnew, c) } |
559 } |
817 } |
560 |
818 |
561 def padd(sigma: Set[Char], delta: STrans, qs: DStates, |
819 def padd(sigma: Set[Char], delta: STrans, qs: DStates, |
562 q: DState, qnew: DState, c: Char) : (DStates, STrans) = { |
820 q: DState, qnew: DState, c: Char) : (DStates, STrans) = { |
596 val t2 = pder('b', r_test).map(simp(_)) |
855 val t2 = pder('b', r_test).map(simp(_)) |
597 |
856 |
598 mkNFA(r_test) // size = 3 |
857 mkNFA(r_test) // size = 3 |
599 |
858 |
600 |
859 |
601 // simple example involving double star |
860 // helper function for recording time |
|
861 def time_needed[T](i: Int, code: => T) = { |
|
862 val start = System.nanoTime() |
|
863 for (j <- 1 to i) code |
|
864 val end = System.nanoTime() |
|
865 (end - start)/(i * 1.0e9) |
|
866 } |
|
867 |
|
868 // simple example involving double star (a*)* b |
602 // with depth-first search causes catastrophic backtracing |
869 // with depth-first search causes catastrophic backtracing |
603 |
870 |
604 val n2 = mkNFA(STAR(STAR("a")) ~ "b") // size 3 |
871 val n2 = mkNFA(EVIL2) // size 3 |
605 |
872 |
606 n2.accepts("aaaaaab".toList) // true |
873 n2.accepts("aaaaaab".toList) // true |
607 n2.accepts("aaaaaa".toList) // false |
874 n2.accepts("aaaaaa".toList) // false |
608 n2.accepts(("a" * 100).toList) // false |
875 n2.accepts(("a" * 100).toList) // false |
609 |
876 |
|
877 n2.accepts2("aaaaaab".toList) // true |
|
878 n2.accepts2("aaaaaa".toList) // false |
|
879 n2.accepts2(("a" * 100).toList) // false |
|
880 |
|
881 time_needed(2, n2.accepts("aaaaaab".toList)) |
|
882 time_needed(2, n2.accepts("aaaaaa".toList)) |
|
883 time_needed(2, n2.accepts(("a" * 2000).toList)) |
|
884 |
|
885 time_needed(2, n2.accepts2("aaaaaab".toList)) |
|
886 time_needed(2, n2.accepts2("aaaaaa".toList)) |
|
887 time_needed(2, n2.accepts2(("a" * 2000).toList)) |
|
888 |
|
889 |
|
890 // other evil regular expression |
|
891 |
|
892 for (i <- 0 to 100 by 5) { |
|
893 println(i + ": " + "%.5f".format(time_needed(1, mkNFA(EVIL(i)).accepts(("a" * i).toList)))) |
|
894 } |
|
895 |
610 |
896 |
611 // example involving not |
897 // example involving not |
612 |
898 |
613 val rnot = "/*" ~ (NOT ((ALL.%) ~ "*/" ~ (ALL.%))) ~ "*/" |
899 val rnot = "/*" ~ (NOT ((ALL.%) ~ "*/" ~ (ALL.%))) ~ "*/" |
614 val nnot = mkNFA(rnot) // size 7 |
900 |
615 |
901 |
616 nnot.accepts("/**/".toList) // true |
902 val dfa_not = mkDFA(rnot) // size 10 |
617 nnot.accepts("/*aaabaa*/".toList) // true |
903 |
618 nnot.accepts("/*/**/".toList) // true |
904 dfa_not.accepts("/**/".toList) // true |
619 nnot.accepts("/*aaa*/aa*/".toList) // false ???? |
905 dfa_not.accepts("/*aaabaa*/".toList) // true |
620 nnot.accepts("/aaa*/aa*/".toList) // false |
906 dfa_not.accepts("/*/**/".toList) // true |
621 |
907 dfa_not.accepts("/*aaa*/aa*/".toList) // false |
|
908 dfa_not.accepts("/aaa*/aa*/".toList) // false |
|
909 |
|
910 |
|
911 /* nfa construction according to pder does not work for NOT and AND; |
|
912 * nfa construction according to pder2/nder2 does not necesarily terminate |
|
913 |
|
914 val nfa_not = mkNFA(rnot) // does not termninate |
|
915 |
|
916 nfa_not.accepts("/**/".toList) // true |
|
917 nfa_not.accepts("/*aaabaa*/".toList) // true |
|
918 nfa_not.accepts("/*/**/".toList) // true |
|
919 nfa_not.accepts("/*aaa*/aa*/".toList) // false ???? |
|
920 nfa_not.accepts("/aaa*/aa*/".toList) // false |
|
921 */ |
|
922 |
|
923 // derivative matcher |
622 matcher(rnot, "/**/".toList) // true |
924 matcher(rnot, "/**/".toList) // true |
623 matcher(rnot, "/*aaabaa*/".toList) // true |
925 matcher(rnot, "/*aaabaa*/".toList) // true |
624 matcher(rnot, "/*/**/".toList) // true |
926 matcher(rnot, "/*/**/".toList) // true |
625 matcher(rnot, "/*aaa*/aa*/".toList) // false |
927 matcher(rnot, "/*aaa*/aa*/".toList) // false |
626 matcher(rnot, "/aaa*/aa*/".toList) // false |
928 matcher(rnot, "/aaa*/aa*/".toList) // false |
627 |
929 |
|
930 // pder2/nder2 partial derivative matcher |
628 pmatcher2(Set(rnot), "/**/".toList) // true |
931 pmatcher2(Set(rnot), "/**/".toList) // true |
629 pmatcher2(Set(rnot), "/*aaabaa*/".toList) // true |
932 pmatcher2(Set(rnot), "/*aaabaa*/".toList) // true |
630 pmatcher2(Set(rnot), "/*/**/".toList) // true |
933 pmatcher2(Set(rnot), "/*/**/".toList) // true |
631 pmatcher2(Set(rnot), "/*aaa*/aa*/".toList) // false |
934 pmatcher2(Set(rnot), "/*aaa*/aa*/".toList) // false |
632 pmatcher2(Set(rnot), "/aaa*/aa*/".toList) // false |
935 pmatcher2(Set(rnot), "/aaa*/aa*/".toList) // false |