245 abstract class Rexp |
245 abstract class Rexp |
246 case object ZERO extends Rexp |
246 case object ZERO extends Rexp |
247 case object ONE extends Rexp |
247 case object ONE extends Rexp |
248 case class CHAR(c: Char) extends Rexp |
248 case class CHAR(c: Char) extends Rexp |
249 case object ALL extends Rexp |
249 case object ALL extends Rexp |
|
250 case object ALLPlus extends Rexp |
250 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
251 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
251 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
252 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
252 case class STAR(r: Rexp) extends Rexp |
253 case class STAR(r: Rexp) extends Rexp |
253 case class NTIMES(r: Rexp, n: Int) extends Rexp |
254 case class NTIMES(r: Rexp, n: Int) extends Rexp |
254 case class UPNTIMES(r: Rexp, n: Int) extends Rexp |
255 case class UPNTIMES(r: Rexp, n: Int) extends Rexp |
297 |
298 |
298 def simp(r: Rexp) : Rexp = r match { |
299 def simp(r: Rexp) : Rexp = r match { |
299 case ALT(r1, r2) => (simp(r1), simp(r2)) match { |
300 case ALT(r1, r2) => (simp(r1), simp(r2)) match { |
300 case (ZERO, r2s) => r2s |
301 case (ZERO, r2s) => r2s |
301 case (r1s, ZERO) => r1s |
302 case (r1s, ZERO) => r1s |
302 case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) |
303 case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s) |
303 } |
304 } |
304 case SEQ(r1, r2) => (simp(r1), simp(r2)) match { |
305 case SEQ(r1, r2) => (simp(r1), simp(r2)) match { |
305 case (ZERO, _) => ZERO |
306 case (ZERO, _) => ZERO |
306 case (_, ZERO) => ZERO |
307 case (_, ZERO) => ZERO |
307 case (ONE, r2s) => r2s |
308 case (ONE, r2s) => r2s |
308 case (r1s, ONE) => r1s |
309 case (r1s, ONE) => r1s |
309 case (r1s, r2s) => SEQ(r1s, r2s) |
310 case (r1s, r2s) => SEQ(r1s, r2s) |
310 } |
311 } |
311 case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n) |
312 case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n) |
312 case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n) |
313 case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n) |
|
314 /* |
313 case NOT(r) => NOT(simp(r)) |
315 case NOT(r) => NOT(simp(r)) |
314 case AND(r1, r2) => AND(simp(r1), simp(r2)) |
316 case AND(r1, r2) => (simp(r1), simp(r2)) match { |
|
317 case (ALL, r2s) => r2s |
|
318 case (r1s, ALL) => r1s |
|
319 case (r1s, r2s) => if (r1s == r2s) r1s else AND(r1s, r2s) |
|
320 } |
|
321 */ |
315 case r => r |
322 case r => r |
316 } |
323 } |
317 |
324 |
318 |
325 |
319 // nullable function: tests whether the regular |
326 // nullable function: tests whether the regular |
320 // expression can recognise the empty string |
327 // expression can recognise the empty string |
321 def nullable(r: Rexp) : Boolean = r match { |
328 def nullable(r: Rexp) : Boolean = r match { |
322 case ZERO => false |
329 case ZERO => false |
323 case ONE => true |
330 case ONE => true |
324 case CHAR(_) => false |
331 case CHAR(_) => false |
325 case ALL => false |
332 case ALL => true |
|
333 case ALLPlus => false |
326 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
334 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
327 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
335 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
328 case STAR(_) => true |
336 case STAR(_) => true |
329 case NTIMES(r, i) => if (i == 0) true else nullable(r) |
337 case NTIMES(r, i) => if (i == 0) true else nullable(r) |
330 case UPNTIMES(r: Rexp, n: Int) => true |
338 case UPNTIMES(r: Rexp, n: Int) => true |
335 // derivative of a regular expression w.r.t. a character |
343 // derivative of a regular expression w.r.t. a character |
336 def der(c: Char, r: Rexp) : Rexp = r match { |
344 def der(c: Char, r: Rexp) : Rexp = r match { |
337 case ZERO => ZERO |
345 case ZERO => ZERO |
338 case ONE => ZERO |
346 case ONE => ZERO |
339 case CHAR(d) => if (c == d) ONE else ZERO |
347 case CHAR(d) => if (c == d) ONE else ZERO |
340 case ALL => ONE |
348 case ALL => ALL |
|
349 case ALLPlus => ALL |
341 case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |
350 case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |
342 case SEQ(r1, r2) => |
351 case SEQ(r1, r2) => |
343 if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
352 if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
344 else SEQ(der(c, r1), r2) |
353 else SEQ(der(c, r1), r2) |
345 case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |
354 case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |
349 else SEQ(der(c, r1), NTIMES(r1, i - 1)) |
358 else SEQ(der(c, r1), NTIMES(r1, i - 1)) |
350 case UPNTIMES(r1, i) => |
359 case UPNTIMES(r1, i) => |
351 if (i == 0) ZERO |
360 if (i == 0) ZERO |
352 else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) |
361 else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) |
353 case NOT(r) => NOT(der(c, r)) |
362 case NOT(r) => NOT(der(c, r)) |
354 // AND case? |
363 case AND(r1, r2) => AND(der(c, r1), der(c, r2)) |
355 } |
364 } |
356 |
365 |
357 |
366 |
358 // partial derivative of a regular expression w.r.t. a character |
367 // partial derivative of a regular expression w.r.t. a character |
359 // does not work for NOT and AND ... see below |
368 // does not work for NOT and AND ... see below |
360 def pder(c: Char, r: Rexp) : Set[Rexp] = r match { |
369 def pder(c: Char, r: Rexp) : Set[Rexp] = r match { |
361 case ZERO => Set() |
370 case ZERO => Set() |
362 case ONE => Set() |
371 case ONE => Set() |
363 case CHAR(d) => if (c == d) Set(ONE) else Set() |
372 case CHAR(d) => if (c == d) Set(ONE) else Set() |
364 case ALL => Set(ONE) |
373 case ALL => Set(ALL) |
|
374 case ALLPlus => Set(ALL) |
365 case ALT(r1, r2) => pder(c, r1) ++ pder(c, r2) |
375 case ALT(r1, r2) => pder(c, r1) ++ pder(c, r2) |
366 case SEQ(r1, r2) => |
376 case SEQ(r1, r2) => |
367 (for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) ++ |
377 (for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) ++ |
368 (if (nullable(r1)) pder(c, r2) else Set()) |
378 (if (nullable(r1)) pder(c, r2) else Set()) |
369 case STAR(r1) => |
379 case STAR(r1) => |
394 case Nil => rs.exists(nullable(_)) |
404 case Nil => rs.exists(nullable(_)) |
395 case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs) |
405 case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs) |
396 } |
406 } |
397 |
407 |
398 |
408 |
399 |
409 // partial derivatives including NOT and AND according to |
400 |
410 // PhD-thesis: Manipulation of Extended Regular Expressions |
401 def seq_add(rss: Set[Set[Rexp]], r2: Rexp) : Set[Set[Rexp]] = |
411 // with Derivatives; these partial derivatives are also not |
402 for (rs <- rss) yield (for (r <- rs) yield (SEQ(r, r2) : Rexp)) |
412 // finite...for example NOT((a*)a) |
403 |
413 |
404 def not_add(rss: Set[Set[Rexp]]) : Set[Set[Set[Rexp]]] = |
414 def seq_compose(rs: Set[Rexp], r2: Rexp) : Set[Rexp] = |
405 for (rs <- rss) yield (for (r <- rs) yield (Set(NOT(r)) : Set[Rexp])) |
415 for (r <- rs) yield (SEQ(r, r2) : Rexp) |
406 |
416 |
407 def and_compose(rss1: Set[Set[Rexp]], rss2: Set[Set[Rexp]]) : Set[Set[Rexp]] = |
417 def not_compose(rs: Set[Rexp]) : Set[Rexp] = |
408 for (rs1 <- rss1; rs2 <- rss2) yield rs1 ++ rs2 |
418 Set(rs.fold(ALL)((r1, r2) => AND(r1, NOT(r2)))) |
409 |
419 |
410 def pder2(c: Char, r: Rexp) : Set[Set[Rexp]] = r match { |
420 def and_compose(rs1: Set[Rexp], rs2: Set[Rexp]) : Set[Rexp] = |
|
421 for (r1 <- rs1; r2 <- rs2) yield AND(r1, r2) |
|
422 |
|
423 def pder2(c: Char, r: Rexp) : Set[Rexp] = r match { |
411 case ZERO => Set() |
424 case ZERO => Set() |
412 case ONE => Set() |
425 case ONE => Set() |
413 case CHAR(d) => if (c == d) Set(Set(ONE)) else Set() |
426 case ALL => Set(ALL) |
414 case ALL => Set(Set(ONE)) |
427 case ALLPlus => Set(ALL) |
|
428 case CHAR(d) => if (c == d) Set(ONE) else Set() |
415 case ALT(r1, r2) => pder2(c, r1) ++ pder2(c, r2) |
429 case ALT(r1, r2) => pder2(c, r1) ++ pder2(c, r2) |
416 case SEQ(r1, r2) => { |
430 case SEQ(r1, r2) => { |
417 val prs1 = seq_add(pder2(c, r1), r2) |
431 val prs1 = seq_compose(pder2(c, r1), r2) |
418 if (nullable(r1)) (prs1 ++ pder2(c, r2)) else prs1 |
432 if (nullable(r1)) (prs1 ++ pder2(c, r2)) else prs1 |
419 } |
433 } |
420 case STAR(r1) => |
434 case STAR(r1) => seq_compose(pder2(c, r1), STAR(r1)) |
421 seq_add(pder2(c, r1), STAR(r1)) |
|
422 case AND(r1, r2) => and_compose(pder2(c, r1), pder2(c, r2)) |
435 case AND(r1, r2) => and_compose(pder2(c, r1), pder2(c, r2)) |
423 case NOT(r1) => { |
436 case NOT(r1) => nder2(c, r1) |
424 val prs1 = not_add(pder2(c, r1)) |
437 } |
425 if (prs1.isEmpty) Set() else |
438 |
426 prs1.tail.foldRight(prs1.head) { (rs1, rs2) => rs1 ++ rs2 } |
439 def nder2(c: Char, r: Rexp) : Set[Rexp] = r match { |
427 } |
440 case ZERO => Set(ALL) |
428 } |
441 case ONE => Set(ALL) |
429 |
442 case ALL => Set() |
430 def pmatcher2(rss: Set[Set[Rexp]], s: List[Char]): Boolean = s match { |
443 case ALLPlus => Set() |
431 case Nil => rss.exists((rs) => rs.exists((r) => nullable(r))) |
444 case CHAR(d) => if (c == d) Set(ALLPlus) else Set(ALL) |
432 case c::cs => pmatcher2(rss.flatMap(_.flatMap(pder2(c, _))), cs) |
445 case ALT(r1, r2) => and_compose(nder2(c, r1), nder2(c, r2)) |
|
446 case SEQ(r1, r2) => if (nullable(r1)) |
|
447 and_compose(not_compose(seq_compose(pder2(c, r1), r2)), nder2(c, r2)) |
|
448 else not_compose(seq_compose(pder2(c, r1), r2)) |
|
449 case STAR(r1) => not_compose(pder2(c, STAR(r1))) |
|
450 case AND(r1, r2) => nder2(c, r1) ++ nder2(c, r2) |
|
451 case NOT(r1) => pder2(c, r1) |
|
452 } |
|
453 |
|
454 |
|
455 def pmatcher2(rs: Set[Rexp], s: List[Char]): Boolean = s match { |
|
456 case Nil => rs.exists(nullable(_)) |
|
457 case c::cs => pmatcher2(rs.flatMap(pder2(c, _)), cs) |
433 } |
458 } |
434 |
459 |
435 |
460 // pder2/nder2 example |
436 // quick-and-dirty translation of a pder automaton |
461 |
|
462 val r_ex = AND("aa" | "a", AND(STAR("a"), NOT(STAR("a") ~ "a"))) |
|
463 nder2('a', r_ex).map(simp(_)).mkString("\n") |
|
464 |
|
465 |
|
466 |
|
467 |
|
468 |
|
469 // quick-and-dirty translation of a pder-automaton |
437 |
470 |
438 val r = STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc" |
471 val r = STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc" |
439 val pder_nfa = NFA[Set[Rexp], Char](Set(Set(r)), |
472 val pder_nfa = NFA[Set[Rexp], Char](Set(Set(r)), |
440 Set( { case (rs, c) => rs.flatMap(pder(c, _))} ), |
473 Set( { case (rs, c) => rs.flatMap(pder(c, _))} ), |
441 _.exists(nullable)) |
474 _.exists(nullable)) |
507 |
540 |
508 //mkDFA(big) // does not terminate |
541 //mkDFA(big) // does not terminate |
509 |
542 |
510 |
543 |
511 |
544 |
512 // Antimirov Partial derivative automata construction ... definitely terminates |
545 // Antimirov Partial derivative automata construction ... definitely |
|
546 // terminates but does not work for extensions of NOT and AND |
|
547 // |
|
548 // for this we have to use the extended partial derivatives |
|
549 // pder2/nder2 |
513 |
550 |
514 |
551 |
515 // to transform (concrete) Maps into functions |
552 // to transform (concrete) Maps into functions |
516 def toFun(m: MTrans) : Trans = |
553 def toFun(m: MTrans) : Trans = |
517 { case (q, c) => m(q, c) } |
554 { case (q, c) => m(q, c) } |
518 |
555 |
519 def pgoto(sigma: Set[Char], delta: STrans, qs: DStates, q: DState, c: Char) : (DStates, STrans) = { |
556 def pgoto(sigma: Set[Char], delta: STrans, qs: DStates, q: DState, c: Char) : (DStates, STrans) = { |
520 val qders = pder(c, q).map(simp(_)) // set of simplified partial derivatives |
557 val qders = pder2(c, q).map(simp(_)) // set of simplified partial derivatives |
521 qders.foldLeft((qs, delta)) { case ((qs, delta), qnew) => padd(sigma, delta, qs, q, qnew, c) } |
558 qders.foldLeft((qs, delta)) { case ((qs, delta), qnew) => padd(sigma, delta, qs, q, qnew, c) } |
522 } |
559 } |
523 |
560 |
524 def padd(sigma: Set[Char], delta: STrans, qs: DStates, |
561 def padd(sigma: Set[Char], delta: STrans, qs: DStates, |
525 q: DState, qnew: DState, c: Char) : (DStates, STrans) = { |
562 q: DState, qnew: DState, c: Char) : (DStates, STrans) = { |
584 |
621 |
585 matcher(rnot, "/**/".toList) // true |
622 matcher(rnot, "/**/".toList) // true |
586 matcher(rnot, "/*aaabaa*/".toList) // true |
623 matcher(rnot, "/*aaabaa*/".toList) // true |
587 matcher(rnot, "/*/**/".toList) // true |
624 matcher(rnot, "/*/**/".toList) // true |
588 matcher(rnot, "/*aaa*/aa*/".toList) // false |
625 matcher(rnot, "/*aaa*/aa*/".toList) // false |
589 matcher(rnot, "/aaa*/aa*/".toList) // false |
626 matcher(rnot, "/aaa*/aa*/".toList) // false |
590 |
627 |
591 pmatcher(Set(rnot), "/**/".toList) // true |
628 pmatcher2(Set(rnot), "/**/".toList) // true |
592 pmatcher(Set(rnot), "/*aaabaa*/".toList) // true |
629 pmatcher2(Set(rnot), "/*aaabaa*/".toList) // true |
593 pmatcher(Set(rnot), "/*/**/".toList) // true |
630 pmatcher2(Set(rnot), "/*/**/".toList) // true |
594 pmatcher(Set(rnot), "/*aaa*/aa*/".toList) // false ????? |
631 pmatcher2(Set(rnot), "/*aaa*/aa*/".toList) // false |
595 pmatcher(Set(rnot), "/aaa*/aa*/".toList) // false |
632 pmatcher2(Set(rnot), "/aaa*/aa*/".toList) // false |
596 |
|
597 pmatcher2(Set(Set(rnot)), "/**/".toList) // true |
|
598 pmatcher2(Set(Set(rnot)), "/*aaabaa*/".toList) // true |
|
599 pmatcher2(Set(Set(rnot)), "/*/**/".toList) // true |
|
600 pmatcher2(Set(Set(rnot)), "/*aaa*/aa*/".toList) // false ????? |
|
601 pmatcher2(Set(Set(rnot)), "/aaa*/aa*/".toList) // false |
|
602 |
633 |
603 // example from automata paper with counters and backreferences |
634 // example from automata paper with counters and backreferences |
604 |
635 |
605 val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc" |
636 val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc" |
606 mkNFA(r1) // size = 5 |
637 mkNFA(r1) // size = 5 |