251 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
251 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
252 case class STAR(r: Rexp) extends Rexp |
252 case class STAR(r: Rexp) extends Rexp |
253 case class NTIMES(r: Rexp, n: Int) extends Rexp |
253 case class NTIMES(r: Rexp, n: Int) extends Rexp |
254 case class UPNTIMES(r: Rexp, n: Int) extends Rexp |
254 case class UPNTIMES(r: Rexp, n: Int) extends Rexp |
255 case class NOT(r: Rexp) extends Rexp |
255 case class NOT(r: Rexp) extends Rexp |
|
256 case class AND(r1: Rexp, r2: Rexp) extends Rexp |
256 |
257 |
257 def get_chars(r: Rexp) : Set[Char] = r match { |
258 def get_chars(r: Rexp) : Set[Char] = r match { |
258 case ZERO => Set() |
259 case ZERO => Set() |
259 case ONE => Set() |
260 case ONE => Set() |
260 case CHAR(c) => Set(c) |
261 case CHAR(c) => Set(c) |
263 case STAR(r) => get_chars(r) |
264 case STAR(r) => get_chars(r) |
264 case NTIMES(r, _) => get_chars(r) |
265 case NTIMES(r, _) => get_chars(r) |
265 case UPNTIMES(r, _) => get_chars(r) |
266 case UPNTIMES(r, _) => get_chars(r) |
266 case ALL => ('a' to 'z').toSet ++ Set('*','/','\\') |
267 case ALL => ('a' to 'z').toSet ++ Set('*','/','\\') |
267 case NOT(r) => get_chars(r) |
268 case NOT(r) => get_chars(r) |
|
269 case AND(r1, r2) => get_chars(r1) ++ get_chars(r2) |
268 } |
270 } |
269 |
271 |
270 |
272 |
271 |
273 |
272 import scala.language.implicitConversions |
274 import scala.language.implicitConversions |
307 case (r1s, r2s) => SEQ(r1s, r2s) |
309 case (r1s, r2s) => SEQ(r1s, r2s) |
308 } |
310 } |
309 case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n) |
311 case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n) |
310 case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n) |
312 case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n) |
311 case NOT(r) => NOT(simp(r)) |
313 case NOT(r) => NOT(simp(r)) |
|
314 case AND(r1, r2) => AND(simp(r1), simp(r2)) |
312 case r => r |
315 case r => r |
313 } |
316 } |
314 |
317 |
315 |
318 |
316 // nullable function: tests whether the regular |
319 // nullable function: tests whether the regular |
324 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
327 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
325 case STAR(_) => true |
328 case STAR(_) => true |
326 case NTIMES(r, i) => if (i == 0) true else nullable(r) |
329 case NTIMES(r, i) => if (i == 0) true else nullable(r) |
327 case UPNTIMES(r: Rexp, n: Int) => true |
330 case UPNTIMES(r: Rexp, n: Int) => true |
328 case NOT(r) => !nullable(r) |
331 case NOT(r) => !nullable(r) |
|
332 case AND(r1, r2) => nullable(r1) && nullable(r2) |
329 } |
333 } |
330 |
334 |
331 // derivative of a regular expression w.r.t. a character |
335 // derivative of a regular expression w.r.t. a character |
332 def der(c: Char, r: Rexp) : Rexp = r match { |
336 def der(c: Char, r: Rexp) : Rexp = r match { |
333 case ZERO => ZERO |
337 case ZERO => ZERO |
345 else SEQ(der(c, r1), NTIMES(r1, i - 1)) |
349 else SEQ(der(c, r1), NTIMES(r1, i - 1)) |
346 case UPNTIMES(r1, i) => |
350 case UPNTIMES(r1, i) => |
347 if (i == 0) ZERO |
351 if (i == 0) ZERO |
348 else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) |
352 else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) |
349 case NOT(r) => NOT(der(c, r)) |
353 case NOT(r) => NOT(der(c, r)) |
|
354 // AND case? |
350 } |
355 } |
351 |
356 |
352 |
357 |
353 // partial derivative of a regular expression w.r.t. a character |
358 // partial derivative of a regular expression w.r.t. a character |
|
359 // does not work for NOT and AND ... see below |
354 def pder(c: Char, r: Rexp) : Set[Rexp] = r match { |
360 def pder(c: Char, r: Rexp) : Set[Rexp] = r match { |
355 case ZERO => Set() |
361 case ZERO => Set() |
356 case ONE => Set() |
362 case ONE => Set() |
357 case CHAR(d) => if (c == d) Set(ONE) else Set() |
363 case CHAR(d) => if (c == d) Set(ONE) else Set() |
358 case ALL => Set(ONE) |
364 case ALL => Set(ONE) |
370 for (pr1 <- pder(c, r1)) yield SEQ(pr1, NTIMES(r1, i - 1)) |
376 for (pr1 <- pder(c, r1)) yield SEQ(pr1, NTIMES(r1, i - 1)) |
371 case UPNTIMES(r1, i) => |
377 case UPNTIMES(r1, i) => |
372 if (i == 0) Set() |
378 if (i == 0) Set() |
373 else |
379 else |
374 for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1)) |
380 for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1)) |
375 case NOT(r1) => |
|
376 for (pr1 <- pder(c, r1)) yield NOT(pr1) |
|
377 } |
381 } |
378 |
382 |
379 def ppder(c: Char, rs: Set[Rexp]) : Set[Rexp] = |
383 def ppder(c: Char, rs: Set[Rexp]) : Set[Rexp] = |
380 rs.flatMap(pder(c, _)) |
384 rs.flatMap(pder(c, _)) |
381 |
385 |
387 } |
391 } |
388 |
392 |
389 def pmatcher(rs: Set[Rexp], s: List[Char]): Boolean = s match { |
393 def pmatcher(rs: Set[Rexp], s: List[Char]): Boolean = s match { |
390 case Nil => rs.exists(nullable(_)) |
394 case Nil => rs.exists(nullable(_)) |
391 case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs) |
395 case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs) |
|
396 } |
|
397 |
|
398 |
|
399 |
|
400 |
|
401 def seq_add(rss: Set[Set[Rexp]], r2: Rexp) : Set[Set[Rexp]] = |
|
402 for (rs <- rss) yield (for (r <- rs) yield (SEQ(r, r2) : Rexp)) |
|
403 |
|
404 def not_add(rss: Set[Set[Rexp]]) : Set[Set[Set[Rexp]]] = |
|
405 for (rs <- rss) yield (for (r <- rs) yield (Set(NOT(r)) : Set[Rexp])) |
|
406 |
|
407 def and_compose(rss1: Set[Set[Rexp]], rss2: Set[Set[Rexp]]) : Set[Set[Rexp]] = |
|
408 for (rs1 <- rss1; rs2 <- rss2) yield rs1 ++ rs2 |
|
409 |
|
410 def pder2(c: Char, r: Rexp) : Set[Set[Rexp]] = r match { |
|
411 case ZERO => Set() |
|
412 case ONE => Set() |
|
413 case CHAR(d) => if (c == d) Set(Set(ONE)) else Set() |
|
414 case ALL => Set(Set(ONE)) |
|
415 case ALT(r1, r2) => pder2(c, r1) ++ pder2(c, r2) |
|
416 case SEQ(r1, r2) => { |
|
417 val prs1 = seq_add(pder2(c, r1), r2) |
|
418 if (nullable(r1)) (prs1 ++ pder2(c, r2)) else prs1 |
|
419 } |
|
420 case STAR(r1) => |
|
421 seq_add(pder2(c, r1), STAR(r1)) |
|
422 case AND(r1, r2) => and_compose(pder2(c, r1), pder2(c, r2)) |
|
423 case NOT(r1) => { |
|
424 val prs1 = not_add(pder2(c, r1)) |
|
425 if (prs1.isEmpty) Set() else |
|
426 prs1.tail.foldRight(prs1.head) { (rs1, rs2) => rs1 ++ rs2 } |
|
427 } |
|
428 } |
|
429 |
|
430 def pmatcher2(rss: Set[Set[Rexp]], s: List[Char]): Boolean = s match { |
|
431 case Nil => rss.exists((rs) => rs.exists((r) => nullable(r))) |
|
432 case c::cs => pmatcher2(rss.flatMap(_.flatMap(pder2(c, _))), cs) |
392 } |
433 } |
393 |
434 |
394 |
435 |
395 // quick-and-dirty translation of a pder automaton |
436 // quick-and-dirty translation of a pder automaton |
396 |
437 |
551 pmatcher(Set(rnot), "/*aaabaa*/".toList) // true |
592 pmatcher(Set(rnot), "/*aaabaa*/".toList) // true |
552 pmatcher(Set(rnot), "/*/**/".toList) // true |
593 pmatcher(Set(rnot), "/*/**/".toList) // true |
553 pmatcher(Set(rnot), "/*aaa*/aa*/".toList) // false ????? |
594 pmatcher(Set(rnot), "/*aaa*/aa*/".toList) // false ????? |
554 pmatcher(Set(rnot), "/aaa*/aa*/".toList) // false |
595 pmatcher(Set(rnot), "/aaa*/aa*/".toList) // false |
555 |
596 |
|
597 pmatcher2(Set(Set(rnot)), "/**/".toList) // true |
|
598 pmatcher2(Set(Set(rnot)), "/*aaabaa*/".toList) // true |
|
599 pmatcher2(Set(Set(rnot)), "/*/**/".toList) // true |
|
600 pmatcher2(Set(Set(rnot)), "/*aaa*/aa*/".toList) // false ????? |
|
601 pmatcher2(Set(Set(rnot)), "/aaa*/aa*/".toList) // false |
|
602 |
556 // example from automata paper with counters and backreferences |
603 // example from automata paper with counters and backreferences |
557 |
604 |
558 val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc" |
605 val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc" |
559 mkNFA(r1) // size = 5 |
606 mkNFA(r1) // size = 5 |
560 |
607 |