243 // Regular expressions fro derivative automata |
243 // Regular expressions fro derivative automata |
244 |
244 |
245 abstract class Rexp |
245 abstract class Rexp |
246 case object ZERO extends Rexp |
246 case object ZERO extends Rexp |
247 case object ONE extends Rexp |
247 case object ONE extends Rexp |
248 case class CHAR(c: Char) extends Rexp { |
248 case class CHAR(c: Char) extends Rexp |
249 override def toString = c.toString |
249 case object ALL extends Rexp |
250 } |
|
251 case object ALL extends Rexp { |
|
252 override def toString = "." |
|
253 } |
|
254 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
250 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
255 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { |
251 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
256 override def toString = r1.toString + " ~ " + r2.toString |
252 case class STAR(r: Rexp) extends Rexp |
257 } |
253 case class NTIMES(r: Rexp, n: Int) extends Rexp |
258 case class STAR(r: Rexp) extends Rexp { |
|
259 override def toString = r.toString + "*" |
|
260 } |
|
261 case class NTIMES(r: Rexp, n: Int) extends Rexp { |
|
262 override def toString = r.toString + "{" + n.toString + "}" |
|
263 } |
|
264 case class UPNTIMES(r: Rexp, n: Int) extends Rexp |
254 case class UPNTIMES(r: Rexp, n: Int) extends Rexp |
265 |
255 case class NOT(r: Rexp) extends Rexp |
266 |
256 |
267 def get_chars(r: Rexp) : Set[Char] = r match { |
257 def get_chars(r: Rexp) : Set[Char] = r match { |
268 case ZERO => Set() |
258 case ZERO => Set() |
269 case ONE => Set() |
259 case ONE => Set() |
270 case CHAR(c) => Set(c) |
260 case CHAR(c) => Set(c) |
271 case ALT(r1, r2) => get_chars(r1) ++ get_chars(r2) |
261 case ALT(r1, r2) => get_chars(r1) ++ get_chars(r2) |
272 case SEQ(r1, r2) => get_chars(r1) ++ get_chars(r2) |
262 case SEQ(r1, r2) => get_chars(r1) ++ get_chars(r2) |
273 case STAR(r) => get_chars(r) |
263 case STAR(r) => get_chars(r) |
274 case NTIMES(r, _) => get_chars(r) |
264 case NTIMES(r, _) => get_chars(r) |
275 case UPNTIMES(r, _) => get_chars(r) |
265 case UPNTIMES(r, _) => get_chars(r) |
276 case ALL => ('a' to 'z').toSet |
266 case ALL => ('a' to 'z').toSet ++ Set('*','/','\\') |
|
267 case NOT(r) => get_chars(r) |
277 } |
268 } |
278 |
269 |
279 |
270 |
280 |
271 |
281 import scala.language.implicitConversions |
272 import scala.language.implicitConversions |
315 case (r1s, ONE) => r1s |
306 case (r1s, ONE) => r1s |
316 case (r1s, r2s) => SEQ(r1s, r2s) |
307 case (r1s, r2s) => SEQ(r1s, r2s) |
317 } |
308 } |
318 case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n) |
309 case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n) |
319 case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n) |
310 case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n) |
|
311 case NOT(r) => NOT(simp(r)) |
320 case r => r |
312 case r => r |
321 } |
313 } |
322 |
314 |
323 |
315 |
324 // nullable function: tests whether the regular |
316 // nullable function: tests whether the regular |
331 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
323 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
332 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
324 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
333 case STAR(_) => true |
325 case STAR(_) => true |
334 case NTIMES(r, i) => if (i == 0) true else nullable(r) |
326 case NTIMES(r, i) => if (i == 0) true else nullable(r) |
335 case UPNTIMES(r: Rexp, n: Int) => true |
327 case UPNTIMES(r: Rexp, n: Int) => true |
|
328 case NOT(r) => !nullable(r) |
336 } |
329 } |
337 |
330 |
338 // derivative of a regular expression w.r.t. a character |
331 // derivative of a regular expression w.r.t. a character |
339 def der(c: Char, r: Rexp) : Rexp = r match { |
332 def der(c: Char, r: Rexp) : Rexp = r match { |
340 case ZERO => ZERO |
333 case ZERO => ZERO |
351 if (nullable(r1)) SEQ(der(c, r1), UPNTIMES(r1, i - 1)) |
344 if (nullable(r1)) SEQ(der(c, r1), UPNTIMES(r1, i - 1)) |
352 else SEQ(der(c, r1), NTIMES(r1, i - 1)) |
345 else SEQ(der(c, r1), NTIMES(r1, i - 1)) |
353 case UPNTIMES(r1, i) => |
346 case UPNTIMES(r1, i) => |
354 if (i == 0) ZERO |
347 if (i == 0) ZERO |
355 else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) |
348 else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) |
|
349 case NOT(r) => NOT(der(c, r)) |
356 } |
350 } |
357 |
351 |
358 |
352 |
359 // partial derivative of a regular expression w.r.t. a character |
353 // partial derivative of a regular expression w.r.t. a character |
360 def pder(c: Char, r: Rexp) : Set[Rexp] = r match { |
354 def pder(c: Char, r: Rexp) : Set[Rexp] = r match { |
376 for (pr1 <- pder(c, r1)) yield SEQ(pr1, NTIMES(r1, i - 1)) |
370 for (pr1 <- pder(c, r1)) yield SEQ(pr1, NTIMES(r1, i - 1)) |
377 case UPNTIMES(r1, i) => |
371 case UPNTIMES(r1, i) => |
378 if (i == 0) Set() |
372 if (i == 0) Set() |
379 else |
373 else |
380 for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1)) |
374 for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1)) |
|
375 case NOT(r1) => |
|
376 for (pr1 <- pder(c, r1)) yield NOT(pr1) |
381 } |
377 } |
382 |
378 |
383 def ppder(c: Char, rs: Set[Rexp]) : Set[Rexp] = |
379 def ppder(c: Char, rs: Set[Rexp]) : Set[Rexp] = |
384 rs.flatMap(pder(c, _)) |
380 rs.flatMap(pder(c, _)) |
385 |
381 |
|
382 |
|
383 // matchers |
|
384 def matcher(r: Rexp, s: List[Char]): Boolean = s match { |
|
385 case Nil => nullable(r) |
|
386 case c::cs => matcher(der(c, r), cs) |
|
387 } |
|
388 |
|
389 def pmatcher(rs: Set[Rexp], s: List[Char]): Boolean = s match { |
|
390 case Nil => rs.exists(nullable(_)) |
|
391 case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs) |
|
392 } |
386 |
393 |
387 |
394 |
388 // quick-and-dirty translation of a pder automaton |
395 // quick-and-dirty translation of a pder automaton |
389 |
396 |
390 val r = STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc" |
397 val r = STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc" |
429 def mkDFA(r: Rexp) = { |
436 def mkDFA(r: Rexp) = { |
430 val sigma = get_chars(r) |
437 val sigma = get_chars(r) |
431 val (qs, delta) = explore(sigma, Map(), Set[Rexp](r), r) |
438 val (qs, delta) = explore(sigma, Map(), Set[Rexp](r), r) |
432 val fins = qs.filter(nullable(_)) |
439 val fins = qs.filter(nullable(_)) |
433 val deltaf : (Rexp, Char) :=> Rexp = { case (q, c) => delta(q, c) } |
440 val deltaf : (Rexp, Char) :=> Rexp = { case (q, c) => delta(q, c) } |
434 println(s"Automata size: ${qs.size}") |
441 println(s"DFA size: ${qs.size}") |
|
442 println(s"""DFA states\n${qs.mkString("\n")}""") |
435 DFA(r, deltaf, fins) |
443 DFA(r, deltaf, fins) |
436 } |
444 } |
437 |
445 |
438 val re = "ab" | "ac" |
446 val re = "ab" | "ac" |
439 val d1 = mkDFA(re) |
447 val d1 = mkDFA(re) |
487 val sigma = get_chars(r) |
495 val sigma = get_chars(r) |
488 val (qs, delta) = pexplore(sigma, Set(), Set(r), r) |
496 val (qs, delta) = pexplore(sigma, Set(), Set(r), r) |
489 val fins = qs.filter(nullable(_)) |
497 val fins = qs.filter(nullable(_)) |
490 val deltaf = delta.map(toFun(_)) |
498 val deltaf = delta.map(toFun(_)) |
491 println(s"NFA size: ${qs.size}") |
499 println(s"NFA size: ${qs.size}") |
|
500 println(s"""NFA states\n${qs.mkString("\n")}""") |
492 NFA(Set(r), deltaf, fins) |
501 NFA(Set(r), deltaf, fins) |
493 } |
502 } |
494 |
503 |
495 |
504 |
496 // simple example from Scott's paper |
505 // simple example from Scott's paper |
518 |
527 |
519 n2.accepts("aaaaaab".toList) // true |
528 n2.accepts("aaaaaab".toList) // true |
520 n2.accepts("aaaaaa".toList) // false |
529 n2.accepts("aaaaaa".toList) // false |
521 n2.accepts(("a" * 100).toList) // false |
530 n2.accepts(("a" * 100).toList) // false |
522 |
531 |
|
532 |
|
533 // example involving not |
|
534 |
|
535 val rnot = "/*" ~ (NOT ((ALL.%) ~ "*/" ~ (ALL.%))) ~ "*/" |
|
536 val nnot = mkNFA(rnot) // size 7 |
|
537 |
|
538 nnot.accepts("/**/".toList) // true |
|
539 nnot.accepts("/*aaabaa*/".toList) // true |
|
540 nnot.accepts("/*/**/".toList) // true |
|
541 nnot.accepts("/*aaa*/aa*/".toList) // false ???? |
|
542 nnot.accepts("/aaa*/aa*/".toList) // false |
|
543 |
|
544 matcher(rnot, "/**/".toList) // true |
|
545 matcher(rnot, "/*aaabaa*/".toList) // true |
|
546 matcher(rnot, "/*/**/".toList) // true |
|
547 matcher(rnot, "/*aaa*/aa*/".toList) // false |
|
548 matcher(rnot, "/aaa*/aa*/".toList) // false |
|
549 |
|
550 pmatcher(Set(rnot), "/**/".toList) // true |
|
551 pmatcher(Set(rnot), "/*aaabaa*/".toList) // true |
|
552 pmatcher(Set(rnot), "/*/**/".toList) // true |
|
553 pmatcher(Set(rnot), "/*aaa*/aa*/".toList) // false ????? |
|
554 pmatcher(Set(rnot), "/aaa*/aa*/".toList) // false |
|
555 |
|
556 // example from automata paper with counters and backreferences |
|
557 |
523 val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc" |
558 val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc" |
524 mkNFA(r1) // size = 5 |
559 mkNFA(r1) // size = 5 |
525 |
560 |
526 val n3 = mkNFA(r) // size = 7 |
561 val n3 = mkNFA(r) // size = 7 |
527 |
562 |