diff -r e59cec211f4f -r 820228ac1d0f progs/scala/autos.scala --- a/progs/scala/autos.scala Mon Mar 20 15:13:17 2017 +0000 +++ b/progs/scala/autos.scala Tue Mar 21 11:40:22 2017 +0000 @@ -245,24 +245,14 @@ abstract class Rexp case object ZERO extends Rexp case object ONE extends Rexp -case class CHAR(c: Char) extends Rexp { - override def toString = c.toString -} -case object ALL extends Rexp { - override def toString = "." -} +case class CHAR(c: Char) extends Rexp +case object ALL extends Rexp case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { - override def toString = r1.toString + " ~ " + r2.toString -} -case class STAR(r: Rexp) extends Rexp { - override def toString = r.toString + "*" -} -case class NTIMES(r: Rexp, n: Int) extends Rexp { - override def toString = r.toString + "{" + n.toString + "}" -} +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NTIMES(r: Rexp, n: Int) extends Rexp case class UPNTIMES(r: Rexp, n: Int) extends Rexp - +case class NOT(r: Rexp) extends Rexp def get_chars(r: Rexp) : Set[Char] = r match { case ZERO => Set() @@ -273,7 +263,8 @@ case STAR(r) => get_chars(r) case NTIMES(r, _) => get_chars(r) case UPNTIMES(r, _) => get_chars(r) - case ALL => ('a' to 'z').toSet + case ALL => ('a' to 'z').toSet ++ Set('*','/','\\') + case NOT(r) => get_chars(r) } @@ -317,6 +308,7 @@ } case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n) case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n) + case NOT(r) => NOT(simp(r)) case r => r } @@ -333,6 +325,7 @@ case STAR(_) => true case NTIMES(r, i) => if (i == 0) true else nullable(r) case UPNTIMES(r: Rexp, n: Int) => true + case NOT(r) => !nullable(r) } // derivative of a regular expression w.r.t. a character @@ -353,6 +346,7 @@ case UPNTIMES(r1, i) => if (i == 0) ZERO else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) + case NOT(r) => NOT(der(c, r)) } @@ -378,12 +372,25 @@ if (i == 0) Set() else for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1)) + case NOT(r1) => + for (pr1 <- pder(c, r1)) yield NOT(pr1) } def ppder(c: Char, rs: Set[Rexp]) : Set[Rexp] = rs.flatMap(pder(c, _)) +// matchers +def matcher(r: Rexp, s: List[Char]): Boolean = s match { + case Nil => nullable(r) + case c::cs => matcher(der(c, r), cs) +} + +def pmatcher(rs: Set[Rexp], s: List[Char]): Boolean = s match { + case Nil => rs.exists(nullable(_)) + case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs) +} + // quick-and-dirty translation of a pder automaton @@ -431,7 +438,8 @@ val (qs, delta) = explore(sigma, Map(), Set[Rexp](r), r) val fins = qs.filter(nullable(_)) val deltaf : (Rexp, Char) :=> Rexp = { case (q, c) => delta(q, c) } - println(s"Automata size: ${qs.size}") + println(s"DFA size: ${qs.size}") + println(s"""DFA states\n${qs.mkString("\n")}""") DFA(r, deltaf, fins) } @@ -489,6 +497,7 @@ val fins = qs.filter(nullable(_)) val deltaf = delta.map(toFun(_)) println(s"NFA size: ${qs.size}") + println(s"""NFA states\n${qs.mkString("\n")}""") NFA(Set(r), deltaf, fins) } @@ -520,6 +529,32 @@ n2.accepts("aaaaaa".toList) // false n2.accepts(("a" * 100).toList) // false + +// example involving not + +val rnot = "/*" ~ (NOT ((ALL.%) ~ "*/" ~ (ALL.%))) ~ "*/" +val nnot = mkNFA(rnot) // size 7 + +nnot.accepts("/**/".toList) // true +nnot.accepts("/*aaabaa*/".toList) // true +nnot.accepts("/*/**/".toList) // true +nnot.accepts("/*aaa*/aa*/".toList) // false ???? +nnot.accepts("/aaa*/aa*/".toList) // false + +matcher(rnot, "/**/".toList) // true +matcher(rnot, "/*aaabaa*/".toList) // true +matcher(rnot, "/*/**/".toList) // true +matcher(rnot, "/*aaa*/aa*/".toList) // false +matcher(rnot, "/aaa*/aa*/".toList) // false + +pmatcher(Set(rnot), "/**/".toList) // true +pmatcher(Set(rnot), "/*aaabaa*/".toList) // true +pmatcher(Set(rnot), "/*/**/".toList) // true +pmatcher(Set(rnot), "/*aaa*/aa*/".toList) // false ????? +pmatcher(Set(rnot), "/aaa*/aa*/".toList) // false + +// example from automata paper with counters and backreferences + val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc" mkNFA(r1) // size = 5