--- a/progs/scala/autos.scala Mon Mar 20 15:13:17 2017 +0000
+++ b/progs/scala/autos.scala Tue Mar 21 11:40:22 2017 +0000
@@ -245,24 +245,14 @@
abstract class Rexp
case object ZERO extends Rexp
case object ONE extends Rexp
-case class CHAR(c: Char) extends Rexp {
- override def toString = c.toString
-}
-case object ALL extends Rexp {
- override def toString = "."
-}
+case class CHAR(c: Char) extends Rexp
+case object ALL extends Rexp
case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
- override def toString = r1.toString + " ~ " + r2.toString
-}
-case class STAR(r: Rexp) extends Rexp {
- override def toString = r.toString + "*"
-}
-case class NTIMES(r: Rexp, n: Int) extends Rexp {
- override def toString = r.toString + "{" + n.toString + "}"
-}
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NTIMES(r: Rexp, n: Int) extends Rexp
case class UPNTIMES(r: Rexp, n: Int) extends Rexp
-
+case class NOT(r: Rexp) extends Rexp
def get_chars(r: Rexp) : Set[Char] = r match {
case ZERO => Set()
@@ -273,7 +263,8 @@
case STAR(r) => get_chars(r)
case NTIMES(r, _) => get_chars(r)
case UPNTIMES(r, _) => get_chars(r)
- case ALL => ('a' to 'z').toSet
+ case ALL => ('a' to 'z').toSet ++ Set('*','/','\\')
+ case NOT(r) => get_chars(r)
}
@@ -317,6 +308,7 @@
}
case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n)
case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n)
+ case NOT(r) => NOT(simp(r))
case r => r
}
@@ -333,6 +325,7 @@
case STAR(_) => true
case NTIMES(r, i) => if (i == 0) true else nullable(r)
case UPNTIMES(r: Rexp, n: Int) => true
+ case NOT(r) => !nullable(r)
}
// derivative of a regular expression w.r.t. a character
@@ -353,6 +346,7 @@
case UPNTIMES(r1, i) =>
if (i == 0) ZERO
else SEQ(der(c, r1), UPNTIMES(r1, i - 1))
+ case NOT(r) => NOT(der(c, r))
}
@@ -378,12 +372,25 @@
if (i == 0) Set()
else
for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1))
+ case NOT(r1) =>
+ for (pr1 <- pder(c, r1)) yield NOT(pr1)
}
def ppder(c: Char, rs: Set[Rexp]) : Set[Rexp] =
rs.flatMap(pder(c, _))
+// matchers
+def matcher(r: Rexp, s: List[Char]): Boolean = s match {
+ case Nil => nullable(r)
+ case c::cs => matcher(der(c, r), cs)
+}
+
+def pmatcher(rs: Set[Rexp], s: List[Char]): Boolean = s match {
+ case Nil => rs.exists(nullable(_))
+ case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs)
+}
+
// quick-and-dirty translation of a pder automaton
@@ -431,7 +438,8 @@
val (qs, delta) = explore(sigma, Map(), Set[Rexp](r), r)
val fins = qs.filter(nullable(_))
val deltaf : (Rexp, Char) :=> Rexp = { case (q, c) => delta(q, c) }
- println(s"Automata size: ${qs.size}")
+ println(s"DFA size: ${qs.size}")
+ println(s"""DFA states\n${qs.mkString("\n")}""")
DFA(r, deltaf, fins)
}
@@ -489,6 +497,7 @@
val fins = qs.filter(nullable(_))
val deltaf = delta.map(toFun(_))
println(s"NFA size: ${qs.size}")
+ println(s"""NFA states\n${qs.mkString("\n")}""")
NFA(Set(r), deltaf, fins)
}
@@ -520,6 +529,32 @@
n2.accepts("aaaaaa".toList) // false
n2.accepts(("a" * 100).toList) // false
+
+// example involving not
+
+val rnot = "/*" ~ (NOT ((ALL.%) ~ "*/" ~ (ALL.%))) ~ "*/"
+val nnot = mkNFA(rnot) // size 7
+
+nnot.accepts("/**/".toList) // true
+nnot.accepts("/*aaabaa*/".toList) // true
+nnot.accepts("/*/**/".toList) // true
+nnot.accepts("/*aaa*/aa*/".toList) // false ????
+nnot.accepts("/aaa*/aa*/".toList) // false
+
+matcher(rnot, "/**/".toList) // true
+matcher(rnot, "/*aaabaa*/".toList) // true
+matcher(rnot, "/*/**/".toList) // true
+matcher(rnot, "/*aaa*/aa*/".toList) // false
+matcher(rnot, "/aaa*/aa*/".toList) // false
+
+pmatcher(Set(rnot), "/**/".toList) // true
+pmatcher(Set(rnot), "/*aaabaa*/".toList) // true
+pmatcher(Set(rnot), "/*/**/".toList) // true
+pmatcher(Set(rnot), "/*aaa*/aa*/".toList) // false ?????
+pmatcher(Set(rnot), "/aaa*/aa*/".toList) // false
+
+// example from automata paper with counters and backreferences
+
val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc"
mkNFA(r1) // size = 5