// DFAs and NFAs based on Scala's partial functions
import scala.util.Try
// type abbreviation for partial functions
type :=>[A, B] = PartialFunction[A, B]
// some states for test cases
abstract class State
case object Q0 extends State
case object Q1 extends State
case object Q2 extends State
case object Q3 extends State
case object Q4 extends State
case object Q5 extends State
case object Q6 extends State
// class for DFAs
case class DFA[A, C](start: A, // starting state
delta: (A, C) :=> A, // transition
fins: A => Boolean) { // final states
// given a state and a "string", what is the next
// state, if there is any?
def deltas(q: A, s: List[C]) : A = s match {
case Nil => q
case c::cs => deltas(delta(q, c), cs)
}
// is a "string" accepted by an DFA?
def accepts(s: List[C]) : Boolean =
Try(fins(deltas(start, s))) getOrElse false
}
// DFA 1
val dtrans1 : (State, Char) :=> State =
{ case (Q0, 'a') => Q0
case (Q0, 'b') => Q1
}
val dfa1 = DFA(Q0, dtrans1, Set[State](Q1))
dfa1.accepts("aaab".toList) // true
dfa1.accepts("aacb".toList) // false
// another DFA test
abstract class S
case object S0 extends S
case object S1 extends S
case object S2 extends S
case object Sink extends S
// transition function with a sink state
// S0 --a--> S1 --a--> S2
val sigma : (S, Char) :=> S =
{ case (S0, 'a') => S1
case (S1, 'a') => S2
case _ => Sink
}
val dfa1a = DFA(S0, sigma, Set[S](S2))
dfa1a.accepts("aa".toList) //true
dfa1a.accepts("".toList) //false
dfa1a.accepts("ab".toList) //false
// class for NFAs
case class NFA[A, C](starts: Set[A], // starting states
delta: Set[(A, C) :=> A], // transitions
fins: A => Boolean) { // final states
// given a state and a character, what is the set of next states?
// if there is none => empty set
def next(q: A, c: C) : Set[A] =
delta.flatMap((d) => Try(d(q, c)).toOption)
def nexts(qs: Set[A], c: C) : Set[A] =
qs.flatMap(next(_, c))
// given some states and a string, what is the set of next states?
def deltas(qs: Set[A], s: List[C]) : Set[A] = s match {
case Nil => qs
case c::cs => deltas(nexts(qs, c), cs)
}
// is a string accepted by an NFA?
def accepts(s: List[C]) : Boolean =
deltas(starts, s).exists(fins)
}
// NFA test cases
// 1
val trans1 = Set[(State, Char) :=> State](
{ case (Q0, 'a') => Q1 },
{ case (Q0, _) => Q0 },
{ case (Q1, _) => Q2 },
{ case (Q2, _) => Q3 },
{ case (Q3, _) => Q4 },
{ case (Q4, 'b') => Q5 },
{ case (Q5, 'c') => Q6 }
)
val nfa1 = NFA(Set[State](Q0), trans1, Set[State](Q6))
nfa1.accepts("axaybzbc".toList) // true
nfa1.accepts("aaaaxaybzbc".toList) // true
nfa1.accepts("axaybzbd".toList) // false
// 2
val trans2 = Set[(State, Char) :=> State](
{ case (Q0, 'a') => Q0 },
{ case (Q0, 'a') => Q1 },
{ case (Q0, 'b') => Q2 },
{ case (Q1, 'a') => Q1 },
{ case (Q2, 'b') => Q2 }
)
val nfa2 = NFA(Set[State](Q0), trans2, Set[State](Q2))
nfa2.accepts("aa".toList) // false
nfa2.accepts("aaaaa".toList) // false
nfa2.accepts("aaaaab".toList) // true
nfa2.accepts("aaaaabbb".toList) // true
nfa2.accepts("aaaaabbbaaa".toList) // false
nfa2.accepts("ac".toList) // false
// 3
val trans3 = Set[(State, Char) :=> State](
{ case (Q0, _) => Q0 },
{ case (Q0, 'a') => Q1 },
{ case (Q0, 'b') => Q3 },
{ case (Q1, 'b') => Q2 },
{ case (Q2, 'c') => Q5 },
{ case (Q3, 'c') => Q4 },
{ case (Q4, 'd') => Q5 }
)
val nfa3 = NFA(Set[State](Q0), trans3, Set[State](Q5))
nfa3.accepts("aaaaabc".toList) // true
nfa3.accepts("aaaabcd".toList) // true
nfa3.accepts("aaaaab".toList) // false
nfa3.accepts("aaaabc".toList) // true
nfa3.accepts("aaaaabbbaaa".toList) // false
// subset, or powerset, construction
def powerset[A, C](nfa: NFA[A, C]) : DFA[Set[A], C] = {
DFA(nfa.starts,
{ case (qs, c) => nfa.nexts(qs, c) },
_.exists(nfa.fins))
}
val dfa2 = powerset(nfa1)
dfa2.accepts("axaybzbc".toList) // true
dfa2.accepts("aaaaxaybzbc".toList) // true
dfa2.accepts("axaybzbd".toList) // false
val dfa3 = powerset(nfa2)
dfa3.accepts("aa".toList) // false
dfa3.accepts("aaaaa".toList) // false
dfa3.accepts("aaaaab".toList) // true
dfa3.accepts("aaaaabbb".toList) // true
dfa3.accepts("aaaaabbbaaa".toList) // false
dfa3.accepts("ac".toList) // false
// epsilon NFA
// fixpoint construction
import scala.annotation.tailrec
@tailrec
def fixpT[A](f: A => A, x: A): A = {
val fx = f(x)
if (fx == x) x else fixpT(f, fx)
}
case class eNFA[A, C](starts: Set[A], // starting state
delta: Set[(A, Option[C]) :=> A], // transition edges
fins: A => Boolean) { // final states
// epsilon transitions
def enext(q: A) : Set[A] =
delta.flatMap((d) => Try(d(q, None)).toOption)
def enexts(qs: Set[A]) : Set[A] =
qs ++ qs.flatMap(enext(_))
// epsilon closure
def ecl(qs: Set[A]) : Set[A] =
fixpT(enexts, qs)
// "normal" transition
def next(q: A, c: C) : Set[A] =
delta.flatMap((d) => Try(d(q, Some(c))).toOption)
def nexts(qs: Set[A], c: C) : Set[A] =
qs.flatMap(next(_, c))
def deltas(qs: Set[A], s: List[C]) : Set[A] = s match {
case Nil => ecl(qs)
case c::cs => deltas(ecl(nexts(ecl(qs), c)), cs)
}
def accepts(s: List[C]) : Boolean =
deltas(starts, s.toList).exists(fins)
}
val etrans1 = Set[(State, Option[Char]) :=> State](
{ case (Q0, Some('a')) => Q1 },
{ case (Q1, None) => Q0 }
)
val enfa = eNFA(Set[State](Q0), etrans1, Set[State](Q1))
enfa.accepts("a".toList) // true
enfa.accepts("".toList) // false
enfa.accepts("aaaaa".toList) // true
enfa.accepts("aaaaab".toList) // flase
enfa.accepts("aaaaabbb".toList) // false
enfa.accepts("aaaaabbbaaa".toList) // false
enfa.accepts("ac".toList) // false
// Regular expressions fro derivative automata
abstract class Rexp
case object ZERO extends Rexp
case object ONE extends Rexp
case class CHAR(c: Char) extends Rexp
case object ALL extends Rexp
case class ALT(r1: Rexp, r2: Rexp) extends Rexp
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
case class STAR(r: Rexp) extends Rexp
case class NTIMES(r: Rexp, n: Int) extends Rexp
case class UPNTIMES(r: Rexp, n: Int) extends Rexp
case class NOT(r: Rexp) extends Rexp
case class AND(r1: Rexp, r2: Rexp) extends Rexp
def get_chars(r: Rexp) : Set[Char] = r match {
case ZERO => Set()
case ONE => Set()
case CHAR(c) => Set(c)
case ALT(r1, r2) => get_chars(r1) ++ get_chars(r2)
case SEQ(r1, r2) => get_chars(r1) ++ get_chars(r2)
case STAR(r) => get_chars(r)
case NTIMES(r, _) => get_chars(r)
case UPNTIMES(r, _) => get_chars(r)
case ALL => ('a' to 'z').toSet ++ Set('*','/','\\')
case NOT(r) => get_chars(r)
case AND(r1, r2) => get_chars(r1) ++ get_chars(r2)
}
import scala.language.implicitConversions
import scala.language.reflectiveCalls
def charlist2rexp(s: List[Char]): Rexp = s match {
case Nil => ONE
case c::Nil => CHAR(c)
case c::s => SEQ(CHAR(c), charlist2rexp(s))
}
implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList)
implicit def RexpOps (r: Rexp) = new {
def | (s: Rexp) = ALT(r, s)
def % = STAR(r)
def ~ (s: Rexp) = SEQ(r, s)
}
implicit def stringOps (s: String) = new {
def | (r: Rexp) = ALT(s, r)
def | (r: String) = ALT(s, r)
def % = STAR(s)
def ~ (r: Rexp) = SEQ(s, r)
def ~ (r: String) = SEQ(s, r)
}
def simp(r: Rexp) : Rexp = r match {
case ALT(r1, r2) => (simp(r1), simp(r2)) match {
case (ZERO, r2s) => r2s
case (r1s, ZERO) => r1s
case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
}
case SEQ(r1, r2) => (simp(r1), simp(r2)) match {
case (ZERO, _) => ZERO
case (_, ZERO) => ZERO
case (ONE, r2s) => r2s
case (r1s, ONE) => r1s
case (r1s, r2s) => SEQ(r1s, r2s)
}
case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n)
case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n)
case NOT(r) => NOT(simp(r))
case AND(r1, r2) => AND(simp(r1), simp(r2))
case r => r
}
// nullable function: tests whether the regular
// expression can recognise the empty string
def nullable(r: Rexp) : Boolean = r match {
case ZERO => false
case ONE => true
case CHAR(_) => false
case ALL => false
case ALT(r1, r2) => nullable(r1) || nullable(r2)
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
case STAR(_) => true
case NTIMES(r, i) => if (i == 0) true else nullable(r)
case UPNTIMES(r: Rexp, n: Int) => true
case NOT(r) => !nullable(r)
case AND(r1, r2) => nullable(r1) && nullable(r2)
}
// derivative of a regular expression w.r.t. a character
def der(c: Char, r: Rexp) : Rexp = r match {
case ZERO => ZERO
case ONE => ZERO
case CHAR(d) => if (c == d) ONE else ZERO
case ALL => ONE
case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
case SEQ(r1, r2) =>
if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
else SEQ(der(c, r1), r2)
case STAR(r1) => SEQ(der(c, r1), STAR(r1))
case NTIMES(r1, i) =>
if (i == 0) ZERO else
if (nullable(r1)) SEQ(der(c, r1), UPNTIMES(r1, i - 1))
else SEQ(der(c, r1), NTIMES(r1, i - 1))
case UPNTIMES(r1, i) =>
if (i == 0) ZERO
else SEQ(der(c, r1), UPNTIMES(r1, i - 1))
case NOT(r) => NOT(der(c, r))
// AND case?
}
// partial derivative of a regular expression w.r.t. a character
// does not work for NOT and AND ... see below
def pder(c: Char, r: Rexp) : Set[Rexp] = r match {
case ZERO => Set()
case ONE => Set()
case CHAR(d) => if (c == d) Set(ONE) else Set()
case ALL => Set(ONE)
case ALT(r1, r2) => pder(c, r1) ++ pder(c, r2)
case SEQ(r1, r2) =>
(for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) ++
(if (nullable(r1)) pder(c, r2) else Set())
case STAR(r1) =>
for (pr1 <- pder(c, r1)) yield SEQ(pr1, STAR(r1))
case NTIMES(r1, i) =>
if (i == 0) Set() else
if (nullable(r1))
for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1))
else
for (pr1 <- pder(c, r1)) yield SEQ(pr1, NTIMES(r1, i - 1))
case UPNTIMES(r1, i) =>
if (i == 0) Set()
else
for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1))
}
def ppder(c: Char, rs: Set[Rexp]) : Set[Rexp] =
rs.flatMap(pder(c, _))
// matchers
def matcher(r: Rexp, s: List[Char]): Boolean = s match {
case Nil => nullable(r)
case c::cs => matcher(der(c, r), cs)
}
def pmatcher(rs: Set[Rexp], s: List[Char]): Boolean = s match {
case Nil => rs.exists(nullable(_))
case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs)
}
def seq_add(rss: Set[Set[Rexp]], r2: Rexp) : Set[Set[Rexp]] =
for (rs <- rss) yield (for (r <- rs) yield (SEQ(r, r2) : Rexp))
def not_add(rss: Set[Set[Rexp]]) : Set[Set[Set[Rexp]]] =
for (rs <- rss) yield (for (r <- rs) yield (Set(NOT(r)) : Set[Rexp]))
def and_compose(rss1: Set[Set[Rexp]], rss2: Set[Set[Rexp]]) : Set[Set[Rexp]] =
for (rs1 <- rss1; rs2 <- rss2) yield rs1 ++ rs2
def pder2(c: Char, r: Rexp) : Set[Set[Rexp]] = r match {
case ZERO => Set()
case ONE => Set()
case CHAR(d) => if (c == d) Set(Set(ONE)) else Set()
case ALL => Set(Set(ONE))
case ALT(r1, r2) => pder2(c, r1) ++ pder2(c, r2)
case SEQ(r1, r2) => {
val prs1 = seq_add(pder2(c, r1), r2)
if (nullable(r1)) (prs1 ++ pder2(c, r2)) else prs1
}
case STAR(r1) =>
seq_add(pder2(c, r1), STAR(r1))
case AND(r1, r2) => and_compose(pder2(c, r1), pder2(c, r2))
case NOT(r1) => {
val prs1 = not_add(pder2(c, r1))
if (prs1.isEmpty) Set() else
prs1.tail.foldRight(prs1.head) { (rs1, rs2) => rs1 ++ rs2 }
}
}
def pmatcher2(rss: Set[Set[Rexp]], s: List[Char]): Boolean = s match {
case Nil => rss.exists((rs) => rs.exists((r) => nullable(r)))
case c::cs => pmatcher2(rss.flatMap(_.flatMap(pder2(c, _))), cs)
}
// quick-and-dirty translation of a pder automaton
val r = STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc"
val pder_nfa = NFA[Set[Rexp], Char](Set(Set(r)),
Set( { case (rs, c) => rs.flatMap(pder(c, _))} ),
_.exists(nullable))
pder_nfa.accepts("axaybzbc".toList) // true
pder_nfa.accepts("aaaaxaybzbc".toList) // true
pder_nfa.accepts("axaybzbd".toList) // false
// Derivative and Partial Derivative Automaton construction
type DState = Rexp // state type of the derivative automaton
type DStates = Set[Rexp]
type Trans = (DState, Char) :=> DState // transition functions of the der/pder auto
type MTrans = Map[(DState, Char), DState] // transition Maps
type STrans = Set[MTrans] // set of transition Maps
// Brzozoswki Derivative automaton construction ... simple
// version, might not terminate
def goto(sigma: Set[Char], delta: MTrans, qs: DStates, q: DState, c: Char) : (DStates, MTrans) = {
val qder = simp(der(c, q))
qs.find(_ == qder) match {
case Some(qexists) => (qs, delta + ((q, c) -> qexists))
case None => explore(sigma, delta + ((q, c) -> qder), qs + qder, qder)
}
}
def explore(sigma: Set[Char], delta: MTrans, qs: DStates, q: DState) : (DStates, MTrans) =
sigma.foldLeft((qs, delta)) { case ((qs, delta), c) => goto(sigma, delta, qs, q, c) }
def mkDFA(r: Rexp) = {
val sigma = get_chars(r)
val (qs, delta) = explore(sigma, Map(), Set[Rexp](r), r)
val fins = qs.filter(nullable(_))
val deltaf : (Rexp, Char) :=> Rexp = { case (q, c) => delta(q, c) }
println(s"DFA size: ${qs.size}")
println(s"""DFA states\n${qs.mkString("\n")}""")
DFA(r, deltaf, fins)
}
val re = "ab" | "ac"
val d1 = mkDFA(re)
d1.accepts("ab".toList) // true
d1.accepts("ac".toList) // true
d1.accepts("aa".toList) // false
val d2 = mkDFA(r)
d2.accepts("axaybzbc".toList) // true
d2.accepts("aaaaxaybzbc".toList) // true
d2.accepts("axaybzbd".toList) // false
for (n <- (1 to 10).toList)
mkDFA(STAR(ALL) ~ "a" ~ NTIMES(ALL, n) ~ "bc")
// this is an example where mkDFA does not terminate
val big_aux = STAR("a" | "b")
val big = SEQ(big_aux, SEQ("a",SEQ("b", big_aux)))
//mkDFA(big) // does not terminate
// Antimirov Partial derivative automata construction ... definitely terminates
// to transform (concrete) Maps into functions
def toFun(m: MTrans) : Trans =
{ case (q, c) => m(q, c) }
def pgoto(sigma: Set[Char], delta: STrans, qs: DStates, q: DState, c: Char) : (DStates, STrans) = {
val qders = pder(c, q).map(simp(_)) // set of simplified partial derivatives
qders.foldLeft((qs, delta)) { case ((qs, delta), qnew) => padd(sigma, delta, qs, q, qnew, c) }
}
def padd(sigma: Set[Char], delta: STrans, qs: DStates,
q: DState, qnew: DState, c: Char) : (DStates, STrans) = {
qs.find(_ == qnew) match {
case Some(qexists) => (qs, delta + Map((q, c) -> qexists))
case None => pexplore(sigma, delta + Map((q, c) -> qnew), qs + qnew, qnew)
}
}
def pexplore(sigma: Set[Char], delta: STrans, qs: DStates, q: DState) : (DStates, STrans) =
sigma.foldLeft((qs, delta)) { case ((qs, delta), c) => pgoto(sigma, delta, qs, q, c) }
def mkNFA(r: Rexp) : NFA[Rexp, Char]= {
val sigma = get_chars(r)
val (qs, delta) = pexplore(sigma, Set(), Set(r), r)
val fins = qs.filter(nullable(_))
val deltaf = delta.map(toFun(_))
println(s"NFA size: ${qs.size}")
println(s"""NFA states\n${qs.mkString("\n")}""")
NFA(Set(r), deltaf, fins)
}
// simple example from Scott's paper
val n1 = mkNFA(re) // size = 4
n1.accepts("ab".toList) // true
n1.accepts("ac".toList) // true
n1.accepts("aa".toList) // false
// example from: Partial Derivative and Position Bisimilarity
// Automata, Eva Maia, Nelma Moreira, Rogerio Reis
val r_test = STAR(("a" ~ STAR("b")) | "b") ~ "a"
val t1 = pder('a', r_test).map(simp(_))
val t2 = pder('b', r_test).map(simp(_))
mkNFA(r_test) // size = 3
// simple example involving double star
// with depth-first search causes catastrophic backtracing
val n2 = mkNFA(STAR(STAR("a")) ~ "b") // size 3
n2.accepts("aaaaaab".toList) // true
n2.accepts("aaaaaa".toList) // false
n2.accepts(("a" * 100).toList) // false
// example involving not
val rnot = "/*" ~ (NOT ((ALL.%) ~ "*/" ~ (ALL.%))) ~ "*/"
val nnot = mkNFA(rnot) // size 7
nnot.accepts("/**/".toList) // true
nnot.accepts("/*aaabaa*/".toList) // true
nnot.accepts("/*/**/".toList) // true
nnot.accepts("/*aaa*/aa*/".toList) // false ????
nnot.accepts("/aaa*/aa*/".toList) // false
matcher(rnot, "/**/".toList) // true
matcher(rnot, "/*aaabaa*/".toList) // true
matcher(rnot, "/*/**/".toList) // true
matcher(rnot, "/*aaa*/aa*/".toList) // false
matcher(rnot, "/aaa*/aa*/".toList) // false
pmatcher(Set(rnot), "/**/".toList) // true
pmatcher(Set(rnot), "/*aaabaa*/".toList) // true
pmatcher(Set(rnot), "/*/**/".toList) // true
pmatcher(Set(rnot), "/*aaa*/aa*/".toList) // false ?????
pmatcher(Set(rnot), "/aaa*/aa*/".toList) // false
pmatcher2(Set(Set(rnot)), "/**/".toList) // true
pmatcher2(Set(Set(rnot)), "/*aaabaa*/".toList) // true
pmatcher2(Set(Set(rnot)), "/*/**/".toList) // true
pmatcher2(Set(Set(rnot)), "/*aaa*/aa*/".toList) // false ?????
pmatcher2(Set(Set(rnot)), "/aaa*/aa*/".toList) // false
// example from automata paper with counters and backreferences
val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc"
mkNFA(r1) // size = 5
val n3 = mkNFA(r) // size = 7
n3.accepts("axaybzbc".toList) // true
n3.accepts("aaaaxaybzbc".toList) // true
n3.accepts("axaybzbd".toList) // false
for (n <- (1 to 100).toList)
mkNFA(STAR(ALL) ~ "a" ~ NTIMES(ALL, n) ~ "bc")