diff -r e85600529ca5 -r 4794759139ea progs/automata.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/automata.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,106 @@ + +// a class for deterministic finite automata, +// the type of states is kept polymorphic + +case class Automaton[A](start: A, states: Set[A], delta: Map[(A, Char), A], fins: Set[A]) { + + // the transition function lifted to list of characters + def deltas(q: A, cs: List[Char]) : Either[A, String] = + if (states.contains(q)) cs match { + case Nil => Left(q) + case c::cs => + if (delta.isDefinedAt(q, c)) deltas(delta(q, c), cs) + else Right(q + " does not have a transition for " + c) + } + else Right(q + " is not a state of the automaton") + + // wether a string is accepted by the automaton + def accepts(s: String) = deltas(start, s.toList) match { + case Left(q) => fins.contains(q) + case _ => false + } +} + + +// translating a regular expression into a finite +// automaton + +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +implicit def string2rexp(s : String) = { + def chars2rexp (cs: List[Char]) : Rexp = cs match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::cs => SEQ(CHAR(c), chars2rexp(cs)) + } + chars2rexp(s.toList) +} + +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +def der (r: Rexp, c: Char) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(r1, c), der(r2, c)) + case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(r1, c), r2), der(r2, c)) + else SEQ(der(r1, c), r2) + case STAR(r) => SEQ(der(r, c), STAR(r)) +} + + +// Here we construct an automaton whose +// states are regular expressions +type State = Rexp +type States = Set[State] +type Transition = Map[(State, Char), State] + +// we use as an alphabet all lowercase letters +val alphabet = "abcdefghijklmnopqrstuvwxyz".toSet + +def goto(q: State, c: Char, qs: States, delta: Transition) : (States, Transition) = { + val q_der : State = der(q, c) + if (qs.contains(q_der)) (qs, delta + ((q, c) -> q)) + else explore(qs + q_der, delta + ((q, c) -> q_der), q_der) +} + +def explore (qs: States, delta: Transition, q: State) : (States, Transition) = + alphabet.foldRight[(States, Transition)] (qs, delta) ((c, qsd) => goto(q, c, qsd._1, qsd._2)) + + +def mk_automaton (r: Rexp) : Automaton[Rexp] = { + val (qs, delta) = explore(Set(r), Map(), r); + val fins = for (q <- qs if nullable(q)) yield q; + Automaton[Rexp](r, qs, delta, fins) +} + +val A = mk_automaton(ALT("ab","ac")) + +A.start +A.states.toList.length + +println(A.accepts("bd")) +println(A.accepts("ab")) +println(A.accepts("ac")) + +val r1 = STAR(ALT("a","b")) +val r2 = SEQ("b","b") +val r3 = SEQ(SEQ(SEQ(r1, r2), r1), "a") +val B = mk_automaton(r3) + +B.start +B.states.toList.length