# HG changeset patch # User Christian Urban # Date 1488663312 0 # Node ID 81c85f2746f50521b4c7c93eae58cd5cc57d71b8 # Parent 8b432cef3805fa1a8415d9f18a0d01fc13a7ad8c updated diff -r 8b432cef3805 -r 81c85f2746f5 progs/automata/nfa2.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/automata/nfa2.scala Sat Mar 04 21:35:12 2017 +0000 @@ -0,0 +1,356 @@ +// DFAs and NFAs based on Scala's partial functions + + +// edges of the DFAs and NFAs +type Edge[A, C] = PartialFunction[(A, C), A] + + + + +// some states for test cases +abstract class State +case object Q0 extends State +case object Q1 extends State +case object Q2 extends State +case object Q3 extends State +case object Q4 extends State +case object Q5 extends State +case object Q6 extends State + + +// class for DFAs +case class DFA[A, C](start: A, // starting state + trans: Edge[A, C], // transition + fins: A => Boolean) { // final states + + // given a state and a character, + // what is the next state, if there is any? + def next(q: A, c: C) : Option[A] = + trans.lift.apply((q, c)) + + // given a state and a string, what is the next state? + def nexts(q: A, s: List[C]) : Option[A] = s match { + case Nil => Some(q) + case c::cs => next(q, c).flatMap(nexts(_, cs)) + } + + // is a string accepted by an DFA? + def accepts(s: List[C]) : Boolean = nexts(start, s) match { + case None => false + case Some(q) => fins(q) + } + +} + +// DFA 1 +val dtrans1 : Edge[State, Char] = + { case (Q0, 'a') => Q0 + case (Q0, 'b') => Q1 + } + +val dfa1 = DFA(Q0, dtrans1, Set[State](Q1)) + +dfa1.accepts("aaab".toList) // true +dfa1.accepts("aacb".toList) // false + +// another DFA test +abstract class S +case object S0 extends S +case object S1 extends S +case object S2 extends S +case object Sink extends S + +// transition function +// S0 --a--> S1 --a--> S2 +val sigma : Edge[S, Char] = + { case (S0, 'a') => S1 + case (S1, 'a') => S2 + case _ => Sink + } + +val dfa1a = DFA(S0, sigma, Set[S](S2)) + +dfa1a.accepts("aa".toList) //true +dfa1a.accepts("".toList) //false +dfa1a.accepts("ab".toList) //false + + +// class for NFAs +case class NFA[A, C](starts: Set[A], // starting states + trans: Set[Edge[A, C]], // transition edges + fins: A => Boolean) { // final states + + // given a state and a character, what is the set of next states? + def next(q: A, c: C) : Set[A] = + trans.flatMap(_.lift.apply((q, c))) + + // given some states and a character, what is the set of next states? + def nexts(qs: Set[A], c: C) : Set[A] = + qs.flatMap(next(_, c)) + + // given some states and a string, what is the set of next states? + def nextss(qs: Set[A], s: List[C]) : Set[A] = s match { + case Nil => qs + case c::cs => nextss(nexts(qs, c), cs) + } + + // is a string accepted by an NFA? + def accepts(s: List[C]) : Boolean = + nextss(starts, s.toList).exists(fins) + +} + + + + +// NFA test cases + +// 1 +val trans1 = Set[Edge[State, Char]]( + { case (Q0, 'a') => Q1 }, + { case (Q0, _) => Q0 }, + { case (Q1, _) => Q2 }, + { case (Q2, _) => Q3 }, + { case (Q3, _) => Q4 }, + { case (Q4, 'b') => Q5 }, + { case (Q5, 'c') => Q6 } +) + +val nfa1 = NFA(Set[State](Q0), trans1, Set[State](Q6)) + +nfa1.accepts("axaybzbc".toList) // true +nfa1.accepts("aaaaxaybzbc".toList) // true +nfa1.accepts("axaybzbd".toList) // false + + + +// 2 +val trans2 = Set[Edge[State, Char]]( + { case (Q0, 'a') => Q0 }, + { case (Q0, 'a') => Q1 }, + { case (Q0, 'b') => Q2 }, + { case (Q1, 'a') => Q1 }, + { case (Q2, 'b') => Q2 } +) + +val nfa2 = NFA(Set[State](Q0), trans2, Set[State](Q2)) + +nfa2.accepts("aa".toList) // false +nfa2.accepts("aaaaa".toList) // false +nfa2.accepts("aaaaab".toList) // true +nfa2.accepts("aaaaabbb".toList) // true +nfa2.accepts("aaaaabbbaaa".toList) // false +nfa2.accepts("ac".toList) // false + +// 3 +val trans3 = Set[Edge[State, Char]]( + { case (Q0, _) => Q0 }, + { case (Q0, 'a') => Q1 }, + { case (Q0, 'b') => Q3 }, + { case (Q1, 'b') => Q2 }, + { case (Q2, 'c') => Q5 }, + { case (Q3, 'c') => Q4 }, + { case (Q4, 'd') => Q5 } +) + +val nfa3 = NFA(Set[State](Q0), trans3, Set[State](Q5)) + +nfa3.accepts("aaaaabc".toList) // true +nfa3.accepts("aaaabcd".toList) // true +nfa3.accepts("aaaaab".toList) // false +nfa3.accepts("aaaabc".toList) // true +nfa3.accepts("aaaaabbbaaa".toList) // false + + + +// subset, or powerset, construction +def powerset[A, C](nfa: NFA[A, C]) : DFA[Set[A], C] = { + DFA(nfa.starts, + { case (qs, c) => nfa.nexts(qs, c) }, + _.exists(nfa.fins)) +} + +val dfa2 = powerset(nfa1) + +dfa2.accepts("axaybzbc".toList) // true +dfa2.accepts("aaaaxaybzbc".toList) // true +dfa2.accepts("axaybzbd".toList) // false + +val dfa3 = powerset(nfa2) + +dfa3.accepts("aa".toList) // false +dfa3.accepts("aaaaa".toList) // false +dfa3.accepts("aaaaab".toList) // true +dfa3.accepts("aaaaabbb".toList) // true +dfa3.accepts("aaaaabbbaaa".toList) // false +dfa3.accepts("ac".toList) // false + + +import scala.annotation.tailrec +@tailrec +def fixpT[A](f: A => A, x: A): A = { + val fx = f(x) + if (fx == x) x else fixpT(f, fx) +} + + +case class eNFA[A, C](starts: Set[A], // starting state + trans: Set[Edge[A, Option[C]]], // transition edges + fins: A => Boolean) { // final states + + def enext(q: A) : Set[A] = + trans.flatMap(_.lift.apply((q, None))) + + def enexts(qs: Set[A]) : Set[A] = + qs ++ qs.flatMap(enext(_)) + + // epsilon closure + def ecl(qs: Set[A]) : Set[A] = + fixpT(enexts, qs) + + def next(q: A, c: C) : Set[A] = + trans.flatMap(_.lift.apply((q, Some(c)))) + + def nexts(qs: Set[A], c: C) : Set[A] = + qs.flatMap(next(_, c)) + + def nextss(qs: Set[A], s: List[C]) : Set[A] = s match { + case Nil => ecl(qs) + case c::cs => nextss(ecl(nexts(ecl(qs), c)), cs) + } + + def accepts(s: List[C]) : Boolean = + nextss(starts, s.toList).exists(fins) + +} + +val etrans1 = Set[Edge[State, Option[Char]]]( + { case (Q0, Some('a')) => Q1 }, + { case (Q1, None) => Q0 } +) + +val enfa = eNFA(Set[State](Q0), etrans1, Set[State](Q1)) + +enfa.accepts("a".toList) // true +enfa.accepts("".toList) // false +enfa.accepts("aaaaa".toList) // true +enfa.accepts("aaaaab".toList) // flase +enfa.accepts("aaaaabbb".toList) // false +enfa.accepts("aaaaabbbaaa".toList) // false +enfa.accepts("ac".toList) // false + + + +abstract class Rexp +case object ZERO extends Rexp +case object ONE extends Rexp +case class CHAR(c: Char) extends Rexp +case object ALL extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NTIMES(r: Rexp, n: Int) extends Rexp +case class UPNTIMES(r: Rexp, n: Int) extends Rexp + +import scala.language.implicitConversions +import scala.language.reflectiveCalls + +def charlist2rexp(s: List[Char]): Rexp = s match { + case Nil => ONE + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList) + +implicit def RexpOps (r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +implicit def stringOps (s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) +} + + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALL => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NTIMES(r, i) => if (i == 0) true else nullable(r) + case UPNTIMES(r: Rexp, n: Int) => true +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO + case ALL => ONE + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r1) => SEQ(der(c, r1), STAR(r1)) + case NTIMES(r1, i) => + if (i == 0) ZERO else + if (nullable(r1)) SEQ(der(c, r1), UPNTIMES(r1, i - 1)) + else SEQ(der(c, r1), NTIMES(r1, i - 1)) + case UPNTIMES(r1, i) => + if (i == 0) ZERO + else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) +} + + +// partial derivative of a regular expression w.r.t. a character +def pder (c: Char, r: Rexp) : Set[Rexp] = r match { + case ZERO => Set() + case ONE => Set() + case CHAR(d) => if (c == d) Set(ONE) else Set() + case ALL => Set(ONE) + case ALT(r1, r2) => pder(c, r1) ++ pder(c, r2) + case SEQ(r1, r2) => + (for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) ++ + (if (nullable(r1)) pder(c, r2) else Set()) + case STAR(r1) => + for (pr1 <- pder(c, r1)) yield SEQ(pr1, STAR(r1)) + case NTIMES(r1, i) => + if (i == 0) Set() else + if (nullable(r1)) + for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1)) + else + for (pr1 <- pder(c, r1)) yield SEQ(pr1, NTIMES(r1, i - 1)) + case UPNTIMES(r1, i) => + if (i == 0) Set() + else + for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1)) +} + +val r = STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc" +val pder_nfa = NFA[Set[Rexp], Char](Set(Set(r)), + Set( { case (rs, c) => rs.flatMap(pder(c, _))} ), + _.exists(nullable)) + + + +pder_nfa.accepts("axaybzbc".toList) // true +pder_nfa.accepts("aaaaxaybzbc".toList) // true +pder_nfa.accepts("axaybzbd".toList) // false + + + + +/// +type Trans[A, C] = Map[(A, C), A] + diff -r 8b432cef3805 -r 81c85f2746f5 progs/scala/re-ext.scala --- a/progs/scala/re-ext.scala Thu Mar 02 12:39:27 2017 +0000 +++ b/progs/scala/re-ext.scala Sat Mar 04 21:35:12 2017 +0000 @@ -6,12 +6,11 @@ abstract class Rexp case object ZERO extends Rexp case object ONE extends Rexp -case class CHAR(c: Char) extends Rexp +case class CHARS(f: Char => Boolean) extends Rexp case class ALT(r1: Rexp, r2: Rexp) extends Rexp case class SEQ(r1: Rexp, r2: Rexp) extends Rexp case class STAR(r: Rexp) extends Rexp case class RECD(x: String, r: Rexp) extends Rexp -case class CRANGE(cs: String) extends Rexp case class PLUS(r: Rexp) extends Rexp case class OPT(r: Rexp) extends Rexp case class NTIMES(r: Rexp, n: Int) extends Rexp @@ -28,8 +27,8 @@ // some convenience for typing in regular expressions def charlist2rexp(s : List[Char]): Rexp = s match { case Nil => ONE - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) + case c::Nil => CHARS(_ == c) + case c::s => SEQ(CHARS(_ == c), charlist2rexp(s)) } implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) @@ -68,12 +67,11 @@ def nullable (r: Rexp) : Boolean = r match { case ZERO => false case ONE => true - case CHAR(_) => false + case CHARS(_) => false case ALT(r1, r2) => nullable(r1) || nullable(r2) case SEQ(r1, r2) => nullable(r1) && nullable(r2) case STAR(_) => true case RECD(_, r1) => nullable(r1) - case CRANGE(_) => false case PLUS(r) => nullable(r) case OPT(_) => true case NTIMES(r, n) => if (n == 0) true else nullable(r) @@ -83,17 +81,16 @@ def der (c: Char, r: Rexp) : Rexp = r match { case ZERO => ZERO case ONE => ZERO - case CHAR(d) => if (c == d) ONE else ZERO + case CHARS(f) => if (f(c)) ONE else ZERO case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) else SEQ(der(c, r1), r2) case STAR(r) => SEQ(der(c, r), STAR(r)) case RECD(_, r1) => der(c, r1) - case CRANGE(cs) => if (cs.contains(c)) ONE else ZERO case PLUS(r) => SEQ(der(c, r), STAR(r)) case OPT(r) => ALT(der(c, r), ZERO) - case NTIMES(r, n) => if (n == 0) ZERO else der(c, SEQ(r, NTIMES(r, n - 1))) + case NTIMES(r, n) => if (n == 0) ZERO else SEQ(der(c, r), NTIMES(r, n - 1)) } // derivative w.r.t. a string (iterates der) @@ -136,7 +133,7 @@ case RECD(x, r) => Rec(x, mkeps(r)) case PLUS(r) => Stars(List(mkeps(r))) case OPT(_) => Right(Empty) - case NTIMES(r, n) => if (n == 0) Stars(Nil) else Stars(Nil.padTo(n, mkeps(r))) + case NTIMES(r, n) => Stars(List.fill(n)(mkeps(r))) } @@ -147,14 +144,11 @@ case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) - case (CHAR(d), Empty) => Chr(c) + case (CHARS(_), Empty) => Chr(c) case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) - case (CRANGE(_), Empty) => Chr(c) case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) case (OPT(r), Left(v1)) => Left(inj(r, c, v1)) case (NTIMES(r, n), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) - case (NTIMES(r, n), Left(Sequ(v1, Stars(vs)))) => Stars(inj(r, c, v1)::vs) - case (NTIMES(r, n), Right(Stars(v::vs))) => Stars(mkeps(r)::inj(r, c, v)::vs) } // main unsimplified lexing function (produces a value) @@ -368,7 +362,7 @@ // first benchmark regex //======================= -val reWord = CRANGE("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") +val reWord = CHARS("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" contains _) val reWordStar = STAR(reWord) val reWordPlus = reWord ~ reWordStar diff -r 8b432cef3805 -r 81c85f2746f5 thys/LexerExt.thy --- a/thys/LexerExt.thy Thu Mar 02 12:39:27 2017 +0000 +++ b/thys/LexerExt.thy Sat Mar 04 21:35:12 2017 +0000 @@ -62,16 +62,16 @@ unfolding Der_def by auto +lemma Der_Sequ [simp]: + shows "Der c (A ;; B) = (Der c A) ;; B \ (if [] \ A then Der c B else {})" +unfolding Der_def Sequ_def +by (auto simp add: Cons_eq_append_conv) + lemma Der_union [simp]: shows "Der c (A \ B) = Der c A \ Der c B" unfolding Der_def by auto -lemma Der_Sequ [simp]: - shows "Der c (A ;; B) = (Der c A) ;; B \ (if [] \ A then Der c B else {})" -unfolding Der_def Sequ_def -by (auto simp add: Cons_eq_append_conv) - lemma Der_UNION: shows "Der c (\x\A. B x) = (\x\A. Der c (B x))" by (auto simp add: Der_def)