# HG changeset patch # User Christian Urban # Date 1593896253 -3600 # Node ID 022e2cb1668dbb4a69907e5f7c2d03be09d1d2ce # Parent c7bdd7eac4cb09edeb231431e6c675d04fb6f9a7 updated diff -r c7bdd7eac4cb -r 022e2cb1668d progs/automata/dfa.sc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/automata/dfa.sc Sat Jul 04 21:57:33 2020 +0100 @@ -0,0 +1,67 @@ +// DFAs in Scala using partial functions +import scala.util.Try + +// a type abbreviation for partial functions +type :=>[A, B] = PartialFunction[A, B] + +// main DFA class +case class DFA[A, C](start: A, // the starting state + delta: (A, C) :=> A, // transitions (partial fun) + fins: A => Boolean) { // the final states + + def deltas(q: A, s: List[C]) : A = s match { + case Nil => q + case c::cs => deltas(delta(q, c), cs) + } + + def accepts(s: List[C]) : Boolean = + Try(fins(deltas(start, s))).getOrElse(false) +} + +// the example shown in the handout +abstract class State +case object Q0 extends State +case object Q1 extends State +case object Q2 extends State +case object Q3 extends State +case object Q4 extends State + +val delta : (State, Char) :=> State = + { case (Q0, 'a') => Q1 + case (Q0, 'b') => Q2 + case (Q1, 'a') => Q4 + case (Q1, 'b') => Q2 + case (Q2, 'a') => Q3 + case (Q2, 'b') => Q2 + case (Q3, 'a') => Q4 + case (Q3, 'b') => Q0 + case (Q4, 'a') => Q4 + case (Q4, 'b') => Q4 } + +val dfa = DFA(Q0, delta, Set[State](Q4)) +dfa.accepts("aaabbb".toList) + +dfa.accepts("bbabaab".toList) // true +dfa.accepts("baba".toList) // false +dfa.accepts("abc".toList) // false + +// another DFA test with a Sink state +abstract class S +case object S0 extends S +case object S1 extends S +case object S2 extends S +case object Sink extends S + +// a transition function with a sink state +val sigma : (S, Char) :=> S = + { case (S0, 'a') => S1 + case (S1, 'a') => S2 + case _ => Sink + } + +val dfa1a = DFA(S0, sigma, Set[S](S2)) + +dfa1a.accepts("aa".toList) // true +dfa1a.accepts("".toList) // false +dfa1a.accepts("ab".toList) // false + diff -r c7bdd7eac4cb -r 022e2cb1668d progs/automata/enfa.sc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/automata/enfa.sc Sat Jul 04 21:57:33 2020 +0100 @@ -0,0 +1,69 @@ +// epsilon NFAs...immediately translated into NFAs + +import $file.dfa, dfa._ +import $file.nfa, nfa._ + + + +// a fixpoint construction +import scala.annotation.tailrec +@tailrec +def fixpT[A](f: A => A, x: A): A = { + val fx = f(x) + if (fx == x) x else fixpT(f, fx) +} + +// translates eNFAs directly into NFAs +def eNFA[A, C](starts: Set[A], // the starting states + delta: (A, Option[C]) :=> Set[A], // the epsilon-transitions + fins: A => Boolean) : NFA[A, C] = { // the final states + + // epsilon transitions + def enext(q: A) : Set[A] = + applyOrElse(delta, (q, None)) + + def enexts(qs: Set[A]) : Set[A] = + qs | qs.flatMap(enext(_)) // | is the set-union in Scala + + // epsilon closure + def ecl(qs: Set[A]) : Set[A] = + fixpT(enexts, qs) + + // "normal" transitions + def next(q: A, c: C) : Set[A] = + applyOrElse(delta, (q, Some(c))) + + def nexts(qs: Set[A], c: C) : Set[A] = + ecl(ecl(qs).flatMap(next(_, c))) + + // result NFA + NFA(ecl(starts), + { case (q, c) => nexts(Set(q), c) }, + q => ecl(Set(q)) exists fins) +} + + +// eNFA examples +val enfa_trans1 : (State, Option[Char]) :=> Set[State] = + { case (Q0, Some('a')) => Set(Q0) + case (Q0, None) => Set(Q1, Q2) + case (Q1, Some('a')) => Set(Q1) + case (Q2, Some('b')) => Set(Q2) + } + +val enfa1 = eNFA(Set[State](Q0), enfa_trans1, Set[State](Q2)) + + +// +case object R1 extends State +case object R2 extends State +case object R3 extends State + +val enfa_trans2 : (State, Option[Char]) :=> Set[State] = + { case (R1, Some('b')) => Set(R3) + case (R1, None) => Set(R2) + case (R2, Some('a')) => Set(R1, R3) + } + + +val enfa2 = eNFA(Set[State](R1), enfa_trans1, Set[State](R3)) diff -r c7bdd7eac4cb -r 022e2cb1668d progs/automata/nfa.sc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/automata/nfa.sc Sat Jul 04 21:57:33 2020 +0100 @@ -0,0 +1,93 @@ +// NFAs in Scala using partial functions (returning +// sets of states) +// + +// load dfas +import $file.dfa, dfa._ + + + +// return an empty set, when a parial function is not defined +import scala.util.Try +def applyOrElse[A, B](f: A :=> Set[B], x: A) : Set[B] = + Try(f(x)).getOrElse(Set[B]()) + + +// NFAs +case class NFA[A, C](starts: Set[A], // the starting states + delta: (A, C) :=> Set[A], // the transition function + fins: A => Boolean) { // the final states + + // given a state and a character, what is the set of + // next states? if there is none => empty set + def next(q: A, c: C) : Set[A] = + applyOrElse(delta, (q, c)) + + def nexts(qs: Set[A], c: C) : Set[A] = + qs.flatMap(next(_, c)) + + // given some states and a string, what is the set + // of next states? + def deltas(qs: Set[A], s: List[C]) : Set[A] = s match { + case Nil => qs + case c::cs => deltas(nexts(qs, c), cs) + } + + // is a string accepted by an NFA? + def accepts(s: List[C]) : Boolean = + deltas(starts, s).exists(fins) + + // depth-first version of accepts + def search(q: A, s: List[C]) : Boolean = s match { + case Nil => fins(q) + case c::cs => next(q, c).exists(search(_, cs)) + } + + def accepts2(s: List[C]) : Boolean = + starts.exists(search(_, s)) +} + + + +// NFA examples + +val nfa_trans1 : (State, Char) :=> Set[State] = + { case (Q0, 'a') => Set(Q0, Q1) + case (Q0, 'b') => Set(Q2) + case (Q1, 'a') => Set(Q1) + case (Q2, 'b') => Set(Q2) } + +val nfa1 = NFA(Set[State](Q0), nfa_trans1, Set[State](Q2)) + +println(nfa1.accepts("aa".toList)) // false +println(nfa1.accepts("aaaaa".toList)) // false +println(nfa1.accepts("aaaaab".toList)) // true +println(nfa1.accepts("aaaaabbb".toList)) // true +println(nfa1.accepts("aaaaabbbaaa".toList)) // false +println(nfa1.accepts("ac".toList)) // false + +println(nfa1.accepts2("aa".toList)) // false +println(nfa1.accepts2("aaaaa".toList)) // false +println(nfa1.accepts2("aaaaab".toList)) // true +println(nfa1.accepts2("aaaaabbb".toList)) // true +println(nfa1.accepts2("aaaaabbbaaa".toList)) // false +println(nfa1.accepts2("ac".toList)) // false + + + + +// subset constructions + +def subset[A, C](nfa: NFA[A, C]) : DFA[Set[A], C] = { + DFA(nfa.starts, + { case (qs, c) => nfa.nexts(qs, c) }, + _.exists(nfa.fins)) +} + +println(subset(nfa1).accepts("aa".toList)) // false +println(subset(nfa1).accepts("aaaaa".toList)) // false +println(subset(nfa1).accepts("aaaaab".toList)) // true +println(subset(nfa1).accepts("aaaaabbb".toList)) // true +println(subset(nfa1).accepts("aaaaabbbaaa".toList)) // false +println(subset(nfa1).accepts("ac".toList)) // false + diff -r c7bdd7eac4cb -r 022e2cb1668d progs/automata/thompson.sc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/automata/thompson.sc Sat Jul 04 21:57:33 2020 +0100 @@ -0,0 +1,180 @@ +// Thompson Construction +//======================= + +import $file.dfa, dfa._ +import $file.nfa, nfa._ +import $file.enfa, enfa._ + + +// states for Thompson construction +case class TState(i: Int) extends State + +object TState { + var counter = 0 + + def apply() : TState = { + counter += 1; + new TState(counter) + } +} + + +// some types abbreviations +type NFAt = NFA[TState, Char] +type NFAtrans = (TState, Char) :=> Set[TState] +type eNFAtrans = (TState, Option[Char]) :=> Set[TState] + + +// for composing an eNFA transition with an NFA transition +// | is for set union +implicit def nfaOps(f: eNFAtrans) = new { + def +++(g: NFAtrans) : eNFAtrans = + { case (q, None) => applyOrElse(f, (q, None)) + case (q, Some(c)) => applyOrElse(f, (q, Some(c))) | applyOrElse(g, (q, c)) } +} + + +// NFA that does not accept any string +def NFA_ZERO(): NFAt = { + val Q = TState() + NFA(Set(Q), { case _ => Set() }, Set()) +} + +// NFA that accepts the empty string +def NFA_ONE() : NFAt = { + val Q = TState() + NFA(Set(Q), { case _ => Set() }, Set(Q)) +} + +// NFA that accepts the string "c" +def NFA_CHAR(c: Char) : NFAt = { + val Q1 = TState() + val Q2 = TState() + NFA(Set(Q1), { case (Q1, d) if (c == d) => Set(Q2) }, Set(Q2)) +} + +// sequence of two NFAs +def NFA_SEQ(enfa1: NFAt, enfa2: NFAt) : NFAt = { + val new_delta : eNFAtrans = + { case (q, None) if enfa1.fins(q) => enfa2.starts } + + eNFA(enfa1.starts, + new_delta +++ enfa1.delta +++ enfa2.delta, + enfa2.fins) +} + +// alternative of two NFAs +def NFA_ALT(enfa1: NFAt, enfa2: NFAt) : NFAt = { + val new_delta : NFAtrans = { + case (q, c) => applyOrElse(enfa1.delta, (q, c)) | + applyOrElse(enfa2.delta, (q, c)) } + val new_fins = (q: TState) => enfa1.fins(q) || enfa2.fins(q) + + NFA(enfa1.starts | enfa2.starts, new_delta, new_fins) +} + +// star of a NFA +def NFA_STAR(enfa: NFAt) : NFAt = { + val Q = TState() + val new_delta : eNFAtrans = + { case (Q, None) => enfa.starts + case (q, None) if enfa.fins(q) => Set(Q) } + + eNFA(Set(Q), new_delta +++ enfa.delta, Set(Q)) +} + + +// We are now ready to translate regular expressions +// into DFAs (via eNFAs and NFAs, and the subset construction) + +// regular expressions +abstract class Rexp +case object ZERO extends Rexp // matches nothing +case object ONE extends Rexp // matches the empty string +case class CHAR(c: Char) extends Rexp // matches a character c +case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence +case class STAR(r: Rexp) extends Rexp // star + +// thompson construction +def thompson (r: Rexp) : NFAt = r match { + case ZERO => NFA_ZERO() + case ONE => NFA_ONE() + case CHAR(c) => NFA_CHAR(c) + case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2)) + case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2)) + case STAR(r1) => NFA_STAR(thompson(r1)) +} + +//optional regular expression (one or zero times) +def OPT(r: Rexp) = ALT(r, ONE) + +//n-times regular expression (explicitly expanded) +def NTIMES(r: Rexp, n: Int) : Rexp = n match { + case 0 => ONE + case 1 => r + case n => SEQ(r, NTIMES(r, n - 1)) +} + + +def tmatches_nfa(r: Rexp, s: String) : Boolean = + thompson(r).accepts(s.toList) + +def tmatches_nfa2(r: Rexp, s: String) : Boolean = + thompson(r).accepts2(s.toList) + +// dfas via subset construction +def tmatches_dfa(r: Rexp, s: String) : Boolean = + subset(thompson(r)).accepts(s.toList) + +// Test Cases +//============ + +// the evil regular expression a?{n} a{n} +def EVIL1(n: Int) : Rexp = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) + +// the evil regular expression (a*)*b +val EVIL2 : Rexp = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) + +//for measuring time +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +// the size of the NFA can be large, +// thus slowing down the breadth-first search + +for (i <- 1 to 13) { + println(i + ": " + "%.5f".format(time_needed(2, tmatches_nfa(EVIL1(i), "a" * i)))) +} + +for (i <- 1 to 100 by 5) { + println(i + " " + "%.5f".format(time_needed(2, tmatches_nfa(EVIL2, "a" * i)))) +} + + +// the backtracking that is needed in depth-first +// search can be painfully slow + +for (i <- 1 to 8) { + println(i + " " + "%.5f".format(time_needed(2, tmatches_nfa2(EVIL2, "a" * i)))) +} + + + +// while my thompson->enfa->subset->partial-function-chain +// is probably not the most effcient way to obtain a fast DFA +// (the test below should be much faster with a more direct +// construction), in general the DFAs can be slow because of +// the state explosion in the subset construction + +for (i <- 1 to 13) { + println(i + ": " + "%.5f".format(time_needed(2, tmatches_dfa(EVIL1(i), "a" * i)))) +} + +for (i <- 1 to 100 by 5) { + println(i + " " + "%.5f".format(time_needed(2, tmatches_dfa(EVIL2, "a" * i)))) +} diff -r c7bdd7eac4cb -r 022e2cb1668d progs/c.scala --- a/progs/c.scala Sat Jul 04 16:58:12 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,253 +0,0 @@ -// A parser and interpreter for the While language -// - -import scala.language.implicitConversions -import scala.language.reflectiveCalls -import scala.collection.immutable._ - -// more convenience for the semantic actions later on -case class ~[+A, +B](_1: A, _2: B) - - -type IsSeq[A] = A => Seq[_] - -abstract class Parser[I : IsSeq, T] { - def parse(ts: I): Set[(T, I)] - - def parse_all(ts: I) : Set[T] = - for ((head, tail) <- parse(ts); if tail.isEmpty) yield head -} - - - - -class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, ~[T, S]] { - def parse(sb: I) = - for ((head1, tail1) <- p.parse(sb); - (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2) -} - -class AltParser[I : IsSeq, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: I) = p.parse(sb) ++ q.parse(sb) -} - -class MapParser[I : IsSeq, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { - def parse(sb: I) = - for ((head, tail) <- p.parse(sb)) yield (f(head), tail) -} - -case class StrParser(s: String) extends Parser[String, String] { - def parse(sb: String) = { - val (prefix, suffix) = sb.splitAt(s.length) - if (prefix == s) Set((prefix, suffix)) else Set() - } -} - -case object NumParser extends Parser[String, Int] { - val reg = "[0-9]+".r - def parse(sb: String) = reg.findPrefixOf(sb) match { - case None => Set() - case Some(s) => { - val (head, tail) = sb.splitAt(s.length) - Set((head.toInt, tail)) - } - } -} - - -implicit def parser_interpolation(sc: StringContext) = new { - def p(args: Any*) = StrParser(sc.s(args:_*)) -} - -// this string interpolation allows us to write -// things like the following for a StrParser -// -// p"<_some_string_>" -// -// instead of StrParser(<_some_string_>) - - -implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new { - def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) - def ~[S](q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) - def map[S](f: => T => S) = new MapParser[I, T, S](p, f) -} - -// these implicits allow us to use infic notation for -// sequences and alternatives; we also can write map -// for a parser - - -// the abstract syntax trees for the WHILE language -abstract class Stmt -abstract class AExp -abstract class BExp - -type Block = List[Stmt] - -case object Skip extends Stmt -case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt -case class While(b: BExp, bl: Block) extends Stmt -case class Assign(s: String, a: AExp) extends Stmt -case class Write(s: String) extends Stmt - - -case class Var(s: String) extends AExp -case class Num(i: Int) extends AExp -case class Aop(o: String, a1: AExp, a2: AExp) extends AExp - -case object True extends BExp -case object False extends BExp -case class Bop(o: String, a1: AExp, a2: AExp) extends BExp -case class And(b1: BExp, b2: BExp) extends BExp -case class Or(b1: BExp, b2: BExp) extends BExp - -case object IdParser extends Parser[String, String] { - val reg = "[a-z][a-z,0-9]*".r - def parse(sb: String) = reg.findPrefixOf(sb) match { - case None => Set() - case Some(s) => Set(sb.splitAt(s.length)) - } -} - -// arithmetic expressions -lazy val AExp: Parser[String, AExp] = - (Te ~ p"+" ~ AExp).map[AExp]{ case x ~ _ ~ z => Aop("+", x, z) } || - (Te ~ p"-" ~ AExp).map[AExp]{ case x ~ _ ~ z => Aop("-", x, z) } || Te -lazy val Te: Parser[String, AExp] = - (Fa ~ p"*" ~ Te).map[AExp]{ case x ~ _ ~ z => Aop("*", x, z) } || - (Fa ~ p"/" ~ Te).map[AExp]{ case x ~ _ ~ z => Aop("/", x, z) } || Fa -lazy val Fa: Parser[String, AExp] = - (p"(" ~ AExp ~ p")").map{ case _ ~ y ~ _ => y } || - IdParser.map(Var) || - NumParser.map(Num) - -// boolean expressions with some simple nesting -lazy val BExp: Parser[String, BExp] = - (AExp ~ p"==" ~ AExp).map { case x ~ _ ~ z => Bop("==", x, z): BExp } || - (AExp ~ p"!=" ~ AExp).map { case x ~ _ ~ z => Bop("!=", x, z): BExp } || - (AExp ~ p"<" ~ AExp).map { case x ~ _ ~ z => Bop("<", x, z): BExp } || - (AExp ~ p">" ~ AExp).map { case x ~ _ ~ z => Bop(">", x, z): BExp } || - (p"(" ~ BExp ~ p")" ~ p"&&" ~ BExp).map { case _ ~ y ~ _ ~ _ ~ v => And(y, v): BExp } || - (p"(" ~ BExp ~ p")" ~ p"||" ~ BExp).map { case _ ~ y ~ _ ~ _ ~ v => Or(y, v): BExp } || - (p"true".map { _ => True: BExp }) || - (p"false".map { _ => False: BExp }) || - (p"(" ~ BExp ~ p")").map { case _ ~ x ~ _ => x } - -// statement / statements -lazy val Stmt: Parser[String, Stmt] = - ((p"skip".map {_ => Skip: Stmt}) || - (IdParser ~ p":=" ~ AExp).map { case x ~ _ ~ z => Assign(x, z): Stmt } || - (p"write(" ~ IdParser ~ p")").map { case _ ~ y ~ _ => Write(y): Stmt } || - (p"if" ~ BExp ~ p"then" ~ Block ~ p"else" ~ Block) - .map{ case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w): Stmt } || - (p"while" ~ BExp ~ p"do" ~ Block).map { case _ ~ y ~ _ ~ w => While(y, w) }) - -lazy val Stmts: Parser[String, Block] = - (Stmt ~ p";" ~ Stmts).map { case x ~ _ ~ z => x :: z : Block } || - (Stmt.map { s => List(s) : Block}) - -// blocks (enclosed in curly braces) -lazy val Block: Parser[String, Block] = - ((p"{" ~ Stmts ~ p"}").map{ case _ ~ y ~ _ => y } || - (Stmt.map(s => List(s)))) - - -Stmts.parse_all("x2:=5+3;") -Block.parse_all("{x:=5;y:=8}") -Block.parse_all("if(false)then{x:=5}else{x:=10}") - -val fib = """n := 10; - minus1 := 0; - minus2 := 1; - temp := 0; - while (n > 0) do { - temp := minus2; - minus2 := minus1 + minus2; - minus1 := temp; - n := n - 1 - }; - result := minus2""".replaceAll("\\s+", "") - -Stmts.parse_all(fib) - - -// an interpreter for the WHILE language -type Env = Map[String, Int] - -def eval_aexp(a: AExp, env: Env) : Int = a match { - case Num(i) => i - case Var(s) => env(s) - case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env) - case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env) - case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env) - case Aop("/", a1, a2) => eval_aexp(a1, env) / eval_aexp(a2, env) -} - -def eval_bexp(b: BExp, env: Env) : Boolean = b match { - case True => true - case False => false - case Bop("==", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env) - case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env)) - case Bop(">", a1, a2) => eval_aexp(a1, env) > eval_aexp(a2, env) - case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env) - case And(b1, b2) => eval_bexp(b1, env) && eval_bexp(b2, env) - case Or(b1, b2) => eval_bexp(b1, env) || eval_bexp(b2, env) -} - -def eval_stmt(s: Stmt, env: Env) : Env = s match { - case Skip => env - case Assign(x, a) => env + (x -> eval_aexp(a, env)) - case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) - case While(b, bl) => - if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env)) - else env - case Write(x) => { println(env(x)) ; env } -} - -def eval_bl(bl: Block, env: Env) : Env = bl match { - case Nil => env - case s::bl => eval_bl(bl, eval_stmt(s, env)) -} - -def eval(bl: Block) : Env = eval_bl(bl, Map()) - -// parse + evaluate fib program; then lookup what is -// stored under the variable result -println(eval(Stmts.parse_all(fib).head)("result")) - - -// more examles - -// calculate and print all factors bigger -// than 1 and smaller than n -println("Factors") - -val factors = - """n := 12; - f := 2; - while (f < n / 2 + 1) do { - if ((n / f) * f == n) then { write(f) } else { skip }; - f := f + 1 - }""".replaceAll("\\s+", "") - -eval(Stmts.parse_all(factors).head) - -// calculate all prime numbers up to a number -println("Primes") - -val primes = - """end := 100; - n := 2; - while (n < end) do { - f := 2; - tmp := 0; - while ((f < n / 2 + 1) && (tmp == 0)) do { - if ((n / f) * f == n) then { tmp := 1 } else { skip }; - f := f + 1 - }; - if (tmp == 0) then { write(n) } else { skip }; - n := n + 1 - }""".replaceAll("\\s+", "") - -eval(Stmts.parse_all(primes).head) diff -r c7bdd7eac4cb -r 022e2cb1668d progs/dfa.scala --- a/progs/dfa.scala Sat Jul 04 16:58:12 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,67 +0,0 @@ -// DFAs in Scala using partial functions -import scala.util.Try - -// a type abbreviation for partial functions -type :=>[A, B] = PartialFunction[A, B] - - -case class DFA[A, C](start: A, // the starting state - delta: (A, C) :=> A, // transitions (partial fun) - fins: A => Boolean) { // the final states - - def deltas(q: A, s: List[C]) : A = s match { - case Nil => q - case c::cs => deltas(delta(q, c), cs) - } - - def accepts(s: List[C]) : Boolean = - Try(fins(deltas(start, s))) getOrElse false -} - -// the example shown in the handout -abstract class State -case object Q0 extends State -case object Q1 extends State -case object Q2 extends State -case object Q3 extends State -case object Q4 extends State - -val delta : (State, Char) :=> State = - { case (Q0, 'a') => Q1 - case (Q0, 'b') => Q2 - case (Q1, 'a') => Q4 - case (Q1, 'b') => Q2 - case (Q2, 'a') => Q3 - case (Q2, 'b') => Q2 - case (Q3, 'a') => Q4 - case (Q3, 'b') => Q0 - case (Q4, 'a') => Q4 - case (Q4, 'b') => Q4 } - -val dfa = DFA(Q0, delta, Set[State](Q4)) -dfa.accepts("aaabbb".toList) - -dfa.accepts("bbabaab".toList) // true -dfa.accepts("baba".toList) // false -dfa.accepts("abc".toList) // false - -// another DFA test with a Sink state -abstract class S -case object S0 extends S -case object S1 extends S -case object S2 extends S -case object Sink extends S - -// a transition function with a sink state -val sigma : (S, Char) :=> S = - { case (S0, 'a') => S1 - case (S1, 'a') => S2 - case _ => Sink - } - -val dfa1a = DFA(S0, sigma, Set[S](S2)) - -dfa1a.accepts("aa".toList) // true -dfa1a.accepts("".toList) // false -dfa1a.accepts("ab".toList) // false - diff -r c7bdd7eac4cb -r 022e2cb1668d progs/enfa.scala --- a/progs/enfa.scala Sat Jul 04 16:58:12 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,66 +0,0 @@ -// epsilon NFAs...immediately translated into NFAs -// (needs :load dfa.scala -// :load nfa.scala in REPL) - -// a fixpoint construction -import scala.annotation.tailrec -@tailrec -def fixpT[A](f: A => A, x: A): A = { - val fx = f(x) - if (fx == x) x else fixpT(f, fx) -} - -// translates eNFAs directly into NFAs -def eNFA[A, C](starts: Set[A], // the starting states - delta: (A, Option[C]) :=> Set[A], // the epsilon-transitions - fins: A => Boolean) : NFA[A, C] = { // the final states - - // epsilon transitions - def enext(q: A) : Set[A] = - applyOrElse(delta, (q, None)) - - def enexts(qs: Set[A]) : Set[A] = - qs | qs.flatMap(enext(_)) // | is the set-union in Scala - - // epsilon closure - def ecl(qs: Set[A]) : Set[A] = - fixpT(enexts, qs) - - // "normal" transitions - def next(q: A, c: C) : Set[A] = - applyOrElse(delta, (q, Some(c))) - - def nexts(qs: Set[A], c: C) : Set[A] = - ecl(ecl(qs).flatMap(next(_, c))) - - // result NFA - NFA(ecl(starts), - { case (q, c) => nexts(Set(q), c) }, - q => ecl(Set(q)) exists fins) -} - - -// eNFA examples -val enfa_trans1 : (State, Option[Char]) :=> Set[State] = - { case (Q0, Some('a')) => Set(Q0) - case (Q0, None) => Set(Q1, Q2) - case (Q1, Some('a')) => Set(Q1) - case (Q2, Some('b')) => Set(Q2) - } - -val enfa1 = eNFA(Set[State](Q0), enfa_trans1, Set[State](Q2)) - - -// -case object R1 extends State -case object R2 extends State -case object R3 extends State - -val enfa_trans2 : (State, Option[Char]) :=> Set[State] = - { case (R1, Some('b')) => Set(R3) - case (R1, None) => Set(R2) - case (R2, Some('a')) => Set(R1, R3) - } - - -val enfa2 = eNFA(Set[State](R1), enfa_trans1, Set[State](R3)) diff -r c7bdd7eac4cb -r 022e2cb1668d progs/nfa.scala --- a/progs/nfa.scala Sat Jul 04 16:58:12 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,92 +0,0 @@ -// NFAs in Scala using partial functions (returning -// sets of states) -// -// needs :load dfa.scala for states - - -// a type abbreviation for partial functions -type :=>[A, B] = PartialFunction[A, B] - -// return an empty set, when a parial function is not defined -def applyOrElse[A, B](f: A :=> Set[B], x: A) : Set[B] = - Try(f(x)) getOrElse Set[B]() - - -// NFAs -case class NFA[A, C](starts: Set[A], // the starting states - delta: (A, C) :=> Set[A], // the transition function - fins: A => Boolean) { // the final states - - // given a state and a character, what is the set of - // next states? if there is none => empty set - def next(q: A, c: C) : Set[A] = - applyOrElse(delta, (q, c)) - - def nexts(qs: Set[A], c: C) : Set[A] = - qs.flatMap(next(_, c)) - - // given some states and a string, what is the set - // of next states? - def deltas(qs: Set[A], s: List[C]) : Set[A] = s match { - case Nil => qs - case c::cs => deltas(nexts(qs, c), cs) - } - - // is a string accepted by an NFA? - def accepts(s: List[C]) : Boolean = - deltas(starts, s).exists(fins) - - // depth-first version of accepts - def search(q: A, s: List[C]) : Boolean = s match { - case Nil => fins(q) - case c::cs => next(q, c).exists(search(_, cs)) - } - - def accepts2(s: List[C]) : Boolean = - starts.exists(search(_, s)) -} - - - -// NFA examples - -val nfa_trans1 : (State, Char) :=> Set[State] = - { case (Q0, 'a') => Set(Q0, Q1) - case (Q0, 'b') => Set(Q2) - case (Q1, 'a') => Set(Q1) - case (Q2, 'b') => Set(Q2) } - -val nfa1 = NFA(Set[State](Q0), nfa_trans1, Set[State](Q2)) - -nfa1.accepts("aa".toList) // false -nfa1.accepts("aaaaa".toList) // false -nfa1.accepts("aaaaab".toList) // true -nfa1.accepts("aaaaabbb".toList) // true -nfa1.accepts("aaaaabbbaaa".toList) // false -nfa1.accepts("ac".toList) // false - -nfa1.accepts2("aa".toList) // false -nfa1.accepts2("aaaaa".toList) // false -nfa1.accepts2("aaaaab".toList) // true -nfa1.accepts2("aaaaabbb".toList) // true -nfa1.accepts2("aaaaabbbaaa".toList) // false -nfa1.accepts2("ac".toList) // false - - - - -// subset constructions - -def subset[A, C](nfa: NFA[A, C]) : DFA[Set[A], C] = { - DFA(nfa.starts, - { case (qs, c) => nfa.nexts(qs, c) }, - _.exists(nfa.fins)) -} - -subset(nfa1).accepts("aa".toList) // false -subset(nfa1).accepts("aaaaa".toList) // false -subset(nfa1).accepts("aaaaab".toList) // true -subset(nfa1).accepts("aaaaabbb".toList) // true -subset(nfa1).accepts("aaaaabbbaaa".toList) // false -subset(nfa1).accepts("ac".toList) // false - diff -r c7bdd7eac4cb -r 022e2cb1668d progs/nfa2.scala --- a/progs/nfa2.scala Sat Jul 04 16:58:12 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,242 +0,0 @@ -// NFAs and Thompson's construction - -// helper function for recording time -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - - -// state nodes -abstract class State -type States = Set[State] - -case class IntState(i: Int) extends State - -object NewState { - var counter = 0 - - def apply() : IntState = { - counter += 1; - new IntState(counter - 1) - } -} - - -case class NFA(states: States, - start: State, - delta: (State, Char) => States, - eps: State => States, - fins: States) { - - def epsclosure(qs: States) : States = { - val ps = qs ++ qs.flatMap(eps(_)) - if (qs == ps) ps else epsclosure(ps) - } - - def deltas(qs: States, s: List[Char]) : States = s match { - case Nil => epsclosure(qs) - case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs) - } - - def accepts(s: String) : Boolean = - deltas(Set(start), s.toList) exists (fins contains (_)) -} - -// A small example NFA from the lectures -val Q0 = NewState() -val Q1 = NewState() -val Q2 = NewState() - -val delta : (State, Char) => States = { - case (Q0, 'a') => Set(Q0) - case (Q1, 'a') => Set(Q1) - case (Q2, 'b') => Set(Q2) - case (_, _) => Set () -} - -val eps : State => States = { - case Q0 => Set(Q1, Q2) - case _ => Set() -} - -val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2)) - -NFA1.accepts("aa") -NFA1.accepts("aaaaa") -NFA1.accepts("aaaaabbb") -NFA1.accepts("aaaaabbbaaa") -NFA1.accepts("ac") - - -// explicit construction of some NFAs; used in -// Thompson's construction - -// NFA that does not accept any string -def NFA_NULL() : NFA = { - val Q = NewState() - NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set()) -} - -// NFA that accepts the empty string -def NFA_EMPTY() : NFA = { - val Q = NewState() - NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q)) -} - -// NFA that accepts the string "c" -def NFA_CHAR(c: Char) : NFA = { - val Q1 = NewState() - val Q2 = NewState() - NFA(Set(Q1, Q2), - Q1, - { case (Q1, d) if (c == d) => Set(Q2) - case (_, _) => Set() }, - { case _ => Set() }, - Set(Q2)) -} - -// alternative of two NFAs -def NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = { - val Q = NewState() - NFA(Set(Q) ++ nfa1.states ++ nfa2.states, - Q, - { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) - case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) - case (_, _) => Set() }, - { case Q => Set(nfa1.start, nfa2.start) - case q if (nfa1.states contains q) => nfa1.eps(q) - case q if (nfa2.states contains q) => nfa2.eps(q) - case _ => Set() }, - nfa1.fins ++ nfa2.fins) -} - -// sequence of two NFAs -def NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = { - NFA(nfa1.states ++ nfa2.states, - nfa1.start, - { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) - case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) - case (_, _) => Set() }, - { case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start) - case q if (nfa1.states contains q) => nfa1.eps(q) - case q if (nfa2.states contains q) => nfa2.eps(q) - case _ => Set() }, - nfa2.fins) -} - -// star of an NFA -def NFA_STAR(nfa: NFA) : NFA = { - val Q = NewState() - NFA(Set(Q) ++ nfa.states, - Q, - nfa.delta, - { case Q => Set(nfa.start) - case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q) - case q if (nfa.states contains q) => nfa.eps(q) - case _ => Set() }, - Set(Q)) -} - - -// regular expressions used for Thompson's construction -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - - -def thompson (r: Rexp) : NFA = r match { - case NULL => NFA_NULL() - case EMPTY => NFA_EMPTY() - case CHAR(c) => NFA_CHAR(c) - case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2)) - case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2)) - case STAR(r1) => NFA_STAR(thompson(r1)) -} - -// some examples for Thompson's -val A = thompson(CHAR('a')) - -println(A.accepts("a")) // true -println(A.accepts("c")) // false -println(A.accepts("aa")) // false - -val B = thompson(ALT("ab","ac")) - -println(B.accepts("ab")) // true -println(B.accepts("ac")) // true -println(B.accepts("bb")) // false -println(B.accepts("aa")) // false - -val C = thompson(STAR("ab")) - -println(C.accepts("")) // true -println(C.accepts("a")) // false -println(C.accepts("ababab")) // true -println(C.accepts("ab")) // true -println(C.accepts("ac")) // false -println(C.accepts("bb")) // false -println(C.accepts("aa")) // false - -// regular expression matcher using Thompson's -def matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s) - - -//optional -def OPT(r: Rexp) = ALT(r, EMPTY) - -//n-times -def NTIMES(r: Rexp, n: Int) : Rexp = n match { - case 0 => EMPTY - case 1 => r - case n => SEQ(r, NTIMES(r, n - 1)) -} - -// evil regular exproession -def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) - -// test harness for the matcher -for (i <- 0 to 100 by 5) { - println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i)))) -} - - -// regular expression matching via search and backtracking -def accepts2(nfa: NFA, s: String) : Boolean = { - - def search(q: State, s: List[Char]) : Boolean = s match { - case Nil => nfa.fins contains (q) - case c::cs => - (nfa.delta(q, c) exists (search(_, cs))) || - (nfa.eps(q) exists (search(_, c::cs))) - } - - search(nfa.start, s.toList) -} - -def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s) - -// test harness for the backtracking matcher -for (i <- 0 to 20 by 1) { - println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i)))) -} - - - - diff -r c7bdd7eac4cb -r 022e2cb1668d progs/thompson.scala --- a/progs/thompson.scala Sat Jul 04 16:58:12 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,177 +0,0 @@ -// Thompson Construction -// (needs :load dfa.scala -// :load nfa.scala -// :load enfa.scala) - - -// states for Thompson construction -case class TState(i: Int) extends State - -object TState { - var counter = 0 - - def apply() : TState = { - counter += 1; - new TState(counter - 1) - } -} - - -// some types abbreviations -type NFAt = NFA[TState, Char] -type NFAtrans = (TState, Char) :=> Set[TState] -type eNFAtrans = (TState, Option[Char]) :=> Set[TState] - - -// for composing an eNFA transition with a NFA transition -implicit class RichPF(val f: eNFAtrans) extends AnyVal { - def +++(g: NFAtrans) : eNFAtrans = - { case (q, None) => applyOrElse(f, (q, None)) - case (q, Some(c)) => applyOrElse(f, (q, Some(c))) | applyOrElse(g, (q, c)) } -} - - -// NFA that does not accept any string -def NFA_ZERO(): NFAt = { - val Q = TState() - NFA(Set(Q), { case _ => Set() }, Set()) -} - -// NFA that accepts the empty string -def NFA_ONE() : NFAt = { - val Q = TState() - NFA(Set(Q), { case _ => Set() }, Set(Q)) -} - -// NFA that accepts the string "c" -def NFA_CHAR(c: Char) : NFAt = { - val Q1 = TState() - val Q2 = TState() - NFA(Set(Q1), { case (Q1, d) if (c == d) => Set(Q2) }, Set(Q2)) -} - -// sequence of two NFAs -def NFA_SEQ(enfa1: NFAt, enfa2: NFAt) : NFAt = { - val new_delta : eNFAtrans = - { case (q, None) if enfa1.fins(q) => enfa2.starts } - - eNFA(enfa1.starts, new_delta +++ enfa1.delta +++ enfa2.delta, - enfa2.fins) -} - -// alternative of two NFAs -def NFA_ALT(enfa1: NFAt, enfa2: NFAt) : NFAt = { - val new_delta : NFAtrans = { - case (q, c) => applyOrElse(enfa1.delta, (q, c)) | - applyOrElse(enfa2.delta, (q, c)) } - val new_fins = (q: TState) => enfa1.fins(q) || enfa2.fins(q) - - NFA(enfa1.starts | enfa2.starts, new_delta, new_fins) -} - -// star of a NFA -def NFA_STAR(enfa: NFAt) : NFAt = { - val Q = TState() - val new_delta : eNFAtrans = - { case (Q, None) => enfa.starts - case (q, None) if enfa.fins(q) => Set(Q) } - - eNFA(Set(Q), new_delta +++ enfa.delta, Set(Q)) -} - - - -// regular expressions -abstract class Rexp -case object ZERO extends Rexp // matches nothing -case object ONE extends Rexp // matches the empty string -case class CHAR(c: Char) extends Rexp // matches a character c -case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence -case class STAR(r: Rexp) extends Rexp // star - - - - -// thompson construction -def thompson (r: Rexp) : NFAt = r match { - case ZERO => NFA_ZERO() - case ONE => NFA_ONE() - case CHAR(c) => NFA_CHAR(c) - case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2)) - case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2)) - case STAR(r1) => NFA_STAR(thompson(r1)) -} - -//optional regular expression (one or zero times) -def OPT(r: Rexp) = ALT(r, ONE) - -//n-times regular expression (explicitly expanded) -def NTIMES(r: Rexp, n: Int) : Rexp = n match { - case 0 => ONE - case 1 => r - case n => SEQ(r, NTIMES(r, n - 1)) -} - - -def tmatches(r: Rexp, s: String) : Boolean = - thompson(r).accepts(s.toList) - -def tmatches2(r: Rexp, s: String) : Boolean = - thompson(r).accepts2(s.toList) - -// dfa via subset construction -def tmatches_dfa(r: Rexp, s: String) : Boolean = - subset(thompson(r)).accepts(s.toList) - -// Test Cases - - -// the evil regular expression a?{n} a{n} -def EVIL1(n: Int) : Rexp = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) - -// the evil regular expression (a*)*b -val EVIL2 : Rexp = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) - -//for measuring time -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -// the size of the NFA can be large, -// thus slowing down the breadth-first search - -for (i <- 1 to 13) { - println(i + ": " + "%.5f".format(time_needed(2, tmatches(EVIL1(i), "a" * i)))) -} - -for (i <- 1 to 100 by 5) { - println(i + " " + "%.5f".format(time_needed(2, tmatches(EVIL2, "a" * i)))) -} - - -// the backtracking needed in depth-first search -// can be painfully slow - -for (i <- 1 to 8) { - println(i + " " + "%.5f".format(time_needed(2, tmatches2(EVIL2, "a" * i)))) -} - - - -// while my thompson->enfa->subset->partial-function-chain -// is probably not the most effcient way to obtain a fast DFA -// (the test below should be much faster with a more direct -// construction), in general the DFAs can be slow because of -// the state explosion in the subset construction - -for (i <- 1 to 13) { - println(i + ": " + "%.5f".format(time_needed(2, tmatches_dfa(EVIL1(i), "a" * i)))) -} - -for (i <- 1 to 100 by 5) { - println(i + " " + "%.5f".format(time_needed(2, tmatches_dfa(EVIL2, "a" * i)))) -}