diff -r c7bdd7eac4cb -r 022e2cb1668d progs/nfa2.scala --- a/progs/nfa2.scala Sat Jul 04 16:58:12 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,242 +0,0 @@ -// NFAs and Thompson's construction - -// helper function for recording time -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - - -// state nodes -abstract class State -type States = Set[State] - -case class IntState(i: Int) extends State - -object NewState { - var counter = 0 - - def apply() : IntState = { - counter += 1; - new IntState(counter - 1) - } -} - - -case class NFA(states: States, - start: State, - delta: (State, Char) => States, - eps: State => States, - fins: States) { - - def epsclosure(qs: States) : States = { - val ps = qs ++ qs.flatMap(eps(_)) - if (qs == ps) ps else epsclosure(ps) - } - - def deltas(qs: States, s: List[Char]) : States = s match { - case Nil => epsclosure(qs) - case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs) - } - - def accepts(s: String) : Boolean = - deltas(Set(start), s.toList) exists (fins contains (_)) -} - -// A small example NFA from the lectures -val Q0 = NewState() -val Q1 = NewState() -val Q2 = NewState() - -val delta : (State, Char) => States = { - case (Q0, 'a') => Set(Q0) - case (Q1, 'a') => Set(Q1) - case (Q2, 'b') => Set(Q2) - case (_, _) => Set () -} - -val eps : State => States = { - case Q0 => Set(Q1, Q2) - case _ => Set() -} - -val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2)) - -NFA1.accepts("aa") -NFA1.accepts("aaaaa") -NFA1.accepts("aaaaabbb") -NFA1.accepts("aaaaabbbaaa") -NFA1.accepts("ac") - - -// explicit construction of some NFAs; used in -// Thompson's construction - -// NFA that does not accept any string -def NFA_NULL() : NFA = { - val Q = NewState() - NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set()) -} - -// NFA that accepts the empty string -def NFA_EMPTY() : NFA = { - val Q = NewState() - NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q)) -} - -// NFA that accepts the string "c" -def NFA_CHAR(c: Char) : NFA = { - val Q1 = NewState() - val Q2 = NewState() - NFA(Set(Q1, Q2), - Q1, - { case (Q1, d) if (c == d) => Set(Q2) - case (_, _) => Set() }, - { case _ => Set() }, - Set(Q2)) -} - -// alternative of two NFAs -def NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = { - val Q = NewState() - NFA(Set(Q) ++ nfa1.states ++ nfa2.states, - Q, - { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) - case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) - case (_, _) => Set() }, - { case Q => Set(nfa1.start, nfa2.start) - case q if (nfa1.states contains q) => nfa1.eps(q) - case q if (nfa2.states contains q) => nfa2.eps(q) - case _ => Set() }, - nfa1.fins ++ nfa2.fins) -} - -// sequence of two NFAs -def NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = { - NFA(nfa1.states ++ nfa2.states, - nfa1.start, - { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) - case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) - case (_, _) => Set() }, - { case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start) - case q if (nfa1.states contains q) => nfa1.eps(q) - case q if (nfa2.states contains q) => nfa2.eps(q) - case _ => Set() }, - nfa2.fins) -} - -// star of an NFA -def NFA_STAR(nfa: NFA) : NFA = { - val Q = NewState() - NFA(Set(Q) ++ nfa.states, - Q, - nfa.delta, - { case Q => Set(nfa.start) - case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q) - case q if (nfa.states contains q) => nfa.eps(q) - case _ => Set() }, - Set(Q)) -} - - -// regular expressions used for Thompson's construction -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - - -def thompson (r: Rexp) : NFA = r match { - case NULL => NFA_NULL() - case EMPTY => NFA_EMPTY() - case CHAR(c) => NFA_CHAR(c) - case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2)) - case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2)) - case STAR(r1) => NFA_STAR(thompson(r1)) -} - -// some examples for Thompson's -val A = thompson(CHAR('a')) - -println(A.accepts("a")) // true -println(A.accepts("c")) // false -println(A.accepts("aa")) // false - -val B = thompson(ALT("ab","ac")) - -println(B.accepts("ab")) // true -println(B.accepts("ac")) // true -println(B.accepts("bb")) // false -println(B.accepts("aa")) // false - -val C = thompson(STAR("ab")) - -println(C.accepts("")) // true -println(C.accepts("a")) // false -println(C.accepts("ababab")) // true -println(C.accepts("ab")) // true -println(C.accepts("ac")) // false -println(C.accepts("bb")) // false -println(C.accepts("aa")) // false - -// regular expression matcher using Thompson's -def matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s) - - -//optional -def OPT(r: Rexp) = ALT(r, EMPTY) - -//n-times -def NTIMES(r: Rexp, n: Int) : Rexp = n match { - case 0 => EMPTY - case 1 => r - case n => SEQ(r, NTIMES(r, n - 1)) -} - -// evil regular exproession -def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) - -// test harness for the matcher -for (i <- 0 to 100 by 5) { - println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i)))) -} - - -// regular expression matching via search and backtracking -def accepts2(nfa: NFA, s: String) : Boolean = { - - def search(q: State, s: List[Char]) : Boolean = s match { - case Nil => nfa.fins contains (q) - case c::cs => - (nfa.delta(q, c) exists (search(_, cs))) || - (nfa.eps(q) exists (search(_, c::cs))) - } - - search(nfa.start, s.toList) -} - -def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s) - -// test harness for the backtracking matcher -for (i <- 0 to 20 by 1) { - println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i)))) -} - - - -