| author | Christian Urban <urbanc@in.tum.de> | 
| Wed, 22 Nov 2017 18:27:59 +0000 | |
| changeset 537 | 55c9f30d0ed2 | 
| parent 484 | 8182eb3278e0 | 
| permissions | -rw-r--r-- | 
| 144 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 1 | // NFAs and Thompson's construction | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 2 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 3 | // helper function for recording time | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 4 | def time_needed[T](i: Int, code: => T) = {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 5 | val start = System.nanoTime() | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 6 | for (j <- 1 to i) code | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 7 | val end = System.nanoTime() | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 8 | (end - start)/(i * 1.0e9) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 9 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 10 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 11 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 12 | // state nodes | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 13 | abstract class State | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 14 | type States = Set[State] | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 15 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 16 | case class IntState(i: Int) extends State | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 17 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 18 | object NewState {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 19 | var counter = 0 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 20 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 21 |   def apply() : IntState = {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 22 | counter += 1; | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 23 | new IntState(counter - 1) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 24 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 25 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 26 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 27 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 28 | case class NFA(states: States, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 29 | start: State, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 30 | delta: (State, Char) => States, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 31 | eps: State => States, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 32 |                fins: States) {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 33 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 34 |   def epsclosure(qs: States) : States = {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 35 | val ps = qs ++ qs.flatMap(eps(_)) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 36 | if (qs == ps) ps else epsclosure(ps) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 37 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 38 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 39 |   def deltas(qs: States, s: List[Char]) : States = s match {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 40 | case Nil => epsclosure(qs) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 41 | case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 42 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 43 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 44 | def accepts(s: String) : Boolean = | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 45 | deltas(Set(start), s.toList) exists (fins contains (_)) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 46 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 47 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 48 | // A small example NFA from the lectures | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 49 | val Q0 = NewState() | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 50 | val Q1 = NewState() | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 51 | val Q2 = NewState() | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 52 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 53 | val delta : (State, Char) => States = {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 54 | case (Q0, 'a') => Set(Q0) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 55 | case (Q1, 'a') => Set(Q1) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 56 | case (Q2, 'b') => Set(Q2) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 57 | case (_, _) => Set () | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 58 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 59 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 60 | val eps : State => States = {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 61 | case Q0 => Set(Q1, Q2) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 62 | case _ => Set() | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 63 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 64 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 65 | val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2)) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 66 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 67 | NFA1.accepts("aa")
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 68 | NFA1.accepts("aaaaa")
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 69 | NFA1.accepts("aaaaabbb")
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 70 | NFA1.accepts("aaaaabbbaaa")
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 71 | NFA1.accepts("ac")
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 72 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 73 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 74 | // explicit construction of some NFAs; used in | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 75 | // Thompson's construction | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 76 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 77 | // NFA that does not accept any string | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 78 | def NFA_NULL() : NFA = {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 79 | val Q = NewState() | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 80 |   NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set())
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 81 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 82 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 83 | // NFA that accepts the empty string | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 84 | def NFA_EMPTY() : NFA = {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 85 | val Q = NewState() | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 86 |   NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q))
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 87 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 88 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 89 | // NFA that accepts the string "c" | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 90 | def NFA_CHAR(c: Char) : NFA = {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 91 | val Q1 = NewState() | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 92 | val Q2 = NewState() | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 93 | NFA(Set(Q1, Q2), | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 94 | Q1, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 95 |       { case (Q1, d) if (c == d) => Set(Q2)
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 96 | case (_, _) => Set() }, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 97 |       { case _ => Set() },
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 98 | Set(Q2)) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 99 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 100 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 101 | // alternative of two NFAs | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 102 | def NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 103 | val Q = NewState() | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 104 | NFA(Set(Q) ++ nfa1.states ++ nfa2.states, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 105 | Q, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 106 |       { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 107 | case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 108 | case (_, _) => Set() }, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 109 |       { case Q => Set(nfa1.start, nfa2.start)
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 110 | case q if (nfa1.states contains q) => nfa1.eps(q) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 111 | case q if (nfa2.states contains q) => nfa2.eps(q) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 112 | case _ => Set() }, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 113 | nfa1.fins ++ nfa2.fins) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 114 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 115 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 116 | // sequence of two NFAs | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 117 | def NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 118 | NFA(nfa1.states ++ nfa2.states, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 119 | nfa1.start, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 120 |       { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 121 | case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 122 | case (_, _) => Set() }, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 123 |       { case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start)
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 124 | case q if (nfa1.states contains q) => nfa1.eps(q) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 125 | case q if (nfa2.states contains q) => nfa2.eps(q) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 126 | case _ => Set() }, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 127 | nfa2.fins) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 128 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 129 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 130 | // star of an NFA | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 131 | def NFA_STAR(nfa: NFA) : NFA = {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 132 | val Q = NewState() | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 133 | NFA(Set(Q) ++ nfa.states, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 134 | Q, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 135 | nfa.delta, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 136 |       { case Q => Set(nfa.start)
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 137 | case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 138 | case q if (nfa.states contains q) => nfa.eps(q) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 139 | case _ => Set() }, | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 140 | Set(Q)) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 141 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 142 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 143 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 144 | // regular expressions used for Thompson's construction | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 145 | abstract class Rexp | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 146 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 147 | case object NULL extends Rexp | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 148 | case object EMPTY extends Rexp | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 149 | case class CHAR(c: Char) extends Rexp | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 150 | case class ALT(r1: Rexp, r2: Rexp) extends Rexp | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 151 | case class SEQ(r1: Rexp, r2: Rexp) extends Rexp | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 152 | case class STAR(r: Rexp) extends Rexp | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 153 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 154 | // some convenience for typing in regular expressions | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 155 | def charlist2rexp(s : List[Char]) : Rexp = s match {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 156 | case Nil => EMPTY | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 157 | case c::Nil => CHAR(c) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 158 | case c::s => SEQ(CHAR(c), charlist2rexp(s)) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 159 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 160 | implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 161 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 162 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 163 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 164 | def thompson (r: Rexp) : NFA = r match {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 165 | case NULL => NFA_NULL() | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 166 | case EMPTY => NFA_EMPTY() | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 167 | case CHAR(c) => NFA_CHAR(c) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 168 | case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2)) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 169 | case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2)) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 170 | case STAR(r1) => NFA_STAR(thompson(r1)) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 171 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 172 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 173 | // some examples for Thompson's | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 174 | val A = thompson(CHAR('a'))
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 175 | |
| 484 | 176 | println(A.accepts("a"))   // true 
 | 
| 177 | println(A.accepts("c"))   // false 
 | |
| 178 | println(A.accepts("aa"))  // false
 | |
| 144 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 179 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 180 | val B = thompson(ALT("ab","ac"))
 | 
| 484 | 181 | |
| 182 | println(B.accepts("ab"))   // true 
 | |
| 183 | println(B.accepts("ac"))   // true 
 | |
| 184 | println(B.accepts("bb"))   // false 
 | |
| 185 | println(B.accepts("aa"))   // false 
 | |
| 144 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 186 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 187 | val C = thompson(STAR("ab"))
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 188 | |
| 484 | 189 | println(C.accepts(""))       // true
 | 
| 190 | println(C.accepts("a"))      // false 
 | |
| 191 | println(C.accepts("ababab")) // true
 | |
| 192 | println(C.accepts("ab"))     // true
 | |
| 193 | println(C.accepts("ac"))     // false 
 | |
| 194 | println(C.accepts("bb"))     // false 
 | |
| 195 | println(C.accepts("aa"))     // false 
 | |
| 144 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 196 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 197 | // regular expression matcher using Thompson's | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 198 | def matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 199 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 200 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 201 | //optional | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 202 | def OPT(r: Rexp) = ALT(r, EMPTY) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 203 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 204 | //n-times | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 205 | def NTIMES(r: Rexp, n: Int) : Rexp = n match {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 206 | case 0 => EMPTY | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 207 | case 1 => r | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 208 | case n => SEQ(r, NTIMES(r, n - 1)) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 209 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 210 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 211 | // evil regular exproession | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 212 | def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 213 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 214 | // test harness for the matcher | 
| 145 
920f675b4ed1
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
144diff
changeset | 215 | for (i <- 0 to 100 by 5) {
 | 
| 144 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 216 | println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i)))) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 217 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 218 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 219 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 220 | // regular expression matching via search and backtracking | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 221 | def accepts2(nfa: NFA, s: String) : Boolean = {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 222 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 223 |   def search(q: State, s: List[Char]) : Boolean = s match {
 | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 224 | case Nil => nfa.fins contains (q) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 225 | case c::cs => | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 226 | (nfa.delta(q, c) exists (search(_, cs))) || | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 227 | (nfa.eps(q) exists (search(_, c::cs))) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 228 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 229 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 230 | search(nfa.start, s.toList) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 231 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 232 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 233 | def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 234 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 235 | // test harness for the backtracking matcher | 
| 145 
920f675b4ed1
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
144diff
changeset | 236 | for (i <- 0 to 20 by 1) {
 | 
| 144 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 237 | println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i)))) | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 238 | } | 
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 239 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 240 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 241 | |
| 
0cb61bed557d
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 242 |