author | Chengsong |
Mon, 09 May 2022 17:17:52 +0100 | |
changeset 511 | 47618d607bbf |
parent 243 | 09ab631ce7fa |
permissions | -rw-r--r-- |
243 | 1 |
// NFAs based on Scala's partial functions (returning |
2 |
// sets of states) |
|
3 |
||
4 |
||
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
5 |
import scala.util.Try |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
6 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
7 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
8 |
// type abbreviation for partial functions |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
9 |
type :=>[A, B] = PartialFunction[A, B] |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
10 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
11 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
12 |
// some states for test cases |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
13 |
abstract class State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
14 |
case object Q0 extends State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
15 |
case object Q1 extends State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
16 |
case object Q2 extends State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
17 |
case object Q3 extends State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
18 |
case object Q4 extends State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
19 |
case object Q5 extends State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
20 |
case object Q6 extends State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
21 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
22 |
|
243 | 23 |
// return empty set when not defined |
24 |
def applyOrElse[A, B](f: A :=> Set[B], x: A) : Set[B] = |
|
25 |
Try(f(x)) getOrElse Set[B]() |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
26 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
27 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
28 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
29 |
// class for NFAs |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
30 |
case class NFA[A, C](starts: Set[A], // starting states |
243 | 31 |
delta: (A, C) :=> Set[A], // transitions |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
32 |
fins: A => Boolean) { // final states |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
33 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
34 |
// given a state and a character, what is the set of next states? |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
35 |
// if there is none => empty set |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
36 |
def next(q: A, c: C) : Set[A] = |
243 | 37 |
applyOrElse(delta, (q, c)) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
38 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
39 |
def nexts(qs: Set[A], c: C) : Set[A] = |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
40 |
qs.flatMap(next(_, c)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
41 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
42 |
// given some states and a string, what is the set of next states? |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
43 |
def deltas(qs: Set[A], s: List[C]) : Set[A] = s match { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
44 |
case Nil => qs |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
45 |
case c::cs => deltas(nexts(qs, c), cs) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
46 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
47 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
48 |
// is a string accepted by an NFA? |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
49 |
def accepts(s: List[C]) : Boolean = |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
50 |
deltas(starts, s).exists(fins) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
51 |
|
243 | 52 |
// depth-first search version of accept |
53 |
def search(q: A, s: List[C]) : Boolean = s match { |
|
54 |
case Nil => fins(q) |
|
55 |
case c::cs => next(q, c).exists(search(_, cs)) |
|
56 |
} |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
57 |
|
243 | 58 |
def accepts2(s: List[C]) : Boolean = |
59 |
starts.exists(search(_, s)) |
|
60 |
||
61 |
} |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
62 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
63 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
64 |
// NFA test cases |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
65 |
|
243 | 66 |
val trans2 : (State, Char) :=> Set[State] = |
67 |
{ case (Q0, 'a') => Set(Q0, Q1) |
|
68 |
case (Q0, 'b') => Set(Q2) |
|
69 |
case (Q1, 'a') => Set(Q1) |
|
70 |
case (Q2, 'b') => Set(Q2) |
|
71 |
} |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
72 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
73 |
val nfa2 = NFA(Set[State](Q0), trans2, Set[State](Q2)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
74 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
75 |
nfa2.accepts("aa".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
76 |
nfa2.accepts("aaaaa".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
77 |
nfa2.accepts("aaaaab".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
78 |
nfa2.accepts("aaaaabbb".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
79 |
nfa2.accepts("aaaaabbbaaa".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
80 |
nfa2.accepts("ac".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
81 |
|
243 | 82 |
nfa2.accepts2("aa".toList) // false |
83 |
nfa2.accepts2("aaaaa".toList) // false |
|
84 |
nfa2.accepts2("aaaaab".toList) // true |
|
85 |
nfa2.accepts2("aaaaabbb".toList) // true |
|
86 |
nfa2.accepts2("aaaaabbbaaa".toList) // false |
|
87 |
nfa2.accepts2("ac".toList) // false |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
88 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
89 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
90 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
91 |
|
243 | 92 |
// epsilon NFAs |
93 |
// (not explicitly defined, but immediately translated into NFAs) |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
94 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
95 |
// fixpoint construction |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
96 |
import scala.annotation.tailrec |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
97 |
@tailrec |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
98 |
def fixpT[A](f: A => A, x: A): A = { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
99 |
val fx = f(x) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
100 |
if (fx == x) x else fixpT(f, fx) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
101 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
102 |
|
243 | 103 |
// translates eNFAs directly into NFAs |
104 |
def eNFA[A, C](starts: Set[A], |
|
105 |
delta: (A, Option[C]) :=> Set[A], |
|
106 |
fins: A => Boolean) : NFA[A, C] = { |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
107 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
108 |
// epsilon transitions |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
109 |
def enext(q: A) : Set[A] = |
243 | 110 |
applyOrElse(delta, (q, None)) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
111 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
112 |
def enexts(qs: Set[A]) : Set[A] = |
243 | 113 |
qs | qs.flatMap(enext(_)) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
114 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
115 |
// epsilon closure |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
116 |
def ecl(qs: Set[A]) : Set[A] = |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
117 |
fixpT(enexts, qs) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
118 |
|
243 | 119 |
// "normal" transitions |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
120 |
def next(q: A, c: C) : Set[A] = |
243 | 121 |
applyOrElse(delta, (q, Some(c))) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
122 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
123 |
def nexts(qs: Set[A], c: C) : Set[A] = |
243 | 124 |
ecl(ecl(qs).flatMap(next(_, c))) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
125 |
|
243 | 126 |
NFA(ecl(starts), |
127 |
{ case (q, c) => nexts(Set(q), c) }, |
|
128 |
q => ecl(Set(q)) exists fins) |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
129 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
130 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
131 |
|
243 | 132 |
|
133 |
||
134 |
||
135 |
// test cases for eNFAs |
|
136 |
val etrans1 : (State, Option[Char]) :=> Set[State] = |
|
137 |
{ case (Q0, Some('a')) => Set(Q1) |
|
138 |
case (Q1, None) => Set(Q0) |
|
139 |
} |
|
140 |
||
141 |
val enfa1 = eNFA(Set[State](Q0), etrans1, Set[State](Q1)) |
|
142 |
||
143 |
enfa1.accepts("a".toList) // true |
|
144 |
enfa1.accepts("".toList) // false |
|
145 |
enfa1.accepts("aaaaa".toList) // true |
|
146 |
enfa1.accepts("aaaaab".toList) // false |
|
147 |
enfa1.accepts("aaaaabbb".toList) // false |
|
148 |
enfa1.accepts("aaaaabbbaaa".toList) // false |
|
149 |
enfa1.accepts("ac".toList) // false |
|
150 |
||
151 |
// example from handouts |
|
152 |
val etrans2 : (State, Option[Char]) :=> Set[State] = |
|
153 |
{ case (Q0, Some('a')) => Set(Q0) |
|
154 |
case (Q0, None) => Set(Q1, Q2) |
|
155 |
case (Q1, Some('a')) => Set(Q1) |
|
156 |
case (Q2, Some('b')) => Set(Q2) |
|
157 |
case (Q1, None) => Set(Q0) |
|
158 |
} |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
159 |
|
243 | 160 |
val enfa2 = eNFA(Set[State](Q0), etrans2, Set[State](Q2)) |
161 |
||
162 |
enfa2.accepts("a".toList) // true |
|
163 |
enfa2.accepts("".toList) // true |
|
164 |
enfa2.accepts("aaaaa".toList) // true |
|
165 |
enfa2.accepts("aaaaab".toList) // true |
|
166 |
enfa2.accepts("aaaaabbb".toList) // true |
|
167 |
enfa2.accepts("aaaaabbbaaa".toList) // false |
|
168 |
enfa2.accepts("ac".toList) // false |
|
169 |
||
170 |
||
171 |
// states for Thompson construction |
|
172 |
case class TState(i: Int) extends State |
|
173 |
||
174 |
object TState { |
|
175 |
var counter = 0 |
|
176 |
||
177 |
def apply() : TState = { |
|
178 |
counter += 1; |
|
179 |
new TState(counter - 1) |
|
180 |
} |
|
181 |
} |
|
182 |
||
183 |
// some types abbreviations |
|
184 |
type NFAt = NFA[TState, Char] |
|
185 |
type NFAtrans = (TState, Char) :=> Set[TState] |
|
186 |
type eNFAtrans = (TState, Option[Char]) :=> Set[TState] |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
187 |
|
243 | 188 |
|
189 |
// for composing an eNFA transition with a NFA transition |
|
190 |
implicit class RichPF(val f: eNFAtrans) extends AnyVal { |
|
191 |
def +++(g: NFAtrans) : eNFAtrans = |
|
192 |
{ case (q, None) => applyOrElse(f, (q, None)) |
|
193 |
case (q, Some(c)) => applyOrElse(f, (q, Some(c))) | applyOrElse(g, (q, c)) } |
|
194 |
} |
|
195 |
||
196 |
||
197 |
// NFA that does not accept any string |
|
198 |
def NFA_ZERO(): NFAt = { |
|
199 |
val Q = TState() |
|
200 |
NFA(Set(Q), { case _ => Set() }, Set()) |
|
201 |
} |
|
202 |
||
203 |
// NFA that accepts the empty string |
|
204 |
def NFA_ONE() : NFAt = { |
|
205 |
val Q = TState() |
|
206 |
NFA(Set(Q), { case _ => Set() }, Set(Q)) |
|
207 |
} |
|
208 |
||
209 |
// NFA that accepts the string "c" |
|
210 |
def NFA_CHAR(c: Char) : NFAt = { |
|
211 |
val Q1 = TState() |
|
212 |
val Q2 = TState() |
|
213 |
NFA(Set(Q1), { case (Q1, d) if (c == d) => Set(Q2) }, Set(Q2)) |
|
214 |
} |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
215 |
|
243 | 216 |
// sequence of two NFAs |
217 |
def NFA_SEQ(enfa1: NFAt, enfa2: NFAt) : NFAt = { |
|
218 |
val new_delta : eNFAtrans = |
|
219 |
{ case (q, None) if enfa1.fins(q) => enfa2.starts } |
|
220 |
||
221 |
eNFA(enfa1.starts, new_delta +++ enfa1.delta +++ enfa2.delta, |
|
222 |
enfa2.fins) |
|
223 |
} |
|
224 |
||
225 |
// alternative of two NFAs |
|
226 |
def NFA_ALT(enfa1: NFAt, enfa2: NFAt) : NFAt = { |
|
227 |
val new_delta : NFAtrans = { |
|
228 |
case (q, c) => applyOrElse(enfa1.delta, (q, c)) | |
|
229 |
applyOrElse(enfa2.delta, (q, c)) } |
|
230 |
val new_fins = (q: TState) => enfa1.fins(q) || enfa2.fins(q) |
|
231 |
||
232 |
NFA(enfa1.starts | enfa2.starts, new_delta, new_fins) |
|
233 |
} |
|
234 |
||
235 |
// star of a NFA |
|
236 |
def NFA_STAR(enfa: NFAt) : NFAt = { |
|
237 |
val Q = TState() |
|
238 |
val new_delta : eNFAtrans = |
|
239 |
{ case (Q, None) => enfa.starts |
|
240 |
case (q, None) if enfa.fins(q) => Set(Q) } |
|
241 |
||
242 |
eNFA(Set(Q), new_delta +++ enfa.delta, Set(Q)) |
|
243 |
} |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
244 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
245 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
246 |
// Regular expressions fro derivative automata |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
247 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
248 |
abstract class Rexp |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
249 |
case object ZERO extends Rexp |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
250 |
case object ONE extends Rexp |
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
251 |
case class CHAR(c: Char) extends Rexp |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
252 |
case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
253 |
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
254 |
case class STAR(r: Rexp) extends Rexp |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
255 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
256 |
import scala.language.implicitConversions |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
257 |
import scala.language.reflectiveCalls |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
258 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
259 |
def charlist2rexp(s: List[Char]): Rexp = s match { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
260 |
case Nil => ONE |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
261 |
case c::Nil => CHAR(c) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
262 |
case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
263 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
264 |
implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
265 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
266 |
implicit def RexpOps (r: Rexp) = new { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
267 |
def | (s: Rexp) = ALT(r, s) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
268 |
def % = STAR(r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
269 |
def ~ (s: Rexp) = SEQ(r, s) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
270 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
271 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
272 |
implicit def stringOps (s: String) = new { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
273 |
def | (r: Rexp) = ALT(s, r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
274 |
def | (r: String) = ALT(s, r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
275 |
def % = STAR(s) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
276 |
def ~ (r: Rexp) = SEQ(s, r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
277 |
def ~ (r: String) = SEQ(s, r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
278 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
279 |
|
243 | 280 |
//optional |
281 |
def OPT(r: Rexp) = ALT(r, ONE) |
|
282 |
||
283 |
//n-times |
|
284 |
def NTIMES(r: Rexp, n: Int) : Rexp = n match { |
|
285 |
case 0 => ONE |
|
286 |
case 1 => r |
|
287 |
case n => SEQ(r, NTIMES(r, n - 1)) |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
288 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
289 |
|
243 | 290 |
// evil regular exproession |
291 |
def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
292 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
293 |
|
243 | 294 |
val EVIL2 = STAR(STAR("a")) ~ "b" |
295 |
||
296 |
// thompson construction |
|
297 |
def thompson (r: Rexp) : NFAt = r match { |
|
298 |
case ZERO => NFA_ZERO() |
|
299 |
case ONE => NFA_ONE() |
|
300 |
case CHAR(c) => NFA_CHAR(c) |
|
301 |
case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2)) |
|
302 |
case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2)) |
|
303 |
case STAR(r1) => NFA_STAR(thompson(r1)) |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
304 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
305 |
|
243 | 306 |
// regular expression matcher using Thompson's |
307 |
def tmatcher(r: Rexp, s: String) : Boolean = |
|
308 |
thompson(r).accepts(s.toList) |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
309 |
|
243 | 310 |
def tmatcher2(r: Rexp, s: String) : Boolean = |
311 |
thompson(r).accepts2(s.toList) |
|
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
312 |
|
243 | 313 |
// test cases for thompson construction |
314 |
tmatcher(ZERO, "") // false |
|
315 |
tmatcher(ZERO, "a") // false |
|
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
316 |
|
243 | 317 |
tmatcher(ONE, "") // true |
318 |
tmatcher(ONE, "a") // false |
|
241 | 319 |
|
243 | 320 |
tmatcher(CHAR('a'), "") // false |
321 |
tmatcher(CHAR('a'), "a") // true |
|
322 |
tmatcher(CHAR('a'), "b") // false |
|
241 | 323 |
|
243 | 324 |
tmatcher("a" | "b", "") // false |
325 |
tmatcher("a" | "b", "a") // true |
|
326 |
tmatcher("a" | "b", "b") // true |
|
327 |
tmatcher("a" | "b", "c") // false |
|
328 |
tmatcher("a" | "b", "ab") // false |
|
241 | 329 |
|
243 | 330 |
tmatcher("a" ~ "b", "") // false |
331 |
tmatcher("a" ~ "b", "a") // false |
|
332 |
tmatcher("a" ~ "b", "b") // false |
|
333 |
tmatcher("a" ~ "b", "c") // false |
|
334 |
tmatcher("a" ~ "b", "ab") // true |
|
335 |
tmatcher("a" ~ "b", "aba") // false |
|
336 |
||
337 |
tmatcher(STAR("a"), "") // true |
|
338 |
tmatcher(STAR("a"), "a") // true |
|
339 |
tmatcher(STAR("a"), "aaaaa") // true |
|
340 |
tmatcher(STAR("a"), "b") // false |
|
341 |
tmatcher(STAR("a"), "aaab") // false |
|
241 | 342 |
|
243 | 343 |
tmatcher(STAR(STAR("a")), "") // true |
344 |
tmatcher(STAR(STAR("a")), "a") // true |
|
345 |
tmatcher(STAR(STAR("a")), "aaaaa") // true |
|
346 |
tmatcher(STAR(STAR("a")), "b") // false |
|
347 |
tmatcher(STAR(STAR("a")), "aaab") // false |
|
348 |
||
349 |
tmatcher(EVIL2, "aaaaaab") // true |
|
350 |
tmatcher(EVIL2, "aaaaaa") // false |
|
351 |
tmatcher(EVIL2, "a" * 100) // false |
|
352 |
||
353 |
// helper function for recording time |
|
354 |
def time_needed[T](i: Int, code: => T) = { |
|
355 |
val start = System.nanoTime() |
|
356 |
for (j <- 1 to i) code |
|
357 |
val end = System.nanoTime() |
|
358 |
(end - start)/(i * 1.0e9) |
|
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
359 |
} |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
360 |
|
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
361 |
|
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
362 |
|
243 | 363 |
// test harness for the matcher |
364 |
for (i <- 0 to 9) { |
|
365 |
println(i + ": " + "%.5f".format(time_needed(1, tmatcher(EVIL(i), "a" * i)))) |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
366 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
367 |
|
243 | 368 |
for (i <- 0 to 7) { |
369 |
println(i + ": " + "%.5f".format(time_needed(1, tmatcher2(EVIL(i), "a" * i)))) |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
370 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
371 |
|
243 | 372 |
for (i <- 0 to 100 by 5) { |
373 |
println(i + ": " + "%.5f".format(time_needed(1, tmatcher(EVIL2, "a" * i)))) |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
374 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
375 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
376 |
|
243 | 377 |
for (i <- 0 to 8) { |
378 |
println(i + ": " + "%.5f".format(time_needed(1, tmatcher2(EVIL2, "a" * i)))) |
|
379 |
} |