144
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
1 |
// NFAs and Thompson's construction
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
2 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
3 |
// helper function for recording time
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
4 |
def time_needed[T](i: Int, code: => T) = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
5 |
val start = System.nanoTime()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
6 |
for (j <- 1 to i) code
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
7 |
val end = System.nanoTime()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
8 |
(end - start)/(i * 1.0e9)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
9 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
10 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
11 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
12 |
// state nodes
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
13 |
abstract class State
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
14 |
type States = Set[State]
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
15 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
16 |
case class IntState(i: Int) extends State
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
17 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
18 |
object NewState {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
19 |
var counter = 0
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
20 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
21 |
def apply() : IntState = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
22 |
counter += 1;
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
23 |
new IntState(counter - 1)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
24 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
25 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
26 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
27 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
28 |
case class NFA(states: States,
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
29 |
start: State,
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
30 |
delta: (State, Char) => States,
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
31 |
eps: State => States,
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
32 |
fins: States) {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
33 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
34 |
def epsclosure(qs: States) : States = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
35 |
val ps = qs ++ qs.flatMap(eps(_))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
36 |
if (qs == ps) ps else epsclosure(ps)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
37 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
38 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
39 |
def deltas(qs: States, s: List[Char]) : States = s match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
40 |
case Nil => epsclosure(qs)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
41 |
case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
42 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
43 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
44 |
def accepts(s: String) : Boolean =
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
45 |
deltas(Set(start), s.toList) exists (fins contains (_))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
46 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
47 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
48 |
// A small example NFA from the lectures
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
49 |
val Q0 = NewState()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
50 |
val Q1 = NewState()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
51 |
val Q2 = NewState()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
52 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
53 |
val delta : (State, Char) => States = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
54 |
case (Q0, 'a') => Set(Q0)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
55 |
case (Q1, 'a') => Set(Q1)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
56 |
case (Q2, 'b') => Set(Q2)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
57 |
case (_, _) => Set ()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
58 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
59 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
60 |
val eps : State => States = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
61 |
case Q0 => Set(Q1, Q2)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
62 |
case _ => Set()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
63 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
64 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
65 |
val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
66 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
67 |
NFA1.accepts("aa")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
68 |
NFA1.accepts("aaaaa")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
69 |
NFA1.accepts("aaaaabbb")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
70 |
NFA1.accepts("aaaaabbbaaa")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
71 |
NFA1.accepts("ac")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
72 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
73 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
74 |
// explicit construction of some NFAs; used in
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
75 |
// Thompson's construction
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
76 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
77 |
// NFA that does not accept any string
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
78 |
def NFA_NULL() : NFA = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
79 |
val Q = NewState()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
80 |
NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set())
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
81 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
82 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
83 |
// NFA that accepts the empty string
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
84 |
def NFA_EMPTY() : NFA = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
85 |
val Q = NewState()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
86 |
NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
87 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
88 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
89 |
// NFA that accepts the string "c"
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
90 |
def NFA_CHAR(c: Char) : NFA = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
91 |
val Q1 = NewState()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
92 |
val Q2 = NewState()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
93 |
NFA(Set(Q1, Q2),
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
94 |
Q1,
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
95 |
{ case (Q1, d) if (c == d) => Set(Q2)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
96 |
case (_, _) => Set() },
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
97 |
{ case _ => Set() },
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
98 |
Set(Q2))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
99 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
100 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
101 |
// alternative of two NFAs
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
102 |
def NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
103 |
val Q = NewState()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
104 |
NFA(Set(Q) ++ nfa1.states ++ nfa2.states,
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
105 |
Q,
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
106 |
{ case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
107 |
case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
108 |
case (_, _) => Set() },
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
109 |
{ case Q => Set(nfa1.start, nfa2.start)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
110 |
case q if (nfa1.states contains q) => nfa1.eps(q)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
111 |
case q if (nfa2.states contains q) => nfa2.eps(q)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
112 |
case _ => Set() },
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
113 |
nfa1.fins ++ nfa2.fins)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
114 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
115 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
116 |
// sequence of two NFAs
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
117 |
def NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
118 |
NFA(nfa1.states ++ nfa2.states,
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
119 |
nfa1.start,
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
120 |
{ case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
121 |
case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
122 |
case (_, _) => Set() },
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
123 |
{ case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
124 |
case q if (nfa1.states contains q) => nfa1.eps(q)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
125 |
case q if (nfa2.states contains q) => nfa2.eps(q)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
126 |
case _ => Set() },
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
127 |
nfa2.fins)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
128 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
129 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
130 |
// star of an NFA
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
131 |
def NFA_STAR(nfa: NFA) : NFA = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
132 |
val Q = NewState()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
133 |
NFA(Set(Q) ++ nfa.states,
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
134 |
Q,
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
135 |
nfa.delta,
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
136 |
{ case Q => Set(nfa.start)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
137 |
case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
138 |
case q if (nfa.states contains q) => nfa.eps(q)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
139 |
case _ => Set() },
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
140 |
Set(Q))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
141 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
142 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
143 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
144 |
// regular expressions used for Thompson's construction
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
145 |
abstract class Rexp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
146 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
147 |
case object NULL extends Rexp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
148 |
case object EMPTY extends Rexp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
149 |
case class CHAR(c: Char) extends Rexp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
150 |
case class ALT(r1: Rexp, r2: Rexp) extends Rexp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
151 |
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
152 |
case class STAR(r: Rexp) extends Rexp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
153 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
154 |
// some convenience for typing in regular expressions
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
155 |
def charlist2rexp(s : List[Char]) : Rexp = s match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
156 |
case Nil => EMPTY
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
157 |
case c::Nil => CHAR(c)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
158 |
case c::s => SEQ(CHAR(c), charlist2rexp(s))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
159 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
160 |
implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
161 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
162 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
163 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
164 |
def thompson (r: Rexp) : NFA = r match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
165 |
case NULL => NFA_NULL()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
166 |
case EMPTY => NFA_EMPTY()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
167 |
case CHAR(c) => NFA_CHAR(c)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
168 |
case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
169 |
case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
170 |
case STAR(r1) => NFA_STAR(thompson(r1))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
171 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
172 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
173 |
// some examples for Thompson's
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
174 |
val A = thompson(CHAR('a'))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
175 |
|
484
|
176 |
println(A.accepts("a")) // true
|
|
177 |
println(A.accepts("c")) // false
|
|
178 |
println(A.accepts("aa")) // false
|
144
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
179 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
180 |
val B = thompson(ALT("ab","ac"))
|
484
|
181 |
|
|
182 |
println(B.accepts("ab")) // true
|
|
183 |
println(B.accepts("ac")) // true
|
|
184 |
println(B.accepts("bb")) // false
|
|
185 |
println(B.accepts("aa")) // false
|
144
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
186 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
187 |
val C = thompson(STAR("ab"))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
188 |
|
484
|
189 |
println(C.accepts("")) // true
|
|
190 |
println(C.accepts("a")) // false
|
|
191 |
println(C.accepts("ababab")) // true
|
|
192 |
println(C.accepts("ab")) // true
|
|
193 |
println(C.accepts("ac")) // false
|
|
194 |
println(C.accepts("bb")) // false
|
|
195 |
println(C.accepts("aa")) // false
|
144
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
196 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
197 |
// regular expression matcher using Thompson's
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
198 |
def matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
199 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
200 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
201 |
//optional
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
202 |
def OPT(r: Rexp) = ALT(r, EMPTY)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
203 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
204 |
//n-times
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
205 |
def NTIMES(r: Rexp, n: Int) : Rexp = n match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
206 |
case 0 => EMPTY
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
207 |
case 1 => r
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
208 |
case n => SEQ(r, NTIMES(r, n - 1))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
209 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
210 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
211 |
// evil regular exproession
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
212 |
def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
213 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
214 |
// test harness for the matcher
|
145
Christian Urban <christian dot urban at kcl dot ac dot uk>
diff
changeset
|
215 |
for (i <- 0 to 100 by 5) {
|
144
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
216 |
println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i))))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
217 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
218 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
219 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
220 |
// regular expression matching via search and backtracking
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
221 |
def accepts2(nfa: NFA, s: String) : Boolean = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
222 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
223 |
def search(q: State, s: List[Char]) : Boolean = s match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
224 |
case Nil => nfa.fins contains (q)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
225 |
case c::cs =>
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
226 |
(nfa.delta(q, c) exists (search(_, cs))) ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
227 |
(nfa.eps(q) exists (search(_, c::cs)))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
228 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
229 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
230 |
search(nfa.start, s.toList)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
231 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
232 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
233 |
def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
234 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
235 |
// test harness for the backtracking matcher
|
145
Christian Urban <christian dot urban at kcl dot ac dot uk>
diff
changeset
|
236 |
for (i <- 0 to 20 by 1) {
|
144
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
237 |
println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i))))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
238 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
239 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
240 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
241 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
242 |
|