author | Christian Urban <christian dot urban at kcl dot ac dot uk> |
Tue, 17 Nov 2015 13:31:19 +0000 | |
changeset 376 | af65ffff9cdd |
parent 145 | 920f675b4ed1 |
child 482 | 0f6e3c5a1751 |
permissions | -rw-r--r-- |
144
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
1 |
// NFAs and Thompson's construction |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
2 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
3 |
// helper function for recording time |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
4 |
def time_needed[T](i: Int, code: => T) = { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
5 |
val start = System.nanoTime() |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
6 |
for (j <- 1 to i) code |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
7 |
val end = System.nanoTime() |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
8 |
(end - start)/(i * 1.0e9) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
9 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
10 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
11 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
12 |
// state nodes |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
13 |
abstract class State |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
14 |
type States = Set[State] |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
15 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
16 |
case class IntState(i: Int) extends State |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
17 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
18 |
object NewState { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
19 |
var counter = 0 |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
20 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
21 |
def apply() : IntState = { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
22 |
counter += 1; |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
23 |
new IntState(counter - 1) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
24 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
25 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
26 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
27 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
28 |
case class NFA(states: States, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
29 |
start: State, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
30 |
delta: (State, Char) => States, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
31 |
eps: State => States, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
32 |
fins: States) { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
33 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
34 |
def epsclosure(qs: States) : States = { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
35 |
val ps = qs ++ qs.flatMap(eps(_)) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
36 |
if (qs == ps) ps else epsclosure(ps) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
37 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
38 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
39 |
def deltas(qs: States, s: List[Char]) : States = s match { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
40 |
case Nil => epsclosure(qs) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
41 |
case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
42 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
43 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
44 |
def accepts(s: String) : Boolean = |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
45 |
deltas(Set(start), s.toList) exists (fins contains (_)) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
46 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
47 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
48 |
// A small example NFA from the lectures |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
49 |
val Q0 = NewState() |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
50 |
val Q1 = NewState() |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
51 |
val Q2 = NewState() |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
52 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
53 |
val delta : (State, Char) => States = { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
54 |
case (Q0, 'a') => Set(Q0) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
55 |
case (Q1, 'a') => Set(Q1) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
56 |
case (Q2, 'b') => Set(Q2) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
57 |
case (_, _) => Set () |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
58 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
59 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
60 |
val eps : State => States = { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
61 |
case Q0 => Set(Q1, Q2) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
62 |
case _ => Set() |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
63 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
64 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
65 |
val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2)) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
66 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
67 |
NFA1.accepts("aa") |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
68 |
NFA1.accepts("aaaaa") |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
69 |
NFA1.accepts("aaaaabbb") |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
70 |
NFA1.accepts("aaaaabbbaaa") |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
71 |
NFA1.accepts("ac") |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
72 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
73 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
74 |
// explicit construction of some NFAs; used in |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
75 |
// Thompson's construction |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
76 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
77 |
// NFA that does not accept any string |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
78 |
def NFA_NULL() : NFA = { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
79 |
val Q = NewState() |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
80 |
NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set()) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
81 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
82 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
83 |
// NFA that accepts the empty string |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
84 |
def NFA_EMPTY() : NFA = { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
85 |
val Q = NewState() |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
86 |
NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q)) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
87 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
88 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
89 |
// NFA that accepts the string "c" |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
90 |
def NFA_CHAR(c: Char) : NFA = { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
91 |
val Q1 = NewState() |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
92 |
val Q2 = NewState() |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
93 |
NFA(Set(Q1, Q2), |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
94 |
Q1, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
95 |
{ case (Q1, d) if (c == d) => Set(Q2) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
96 |
case (_, _) => Set() }, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
97 |
{ case _ => Set() }, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
98 |
Set(Q2)) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
99 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
100 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
101 |
// alternative of two NFAs |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
102 |
def NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
103 |
val Q = NewState() |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
104 |
NFA(Set(Q) ++ nfa1.states ++ nfa2.states, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
105 |
Q, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
106 |
{ case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
107 |
case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
108 |
case (_, _) => Set() }, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
109 |
{ case Q => Set(nfa1.start, nfa2.start) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
110 |
case q if (nfa1.states contains q) => nfa1.eps(q) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
111 |
case q if (nfa2.states contains q) => nfa2.eps(q) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
112 |
case _ => Set() }, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
113 |
nfa1.fins ++ nfa2.fins) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
114 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
115 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
116 |
// sequence of two NFAs |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
117 |
def NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
118 |
NFA(nfa1.states ++ nfa2.states, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
119 |
nfa1.start, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
120 |
{ case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
121 |
case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
122 |
case (_, _) => Set() }, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
123 |
{ case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
124 |
case q if (nfa1.states contains q) => nfa1.eps(q) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
125 |
case q if (nfa2.states contains q) => nfa2.eps(q) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
126 |
case _ => Set() }, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
127 |
nfa2.fins) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
128 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
129 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
130 |
// star of an NFA |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
131 |
def NFA_STAR(nfa: NFA) : NFA = { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
132 |
val Q = NewState() |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
133 |
NFA(Set(Q) ++ nfa.states, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
134 |
Q, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
135 |
nfa.delta, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
136 |
{ case Q => Set(nfa.start) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
137 |
case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
138 |
case q if (nfa.states contains q) => nfa.eps(q) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
139 |
case _ => Set() }, |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
140 |
Set(Q)) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
141 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
142 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
143 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
144 |
// regular expressions used for Thompson's construction |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
145 |
abstract class Rexp |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
146 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
147 |
case object NULL extends Rexp |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
148 |
case object EMPTY extends Rexp |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
149 |
case class CHAR(c: Char) extends Rexp |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
150 |
case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
151 |
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
152 |
case class STAR(r: Rexp) extends Rexp |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
153 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
154 |
// some convenience for typing in regular expressions |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
155 |
def charlist2rexp(s : List[Char]) : Rexp = s match { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
156 |
case Nil => EMPTY |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
157 |
case c::Nil => CHAR(c) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
158 |
case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
159 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
160 |
implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
161 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
162 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
163 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
164 |
def thompson (r: Rexp) : NFA = r match { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
165 |
case NULL => NFA_NULL() |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
166 |
case EMPTY => NFA_EMPTY() |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
167 |
case CHAR(c) => NFA_CHAR(c) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
168 |
case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2)) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
169 |
case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2)) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
170 |
case STAR(r1) => NFA_STAR(thompson(r1)) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
171 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
172 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
173 |
// some examples for Thompson's |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
174 |
val A = thompson(CHAR('a')) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
175 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
176 |
println(A.accepts("a")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
177 |
println(A.accepts("c")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
178 |
println(A.accepts("aa")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
179 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
180 |
val B = thompson(ALT("ab","ac")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
181 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
182 |
println(B.accepts("ab")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
183 |
println(B.accepts("ac")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
184 |
println(B.accepts("bb")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
185 |
println(B.accepts("aa")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
186 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
187 |
val C = thompson(STAR("ab")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
188 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
189 |
println(C.accepts("")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
190 |
println(C.accepts("a")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
191 |
println(C.accepts("ababab")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
192 |
println(C.accepts("ab")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
193 |
println(C.accepts("ac")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
194 |
println(C.accepts("bb")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
195 |
println(C.accepts("aa")) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
196 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
197 |
// regular expression matcher using Thompson's |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
198 |
def matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
199 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
200 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
201 |
//optional |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
202 |
def OPT(r: Rexp) = ALT(r, EMPTY) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
203 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
204 |
//n-times |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
205 |
def NTIMES(r: Rexp, n: Int) : Rexp = n match { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
206 |
case 0 => EMPTY |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
207 |
case 1 => r |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
208 |
case n => SEQ(r, NTIMES(r, n - 1)) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
209 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
210 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
211 |
// evil regular exproession |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
212 |
def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
213 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
214 |
// test harness for the matcher |
145
920f675b4ed1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
144
diff
changeset
|
215 |
for (i <- 0 to 100 by 5) { |
144
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
216 |
println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i)))) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
217 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
218 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
219 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
220 |
// regular expression matching via search and backtracking |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
221 |
def accepts2(nfa: NFA, s: String) : Boolean = { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
222 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
223 |
def search(q: State, s: List[Char]) : Boolean = s match { |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
224 |
case Nil => nfa.fins contains (q) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
225 |
case c::cs => |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
226 |
(nfa.delta(q, c) exists (search(_, cs))) || |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
227 |
(nfa.eps(q) exists (search(_, c::cs))) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
228 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
229 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
230 |
search(nfa.start, s.toList) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
231 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
232 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
233 |
def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
234 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
235 |
// test harness for the backtracking matcher |
145
920f675b4ed1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
144
diff
changeset
|
236 |
for (i <- 0 to 20 by 1) { |
144
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
237 |
println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i)))) |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
238 |
} |
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
239 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
240 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
241 |
|
0cb61bed557d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
242 |