| author | Christian Urban <christian dot urban at kcl dot ac dot uk> | 
| Tue, 26 Sep 2017 14:08:49 +0100 | |
| changeset 504 | 3390e863d796 | 
| parent 484 | 8182eb3278e0 | 
| permissions | -rw-r--r-- | 
| 
144
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
1  | 
// NFAs and Thompson's construction  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
2  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
3  | 
// helper function for recording time  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
4  | 
def time_needed[T](i: Int, code: => T) = {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
5  | 
val start = System.nanoTime()  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
6  | 
for (j <- 1 to i) code  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
7  | 
val end = System.nanoTime()  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
8  | 
(end - start)/(i * 1.0e9)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
9  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
10  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
11  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
12  | 
// state nodes  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
13  | 
abstract class State  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
14  | 
type States = Set[State]  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
15  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
16  | 
case class IntState(i: Int) extends State  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
17  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
18  | 
object NewState {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
19  | 
var counter = 0  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
20  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
21  | 
  def apply() : IntState = {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
22  | 
counter += 1;  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
23  | 
new IntState(counter - 1)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
24  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
25  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
26  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
27  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
28  | 
case class NFA(states: States,  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
29  | 
start: State,  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
30  | 
delta: (State, Char) => States,  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
31  | 
eps: State => States,  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
32  | 
               fins: States) {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
33  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
34  | 
  def epsclosure(qs: States) : States = {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
35  | 
val ps = qs ++ qs.flatMap(eps(_))  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
36  | 
if (qs == ps) ps else epsclosure(ps)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
37  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
38  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
39  | 
  def deltas(qs: States, s: List[Char]) : States = s match {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
40  | 
case Nil => epsclosure(qs)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
41  | 
case c::cs => deltas(epsclosure(epsclosure(qs).flatMap (delta (_, c))), cs)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
42  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
43  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
44  | 
def accepts(s: String) : Boolean =  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
45  | 
deltas(Set(start), s.toList) exists (fins contains (_))  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
46  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
47  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
48  | 
// A small example NFA from the lectures  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
49  | 
val Q0 = NewState()  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
50  | 
val Q1 = NewState()  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
51  | 
val Q2 = NewState()  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
52  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
53  | 
val delta : (State, Char) => States = {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
54  | 
case (Q0, 'a') => Set(Q0)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
55  | 
case (Q1, 'a') => Set(Q1)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
56  | 
case (Q2, 'b') => Set(Q2)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
57  | 
case (_, _) => Set ()  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
58  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
59  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
60  | 
val eps : State => States = {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
61  | 
case Q0 => Set(Q1, Q2)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
62  | 
case _ => Set()  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
63  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
64  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
65  | 
val NFA1 = NFA(Set(Q0, Q1, Q2), Q0, delta, eps, Set(Q2))  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
66  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
67  | 
NFA1.accepts("aa")
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
68  | 
NFA1.accepts("aaaaa")
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
69  | 
NFA1.accepts("aaaaabbb")
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
70  | 
NFA1.accepts("aaaaabbbaaa")
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
71  | 
NFA1.accepts("ac")
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
72  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
73  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
74  | 
// explicit construction of some NFAs; used in  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
75  | 
// Thompson's construction  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
76  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
77  | 
// NFA that does not accept any string  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
78  | 
def NFA_NULL() : NFA = {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
79  | 
val Q = NewState()  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
80  | 
  NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set())
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
81  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
82  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
83  | 
// NFA that accepts the empty string  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
84  | 
def NFA_EMPTY() : NFA = {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
85  | 
val Q = NewState()  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
86  | 
  NFA(Set(Q), Q, { case (_, _) => Set() }, { case _ => Set() }, Set(Q))
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
87  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
88  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
89  | 
// NFA that accepts the string "c"  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
90  | 
def NFA_CHAR(c: Char) : NFA = {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
91  | 
val Q1 = NewState()  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
92  | 
val Q2 = NewState()  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
93  | 
NFA(Set(Q1, Q2),  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
94  | 
Q1,  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
95  | 
      { case (Q1, d) if (c == d) => Set(Q2)
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
96  | 
case (_, _) => Set() },  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
97  | 
      { case _ => Set() },
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
98  | 
Set(Q2))  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
99  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
100  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
101  | 
// alternative of two NFAs  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
102  | 
def NFA_ALT(nfa1: NFA, nfa2: NFA) : NFA = {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
103  | 
val Q = NewState()  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
104  | 
NFA(Set(Q) ++ nfa1.states ++ nfa2.states,  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
105  | 
Q,  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
106  | 
      { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
107  | 
case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
108  | 
case (_, _) => Set() },  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
109  | 
      { case Q => Set(nfa1.start, nfa2.start)
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
110  | 
case q if (nfa1.states contains q) => nfa1.eps(q)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
111  | 
case q if (nfa2.states contains q) => nfa2.eps(q)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
112  | 
case _ => Set() },  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
113  | 
nfa1.fins ++ nfa2.fins)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
114  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
115  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
116  | 
// sequence of two NFAs  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
117  | 
def NFA_SEQ(nfa1: NFA, nfa2: NFA) : NFA = {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
118  | 
NFA(nfa1.states ++ nfa2.states,  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
119  | 
nfa1.start,  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
120  | 
      { case (q, c) if (nfa1.states contains q) => nfa1.delta(q, c)
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
121  | 
case (q, c) if (nfa2.states contains q) => nfa2.delta(q, c)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
122  | 
case (_, _) => Set() },  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
123  | 
      { case q if (nfa1.fins contains q) => nfa1.eps(q) ++ Set(nfa2.start)
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
124  | 
case q if (nfa1.states contains q) => nfa1.eps(q)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
125  | 
case q if (nfa2.states contains q) => nfa2.eps(q)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
126  | 
case _ => Set() },  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
127  | 
nfa2.fins)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
128  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
129  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
130  | 
// star of an NFA  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
131  | 
def NFA_STAR(nfa: NFA) : NFA = {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
132  | 
val Q = NewState()  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
133  | 
NFA(Set(Q) ++ nfa.states,  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
134  | 
Q,  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
135  | 
nfa.delta,  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
136  | 
      { case Q => Set(nfa.start)
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
137  | 
case q if (nfa.fins contains q) => nfa.eps(q) ++ Set(Q)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
138  | 
case q if (nfa.states contains q) => nfa.eps(q)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
139  | 
case _ => Set() },  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
140  | 
Set(Q))  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
141  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
142  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
143  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
144  | 
// regular expressions used for Thompson's construction  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
145  | 
abstract class Rexp  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
146  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
147  | 
case object NULL extends Rexp  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
148  | 
case object EMPTY extends Rexp  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
149  | 
case class CHAR(c: Char) extends Rexp  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
150  | 
case class ALT(r1: Rexp, r2: Rexp) extends Rexp  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
151  | 
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
152  | 
case class STAR(r: Rexp) extends Rexp  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
153  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
154  | 
// some convenience for typing in regular expressions  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
155  | 
def charlist2rexp(s : List[Char]) : Rexp = s match {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
156  | 
case Nil => EMPTY  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
157  | 
case c::Nil => CHAR(c)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
158  | 
case c::s => SEQ(CHAR(c), charlist2rexp(s))  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
159  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
160  | 
implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
161  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
162  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
163  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
164  | 
def thompson (r: Rexp) : NFA = r match {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
165  | 
case NULL => NFA_NULL()  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
166  | 
case EMPTY => NFA_EMPTY()  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
167  | 
case CHAR(c) => NFA_CHAR(c)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
168  | 
case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2))  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
169  | 
case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2))  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
170  | 
case STAR(r1) => NFA_STAR(thompson(r1))  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
171  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
172  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
173  | 
// some examples for Thompson's  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
174  | 
val A = thompson(CHAR('a'))
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
175  | 
|
| 484 | 176  | 
println(A.accepts("a"))   // true 
 | 
177  | 
println(A.accepts("c"))   // false 
 | 
|
178  | 
println(A.accepts("aa"))  // false
 | 
|
| 
144
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
179  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
180  | 
val B = thompson(ALT("ab","ac"))
 | 
| 484 | 181  | 
|
182  | 
println(B.accepts("ab"))   // true 
 | 
|
183  | 
println(B.accepts("ac"))   // true 
 | 
|
184  | 
println(B.accepts("bb"))   // false 
 | 
|
185  | 
println(B.accepts("aa"))   // false 
 | 
|
| 
144
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
186  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
187  | 
val C = thompson(STAR("ab"))
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
188  | 
|
| 484 | 189  | 
println(C.accepts(""))       // true
 | 
190  | 
println(C.accepts("a"))      // false 
 | 
|
191  | 
println(C.accepts("ababab")) // true
 | 
|
192  | 
println(C.accepts("ab"))     // true
 | 
|
193  | 
println(C.accepts("ac"))     // false 
 | 
|
194  | 
println(C.accepts("bb"))     // false 
 | 
|
195  | 
println(C.accepts("aa"))     // false 
 | 
|
| 
144
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
196  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
197  | 
// regular expression matcher using Thompson's  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
198  | 
def matcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
199  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
200  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
201  | 
//optional  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
202  | 
def OPT(r: Rexp) = ALT(r, EMPTY)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
203  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
204  | 
//n-times  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
205  | 
def NTIMES(r: Rexp, n: Int) : Rexp = n match {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
206  | 
case 0 => EMPTY  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
207  | 
case 1 => r  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
208  | 
case n => SEQ(r, NTIMES(r, n - 1))  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
209  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
210  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
211  | 
// evil regular exproession  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
212  | 
def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
213  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
214  | 
// test harness for the matcher  | 
| 
145
 
920f675b4ed1
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
144 
diff
changeset
 | 
215  | 
for (i <- 0 to 100 by 5) {
 | 
| 
144
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
216  | 
println(i + ": " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i))))  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
217  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
218  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
219  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
220  | 
// regular expression matching via search and backtracking  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
221  | 
def accepts2(nfa: NFA, s: String) : Boolean = {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
222  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
223  | 
  def search(q: State, s: List[Char]) : Boolean = s match {
 | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
224  | 
case Nil => nfa.fins contains (q)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
225  | 
case c::cs =>  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
226  | 
(nfa.delta(q, c) exists (search(_, cs))) ||  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
227  | 
(nfa.eps(q) exists (search(_, c::cs)))  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
228  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
229  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
230  | 
search(nfa.start, s.toList)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
231  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
232  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
233  | 
def matcher2(r: Rexp, s: String) : Boolean = accepts2(thompson(r), s)  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
234  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
235  | 
// test harness for the backtracking matcher  | 
| 
145
 
920f675b4ed1
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
144 
diff
changeset
 | 
236  | 
for (i <- 0 to 20 by 1) {
 | 
| 
144
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
237  | 
println(i + ": " + "%.5f".format(time_needed(1, matcher2(EVIL(i), "a" * i))))  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
238  | 
}  | 
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
239  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
240  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
241  | 
|
| 
 
0cb61bed557d
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
242  |