author | Chengsong |
Mon, 09 May 2022 17:17:52 +0100 | |
changeset 511 | 47618d607bbf |
parent 283 | c4d821c6309d |
permissions | -rw-r--r-- |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
1 |
// DFAs and NFAs based on Scala's partial functions |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
2 |
import scala.util.Try |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
3 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
4 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
5 |
// type abbreviation for partial functions |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
6 |
type :=>[A, B] = PartialFunction[A, B] |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
7 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
8 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
9 |
// some states for test cases |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
10 |
abstract class State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
11 |
case object Q0 extends State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
12 |
case object Q1 extends State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
13 |
case object Q2 extends State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
14 |
case object Q3 extends State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
15 |
case object Q4 extends State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
16 |
case object Q5 extends State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
17 |
case object Q6 extends State |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
18 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
19 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
20 |
// class for DFAs |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
21 |
case class DFA[A, C](start: A, // starting state |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
22 |
delta: (A, C) :=> A, // transition |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
23 |
fins: A => Boolean) { // final states |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
24 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
25 |
// given a state and a "string", what is the next |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
26 |
// state, if there is any? |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
27 |
def deltas(q: A, s: List[C]) : A = s match { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
28 |
case Nil => q |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
29 |
case c::cs => deltas(delta(q, c), cs) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
30 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
31 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
32 |
// is a "string" accepted by an DFA? |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
33 |
def accepts(s: List[C]) : Boolean = |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
34 |
Try(fins(deltas(start, s))) getOrElse false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
35 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
36 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
37 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
38 |
// DFA 1 |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
39 |
val dtrans1 : (State, Char) :=> State = |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
40 |
{ case (Q0, 'a') => Q0 |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
41 |
case (Q0, 'b') => Q1 |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
42 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
43 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
44 |
val dfa1 = DFA(Q0, dtrans1, Set[State](Q1)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
45 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
46 |
dfa1.accepts("aaab".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
47 |
dfa1.accepts("aacb".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
48 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
49 |
// another DFA test |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
50 |
abstract class S |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
51 |
case object S0 extends S |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
52 |
case object S1 extends S |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
53 |
case object S2 extends S |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
54 |
case object Sink extends S |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
55 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
56 |
// transition function with a sink state |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
57 |
// S0 --a--> S1 --a--> S2 |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
58 |
val sigma : (S, Char) :=> S = |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
59 |
{ case (S0, 'a') => S1 |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
60 |
case (S1, 'a') => S2 |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
61 |
case _ => Sink |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
62 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
63 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
64 |
val dfa1a = DFA(S0, sigma, Set[S](S2)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
65 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
66 |
dfa1a.accepts("aa".toList) //true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
67 |
dfa1a.accepts("".toList) //false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
68 |
dfa1a.accepts("ab".toList) //false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
69 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
70 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
71 |
// class for NFAs |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
72 |
case class NFA[A, C](starts: Set[A], // starting states |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
73 |
delta: Set[(A, C) :=> A], // transitions |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
74 |
fins: A => Boolean) { // final states |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
75 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
76 |
// given a state and a character, what is the set of next states? |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
77 |
// if there is none => empty set |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
78 |
def next(q: A, c: C) : Set[A] = |
243 | 79 |
delta.flatMap(d => Try(d(q, c)).toOption) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
80 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
81 |
def nexts(qs: Set[A], c: C) : Set[A] = |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
82 |
qs.flatMap(next(_, c)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
83 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
84 |
// given some states and a string, what is the set of next states? |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
85 |
def deltas(qs: Set[A], s: List[C]) : Set[A] = s match { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
86 |
case Nil => qs |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
87 |
case c::cs => deltas(nexts(qs, c), cs) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
88 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
89 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
90 |
// is a string accepted by an NFA? |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
91 |
def accepts(s: List[C]) : Boolean = |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
92 |
deltas(starts, s).exists(fins) |
243 | 93 |
|
94 |
// depth-first search version of accept |
|
95 |
def search(q: A, s: List[C]) : Boolean = s match { |
|
96 |
case Nil => fins(q) |
|
97 |
case c::cs => delta.exists(d => Try(search(d(q, c), cs)) getOrElse false) |
|
98 |
} |
|
99 |
||
100 |
def accepts2(s: List[C]) : Boolean = |
|
101 |
starts.exists(search(_, s)) |
|
102 |
||
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
103 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
104 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
105 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
106 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
107 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
108 |
// NFA test cases |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
109 |
|
243 | 110 |
// 1: NFA for STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc" |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
111 |
val trans1 = Set[(State, Char) :=> State]( |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
112 |
{ case (Q0, 'a') => Q1 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
113 |
{ case (Q0, _) => Q0 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
114 |
{ case (Q1, _) => Q2 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
115 |
{ case (Q2, _) => Q3 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
116 |
{ case (Q3, _) => Q4 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
117 |
{ case (Q4, 'b') => Q5 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
118 |
{ case (Q5, 'c') => Q6 } |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
119 |
) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
120 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
121 |
val nfa1 = NFA(Set[State](Q0), trans1, Set[State](Q6)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
122 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
123 |
nfa1.accepts("axaybzbc".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
124 |
nfa1.accepts("aaaaxaybzbc".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
125 |
nfa1.accepts("axaybzbd".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
126 |
|
243 | 127 |
nfa1.accepts2("axaybzbc".toList) // true |
128 |
nfa1.accepts2("aaaaxaybzbc".toList) // true |
|
129 |
nfa1.accepts2("axaybzbd".toList) // false |
|
130 |
||
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
131 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
132 |
// 2 |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
133 |
val trans2 = Set[(State, Char) :=> State]( |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
134 |
{ case (Q0, 'a') => Q0 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
135 |
{ case (Q0, 'a') => Q1 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
136 |
{ case (Q0, 'b') => Q2 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
137 |
{ case (Q1, 'a') => Q1 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
138 |
{ case (Q2, 'b') => Q2 } |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
139 |
) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
140 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
141 |
val nfa2 = NFA(Set[State](Q0), trans2, Set[State](Q2)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
142 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
143 |
nfa2.accepts("aa".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
144 |
nfa2.accepts("aaaaa".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
145 |
nfa2.accepts("aaaaab".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
146 |
nfa2.accepts("aaaaabbb".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
147 |
nfa2.accepts("aaaaabbbaaa".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
148 |
nfa2.accepts("ac".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
149 |
|
243 | 150 |
nfa2.accepts2("aa".toList) // false |
151 |
nfa2.accepts2("aaaaa".toList) // false |
|
152 |
nfa2.accepts2("aaaaab".toList) // true |
|
153 |
nfa2.accepts2("aaaaabbb".toList) // true |
|
154 |
nfa2.accepts2("aaaaabbbaaa".toList) // false |
|
155 |
nfa2.accepts2("ac".toList) // false |
|
156 |
||
157 |
||
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
158 |
// 3 |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
159 |
val trans3 = Set[(State, Char) :=> State]( |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
160 |
{ case (Q0, _) => Q0 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
161 |
{ case (Q0, 'a') => Q1 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
162 |
{ case (Q0, 'b') => Q3 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
163 |
{ case (Q1, 'b') => Q2 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
164 |
{ case (Q2, 'c') => Q5 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
165 |
{ case (Q3, 'c') => Q4 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
166 |
{ case (Q4, 'd') => Q5 } |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
167 |
) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
168 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
169 |
val nfa3 = NFA(Set[State](Q0), trans3, Set[State](Q5)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
170 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
171 |
nfa3.accepts("aaaaabc".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
172 |
nfa3.accepts("aaaabcd".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
173 |
nfa3.accepts("aaaaab".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
174 |
nfa3.accepts("aaaabc".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
175 |
nfa3.accepts("aaaaabbbaaa".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
176 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
177 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
178 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
179 |
// subset, or powerset, construction |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
180 |
def powerset[A, C](nfa: NFA[A, C]) : DFA[Set[A], C] = { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
181 |
DFA(nfa.starts, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
182 |
{ case (qs, c) => nfa.nexts(qs, c) }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
183 |
_.exists(nfa.fins)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
184 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
185 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
186 |
val dfa2 = powerset(nfa1) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
187 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
188 |
dfa2.accepts("axaybzbc".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
189 |
dfa2.accepts("aaaaxaybzbc".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
190 |
dfa2.accepts("axaybzbd".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
191 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
192 |
val dfa3 = powerset(nfa2) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
193 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
194 |
dfa3.accepts("aa".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
195 |
dfa3.accepts("aaaaa".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
196 |
dfa3.accepts("aaaaab".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
197 |
dfa3.accepts("aaaaabbb".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
198 |
dfa3.accepts("aaaaabbbaaa".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
199 |
dfa3.accepts("ac".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
200 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
201 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
202 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
203 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
204 |
// epsilon NFA |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
205 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
206 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
207 |
// fixpoint construction |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
208 |
import scala.annotation.tailrec |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
209 |
@tailrec |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
210 |
def fixpT[A](f: A => A, x: A): A = { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
211 |
val fx = f(x) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
212 |
if (fx == x) x else fixpT(f, fx) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
213 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
214 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
215 |
|
243 | 216 |
case class eNFA[A, C](start: A, // starting state |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
217 |
delta: Set[(A, Option[C]) :=> A], // transition edges |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
218 |
fins: A => Boolean) { // final states |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
219 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
220 |
// epsilon transitions |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
221 |
def enext(q: A) : Set[A] = |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
222 |
delta.flatMap((d) => Try(d(q, None)).toOption) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
223 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
224 |
def enexts(qs: Set[A]) : Set[A] = |
243 | 225 |
qs | qs.flatMap(enext(_)) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
226 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
227 |
// epsilon closure |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
228 |
def ecl(qs: Set[A]) : Set[A] = |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
229 |
fixpT(enexts, qs) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
230 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
231 |
// "normal" transition |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
232 |
def next(q: A, c: C) : Set[A] = |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
233 |
delta.flatMap((d) => Try(d(q, Some(c))).toOption) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
234 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
235 |
def nexts(qs: Set[A], c: C) : Set[A] = |
243 | 236 |
ecl(ecl(qs).flatMap(next(_, c))) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
237 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
238 |
def deltas(qs: Set[A], s: List[C]) : Set[A] = s match { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
239 |
case Nil => ecl(qs) |
243 | 240 |
case c::cs => deltas(nexts(qs, c), cs) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
241 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
242 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
243 |
def accepts(s: List[C]) : Boolean = |
243 | 244 |
deltas(Set(start), s.toList).exists(fins) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
245 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
246 |
|
243 | 247 |
// test cases for eNFAs |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
248 |
val etrans1 = Set[(State, Option[Char]) :=> State]( |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
249 |
{ case (Q0, Some('a')) => Q1 }, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
250 |
{ case (Q1, None) => Q0 } |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
251 |
) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
252 |
|
243 | 253 |
val enfa1 = eNFA(Q0, etrans1, Set[State](Q1)) |
254 |
||
255 |
enfa1.accepts("a".toList) // true |
|
256 |
enfa1.accepts("".toList) // false |
|
257 |
enfa1.accepts("aaaaa".toList) // true |
|
258 |
enfa1.accepts("aaaaab".toList) // false |
|
259 |
enfa1.accepts("aaaaabbb".toList) // false |
|
260 |
enfa1.accepts("aaaaabbbaaa".toList) // false |
|
261 |
enfa1.accepts("ac".toList) // false |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
262 |
|
243 | 263 |
// example from handouts |
264 |
val etrans2 = Set[(State, Option[Char]) :=> State]( |
|
265 |
{ case (Q0, Some('a')) => Q0 }, |
|
266 |
{ case (Q0, None) => Q1 }, |
|
267 |
{ case (Q0, None) => Q2 }, |
|
268 |
{ case (Q1, Some('a')) => Q1 }, |
|
269 |
{ case (Q2, Some('b')) => Q2 }, |
|
270 |
{ case (Q1, None) => Q0 } |
|
271 |
) |
|
272 |
||
273 |
val enfa2 = eNFA(Q0, etrans2, Set[State](Q2)) |
|
274 |
||
275 |
enfa2.accepts("a".toList) // true |
|
276 |
enfa2.accepts("".toList) // true |
|
277 |
enfa2.accepts("aaaaa".toList) // true |
|
278 |
enfa2.accepts("aaaaab".toList) // true |
|
279 |
enfa2.accepts("aaaaabbb".toList) // true |
|
280 |
enfa2.accepts("aaaaabbbaaa".toList) // false |
|
281 |
enfa2.accepts("ac".toList) // false |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
282 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
283 |
|
243 | 284 |
// states for Thompson construction |
285 |
case class TState(i: Int) extends State |
|
286 |
||
287 |
object TState { |
|
288 |
var counter = 0 |
|
289 |
||
290 |
def apply() : TState = { |
|
291 |
counter += 1; |
|
292 |
new TState(counter - 1) |
|
293 |
} |
|
294 |
} |
|
295 |
||
296 |
// eNFA that does not accept any string |
|
297 |
def eNFA_ZERO(): eNFA[TState, Char] = { |
|
298 |
val Q = TState() |
|
299 |
eNFA(Q, Set(), Set()) |
|
300 |
} |
|
301 |
||
302 |
// eNFA that accepts the empty string |
|
303 |
def eNFA_ONE() : eNFA[TState, Char] = { |
|
304 |
val Q = TState() |
|
305 |
eNFA(Q, Set(), Set(Q)) |
|
306 |
} |
|
307 |
||
308 |
// eNFA that accepts the string "c" |
|
309 |
def eNFA_CHAR(c: Char) : eNFA[TState, Char] = { |
|
310 |
val Q1 = TState() |
|
311 |
val Q2 = TState() |
|
312 |
eNFA(Q1, |
|
313 |
Set({ case (Q1, Some(d)) if (c == d) => Q2 }), |
|
314 |
Set(Q2)) |
|
315 |
} |
|
316 |
||
317 |
// alternative of two eNFAs |
|
318 |
def eNFA_ALT(enfa1: eNFA[TState, Char], enfa2: eNFA[TState, Char]) : eNFA[TState, Char] = { |
|
319 |
val Q = TState() |
|
320 |
eNFA(Q, |
|
321 |
enfa1.delta | enfa2.delta | |
|
322 |
Set({ case (Q, None) => enfa1.start }, |
|
323 |
{ case (Q, None) => enfa2.start }), |
|
324 |
q => enfa1.fins(q) || enfa2.fins(q)) |
|
325 |
} |
|
326 |
||
327 |
// sequence of two eNFAs |
|
328 |
def eNFA_SEQ(enfa1: eNFA[TState, Char], enfa2: eNFA[TState, Char]) : eNFA[TState, Char] = { |
|
329 |
eNFA(enfa1.start, |
|
330 |
enfa1.delta | enfa2.delta | |
|
331 |
Set({ case (q, None) if enfa1.fins(q) => enfa2.start }), |
|
332 |
enfa2.fins) |
|
333 |
} |
|
334 |
||
335 |
// star of an eNFA |
|
336 |
def eNFA_STAR(enfa: eNFA[TState, Char]) : eNFA[TState, Char] = { |
|
337 |
val Q = TState() |
|
338 |
eNFA(Q, |
|
339 |
enfa.delta | |
|
340 |
Set({ case (q, None) if enfa.fins(q) => Q }, |
|
341 |
{ case (Q, None) => enfa.start }), |
|
342 |
Set(Q)) |
|
343 |
} |
|
344 |
||
345 |
/* |
|
346 |
def tofunset[A, C](d: (A, Option[C]) :=> Set[A])(q: A, c: C) : Set[(A, C) :=> A] = { |
|
347 |
d((q, Some(c))).map ((qs: A) => { case (qi, ci) if (qi == q && ci == c) => qs } : (A, C) :=> A) |
|
348 |
} |
|
349 |
||
350 |
||
351 |
def eNFA2NFA[A, C](starts: Set[A], // starting state |
|
352 |
delta: Set[(A, Option[C]) :=> A], // transition edges |
|
353 |
fins: A => Boolean) { // final states |
|
354 |
||
355 |
// epsilon transitions |
|
356 |
def enext(q: A) : Set[A] = |
|
357 |
delta.flatMap(d => Try(d(q, None)).toOption) |
|
358 |
||
359 |
def enexts(qs: Set[A]) : Set[A] = |
|
360 |
qs | qs.flatMap(enext(_)) |
|
361 |
||
362 |
// epsilon closure |
|
363 |
def ecl(qs: Set[A]) : Set[A] = |
|
364 |
fixpT(enexts, qs) |
|
365 |
||
366 |
||
367 |
// "normal" transition |
|
368 |
def next(q: A, c: C) : Set[A] = |
|
369 |
delta.flatMap(d => Try(d(q, Some(c))).toOption) |
|
370 |
||
371 |
def nexts(qs: Set[A], c: C) : Set[A] = |
|
372 |
ecl(ecl(qs).flatMap(next(_, c))) |
|
373 |
||
374 |
||
375 |
def nfa_delta : Set[(A, C) :=> A] = delta.flatMap(d => tofunset(d)) |
|
376 |
||
377 |
def nfa_starts = ecl(starts) |
|
378 |
||
379 |
def nfa_fins = (q: A) => ecl(Set[A](q)) exists fins |
|
380 |
||
381 |
NFA(nfa_starts, nfa_delta, nfa_fins) |
|
382 |
} |
|
383 |
||
384 |
*/ |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
385 |
|
283 | 386 |
// Regular expressions for derivative automata |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
387 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
388 |
abstract class Rexp |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
389 |
case object ZERO extends Rexp |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
390 |
case object ONE extends Rexp |
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
391 |
case class CHAR(c: Char) extends Rexp |
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
392 |
case object ALL extends Rexp |
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
393 |
case object ALLPlus extends Rexp |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
394 |
case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
395 |
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
396 |
case class STAR(r: Rexp) extends Rexp |
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
397 |
case class NTIMES(r: Rexp, n: Int) extends Rexp |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
398 |
case class UPNTIMES(r: Rexp, n: Int) extends Rexp |
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
399 |
case class NOT(r: Rexp) extends Rexp |
241 | 400 |
case class AND(r1: Rexp, r2: Rexp) extends Rexp |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
401 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
402 |
def get_chars(r: Rexp) : Set[Char] = r match { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
403 |
case ZERO => Set() |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
404 |
case ONE => Set() |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
405 |
case CHAR(c) => Set(c) |
243 | 406 |
case ALT(r1, r2) => get_chars(r1) | get_chars(r2) |
407 |
case SEQ(r1, r2) => get_chars(r1) | get_chars(r2) |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
408 |
case STAR(r) => get_chars(r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
409 |
case NTIMES(r, _) => get_chars(r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
410 |
case UPNTIMES(r, _) => get_chars(r) |
243 | 411 |
case ALL => ('a' to 'z').toSet | Set('*','/','\\') |
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
412 |
case NOT(r) => get_chars(r) |
243 | 413 |
case AND(r1, r2) => get_chars(r1) | get_chars(r2) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
414 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
415 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
416 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
417 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
418 |
import scala.language.implicitConversions |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
419 |
import scala.language.reflectiveCalls |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
420 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
421 |
def charlist2rexp(s: List[Char]): Rexp = s match { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
422 |
case Nil => ONE |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
423 |
case c::Nil => CHAR(c) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
424 |
case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
425 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
426 |
implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
427 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
428 |
implicit def RexpOps (r: Rexp) = new { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
429 |
def | (s: Rexp) = ALT(r, s) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
430 |
def % = STAR(r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
431 |
def ~ (s: Rexp) = SEQ(r, s) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
432 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
433 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
434 |
implicit def stringOps (s: String) = new { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
435 |
def | (r: Rexp) = ALT(s, r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
436 |
def | (r: String) = ALT(s, r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
437 |
def % = STAR(s) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
438 |
def ~ (r: Rexp) = SEQ(s, r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
439 |
def ~ (r: String) = SEQ(s, r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
440 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
441 |
|
243 | 442 |
// thompson construction only for basic regular expressions |
443 |
def thompson (r: Rexp) : eNFA[TState, Char] = r match { |
|
444 |
case ZERO => eNFA_ZERO() |
|
445 |
case ONE => eNFA_ONE() |
|
446 |
case CHAR(c) => eNFA_CHAR(c) |
|
447 |
case ALT(r1, r2) => eNFA_ALT(thompson(r1), thompson(r2)) |
|
448 |
case SEQ(r1, r2) => eNFA_SEQ(thompson(r1), thompson(r2)) |
|
449 |
case STAR(r1) => eNFA_STAR(thompson(r1)) |
|
450 |
} |
|
451 |
||
452 |
// regular expression matcher using Thompson's |
|
453 |
def tmatcher(r: Rexp, s: String) : Boolean = thompson(r).accepts(s.toList) |
|
454 |
||
455 |
||
456 |
// test cases for thompson construction |
|
457 |
tmatcher(ZERO, "") // false |
|
458 |
tmatcher(ZERO, "a") // false |
|
459 |
||
460 |
tmatcher(ONE, "") // true |
|
461 |
tmatcher(ONE, "a") // false |
|
462 |
||
463 |
tmatcher(CHAR('a'), "") // false |
|
464 |
tmatcher(CHAR('a'), "a") // true |
|
465 |
tmatcher(CHAR('a'), "b") // false |
|
466 |
||
467 |
tmatcher("a" | "b", "") // false |
|
468 |
tmatcher("a" | "b", "a") // true |
|
469 |
tmatcher("a" | "b", "b") // true |
|
470 |
tmatcher("a" | "b", "c") // false |
|
471 |
tmatcher("a" | "b", "ab") // false |
|
472 |
||
473 |
tmatcher("a" ~ "b", "") // false |
|
474 |
tmatcher("a" ~ "b", "a") // false |
|
475 |
tmatcher("a" ~ "b", "b") // false |
|
476 |
tmatcher("a" ~ "b", "c") // false |
|
477 |
tmatcher("a" ~ "b", "ab") // true |
|
478 |
tmatcher("a" ~ "b", "aba") // false |
|
479 |
||
480 |
tmatcher(EVIL2, "aaaaaab") // true |
|
481 |
tmatcher(EVIL2, "aaaaaa") // false |
|
482 |
tmatcher(EVIL2, "a" * 100) // false |
|
483 |
||
484 |
||
485 |
//optional |
|
486 |
def OPT(r: Rexp) = ALT(r, ONE) |
|
487 |
||
488 |
//n-times |
|
489 |
def NTIMES(r: Rexp, n: Int) : Rexp = n match { |
|
490 |
case 0 => ONE |
|
491 |
case 1 => r |
|
492 |
case n => SEQ(r, NTIMES(r, n - 1)) |
|
493 |
} |
|
494 |
||
495 |
// evil regular exproession |
|
496 |
def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) |
|
497 |
||
498 |
||
499 |
val EVIL2 = STAR(STAR("a")) ~ "b" |
|
500 |
||
501 |
// helper function for recording time |
|
502 |
def time_needed[T](i: Int, code: => T) = { |
|
503 |
val start = System.nanoTime() |
|
504 |
for (j <- 1 to i) code |
|
505 |
val end = System.nanoTime() |
|
506 |
(end - start)/(i * 1.0e9) |
|
507 |
} |
|
508 |
||
509 |
||
510 |
||
511 |
// test harness for the matcher |
|
512 |
for (i <- 0 to 35 by 5) { |
|
513 |
println(i + ": " + "%.5f".format(time_needed(1, tmatcher(EVIL(i), "a" * i)))) |
|
514 |
} |
|
515 |
||
516 |
||
517 |
||
518 |
// normalisation of regular expressions |
|
519 |
// for derivative automata |
|
520 |
||
521 |
case class ALTs(rs: Set[Rexp]) extends Rexp |
|
522 |
case class ANDs(rs: Set[Rexp]) extends Rexp |
|
523 |
case class SEQs(rs: List[Rexp]) extends Rexp |
|
524 |
||
525 |
def normalise(r: Rexp) : Rexp = r match { |
|
526 |
case ALT(r1, r2) => (normalise(r1), normalise(r2)) match { |
|
527 |
case (ALTs(rs1), ALTs(rs2)) => ALTs(rs1 | rs2) |
|
528 |
case (r1s, ALTs(rs2)) => ALTs(rs2 + r1s) |
|
529 |
case (ALTs(rs1), r2s) => ALTs(rs1 + r2s) |
|
530 |
case (r1s, r2s) => ALTs(Set(r1s, r2s)) |
|
531 |
} |
|
532 |
case AND(r1, r2) => (normalise(r1), normalise(r2)) match { |
|
533 |
case (ANDs(rs1), ANDs(rs2)) => ANDs(rs1 | rs2) |
|
534 |
case (r1s, ANDs(rs2)) => ANDs(rs2 + r1s) |
|
535 |
case (ANDs(rs1), r2s) => ANDs(rs1 + r2s) |
|
536 |
case (r1s, r2s) => ANDs(Set(r1s, r2s)) |
|
537 |
} |
|
538 |
case SEQ(r1, r2) => (normalise(r1), normalise(r2)) match { |
|
539 |
case (SEQs(rs1), SEQs(rs2)) => SEQs(rs1 ++ rs2) |
|
540 |
case (r1s, SEQs(rs2)) => SEQs(r1s :: rs2) |
|
541 |
case (SEQs(rs1), r2s) => SEQs(rs1 ++ List(r2s)) |
|
542 |
case (r1s, r2s) => SEQs(List(r1s, r2s)) |
|
543 |
} |
|
544 |
case STAR(r) => STAR(normalise(r)) |
|
545 |
case NTIMES(r, n) => NTIMES(normalise(r), n) |
|
546 |
case UPNTIMES(r, n) => UPNTIMES(normalise(r), n) |
|
547 |
case NOT(r) => NOT(normalise(r)) |
|
548 |
case r => r |
|
549 |
} |
|
550 |
||
551 |
||
552 |
// simplification of regular expressions |
|
553 |
||
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
554 |
def simp(r: Rexp) : Rexp = r match { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
555 |
case ALT(r1, r2) => (simp(r1), simp(r2)) match { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
556 |
case (ZERO, r2s) => r2s |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
557 |
case (r1s, ZERO) => r1s |
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
558 |
case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
559 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
560 |
case SEQ(r1, r2) => (simp(r1), simp(r2)) match { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
561 |
case (ZERO, _) => ZERO |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
562 |
case (_, ZERO) => ZERO |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
563 |
case (ONE, r2s) => r2s |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
564 |
case (r1s, ONE) => r1s |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
565 |
case (r1s, r2s) => SEQ(r1s, r2s) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
566 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
567 |
case NTIMES(r, n) => if (n == 0) ONE else NTIMES(simp(r), n) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
568 |
case UPNTIMES(r, n) => if (n == 0) ONE else UPNTIMES(simp(r), n) |
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
569 |
case NOT(r) => NOT(simp(r)) |
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
570 |
case AND(r1, r2) => (simp(r1), simp(r2)) match { |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
571 |
case (ALL, r2s) => r2s |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
572 |
case (r1s, ALL) => r1s |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
573 |
case (r1s, r2s) => if (r1s == r2s) r1s else AND(r1s, r2s) |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
574 |
} |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
575 |
case r => r |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
576 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
577 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
578 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
579 |
// nullable function: tests whether the regular |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
580 |
// expression can recognise the empty string |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
581 |
def nullable(r: Rexp) : Boolean = r match { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
582 |
case ZERO => false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
583 |
case ONE => true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
584 |
case CHAR(_) => false |
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
585 |
case ALL => true |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
586 |
case ALLPlus => false |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
587 |
case ALT(r1, r2) => nullable(r1) || nullable(r2) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
588 |
case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
589 |
case STAR(_) => true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
590 |
case NTIMES(r, i) => if (i == 0) true else nullable(r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
591 |
case UPNTIMES(r: Rexp, n: Int) => true |
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
592 |
case NOT(r) => !nullable(r) |
241 | 593 |
case AND(r1, r2) => nullable(r1) && nullable(r2) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
594 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
595 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
596 |
// derivative of a regular expression w.r.t. a character |
243 | 597 |
// they are not finite even for simple regular expressions |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
598 |
def der(c: Char, r: Rexp) : Rexp = r match { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
599 |
case ZERO => ZERO |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
600 |
case ONE => ZERO |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
601 |
case CHAR(d) => if (c == d) ONE else ZERO |
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
602 |
case ALL => ALL |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
603 |
case ALLPlus => ALL |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
604 |
case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
605 |
case SEQ(r1, r2) => |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
606 |
if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
607 |
else SEQ(der(c, r1), r2) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
608 |
case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
609 |
case NTIMES(r1, i) => |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
610 |
if (i == 0) ZERO else |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
611 |
if (nullable(r1)) SEQ(der(c, r1), UPNTIMES(r1, i - 1)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
612 |
else SEQ(der(c, r1), NTIMES(r1, i - 1)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
613 |
case UPNTIMES(r1, i) => |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
614 |
if (i == 0) ZERO |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
615 |
else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) |
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
616 |
case NOT(r) => NOT(der(c, r)) |
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
617 |
case AND(r1, r2) => AND(der(c, r1), der(c, r2)) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
618 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
619 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
620 |
|
243 | 621 |
// derivative based matcher |
622 |
def matcher(r: Rexp, s: List[Char]): Boolean = s match { |
|
623 |
case Nil => nullable(r) |
|
624 |
case c::cs => matcher(der(c, r), cs) |
|
625 |
} |
|
626 |
||
627 |
||
628 |
||
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
629 |
// partial derivative of a regular expression w.r.t. a character |
241 | 630 |
// does not work for NOT and AND ... see below |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
631 |
def pder(c: Char, r: Rexp) : Set[Rexp] = r match { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
632 |
case ZERO => Set() |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
633 |
case ONE => Set() |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
634 |
case CHAR(d) => if (c == d) Set(ONE) else Set() |
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
635 |
case ALL => Set(ALL) |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
636 |
case ALLPlus => Set(ALL) |
243 | 637 |
case ALT(r1, r2) => pder(c, r1) | pder(c, r2) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
638 |
case SEQ(r1, r2) => |
243 | 639 |
(for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) | |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
640 |
(if (nullable(r1)) pder(c, r2) else Set()) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
641 |
case STAR(r1) => |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
642 |
for (pr1 <- pder(c, r1)) yield SEQ(pr1, STAR(r1)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
643 |
case NTIMES(r1, i) => |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
644 |
if (i == 0) Set() else |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
645 |
if (nullable(r1)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
646 |
for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
647 |
else |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
648 |
for (pr1 <- pder(c, r1)) yield SEQ(pr1, NTIMES(r1, i - 1)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
649 |
case UPNTIMES(r1, i) => |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
650 |
if (i == 0) Set() |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
651 |
else |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
652 |
for (pr1 <- pder(c, r1)) yield SEQ(pr1, UPNTIMES(r1, i - 1)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
653 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
654 |
|
243 | 655 |
// partial derivative matcher (not for NOT and AND) |
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
656 |
def pmatcher(rs: Set[Rexp], s: List[Char]): Boolean = s match { |
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
657 |
case Nil => rs.exists(nullable(_)) |
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
658 |
case c::cs => pmatcher(rs.flatMap(pder(c, _)), cs) |
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
659 |
} |
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
660 |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
661 |
|
243 | 662 |
// quick-and-dirty translation of a pder-automaton |
663 |
// does not work for NOT and AND |
|
664 |
||
665 |
val r = STAR(ALL) ~ "a" ~ NTIMES(ALL, 3) ~ "bc" |
|
666 |
val pder_nfa = NFA[Set[Rexp], Char](Set(Set(r)), |
|
667 |
Set( { case (rs, c) => rs.flatMap(pder(c, _))} ), |
|
668 |
_.exists(nullable)) |
|
669 |
||
670 |
||
671 |
||
672 |
pder_nfa.accepts("axaybzbc".toList) // true |
|
673 |
pder_nfa.accepts("aaaaxaybzbc".toList) // true |
|
674 |
pder_nfa.accepts("axaybzbd".toList) // false |
|
675 |
||
676 |
||
677 |
||
678 |
||
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
679 |
// partial derivatives including NOT and AND according to |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
680 |
// PhD-thesis: Manipulation of Extended Regular Expressions |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
681 |
// with Derivatives; these partial derivatives are also not |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
682 |
// finite...for example NOT((a*)a) |
241 | 683 |
|
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
684 |
def seq_compose(rs: Set[Rexp], r2: Rexp) : Set[Rexp] = |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
685 |
for (r <- rs) yield (SEQ(r, r2) : Rexp) |
241 | 686 |
|
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
687 |
def not_compose(rs: Set[Rexp]) : Set[Rexp] = |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
688 |
Set(rs.fold(ALL)((r1, r2) => AND(r1, NOT(r2)))) |
241 | 689 |
|
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
690 |
def and_compose(rs1: Set[Rexp], rs2: Set[Rexp]) : Set[Rexp] = |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
691 |
for (r1 <- rs1; r2 <- rs2) yield AND(r1, r2) |
241 | 692 |
|
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
693 |
def pder2(c: Char, r: Rexp) : Set[Rexp] = r match { |
241 | 694 |
case ZERO => Set() |
695 |
case ONE => Set() |
|
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
696 |
case ALL => Set(ALL) |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
697 |
case ALLPlus => Set(ALL) |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
698 |
case CHAR(d) => if (c == d) Set(ONE) else Set() |
243 | 699 |
case ALT(r1, r2) => pder2(c, r1) | pder2(c, r2) |
241 | 700 |
case SEQ(r1, r2) => { |
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
701 |
val prs1 = seq_compose(pder2(c, r1), r2) |
243 | 702 |
if (nullable(r1)) (prs1 | pder2(c, r2)) else prs1 |
241 | 703 |
} |
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
704 |
case STAR(r1) => seq_compose(pder2(c, r1), STAR(r1)) |
241 | 705 |
case AND(r1, r2) => and_compose(pder2(c, r1), pder2(c, r2)) |
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
706 |
case NOT(r1) => nder2(c, r1) |
241 | 707 |
} |
708 |
||
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
709 |
def nder2(c: Char, r: Rexp) : Set[Rexp] = r match { |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
710 |
case ZERO => Set(ALL) |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
711 |
case ONE => Set(ALL) |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
712 |
case ALL => Set() |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
713 |
case ALLPlus => Set() |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
714 |
case CHAR(d) => if (c == d) Set(ALLPlus) else Set(ALL) |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
715 |
case ALT(r1, r2) => and_compose(nder2(c, r1), nder2(c, r2)) |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
716 |
case SEQ(r1, r2) => if (nullable(r1)) |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
717 |
and_compose(not_compose(seq_compose(pder2(c, r1), r2)), nder2(c, r2)) |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
718 |
else not_compose(seq_compose(pder2(c, r1), r2)) |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
719 |
case STAR(r1) => not_compose(pder2(c, STAR(r1))) |
243 | 720 |
case AND(r1, r2) => nder2(c, r1) | nder2(c, r2) |
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
721 |
case NOT(r1) => pder2(c, r1) |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
722 |
} |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
723 |
|
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
724 |
|
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
725 |
def pmatcher2(rs: Set[Rexp], s: List[Char]): Boolean = s match { |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
726 |
case Nil => rs.exists(nullable(_)) |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
727 |
case c::cs => pmatcher2(rs.flatMap(pder2(c, _)), cs) |
241 | 728 |
} |
729 |
||
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
730 |
// pder2/nder2 example |
241 | 731 |
|
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
732 |
val r_ex = AND("aa" | "a", AND(STAR("a"), NOT(STAR("a") ~ "a"))) |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
733 |
nder2('a', r_ex).map(simp(_)).mkString("\n") |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
734 |
|
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
735 |
|
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
736 |
|
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
737 |
|
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
738 |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
739 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
740 |
// Derivative and Partial Derivative Automaton construction |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
741 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
742 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
743 |
type DState = Rexp // state type of the derivative automaton |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
744 |
type DStates = Set[Rexp] |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
745 |
type Trans = (DState, Char) :=> DState // transition functions of the der/pder auto |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
746 |
type MTrans = Map[(DState, Char), DState] // transition Maps |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
747 |
type STrans = Set[MTrans] // set of transition Maps |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
748 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
749 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
750 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
751 |
// Brzozoswki Derivative automaton construction ... simple |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
752 |
// version, might not terminate |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
753 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
754 |
def goto(sigma: Set[Char], delta: MTrans, qs: DStates, q: DState, c: Char) : (DStates, MTrans) = { |
243 | 755 |
val qder = simp(der(c, q)) |
756 |
qs.find(normalise(_) == normalise(qder)) match { |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
757 |
case Some(qexists) => (qs, delta + ((q, c) -> qexists)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
758 |
case None => explore(sigma, delta + ((q, c) -> qder), qs + qder, qder) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
759 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
760 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
761 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
762 |
def explore(sigma: Set[Char], delta: MTrans, qs: DStates, q: DState) : (DStates, MTrans) = |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
763 |
sigma.foldLeft((qs, delta)) { case ((qs, delta), c) => goto(sigma, delta, qs, q, c) } |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
764 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
765 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
766 |
def mkDFA(r: Rexp) = { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
767 |
val sigma = get_chars(r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
768 |
val (qs, delta) = explore(sigma, Map(), Set[Rexp](r), r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
769 |
val fins = qs.filter(nullable(_)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
770 |
val deltaf : (Rexp, Char) :=> Rexp = { case (q, c) => delta(q, c) } |
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
771 |
println(s"DFA size: ${qs.size}") |
243 | 772 |
//println(s"""DFA states\n${qs.mkString("\n")}""") |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
773 |
DFA(r, deltaf, fins) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
774 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
775 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
776 |
val re = "ab" | "ac" |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
777 |
val d1 = mkDFA(re) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
778 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
779 |
d1.accepts("ab".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
780 |
d1.accepts("ac".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
781 |
d1.accepts("aa".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
782 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
783 |
val d2 = mkDFA(r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
784 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
785 |
d2.accepts("axaybzbc".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
786 |
d2.accepts("aaaaxaybzbc".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
787 |
d2.accepts("axaybzbd".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
788 |
|
243 | 789 |
|
790 |
//for (n <- (1 to 10).toList) |
|
791 |
// mkDFA(STAR(ALL) ~ "a" ~ NTIMES(ALL, n) ~ "bc") |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
792 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
793 |
|
243 | 794 |
// this is an example where mkDFA without normalisation |
795 |
// does not terminate |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
796 |
val big_aux = STAR("a" | "b") |
243 | 797 |
val big = SEQ(big_aux, SEQ("a", SEQ("b", big_aux))) |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
798 |
|
243 | 799 |
mkDFA(big) // does not terminate without normalisation |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
800 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
801 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
802 |
|
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
803 |
// Antimirov Partial derivative automata construction ... definitely |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
804 |
// terminates but does not work for extensions of NOT and AND |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
805 |
// |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
806 |
// for this we have to use the extended partial derivatives |
243 | 807 |
// pder2/nder2...but termination is also not guaranteed |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
808 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
809 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
810 |
// to transform (concrete) Maps into functions |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
811 |
def toFun(m: MTrans) : Trans = |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
812 |
{ case (q, c) => m(q, c) } |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
813 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
814 |
def pgoto(sigma: Set[Char], delta: STrans, qs: DStates, q: DState, c: Char) : (DStates, STrans) = { |
243 | 815 |
val qders = pder(c, q).map(simp(_)) // set of simplified partial derivatives |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
816 |
qders.foldLeft((qs, delta)) { case ((qs, delta), qnew) => padd(sigma, delta, qs, q, qnew, c) } |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
817 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
818 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
819 |
def padd(sigma: Set[Char], delta: STrans, qs: DStates, |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
820 |
q: DState, qnew: DState, c: Char) : (DStates, STrans) = { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
821 |
qs.find(_ == qnew) match { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
822 |
case Some(qexists) => (qs, delta + Map((q, c) -> qexists)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
823 |
case None => pexplore(sigma, delta + Map((q, c) -> qnew), qs + qnew, qnew) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
824 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
825 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
826 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
827 |
def pexplore(sigma: Set[Char], delta: STrans, qs: DStates, q: DState) : (DStates, STrans) = |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
828 |
sigma.foldLeft((qs, delta)) { case ((qs, delta), c) => pgoto(sigma, delta, qs, q, c) } |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
829 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
830 |
def mkNFA(r: Rexp) : NFA[Rexp, Char]= { |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
831 |
val sigma = get_chars(r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
832 |
val (qs, delta) = pexplore(sigma, Set(), Set(r), r) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
833 |
val fins = qs.filter(nullable(_)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
834 |
val deltaf = delta.map(toFun(_)) |
243 | 835 |
//println(s"NFA size: ${qs.size}") |
836 |
//println(s"""NFA states\n${qs.mkString("\n")}""") |
|
837 |
//println(s"""NFA transitions\n${delta.mkString("\n")} """) |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
838 |
NFA(Set(r), deltaf, fins) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
839 |
} |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
840 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
841 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
842 |
// simple example from Scott's paper |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
843 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
844 |
val n1 = mkNFA(re) // size = 4 |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
845 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
846 |
n1.accepts("ab".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
847 |
n1.accepts("ac".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
848 |
n1.accepts("aa".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
849 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
850 |
// example from: Partial Derivative and Position Bisimilarity |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
851 |
// Automata, Eva Maia, Nelma Moreira, Rogerio Reis |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
852 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
853 |
val r_test = STAR(("a" ~ STAR("b")) | "b") ~ "a" |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
854 |
val t1 = pder('a', r_test).map(simp(_)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
855 |
val t2 = pder('b', r_test).map(simp(_)) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
856 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
857 |
mkNFA(r_test) // size = 3 |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
858 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
859 |
|
243 | 860 |
// helper function for recording time |
861 |
def time_needed[T](i: Int, code: => T) = { |
|
862 |
val start = System.nanoTime() |
|
863 |
for (j <- 1 to i) code |
|
864 |
val end = System.nanoTime() |
|
865 |
(end - start)/(i * 1.0e9) |
|
866 |
} |
|
867 |
||
868 |
// simple example involving double star (a*)* b |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
869 |
// with depth-first search causes catastrophic backtracing |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
870 |
|
243 | 871 |
val n2 = mkNFA(EVIL2) // size 3 |
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
872 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
873 |
n2.accepts("aaaaaab".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
874 |
n2.accepts("aaaaaa".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
875 |
n2.accepts(("a" * 100).toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
876 |
|
243 | 877 |
n2.accepts2("aaaaaab".toList) // true |
878 |
n2.accepts2("aaaaaa".toList) // false |
|
879 |
n2.accepts2(("a" * 100).toList) // false |
|
880 |
||
881 |
time_needed(2, n2.accepts("aaaaaab".toList)) |
|
882 |
time_needed(2, n2.accepts("aaaaaa".toList)) |
|
883 |
time_needed(2, n2.accepts(("a" * 2000).toList)) |
|
884 |
||
885 |
time_needed(2, n2.accepts2("aaaaaab".toList)) |
|
886 |
time_needed(2, n2.accepts2("aaaaaa".toList)) |
|
887 |
time_needed(2, n2.accepts2(("a" * 2000).toList)) |
|
888 |
||
889 |
||
890 |
// other evil regular expression |
|
891 |
||
892 |
for (i <- 0 to 100 by 5) { |
|
893 |
println(i + ": " + "%.5f".format(time_needed(1, mkNFA(EVIL(i)).accepts(("a" * i).toList)))) |
|
894 |
} |
|
895 |
||
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
896 |
|
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
897 |
// example involving not |
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
898 |
|
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
899 |
val rnot = "/*" ~ (NOT ((ALL.%) ~ "*/" ~ (ALL.%))) ~ "*/" |
243 | 900 |
|
901 |
||
902 |
val dfa_not = mkDFA(rnot) // size 10 |
|
903 |
||
904 |
dfa_not.accepts("/**/".toList) // true |
|
905 |
dfa_not.accepts("/*aaabaa*/".toList) // true |
|
906 |
dfa_not.accepts("/*/**/".toList) // true |
|
907 |
dfa_not.accepts("/*aaa*/aa*/".toList) // false |
|
908 |
dfa_not.accepts("/aaa*/aa*/".toList) // false |
|
909 |
||
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
910 |
|
243 | 911 |
/* nfa construction according to pder does not work for NOT and AND; |
912 |
* nfa construction according to pder2/nder2 does not necesarily terminate |
|
913 |
||
914 |
val nfa_not = mkNFA(rnot) // does not termninate |
|
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
915 |
|
243 | 916 |
nfa_not.accepts("/**/".toList) // true |
917 |
nfa_not.accepts("/*aaabaa*/".toList) // true |
|
918 |
nfa_not.accepts("/*/**/".toList) // true |
|
919 |
nfa_not.accepts("/*aaa*/aa*/".toList) // false ???? |
|
920 |
nfa_not.accepts("/aaa*/aa*/".toList) // false |
|
921 |
*/ |
|
922 |
||
923 |
// derivative matcher |
|
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
924 |
matcher(rnot, "/**/".toList) // true |
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
925 |
matcher(rnot, "/*aaabaa*/".toList) // true |
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
926 |
matcher(rnot, "/*/**/".toList) // true |
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
927 |
matcher(rnot, "/*aaa*/aa*/".toList) // false |
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
928 |
matcher(rnot, "/aaa*/aa*/".toList) // false |
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
929 |
|
243 | 930 |
// pder2/nder2 partial derivative matcher |
242
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
931 |
pmatcher2(Set(rnot), "/**/".toList) // true |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
932 |
pmatcher2(Set(rnot), "/*aaabaa*/".toList) // true |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
933 |
pmatcher2(Set(rnot), "/*/**/".toList) // true |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
934 |
pmatcher2(Set(rnot), "/*aaa*/aa*/".toList) // false |
dcfc9b23b263
added more literature about extended partial derivative automata
Christian Urban <urbanc@in.tum.de>
parents:
241
diff
changeset
|
935 |
pmatcher2(Set(rnot), "/aaa*/aa*/".toList) // false |
241 | 936 |
|
240
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
937 |
// example from automata paper with counters and backreferences |
820228ac1d0f
updated for extended partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
239
diff
changeset
|
938 |
|
239
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
939 |
val r1 = STAR(ALL) ~ "a" ~ NTIMES(ALL, 1) ~ "bc" |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
940 |
mkNFA(r1) // size = 5 |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
941 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
942 |
val n3 = mkNFA(r) // size = 7 |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
943 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
944 |
n3.accepts("axaybzbc".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
945 |
n3.accepts("aaaaxaybzbc".toList) // true |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
946 |
n3.accepts("axaybzbd".toList) // false |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
947 |
|
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
948 |
for (n <- (1 to 100).toList) |
e59cec211f4f
added automata implementation
Christian Urban <urbanc@in.tum.de>
parents:
diff
changeset
|
949 |
mkNFA(STAR(ALL) ~ "a" ~ NTIMES(ALL, n) ~ "bc") |