41 fun op -- (r1, r2) = SEQ(r1, r2) |
41 fun op -- (r1, r2) = SEQ(r1, r2) |
42 |
42 |
43 fun op $ (x, r) = RECD(x, r) |
43 fun op $ (x, r) = RECD(x, r) |
44 |
44 |
45 fun alts rs = case rs of |
45 fun alts rs = case rs of |
46 [] => NULL |
46 [] => ZERO |
47 | [r] => r |
47 | [r] => r |
48 | r::rs => List.foldl (op ++) r rs |
48 | r::rs => List.foldl (op ++) r rs |
49 |
49 |
50 |
50 |
51 (* size of a regular expressions - for testing purposes *) |
51 (* size of a regular expressions - for testing purposes *) |
52 fun size r = case r of |
52 fun size r = case r of |
53 NULL => 1 |
53 ZERO => 1 |
54 | EMPTY => 1 |
54 | ONE => 1 |
55 | CHAR(_) => 1 |
55 | CHAR(_) => 1 |
56 | ALT(r1, r2) => 1 + (size r1) + (size r2) |
56 | ALT(r1, r2) => 1 + (size r1) + (size r2) |
57 | SEQ(r1, r2) => 1 + (size r1) + (size r2) |
57 | SEQ(r1, r2) => 1 + (size r1) + (size r2) |
58 | STAR(r) => 1 + (size r) |
58 | STAR(r) => 1 + (size r) |
59 | RECD(_, r) => 1 + (size r) |
59 | RECD(_, r) => 1 + (size r) |
60 |
60 |
61 (* nullable function: tests whether the regular |
61 (* nullable function: tests whether the regular |
62 expression can recognise the empty string *) |
62 expression can recognise the empty string *) |
63 fun nullable r = case r of |
63 fun nullable r = case r of |
64 NULL => false |
64 ZERO => false |
65 | EMPTY => true |
65 | ONE => true |
66 | CHAR(_) => false |
66 | CHAR(_) => false |
67 | ALT(r1, r2) => nullable(r1) orelse nullable(r2) |
67 | ALT(r1, r2) => nullable(r1) orelse nullable(r2) |
68 | SEQ(r1, r2) => nullable(r1) andalso nullable(r2) |
68 | SEQ(r1, r2) => nullable(r1) andalso nullable(r2) |
69 | STAR(_) => true |
69 | STAR(_) => true |
70 | RECD(_, r) => nullable(r) |
70 | RECD(_, r) => nullable(r) |
71 |
71 |
72 (* derivative of a regular expression r w.r.t. a character c *) |
72 (* derivative of a regular expression r w.r.t. a character c *) |
73 fun der c r = case r of |
73 fun der c r = case r of |
74 NULL => NULL |
74 ZERO => ZERO |
75 | EMPTY => NULL |
75 | ONE => ZERO |
76 | CHAR(d) => if c = d then EMPTY else NULL |
76 | CHAR(d) => if c = d then ONE else ZERO |
77 | ALT(r1, r2) => ALT(der c r1, der c r2) |
77 | ALT(r1, r2) => ALT(der c r1, der c r2) |
78 | SEQ(r1, r2) => |
78 | SEQ(r1, r2) => |
79 if nullable r1 then ALT(SEQ(der c r1, r2), der c r2) |
79 if nullable r1 then ALT(SEQ(der c r1, r2), der c r2) |
80 else SEQ(der c r1, r2) |
80 else SEQ(der c r1, r2) |
81 | STAR(r) => SEQ(der c r, STAR(r)) |
81 | STAR(r) => SEQ(der c r, STAR(r)) |
86 [] => r |
86 [] => r |
87 | c::s => ders s (der c r) |
87 | c::s => ders s (der c r) |
88 |
88 |
89 (* extracts a string from value *) |
89 (* extracts a string from value *) |
90 fun flatten v = case v of |
90 fun flatten v = case v of |
91 Void => "" |
91 Empty => "" |
92 | Chr(c) => Char.toString c |
92 | Chr(c) => Char.toString c |
93 | Left(v) => flatten v |
93 | Left(v) => flatten v |
94 | Right(v) => flatten v |
94 | Right(v) => flatten v |
95 | Sequ(v1, v2) => flatten v1 ^ flatten v2 |
95 | Sequ(v1, v2) => flatten v1 ^ flatten v2 |
96 | Stars(vs) => String.concat (List.map flatten vs) |
96 | Stars(vs) => String.concat (List.map flatten vs) |
97 | Rec(_, v) => flatten v |
97 | Rec(_, v) => flatten v |
98 |
98 |
99 |
99 |
100 (* extracts an environment from a value *) |
100 (* extracts an environment from a value *) |
101 fun env v = case v of |
101 fun env v = case v of |
102 Void => [] |
102 Empty => [] |
103 | Chr(c) => [] |
103 | Chr(c) => [] |
104 | Left(v) => env v |
104 | Left(v) => env v |
105 | Right(v) => env v |
105 | Right(v) => env v |
106 | Sequ(v1, v2) => env v1 @ env v2 |
106 | Sequ(v1, v2) => env v1 @ env v2 |
107 | Stars(vs) => List.foldr (op @) [] (List.map env vs) |
107 | Stars(vs) => List.foldr (op @) [] (List.map env vs) |
111 fun string_of_env xs = String.concatWith "," (List.map string_of_pair xs) |
111 fun string_of_env xs = String.concatWith "," (List.map string_of_pair xs) |
112 |
112 |
113 |
113 |
114 (* the value for a nullable rexp *) |
114 (* the value for a nullable rexp *) |
115 fun mkeps r = case r of |
115 fun mkeps r = case r of |
116 EMPTY => Void |
116 ONE => Empty |
117 | ALT(r1, r2) => |
117 | ALT(r1, r2) => |
118 if nullable r1 then Left(mkeps r1) else Right(mkeps r2) |
118 if nullable r1 then Left(mkeps r1) else Right(mkeps r2) |
119 | SEQ(r1, r2) => Sequ(mkeps r1, mkeps r2) |
119 | SEQ(r1, r2) => Sequ(mkeps r1, mkeps r2) |
120 | STAR(r) => Stars([]) |
120 | STAR(r) => Stars([]) |
121 | RECD(x, r) => Rec(x, mkeps r) |
121 | RECD(x, r) => Rec(x, mkeps r) |
128 | (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj r1 c v1, v2) |
128 | (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj r1 c v1, v2) |
129 | (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj r1 c v1, v2) |
129 | (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj r1 c v1, v2) |
130 | (SEQ(r1, r2), Right(v2)) => Sequ(mkeps r1, inj r2 c v2) |
130 | (SEQ(r1, r2), Right(v2)) => Sequ(mkeps r1, inj r2 c v2) |
131 | (ALT(r1, r2), Left(v1)) => Left(inj r1 c v1) |
131 | (ALT(r1, r2), Left(v1)) => Left(inj r1 c v1) |
132 | (ALT(r1, r2), Right(v2)) => Right(inj r2 c v2) |
132 | (ALT(r1, r2), Right(v2)) => Right(inj r2 c v2) |
133 | (CHAR(d), Void) => Chr(d) |
133 | (CHAR(d), Empty) => Chr(d) |
134 | (RECD(x, r1), _) => Rec(x, inj r1 c v) |
134 | (RECD(x, r1), _) => Rec(x, inj r1 c v) |
135 | _ => (print ("\nr: " ^ PolyML.makestring r ^ "\n"); |
135 | _ => (print ("\nr: " ^ PolyML.makestring r ^ "\n"); |
136 print ("v: " ^ PolyML.makestring v ^ "\n"); |
136 print ("v: " ^ PolyML.makestring v ^ "\n"); |
137 raise Error) |
137 raise Error) |
138 |
138 |
143 fun f_alt f1 f2 = fn v => case v of |
143 fun f_alt f1 f2 = fn v => case v of |
144 Right(v) => Right(f2 v) |
144 Right(v) => Right(f2 v) |
145 | Left(v) => Left(f1 v) |
145 | Left(v) => Left(f1 v) |
146 fun f_seq f1 f2 = fn v => case v of |
146 fun f_seq f1 f2 = fn v => case v of |
147 Sequ(v1, v2) => Sequ(f1 v1, f2 v2) |
147 Sequ(v1, v2) => Sequ(f1 v1, f2 v2) |
148 fun f_seq_Void1 f1 f2 = fn v => Sequ(f1 Void, f2 v) |
148 fun f_seq_Empty1 f1 f2 = fn v => Sequ(f1 Empty, f2 v) |
149 fun f_seq_Void2 f1 f2 = fn v => Sequ(f1 v, f2 Void) |
149 fun f_seq_Empty2 f1 f2 = fn v => Sequ(f1 v, f2 Empty) |
150 fun f_rec f = fn v => case v of |
150 fun f_rec f = fn v => case v of |
151 Rec(x, v) => Rec(x, f v) |
151 Rec(x, v) => Rec(x, f v) |
152 |
152 |
153 exception ShouldNotHappen |
153 exception ShouldNotHappen |
154 |
154 |
159 fun simp r = case r of |
159 fun simp r = case r of |
160 ALT(r1, r2) => |
160 ALT(r1, r2) => |
161 let val (r1s, f1s) = simp r1 |
161 let val (r1s, f1s) = simp r1 |
162 val (r2s, f2s) = simp r2 in |
162 val (r2s, f2s) = simp r2 in |
163 (case (r1s, r2s) of |
163 (case (r1s, r2s) of |
164 (NULL, _) => (r2s, f_right f2s) |
164 (ZERO, _) => (r2s, f_right f2s) |
165 | (_, NULL) => (r1s, f_left f1s) |
165 | (_, ZERO) => (r1s, f_left f1s) |
166 | (_, _) => if r1s = r2s then (r1s, f_left f1s) |
166 | (_, _) => if r1s = r2s then (r1s, f_left f1s) |
167 else (ALT (r1s, r2s), f_alt f1s f2s)) |
167 else (ALT (r1s, r2s), f_alt f1s f2s)) |
168 end |
168 end |
169 | SEQ(r1, r2) => |
169 | SEQ(r1, r2) => |
170 let val (r1s, f1s) = simp r1 |
170 let val (r1s, f1s) = simp r1 |
171 val (r2s, f2s) = simp r2 in |
171 val (r2s, f2s) = simp r2 in |
172 (case (r1s, r2s) of |
172 (case (r1s, r2s) of |
173 (NULL, _) => (NULL, f_error) |
173 (ZERO, _) => (ZERO, f_error) |
174 | (_, NULL) => (NULL, f_error) |
174 | (_, ZERO) => (ZERO, f_error) |
175 | (EMPTY, _) => (r2s, f_seq_Void1 f1s f2s) |
175 | (ONE, _) => (r2s, f_seq_Empty1 f1s f2s) |
176 | (_, EMPTY) => (r1s, f_seq_Void2 f1s f2s) |
176 | (_, ONE) => (r1s, f_seq_Empty2 f1s f2s) |
177 | (_, _) => (SEQ(r1s, r2s), f_seq f1s f2s)) |
177 | (_, _) => (SEQ(r1s, r2s), f_seq f1s f2s)) |
178 end |
178 end |
179 | RECD(x, r1) => |
179 | RECD(x, r1) => |
180 let val (r1s, f1s) = simp r1 in |
180 let val (r1s, f1s) = simp r1 in |
181 (RECD(x, r1s), f_rec f1s) |
181 (RECD(x, r1s), f_rec f1s) |
182 end |
182 end |
183 | r => (r, f_id) |
183 | r => (r, f_id) |
184 |
184 |
185 fun der_simp c r = case r of |
185 fun der_simp c r = case r of |
186 NULL => (NULL, f_id) |
186 ZERO => (ZERO, f_id) |
187 | EMPTY => (NULL, f_id) |
187 | ONE => (ZERO, f_id) |
188 | CHAR(d) => ((if c = d then EMPTY else NULL), f_id) |
188 | CHAR(d) => ((if c = d then ONE else ZERO), f_id) |
189 | ALT(r1, r2) => |
189 | ALT(r1, r2) => |
190 let |
190 let |
191 val (r1d, f1d) = der_simp c r1 |
191 val (r1d, f1d) = der_simp c r1 |
192 val (r2d, f2d) = der_simp c r2 |
192 val (r2d, f2d) = der_simp c r2 |
193 in |
193 in |
194 case (r1d, r2d) of |
194 case (r1d, r2d) of |
195 (NULL, _) => (r2d, f_right f2d) |
195 (ZERO, _) => (r2d, f_right f2d) |
196 | (_, NULL) => (r1d, f_left f1d) |
196 | (_, ZERO) => (r1d, f_left f1d) |
197 | (_, _) => if r1d = r2d then (r1d, f_left f1d) |
197 | (_, _) => if r1d = r2d then (r1d, f_left f1d) |
198 else (ALT (r1d, r2d), f_alt f1d f2d) |
198 else (ALT (r1d, r2d), f_alt f1d f2d) |
199 end |
199 end |
200 | SEQ(r1, r2) => |
200 | SEQ(r1, r2) => |
201 if nullable r1 |
201 if nullable r1 |
204 val (r1d, f1d) = der_simp c r1 |
204 val (r1d, f1d) = der_simp c r1 |
205 val (r2d, f2d) = der_simp c r2 |
205 val (r2d, f2d) = der_simp c r2 |
206 val (r2s, f2s) = simp r2 |
206 val (r2s, f2s) = simp r2 |
207 in |
207 in |
208 case (r1d, r2s, r2d) of |
208 case (r1d, r2s, r2d) of |
209 (NULL, _, _) => (r2d, f_right f2d) |
209 (ZERO, _, _) => (r2d, f_right f2d) |
210 | (_, NULL, _) => (r2d, f_right f2d) |
210 | (_, ZERO, _) => (r2d, f_right f2d) |
211 | (_, _, NULL) => (SEQ(r1d, r2s), f_left (f_seq f1d f2s)) |
211 | (_, _, ZERO) => (SEQ(r1d, r2s), f_left (f_seq f1d f2s)) |
212 | (EMPTY, _, _) => (ALT(r2s, r2d), f_alt (f_seq_Void1 f1d f2s) f2d) |
212 | (ONE, _, _) => (ALT(r2s, r2d), f_alt (f_seq_Empty1 f1d f2s) f2d) |
213 | (_, EMPTY, _) => (ALT(r1d, r2d), f_alt (f_seq_Void2 f1d f2s) f2d) |
213 | (_, ONE, _) => (ALT(r1d, r2d), f_alt (f_seq_Empty2 f1d f2s) f2d) |
214 | (_, _, _) => (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d) |
214 | (_, _, _) => (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d) |
215 end |
215 end |
216 else |
216 else |
217 let |
217 let |
218 val (r1d, f1d) = der_simp c r1 |
218 val (r1d, f1d) = der_simp c r1 |
219 val (r2s, f2s) = simp r2 |
219 val (r2s, f2s) = simp r2 |
220 in |
220 in |
221 case (r1d, r2s) of |
221 case (r1d, r2s) of |
222 (NULL, _) => (NULL, f_error) |
222 (ZERO, _) => (ZERO, f_error) |
223 | (_, NULL) => (NULL, f_error) |
223 | (_, ZERO) => (ZERO, f_error) |
224 | (EMPTY, _) => (r2s, f_seq_Void1 f1d f2s) |
224 | (ONE, _) => (r2s, f_seq_Empty1 f1d f2s) |
225 | (_, EMPTY) => (r1d, f_seq_Void2 f1d f2s) |
225 | (_, ONE) => (r1d, f_seq_Empty2 f1d f2s) |
226 | (_, _) => (SEQ(r1d, r2s), f_seq f1d f2s) |
226 | (_, _) => (SEQ(r1d, r2s), f_seq f1d f2s) |
227 end |
227 end |
228 | STAR(r1) => |
228 | STAR(r1) => |
229 let |
229 let |
230 val (r1d, f1d) = der_simp c r1 |
230 val (r1d, f1d) = der_simp c r1 |
231 in |
231 in |
232 case r1d of |
232 case r1d of |
233 NULL => (NULL, f_error) |
233 ZERO => (ZERO, f_error) |
234 | EMPTY => (STAR r1, f_seq_Void1 f1d f_id) |
234 | ONE => (STAR r1, f_seq_Empty1 f1d f_id) |
235 | _ => (SEQ(r1d, STAR(r1)), f_seq f1d f_id) |
235 | _ => (SEQ(r1d, STAR(r1)), f_seq f1d f_id) |
236 end |
236 end |
237 | RECD(x, r1) => der_simp c r1 |
237 | RECD(x, r1) => der_simp c r1 |
238 |
238 |
239 |
239 |