53 let (--) r1 r2 = SEQ(r1, r2);; |
53 let (--) r1 r2 = SEQ(r1, r2);; |
54 |
54 |
55 let ($) x r = RECD(x, r);; |
55 let ($) x r = RECD(x, r);; |
56 |
56 |
57 let alts rs = match rs with |
57 let alts rs = match rs with |
58 [] -> NULL |
58 [] -> ZERO |
59 | [r] -> r |
59 | [r] -> r |
60 | r::rs -> List.fold_left (++) r rs;; |
60 | r::rs -> List.fold_left (++) r rs;; |
61 |
61 |
62 |
62 |
63 (* size of a regular expressions - for testing purposes *) |
63 (* size of a regular expressions - for testing purposes *) |
64 let rec size r = match r with |
64 let rec size r = match r with |
65 NULL -> 1 |
65 ZERO -> 1 |
66 | EMPTY -> 1 |
66 | ONE -> 1 |
67 | CHAR(_) -> 1 |
67 | CHAR(_) -> 1 |
68 | ALT(r1, r2) -> 1 + (size r1) + (size r2) |
68 | ALT(r1, r2) -> 1 + (size r1) + (size r2) |
69 | SEQ(r1, r2) -> 1 + (size r1) + (size r2) |
69 | SEQ(r1, r2) -> 1 + (size r1) + (size r2) |
70 | STAR(r) -> 1 + (size r) |
70 | STAR(r) -> 1 + (size r) |
71 | RECD(_, r) -> 1 + (size r);; |
71 | RECD(_, r) -> 1 + (size r);; |
72 |
72 |
73 (* nullable function: tests whether the regular |
73 (* nullable function: tests whether the regular |
74 expression can recognise the empty string *) |
74 expression can recognise the empty string *) |
75 let rec nullable r = match r with |
75 let rec nullable r = match r with |
76 NULL -> false |
76 ZERO -> false |
77 | EMPTY -> true |
77 | ONE -> true |
78 | CHAR(_) -> false |
78 | CHAR(_) -> false |
79 | ALT(r1, r2) -> nullable(r1) || nullable(r2) |
79 | ALT(r1, r2) -> nullable(r1) || nullable(r2) |
80 | SEQ(r1, r2) -> nullable(r1) && nullable(r2) |
80 | SEQ(r1, r2) -> nullable(r1) && nullable(r2) |
81 | STAR(_) -> true |
81 | STAR(_) -> true |
82 | RECD(_, r) -> nullable(r);; |
82 | RECD(_, r) -> nullable(r);; |
83 |
83 |
84 (* derivative of a regular expression r w.r.t. a character c *) |
84 (* derivative of a regular expression r w.r.t. a character c *) |
85 let rec der c r = match r with |
85 let rec der c r = match r with |
86 NULL -> NULL |
86 ZERO -> ZERO |
87 | EMPTY -> NULL |
87 | ONE -> ZERO |
88 | CHAR(d) -> if c = d then EMPTY else NULL |
88 | CHAR(d) -> if c = d then ONE else ZERO |
89 | ALT(r1, r2) -> ALT(der c r1, der c r2) |
89 | ALT(r1, r2) -> ALT(der c r1, der c r2) |
90 | SEQ(r1, r2) -> |
90 | SEQ(r1, r2) -> |
91 if nullable r1 then ALT(SEQ(der c r1, r2), der c r2) |
91 if nullable r1 then ALT(SEQ(der c r1, r2), der c r2) |
92 else SEQ(der c r1, r2) |
92 else SEQ(der c r1, r2) |
93 | STAR(r) -> SEQ(der c r, STAR(r)) |
93 | STAR(r) -> SEQ(der c r, STAR(r)) |
139 | SEQ(r1, r2), Sequ(v1, v2) -> Sequ(inj r1 c v1, v2) |
139 | SEQ(r1, r2), Sequ(v1, v2) -> Sequ(inj r1 c v1, v2) |
140 | SEQ(r1, r2), Left(Sequ(v1, v2)) -> Sequ(inj r1 c v1, v2) |
140 | SEQ(r1, r2), Left(Sequ(v1, v2)) -> Sequ(inj r1 c v1, v2) |
141 | SEQ(r1, r2), Right(v2) -> Sequ(mkeps r1, inj r2 c v2) |
141 | SEQ(r1, r2), Right(v2) -> Sequ(mkeps r1, inj r2 c v2) |
142 | ALT(r1, r2), Left(v1) -> Left(inj r1 c v1) |
142 | ALT(r1, r2), Left(v1) -> Left(inj r1 c v1) |
143 | ALT(r1, r2), Right(v2) -> Right(inj r2 c v2) |
143 | ALT(r1, r2), Right(v2) -> Right(inj r2 c v2) |
144 | CHAR(d), Void -> Chr(d) |
144 | CHAR(d), Empty -> Chr(d) |
145 | RECD(x, r1), _ -> Rec(x, inj r1 c v);; |
145 | RECD(x, r1), _ -> Rec(x, inj r1 c v);; |
146 |
146 |
147 (* some "rectification" functions for simplification *) |
147 (* some "rectification" functions for simplification *) |
148 let f_id v = v;; |
148 let f_id v = v;; |
149 let f_right f = fun v -> Right(f v);; |
149 let f_right f = fun v -> Right(f v);; |
166 let rec simp r = match r with |
166 let rec simp r = match r with |
167 ALT(r1, r2) -> |
167 ALT(r1, r2) -> |
168 let (r1s, f1s) = simp r1 in |
168 let (r1s, f1s) = simp r1 in |
169 let (r2s, f2s) = simp r2 in |
169 let (r2s, f2s) = simp r2 in |
170 (match r1s, r2s with |
170 (match r1s, r2s with |
171 NULL, _ -> (r2s, f_right f2s) |
171 ZERO, _ -> (r2s, f_right f2s) |
172 | _, NULL -> (r1s, f_left f1s) |
172 | _, ZERO -> (r1s, f_left f1s) |
173 | _, _ -> if r1s = r2s then (r1s, f_left f1s) |
173 | _, _ -> if r1s = r2s then (r1s, f_left f1s) |
174 else (ALT (r1s, r2s), f_alt f1s f2s)) |
174 else (ALT (r1s, r2s), f_alt f1s f2s)) |
175 | SEQ(r1, r2) -> |
175 | SEQ(r1, r2) -> |
176 let (r1s, f1s) = simp r1 in |
176 let (r1s, f1s) = simp r1 in |
177 let (r2s, f2s) = simp r2 in |
177 let (r2s, f2s) = simp r2 in |
178 (match r1s, r2s with |
178 (match r1s, r2s with |
179 NULL, _ -> (NULL, f_error) |
179 ZERO, _ -> (ZERO, f_error) |
180 | _, NULL -> (NULL, f_error) |
180 | _, ZERO -> (ZERO, f_error) |
181 | EMPTY, _ -> (r2s, f_seq_Void1 f1s f2s) |
181 | ONE, _ -> (r2s, f_seq_Empty1 f1s f2s) |
182 | _, EMPTY -> (r1s, f_seq_Void2 f1s f2s) |
182 | _, ONE -> (r1s, f_seq_Empty2 f1s f2s) |
183 | _, _ -> (SEQ(r1s, r2s), f_seq f1s f2s)) |
183 | _, _ -> (SEQ(r1s, r2s), f_seq f1s f2s)) |
184 | RECD(x, r1) -> |
184 | RECD(x, r1) -> |
185 let (r1s, f1s) = simp r1 in |
185 let (r1s, f1s) = simp r1 in |
186 (RECD(x, r1s), f_rec f1s) |
186 (RECD(x, r1s), f_rec f1s) |
187 | r -> (r, f_id) |
187 | r -> (r, f_id) |
188 ;; |
188 ;; |
189 |
189 |
190 let rec der_simp c r = match r with |
190 let rec der_simp c r = match r with |
191 NULL -> (NULL, f_id) |
191 ZERO -> (ZERO, f_id) |
192 | EMPTY -> (NULL, f_id) |
192 | ONE -> (ZERO, f_id) |
193 | CHAR(d) -> ((if c = d then EMPTY else NULL), f_id) |
193 | CHAR(d) -> ((if c = d then ONE else ZERO), f_id) |
194 | ALT(r1, r2) -> |
194 | ALT(r1, r2) -> |
195 let (r1d, f1d) = der_simp c r1 in |
195 let (r1d, f1d) = der_simp c r1 in |
196 let (r2d, f2d) = der_simp c r2 in |
196 let (r2d, f2d) = der_simp c r2 in |
197 (match r1d, r2d with |
197 (match r1d, r2d with |
198 NULL, _ -> (r2d, f_right f2d) |
198 ZERO, _ -> (r2d, f_right f2d) |
199 | _, NULL -> (r1d, f_left f1d) |
199 | _, ZERO -> (r1d, f_left f1d) |
200 | _, _ -> if r1d = r2d then (r1d, f_left f1d) |
200 | _, _ -> if r1d = r2d then (r1d, f_left f1d) |
201 else (ALT (r1d, r2d), f_alt f1d f2d)) |
201 else (ALT (r1d, r2d), f_alt f1d f2d)) |
202 | SEQ(r1, r2) -> |
202 | SEQ(r1, r2) -> |
203 if nullable r1 |
203 if nullable r1 |
204 then |
204 then |
205 let (r1d, f1d) = der_simp c r1 in |
205 let (r1d, f1d) = der_simp c r1 in |
206 let (r2d, f2d) = der_simp c r2 in |
206 let (r2d, f2d) = der_simp c r2 in |
207 let (r2s, f2s) = simp r2 in |
207 let (r2s, f2s) = simp r2 in |
208 (match r1d, r2s, r2d with |
208 (match r1d, r2s, r2d with |
209 NULL, _, _ -> (r2d, f_right f2d) |
209 ZERO, _, _ -> (r2d, f_right f2d) |
210 | _, NULL, _ -> (r2d, f_right f2d) |
210 | _, ZERO, _ -> (r2d, f_right f2d) |
211 | _, _, NULL -> (SEQ(r1d, r2s), f_left (f_seq f1d f2s)) |
211 | _, _, ZERO -> (SEQ(r1d, r2s), f_left (f_seq f1d f2s)) |
212 | EMPTY, _, _ -> (ALT(r2s, r2d), f_alt (f_seq_Void1 f1d f2s) f2d) |
212 | ONE, _, _ -> (ALT(r2s, r2d), f_alt (f_seq_Empty1 f1d f2s) f2d) |
213 | _, EMPTY, _ -> (ALT(r1d, r2d), f_alt (f_seq_Void2 f1d f2s) f2d) |
213 | _, ONE, _ -> (ALT(r1d, r2d), f_alt (f_seq_Empty2 f1d f2s) f2d) |
214 | _, _, _ -> (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d)) |
214 | _, _, _ -> (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d)) |
215 else |
215 else |
216 let (r1d, f1d) = der_simp c r1 in |
216 let (r1d, f1d) = der_simp c r1 in |
217 let (r2s, f2s) = simp r2 in |
217 let (r2s, f2s) = simp r2 in |
218 (match r1d, r2s with |
218 (match r1d, r2s with |
219 NULL, _ -> (NULL, f_error) |
219 ZERO, _ -> (ZERO, f_error) |
220 | _, NULL -> (NULL, f_error) |
220 | _, ZERO -> (ZERO, f_error) |
221 | EMPTY, _ -> (r2s, f_seq_Void1 f1d f2s) |
221 | ONE, _ -> (r2s, f_seq_Empty1 f1d f2s) |
222 | _, EMPTY -> (r1d, f_seq_Void2 f1d f2s) |
222 | _, ONE -> (r1d, f_seq_Empty2 f1d f2s) |
223 | _, _ -> (SEQ(r1d, r2s), f_seq f1d f2s)) |
223 | _, _ -> (SEQ(r1d, r2s), f_seq f1d f2s)) |
224 | STAR(r1) -> |
224 | STAR(r1) -> |
225 let (r1d, f1d) = der_simp c r1 in |
225 let (r1d, f1d) = der_simp c r1 in |
226 (match r1d with |
226 (match r1d with |
227 NULL -> (NULL, f_error) |
227 ZERO -> (ZERO, f_error) |
228 | EMPTY -> (STAR r1, f_seq_Void1 f1d f_id) |
228 | ONE -> (STAR r1, f_seq_Empty1 f1d f_id) |
229 | _ -> (SEQ(r1d, STAR(r1)), f_seq f1d f_id)) |
229 | _ -> (SEQ(r1d, STAR(r1)), f_seq f1d f_id)) |
230 | RECD(x, r1) -> der_simp c r1 |
230 | RECD(x, r1) -> der_simp c r1 |
231 |
231 |
232 |
232 |
233 (* matcher function *) |
233 (* matcher function *) |