|
1 |
|
2 type rexp = |
|
3 NULL |
|
4 | EMPTY |
|
5 | CHAR of char |
|
6 | ALT of rexp * rexp |
|
7 | SEQ of rexp * rexp |
|
8 | STAR of rexp |
|
9 | RECD of string * rexp;; |
|
10 |
|
11 type value = |
|
12 Void |
|
13 | Chr of char |
|
14 | Sequ of value * value |
|
15 | Left of value |
|
16 | Right of value |
|
17 | Stars of value list |
|
18 | Rec of string * value;; |
|
19 |
|
20 let rec string_of_val v = match v with |
|
21 Void -> "Void" |
|
22 | Chr(c) -> String.make 1 c |
|
23 | Sequ(v1, v2) -> "Seq(" ^ string_of_val v1 ^ "," ^ string_of_val v2 ^ ")" |
|
24 | Left(v1) -> "Left(" ^ string_of_val v1 ^ ")" |
|
25 | Right(v1) -> "Right(" ^ string_of_val v1 ^ ")" |
|
26 | Stars(vs) -> "[" ^ String.concat "," (List.map string_of_val vs) ^ "]" |
|
27 | Rec(x, v1) -> x ^ " $ " ^ string_of_val v1;; |
|
28 |
|
29 |
|
30 (* some helper functions for strings *) |
|
31 let explode s = |
|
32 let rec exp i l = |
|
33 if i < 0 then l else exp (i - 1) (s.[i] :: l) in |
|
34 exp (String.length s - 1) [];; |
|
35 |
|
36 let string_repeat s n = |
|
37 Array.fold_left (^) "" (Array.make n s);; |
|
38 |
|
39 (* some helper functions for rexps *) |
|
40 let rec seq s = match s with |
|
41 [] -> EMPTY |
|
42 | [c] -> CHAR(c) |
|
43 | c::cs -> SEQ(CHAR(c), seq cs);; |
|
44 |
|
45 let chr c = CHAR(c) |
|
46 |
|
47 let str s = seq(explode s);; |
|
48 |
|
49 let plus r = SEQ(r, STAR(r));; |
|
50 |
|
51 let (++) r1 r2 = ALT(r1, r2);; |
|
52 |
|
53 let (--) r1 r2 = SEQ(r1, r2);; |
|
54 |
|
55 let ($) x r = RECD(x, r);; |
|
56 |
|
57 let alts rs = match rs with |
|
58 [] -> NULL |
|
59 | [r] -> r |
|
60 | r::rs -> List.fold_left (++) r rs;; |
|
61 |
|
62 |
|
63 (* size of a regular expressions - for testing purposes *) |
|
64 let rec size r = match r with |
|
65 NULL -> 1 |
|
66 | EMPTY -> 1 |
|
67 | CHAR(_) -> 1 |
|
68 | ALT(r1, r2) -> 1 + (size r1) + (size r2) |
|
69 | SEQ(r1, r2) -> 1 + (size r1) + (size r2) |
|
70 | STAR(r) -> 1 + (size r) |
|
71 | RECD(_, r) -> 1 + (size r);; |
|
72 |
|
73 (* nullable function: tests whether the regular |
|
74 expression can recognise the empty string *) |
|
75 let rec nullable r = match r with |
|
76 NULL -> false |
|
77 | EMPTY -> true |
|
78 | CHAR(_) -> false |
|
79 | ALT(r1, r2) -> nullable(r1) || nullable(r2) |
|
80 | SEQ(r1, r2) -> nullable(r1) && nullable(r2) |
|
81 | STAR(_) -> true |
|
82 | RECD(_, r) -> nullable(r);; |
|
83 |
|
84 (* derivative of a regular expression r w.r.t. a character c *) |
|
85 let rec der c r = match r with |
|
86 NULL -> NULL |
|
87 | EMPTY -> NULL |
|
88 | CHAR(d) -> if c = d then EMPTY else NULL |
|
89 | ALT(r1, r2) -> ALT(der c r1, der c r2) |
|
90 | SEQ(r1, r2) -> |
|
91 if nullable r1 then ALT(SEQ(der c r1, r2), der c r2) |
|
92 else SEQ(der c r1, r2) |
|
93 | STAR(r) -> SEQ(der c r, STAR(r)) |
|
94 | RECD(_, r) -> der c r;; |
|
95 |
|
96 (* derivative w.r.t. a list of chars (iterates der) *) |
|
97 let rec ders s r = match s with |
|
98 [] -> r |
|
99 | c::s -> ders s (der c r);; |
|
100 |
|
101 (* extracts a string from value *) |
|
102 let rec flatten v = match v with |
|
103 Void -> "" |
|
104 | Chr(c) -> String.make 1 c |
|
105 | Left(v) -> flatten v |
|
106 | Right(v) -> flatten v |
|
107 | Sequ(v1, v2) -> flatten v1 ^ flatten v2 |
|
108 | Stars(vs) -> String.concat "" (List.map flatten vs) |
|
109 | Rec(_, v) -> flatten v;; |
|
110 |
|
111 |
|
112 (* extracts an environment from a value *) |
|
113 let rec env v = match v with |
|
114 Void -> [] |
|
115 | Chr(c) -> [] |
|
116 | Left(v) -> env v |
|
117 | Right(v) -> env v |
|
118 | Sequ(v1, v2) -> env v1 @ env v2 |
|
119 | Stars(vs) -> List.flatten (List.map env vs) |
|
120 | Rec(x, v) -> (x, flatten v) :: env v;; |
|
121 |
|
122 let string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")";; |
|
123 let string_of_env xs = String.concat "," (List.map string_of_pair xs);; |
|
124 |
|
125 |
|
126 (* the value for a nullable rexp *) |
|
127 let rec mkeps r = match r with |
|
128 EMPTY -> Void |
|
129 | ALT(r1, r2) -> |
|
130 if nullable r1 then Left(mkeps r1) else Right(mkeps r2) |
|
131 | SEQ(r1, r2) -> Sequ(mkeps r1, mkeps r2) |
|
132 | STAR(r) -> Stars([]) |
|
133 | RECD(x, r) -> Rec(x, mkeps r);; |
|
134 |
|
135 |
|
136 (* injection of a char into a value *) |
|
137 let rec inj r c v = match r, v with |
|
138 STAR(r), Sequ(v1, Stars(vs)) -> Stars(inj r c v1 :: vs) |
|
139 | SEQ(r1, r2), Sequ(v1, v2) -> Sequ(inj r1 c v1, v2) |
|
140 | SEQ(r1, r2), Left(Sequ(v1, v2)) -> Sequ(inj r1 c v1, v2) |
|
141 | SEQ(r1, r2), Right(v2) -> Sequ(mkeps r1, inj r2 c v2) |
|
142 | ALT(r1, r2), Left(v1) -> Left(inj r1 c v1) |
|
143 | ALT(r1, r2), Right(v2) -> Right(inj r2 c v2) |
|
144 | CHAR(d), Void -> Chr(d) |
|
145 | RECD(x, r1), _ -> Rec(x, inj r1 c v);; |
|
146 |
|
147 (* some "rectification" functions for simplification *) |
|
148 let f_id v = v;; |
|
149 let f_right f = fun v -> Right(f v);; |
|
150 let f_left f = fun v -> Left(f v);; |
|
151 let f_alt f1 f2 = fun v -> match v with |
|
152 Right(v) -> Right(f2 v) |
|
153 | Left(v) -> Left(f1 v);; |
|
154 let f_seq f1 f2 = fun v -> match v with |
|
155 Sequ(v1, v2) -> Sequ(f1 v1, f2 v2);; |
|
156 let f_seq_Void1 f1 f2 = fun v -> Sequ(f1 Void, f2 v);; |
|
157 let f_seq_Void2 f1 f2 = fun v -> Sequ(f1 v, f2 Void);; |
|
158 let f_rec f = fun v -> match v with |
|
159 Rec(x, v) -> Rec(x, f v);; |
|
160 |
|
161 exception ShouldNotHappen |
|
162 let f_error v = raise ShouldNotHappen |
|
163 |
|
164 (* simplification of regular expressions returning also an |
|
165 rectification function; no simplification under STARs *) |
|
166 let rec simp r = match r with |
|
167 ALT(r1, r2) -> |
|
168 let (r1s, f1s) = simp r1 in |
|
169 let (r2s, f2s) = simp r2 in |
|
170 (match r1s, r2s with |
|
171 NULL, _ -> (r2s, f_right f2s) |
|
172 | _, NULL -> (r1s, f_left f1s) |
|
173 | _, _ -> if r1s = r2s then (r1s, f_left f1s) |
|
174 else (ALT (r1s, r2s), f_alt f1s f2s)) |
|
175 | SEQ(r1, r2) -> |
|
176 let (r1s, f1s) = simp r1 in |
|
177 let (r2s, f2s) = simp r2 in |
|
178 (match r1s, r2s with |
|
179 NULL, _ -> (NULL, f_error) |
|
180 | _, NULL -> (NULL, f_error) |
|
181 | EMPTY, _ -> (r2s, f_seq_Void1 f1s f2s) |
|
182 | _, EMPTY -> (r1s, f_seq_Void2 f1s f2s) |
|
183 | _, _ -> (SEQ(r1s, r2s), f_seq f1s f2s)) |
|
184 | RECD(x, r1) -> |
|
185 let (r1s, f1s) = simp r1 in |
|
186 (RECD(x, r1s), f_rec f1s) |
|
187 | r -> (r, f_id) |
|
188 ;; |
|
189 |
|
190 let rec der_simp c r = match r with |
|
191 NULL -> (NULL, f_id) |
|
192 | EMPTY -> (NULL, f_id) |
|
193 | CHAR(d) -> ((if c = d then EMPTY else NULL), f_id) |
|
194 | ALT(r1, r2) -> |
|
195 let (r1d, f1d) = der_simp c r1 in |
|
196 let (r2d, f2d) = der_simp c r2 in |
|
197 (match r1d, r2d with |
|
198 NULL, _ -> (r2d, f_right f2d) |
|
199 | _, NULL -> (r1d, f_left f1d) |
|
200 | _, _ -> if r1d = r2d then (r1d, f_left f1d) |
|
201 else (ALT (r1d, r2d), f_alt f1d f2d)) |
|
202 | SEQ(r1, r2) -> |
|
203 if nullable r1 |
|
204 then |
|
205 let (r1d, f1d) = der_simp c r1 in |
|
206 let (r2d, f2d) = der_simp c r2 in |
|
207 let (r2s, f2s) = simp r2 in |
|
208 (match r1d, r2s, r2d with |
|
209 NULL, _, _ -> (r2d, f_right f2d) |
|
210 | _, NULL, _ -> (r2d, f_right f2d) |
|
211 | _, _, NULL -> (SEQ(r1d, r2s), f_left (f_seq f1d f2s)) |
|
212 | EMPTY, _, _ -> (ALT(r2s, r2d), f_alt (f_seq_Void1 f1d f2s) f2d) |
|
213 | _, EMPTY, _ -> (ALT(r1d, r2d), f_alt (f_seq_Void2 f1d f2s) f2d) |
|
214 | _, _, _ -> (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d)) |
|
215 else |
|
216 let (r1d, f1d) = der_simp c r1 in |
|
217 let (r2s, f2s) = simp r2 in |
|
218 (match r1d, r2s with |
|
219 NULL, _ -> (NULL, f_error) |
|
220 | _, NULL -> (NULL, f_error) |
|
221 | EMPTY, _ -> (r2s, f_seq_Void1 f1d f2s) |
|
222 | _, EMPTY -> (r1d, f_seq_Void2 f1d f2s) |
|
223 | _, _ -> (SEQ(r1d, r2s), f_seq f1d f2s)) |
|
224 | STAR(r1) -> |
|
225 let (r1d, f1d) = der_simp c r1 in |
|
226 (match r1d with |
|
227 NULL -> (NULL, f_error) |
|
228 | EMPTY -> (STAR r1, f_seq_Void1 f1d f_id) |
|
229 | _ -> (SEQ(r1d, STAR(r1)), f_seq f1d f_id)) |
|
230 | RECD(x, r1) -> der_simp c r1 |
|
231 |
|
232 |
|
233 (* matcher function *) |
|
234 let matcher r s = nullable(ders (explode s) r);; |
|
235 |
|
236 (* lexing function (produces a value) *) |
|
237 exception LexError;; |
|
238 |
|
239 let rec lex r s = match s with |
|
240 [] -> if (nullable r) then mkeps r else raise LexError |
|
241 | c::cs -> inj r c (lex (der c r) cs);; |
|
242 |
|
243 let lexing r s = lex r (explode s);; |
|
244 |
|
245 (* lexing with simplification *) |
|
246 let rec lex_simp r s = match s with |
|
247 [] -> if (nullable r) then mkeps r else raise LexError |
|
248 | c::cs -> |
|
249 let (r_simp, f_simp) = simp (der c r) in |
|
250 inj r c (f_simp (lex_simp r_simp cs));; |
|
251 |
|
252 let lexing_simp r s = lex_simp r (explode s);; |
|
253 |
|
254 let rec lex_simp2 r s = match s with |
|
255 [] -> if (nullable r) then mkeps r else raise LexError |
|
256 | c::cs -> |
|
257 let (r_simp, f_simp) = der_simp c r in |
|
258 inj r c (f_simp (lex_simp2 r_simp cs));; |
|
259 |
|
260 let lexing_simp2 r s = lex_simp2 r (explode s);; |
|
261 |
|
262 |
|
263 (* lexing with accumulation *) |
|
264 let rec lex_acc r s f = match s with |
|
265 [] -> if (nullable r) then f (mkeps r) else raise LexError |
|
266 | c::cs -> |
|
267 let (r_simp, f_simp) = simp (der c r) in |
|
268 lex_acc r_simp cs (fun v -> f (inj r c (f_simp v)));; |
|
269 |
|
270 let lexing_acc r s = lex_acc r (explode s) (f_id);; |
|
271 |
|
272 let rec lex_acc2 r s f = match s with |
|
273 [] -> if (nullable r) then f (mkeps r) else raise LexError |
|
274 | c::cs -> |
|
275 let (r_simp, f_simp) = der_simp c r in |
|
276 lex_acc2 r_simp cs (fun v -> f (inj r c (f_simp v)));; |
|
277 |
|
278 let lexing_acc2 r s = lex_acc2 r (explode s) (f_id);; |
|
279 |
|
280 |
|
281 (* Lexing rules for a small WHILE language *) |
|
282 let sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz"));; |
|
283 let digit = alts (List.map chr (explode "0123456789"));; |
|
284 let idents = sym -- STAR(sym ++ digit);; |
|
285 let nums = plus(digit);; |
|
286 let keywords = alts |
|
287 (List.map str ["skip"; "while"; "do"; "if"; "then"; "else"; "read"; "write"; "true"; "false"]);; |
|
288 let semicolon = str ";" |
|
289 let ops = alts |
|
290 (List.map str [":="; "=="; "-"; "+"; "*"; "!="; "<"; ">"; "<="; ">="; "%"; "/"]);; |
|
291 let whitespace = plus(str " " ++ str "\n" ++ str "\t");; |
|
292 let rparen = str ")";; |
|
293 let lparen = str "(";; |
|
294 let begin_paren = str "{";; |
|
295 let end_paren = str "}";; |
|
296 |
|
297 |
|
298 let while_regs = STAR(("k" $ keywords) ++ |
|
299 ("i" $ idents) ++ |
|
300 ("o" $ ops) ++ |
|
301 ("n" $ nums) ++ |
|
302 ("s" $ semicolon) ++ |
|
303 ("p" $ (lparen ++ rparen)) ++ |
|
304 ("b" $ (begin_paren ++ end_paren)) ++ |
|
305 ("w" $ whitespace));; |
|
306 |
|
307 |
|
308 |
|
309 (* Some Tests |
|
310 ============ *) |
|
311 |
|
312 let time f x = |
|
313 let t = Sys.time() in |
|
314 let f_x = (f x; f x; f x) in |
|
315 (print_float ((Sys.time() -. t) /. 3.0); f_x);; |
|
316 |
|
317 |
|
318 let prog0 = "read n";; |
|
319 |
|
320 let prog1 = "read n; write (n)";; |
|
321 string_of_env (env (lexing_simp while_regs prog1));; |
|
322 |
|
323 |
|
324 let prog2 = " |
|
325 i := 2; |
|
326 max := 100; |
|
327 while i < max do { |
|
328 isprime := 1; |
|
329 j := 2; |
|
330 while (j * j) <= i + 1 do { |
|
331 if i % j == 0 then isprime := 0 else skip; |
|
332 j := j + 1 |
|
333 }; |
|
334 if isprime == 1 then write i else skip; |
|
335 i := i + 1 |
|
336 }";; |
|
337 |
|
338 let tst1 = (lexing_simp while_regs prog2 = lexing_simp2 while_regs prog2) in |
|
339 let tst2 = (lexing_simp while_regs prog2 = lexing_acc while_regs prog2) in |
|
340 let tst3 = (lexing_simp while_regs prog2 = lexing_acc2 while_regs prog2) |
|
341 in |
|
342 print_string ("Sanity test simp vs simp2: >>" ^ (string_of_bool tst1) ^ "<<\n") ; |
|
343 print_string ("Sanity test simp vs acc: >>" ^ (string_of_bool tst2) ^ "<<\n") ; |
|
344 print_string ("Sanity test simp vs acc2: >>" ^ (string_of_bool tst3) ^ "<<") ; |
|
345 print_newline ();; |
|
346 |
|
347 |
|
348 |
|
349 type range = |
|
350 To of int * int;; |
|
351 |
|
352 let (---) i j = To(i, j);; |
|
353 |
|
354 let forby n = |
|
355 fun range -> match range with To(lo, up) -> |
|
356 (fun f -> |
|
357 let rec loop lo = |
|
358 if lo > up then () else (f lo; loop (lo + n)) |
|
359 in loop lo);; |
|
360 |
|
361 let step_simp i = |
|
362 (print_string ((string_of_int i) ^ ": ") ; |
|
363 time (lexing_simp while_regs) (string_repeat prog2 i) ; |
|
364 print_newline ());; |
|
365 |
|
366 let step_simp2 i = |
|
367 (print_string ((string_of_int i) ^ ": ") ; |
|
368 time (lexing_simp2 while_regs) (string_repeat prog2 i) ; |
|
369 print_newline ());; |
|
370 |
|
371 let step_acc i = |
|
372 (print_string ((string_of_int i) ^ ": ") ; |
|
373 time (lexing_acc while_regs) (string_repeat prog2 i) ; |
|
374 print_newline ());; |
|
375 |
|
376 let step_acc2 i = |
|
377 (print_string ((string_of_int i) ^ ": ") ; |
|
378 time (lexing_acc2 while_regs) (string_repeat prog2 i) ; |
|
379 print_newline ());; |
|
380 |
|
381 forby 100 (100 --- 700) step_simp;; |
|
382 print_newline ();; |
|
383 forby 100 (100 --- 700) step_simp2;; |
|
384 print_newline ();; |
|
385 forby 100 (100 --- 700) step_acc;; |
|
386 print_newline ();; |
|
387 forby 100 (100 --- 700) step_acc2;; |
|
388 print_newline ();; |
|
389 forby 1000 (1000 --- 5000) step_acc;; |
|
390 print_newline ();; |
|
391 forby 1000 (1000 --- 5000) step_acc2;; |
|
392 (*print_newline ();;*) |
|
393 (* forby 500 (100 --- 5000) step_simp;; *) |
|
394 |