|
1 |
|
2 datatype rexp = |
|
3 NULL |
|
4 | EMPTY |
|
5 | CHAR of char |
|
6 | ALT of rexp * rexp |
|
7 | SEQ of rexp * rexp |
|
8 | STAR of rexp |
|
9 | RECD of string * rexp |
|
10 |
|
11 datatype value = |
|
12 Void |
|
13 | Chr of char |
|
14 | Sequ of value * value |
|
15 | Left of value |
|
16 | Right of value |
|
17 | Stars of value list |
|
18 | Rec of string * value |
|
19 |
|
20 (* some helper functions for strings *) |
|
21 fun string_repeat s n = String.concat (List.tabulate (n, fn _ => s)) |
|
22 |
|
23 (* some helper functions for rexps *) |
|
24 fun seq s = case s of |
|
25 [] => EMPTY |
|
26 | [c] => CHAR(c) |
|
27 | c::cs => SEQ(CHAR(c), seq cs) |
|
28 |
|
29 fun chr c = CHAR(c) |
|
30 |
|
31 fun str s = seq(explode s) |
|
32 |
|
33 fun plus r = SEQ(r, STAR(r)) |
|
34 |
|
35 infix 9 ++ |
|
36 infix 9 -- |
|
37 infix 9 $ |
|
38 |
|
39 fun op ++ (r1, r2) = ALT(r1, r2) |
|
40 |
|
41 fun op -- (r1, r2) = SEQ(r1, r2) |
|
42 |
|
43 fun op $ (x, r) = RECD(x, r) |
|
44 |
|
45 fun alts rs = case rs of |
|
46 [] => NULL |
|
47 | [r] => r |
|
48 | r::rs => List.foldl (op ++) r rs |
|
49 |
|
50 |
|
51 (* size of a regular expressions - for testing purposes *) |
|
52 fun size r = case r of |
|
53 NULL => 1 |
|
54 | EMPTY => 1 |
|
55 | CHAR(_) => 1 |
|
56 | ALT(r1, r2) => 1 + (size r1) + (size r2) |
|
57 | SEQ(r1, r2) => 1 + (size r1) + (size r2) |
|
58 | STAR(r) => 1 + (size r) |
|
59 | RECD(_, r) => 1 + (size r) |
|
60 |
|
61 (* nullable function: tests whether the regular |
|
62 expression can recognise the empty string *) |
|
63 fun nullable r = case r of |
|
64 NULL => false |
|
65 | EMPTY => true |
|
66 | CHAR(_) => false |
|
67 | ALT(r1, r2) => nullable(r1) orelse nullable(r2) |
|
68 | SEQ(r1, r2) => nullable(r1) andalso nullable(r2) |
|
69 | STAR(_) => true |
|
70 | RECD(_, r) => nullable(r) |
|
71 |
|
72 (* derivative of a regular expression r w.r.t. a character c *) |
|
73 fun der c r = case r of |
|
74 NULL => NULL |
|
75 | EMPTY => NULL |
|
76 | CHAR(d) => if c = d then EMPTY else NULL |
|
77 | ALT(r1, r2) => ALT(der c r1, der c r2) |
|
78 | SEQ(r1, r2) => |
|
79 if nullable r1 then ALT(SEQ(der c r1, r2), der c r2) |
|
80 else SEQ(der c r1, r2) |
|
81 | STAR(r) => SEQ(der c r, STAR(r)) |
|
82 | RECD(_, r) => der c r |
|
83 |
|
84 (* derivative w.r.t. a list of chars (iterates der) *) |
|
85 fun ders s r = case s of |
|
86 [] => r |
|
87 | c::s => ders s (der c r) |
|
88 |
|
89 (* extracts a string from value *) |
|
90 fun flatten v = case v of |
|
91 Void => "" |
|
92 | Chr(c) => Char.toString c |
|
93 | Left(v) => flatten v |
|
94 | Right(v) => flatten v |
|
95 | Sequ(v1, v2) => flatten v1 ^ flatten v2 |
|
96 | Stars(vs) => String.concat (List.map flatten vs) |
|
97 | Rec(_, v) => flatten v |
|
98 |
|
99 |
|
100 (* extracts an environment from a value *) |
|
101 fun env v = case v of |
|
102 Void => [] |
|
103 | Chr(c) => [] |
|
104 | Left(v) => env v |
|
105 | Right(v) => env v |
|
106 | Sequ(v1, v2) => env v1 @ env v2 |
|
107 | Stars(vs) => List.foldr (op @) [] (List.map env vs) |
|
108 | Rec(x, v) => (x, flatten v) :: env v |
|
109 |
|
110 fun string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")" |
|
111 fun string_of_env xs = String.concatWith "," (List.map string_of_pair xs) |
|
112 |
|
113 |
|
114 (* the value for a nullable rexp *) |
|
115 fun mkeps r = case r of |
|
116 EMPTY => Void |
|
117 | ALT(r1, r2) => |
|
118 if nullable r1 then Left(mkeps r1) else Right(mkeps r2) |
|
119 | SEQ(r1, r2) => Sequ(mkeps r1, mkeps r2) |
|
120 | STAR(r) => Stars([]) |
|
121 | RECD(x, r) => Rec(x, mkeps r) |
|
122 |
|
123 exception Error |
|
124 |
|
125 (* injection of a char into a value *) |
|
126 fun inj r c v = case (r, v) of |
|
127 (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj r c v1 :: vs) |
|
128 | (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj r1 c v1, v2) |
|
129 | (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj r1 c v1, v2) |
|
130 | (SEQ(r1, r2), Right(v2)) => Sequ(mkeps r1, inj r2 c v2) |
|
131 | (ALT(r1, r2), Left(v1)) => Left(inj r1 c v1) |
|
132 | (ALT(r1, r2), Right(v2)) => Right(inj r2 c v2) |
|
133 | (CHAR(d), Void) => Chr(d) |
|
134 | (RECD(x, r1), _) => Rec(x, inj r1 c v) |
|
135 | _ => (print ("\nr: " ^ PolyML.makestring r ^ "\n"); |
|
136 print ("v: " ^ PolyML.makestring v ^ "\n"); |
|
137 raise Error) |
|
138 |
|
139 (* some "rectification" functions for simplification *) |
|
140 fun f_id v = v |
|
141 fun f_right f = fn v => Right(f v) |
|
142 fun f_left f = fn v => Left(f v) |
|
143 fun f_alt f1 f2 = fn v => case v of |
|
144 Right(v) => Right(f2 v) |
|
145 | Left(v) => Left(f1 v) |
|
146 fun f_seq f1 f2 = fn v => case v of |
|
147 Sequ(v1, v2) => Sequ(f1 v1, f2 v2) |
|
148 fun f_seq_Void1 f1 f2 = fn v => Sequ(f1 Void, f2 v) |
|
149 fun f_seq_Void2 f1 f2 = fn v => Sequ(f1 v, f2 Void) |
|
150 fun f_rec f = fn v => case v of |
|
151 Rec(x, v) => Rec(x, f v) |
|
152 |
|
153 exception ShouldNotHappen |
|
154 |
|
155 fun f_error v = raise ShouldNotHappen |
|
156 |
|
157 (* simplification of regular expressions returning also an |
|
158 rectification function; no simplification under STARs *) |
|
159 fun simp r = case r of |
|
160 ALT(r1, r2) => |
|
161 let val (r1s, f1s) = simp r1 |
|
162 val (r2s, f2s) = simp r2 in |
|
163 (case (r1s, r2s) of |
|
164 (NULL, _) => (r2s, f_right f2s) |
|
165 | (_, NULL) => (r1s, f_left f1s) |
|
166 | (_, _) => if r1s = r2s then (r1s, f_left f1s) |
|
167 else (ALT (r1s, r2s), f_alt f1s f2s)) |
|
168 end |
|
169 | SEQ(r1, r2) => |
|
170 let val (r1s, f1s) = simp r1 |
|
171 val (r2s, f2s) = simp r2 in |
|
172 (case (r1s, r2s) of |
|
173 (NULL, _) => (NULL, f_error) |
|
174 | (_, NULL) => (NULL, f_error) |
|
175 | (EMPTY, _) => (r2s, f_seq_Void1 f1s f2s) |
|
176 | (_, EMPTY) => (r1s, f_seq_Void2 f1s f2s) |
|
177 | (_, _) => (SEQ(r1s, r2s), f_seq f1s f2s)) |
|
178 end |
|
179 | RECD(x, r1) => |
|
180 let val (r1s, f1s) = simp r1 in |
|
181 (RECD(x, r1s), f_rec f1s) |
|
182 end |
|
183 | r => (r, f_id) |
|
184 |
|
185 fun der_simp c r = case r of |
|
186 NULL => (NULL, f_id) |
|
187 | EMPTY => (NULL, f_id) |
|
188 | CHAR(d) => ((if c = d then EMPTY else NULL), f_id) |
|
189 | ALT(r1, r2) => |
|
190 let |
|
191 val (r1d, f1d) = der_simp c r1 |
|
192 val (r2d, f2d) = der_simp c r2 |
|
193 in |
|
194 case (r1d, r2d) of |
|
195 (NULL, _) => (r2d, f_right f2d) |
|
196 | (_, NULL) => (r1d, f_left f1d) |
|
197 | (_, _) => if r1d = r2d then (r1d, f_left f1d) |
|
198 else (ALT (r1d, r2d), f_alt f1d f2d) |
|
199 end |
|
200 | SEQ(r1, r2) => |
|
201 if nullable r1 |
|
202 then |
|
203 let |
|
204 val (r1d, f1d) = der_simp c r1 |
|
205 val (r2d, f2d) = der_simp c r2 |
|
206 val (r2s, f2s) = simp r2 |
|
207 in |
|
208 case (r1d, r2s, r2d) of |
|
209 (NULL, _, _) => (r2d, f_right f2d) |
|
210 | (_, NULL, _) => (r2d, f_right f2d) |
|
211 | (_, _, NULL) => (SEQ(r1d, r2s), f_left (f_seq f1d f2s)) |
|
212 | (EMPTY, _, _) => (ALT(r2s, r2d), f_alt (f_seq_Void1 f1d f2s) f2d) |
|
213 | (_, EMPTY, _) => (ALT(r1d, r2d), f_alt (f_seq_Void2 f1d f2s) f2d) |
|
214 | (_, _, _) => (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d) |
|
215 end |
|
216 else |
|
217 let |
|
218 val (r1d, f1d) = der_simp c r1 |
|
219 val (r2s, f2s) = simp r2 |
|
220 in |
|
221 case (r1d, r2s) of |
|
222 (NULL, _) => (NULL, f_error) |
|
223 | (_, NULL) => (NULL, f_error) |
|
224 | (EMPTY, _) => (r2s, f_seq_Void1 f1d f2s) |
|
225 | (_, EMPTY) => (r1d, f_seq_Void2 f1d f2s) |
|
226 | (_, _) => (SEQ(r1d, r2s), f_seq f1d f2s) |
|
227 end |
|
228 | STAR(r1) => |
|
229 let |
|
230 val (r1d, f1d) = der_simp c r1 |
|
231 in |
|
232 case r1d of |
|
233 NULL => (NULL, f_error) |
|
234 | EMPTY => (STAR r1, f_seq_Void1 f1d f_id) |
|
235 | _ => (SEQ(r1d, STAR(r1)), f_seq f1d f_id) |
|
236 end |
|
237 | RECD(x, r1) => der_simp c r1 |
|
238 |
|
239 |
|
240 |
|
241 (* matcher function *) |
|
242 fun matcher r s = nullable(ders (explode s) r) |
|
243 |
|
244 (* lexing function (produces a value) *) |
|
245 exception LexError |
|
246 |
|
247 fun lex r s = case s of |
|
248 [] => if (nullable r) then mkeps r else raise LexError |
|
249 | c::cs => inj r c (lex (der c r) cs) |
|
250 |
|
251 fun lexing r s = lex r (explode s) |
|
252 |
|
253 (* lexing with simplification *) |
|
254 fun lex_simp r s = case s of |
|
255 [] => if (nullable r) then mkeps r else raise LexError |
|
256 | c::cs => |
|
257 let val (r_simp, f_simp) = simp (der c r) in |
|
258 inj r c (f_simp (lex_simp r_simp cs)) |
|
259 end |
|
260 |
|
261 fun lexing_simp r s = lex_simp r (explode s) |
|
262 |
|
263 fun lex_simp2 r s = case s of |
|
264 [] => if (nullable r) then mkeps r else raise LexError |
|
265 | c::cs => |
|
266 let val (r_simp, f_simp) = der_simp c r in |
|
267 inj r c (f_simp (lex_simp2 r_simp cs)) |
|
268 end |
|
269 |
|
270 fun lexing_simp2 r s = lex_simp2 r (explode s) |
|
271 |
|
272 fun lex_acc r s f = case s of |
|
273 [] => if (nullable r) then f (mkeps r) else raise LexError |
|
274 | c::cs => |
|
275 let val (r_simp, f_simp) = simp (der c r) in |
|
276 lex_acc r_simp cs (fn v => f (inj r c (f_simp v))) |
|
277 end |
|
278 |
|
279 fun lexing_acc r s = lex_acc r (explode s) (f_id) |
|
280 |
|
281 fun lex_acc2 r s f = case s of |
|
282 [] => if (nullable r) then f (mkeps r) else raise LexError |
|
283 | c::cs => |
|
284 let val (r_simp, f_simp) = der_simp c r in |
|
285 lex_acc2 r_simp cs (fn v => f (inj r c (f_simp v))) |
|
286 end |
|
287 |
|
288 fun lexing_acc2 r s = lex_acc2 r (explode s) (f_id) |
|
289 |
|
290 |
|
291 (* Lexing rules for a small WHILE language *) |
|
292 val sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz")) |
|
293 val digit = alts (List.map chr (explode "0123456789")) |
|
294 val idents = sym -- STAR(sym ++ digit) |
|
295 val nums = plus(digit) |
|
296 val keywords = alts (List.map str ["skip", "while", "do", "if", "then", "else", "read", "write", "true", "false"]) |
|
297 val semicolon = str ";" |
|
298 val ops = alts (List.map str [":=", "==", "-", "+", "*", "!=", "<", ">", "<=", ">=", "%", "/"]) |
|
299 val whitespace = plus(str " " ++ str "\n" ++ str "\t") |
|
300 val rparen = str ")" |
|
301 val lparen = str "(" |
|
302 val begin_paren = str "{" |
|
303 val end_paren = str "}" |
|
304 |
|
305 |
|
306 val while_regs = STAR(("k" $ keywords) ++ |
|
307 ("i" $ idents) ++ |
|
308 ("o" $ ops) ++ |
|
309 ("n" $ nums) ++ |
|
310 ("s" $ semicolon) ++ |
|
311 ("p" $ (lparen ++ rparen)) ++ |
|
312 ("b" $ (begin_paren ++ end_paren)) ++ |
|
313 ("w" $ whitespace)) |
|
314 |
|
315 |
|
316 |
|
317 (* Some Tests |
|
318 ============ *) |
|
319 |
|
320 fun time f x = |
|
321 let |
|
322 val t_start = Timer.startCPUTimer() |
|
323 val f_x = (f x; f x; f x; f x; f x; f x; f x; f x; f x; f x) |
|
324 val t_end = Time.toReal(#usr(Timer.checkCPUTimer(t_start))) / 10.0 |
|
325 in |
|
326 (print ((Real.toString t_end) ^ "\n"); f_x) |
|
327 end |
|
328 |
|
329 val prog = "ab"; |
|
330 val reg = ("x" $ ((str "a") -- (str "b"))); |
|
331 print("Simp: " ^ PolyML.makestring (lexing_simp reg prog) ^ "\n"); |
|
332 print("Acc: " ^ PolyML.makestring (lexing_acc reg prog) ^ "\n"); |
|
333 print("Env " ^ string_of_env (env (lexing_acc reg prog)) ^ "\n"); |
|
334 |
|
335 fun fst (x, y) = x; |
|
336 fun snd (x, y) = y; |
|
337 |
|
338 val derS = [reg, |
|
339 der #"a" reg, |
|
340 fst (simp (der #"a" reg)), |
|
341 fst (der_simp #"a" reg)]; |
|
342 |
|
343 val vS = [(snd (simp (der #"a" reg))) (Chr(#"b")), |
|
344 (snd (der_simp #"a" reg)) (Chr(#"b")) |
|
345 ]; |
|
346 |
|
347 print("Ders: \n" ^ |
|
348 String.concatWith "\n" (List.map PolyML.makestring derS) |
|
349 ^ "\n\n"); |
|
350 print("Vs: \n" ^ |
|
351 String.concatWith "\n" (List.map PolyML.makestring vS) |
|
352 ^ "\n\n"); |
|
353 |
|
354 |
|
355 val prog0 = "read n"; |
|
356 print("Env0 is: \n" ^ string_of_env (env (lexing_acc while_regs prog0)) ^ "\n"); |
|
357 |
|
358 val prog1 = "read n; write (n)"; |
|
359 print("Env1 is: \n" ^ string_of_env (env (lexing_acc while_regs prog1)) ^ "\n"); |
|
360 |
|
361 |
|
362 val prog2 = String.concatWith "\n" |
|
363 ["i := 2;", |
|
364 "max := 100;", |
|
365 "while i < max do {", |
|
366 " isprime := 1;", |
|
367 " j := 2;", |
|
368 " while (j * j) <= i + 1 do {", |
|
369 " if i % j == 0 then isprime := 0 else skip;", |
|
370 " j := j + 1", |
|
371 " };", |
|
372 " if isprime == 1 then write i else skip;", |
|
373 " i := i + 1", |
|
374 "}"]; |
|
375 |
|
376 |
|
377 let |
|
378 val tst = (lexing_simp while_regs prog2 = lexing_acc while_regs prog2) |
|
379 in |
|
380 print("Sanity test: >>" ^ (PolyML.makestring tst) ^ "<<\n") |
|
381 end; |
|
382 |
|
383 (* loops in ML *) |
|
384 datatype for = to of int * int |
|
385 infix to |
|
386 |
|
387 val for = |
|
388 fn lo to up => |
|
389 (fn f => |
|
390 let fun loop lo = |
|
391 if lo > up then () else (f lo; loop (lo + 1)) |
|
392 in loop lo end) |
|
393 |
|
394 fun forby n = |
|
395 fn lo to up => |
|
396 (fn f => |
|
397 let fun loop lo = |
|
398 if lo > up then () else (f lo; loop (lo + n)) |
|
399 in loop lo end) |
|
400 |
|
401 |
|
402 fun step_simp i = |
|
403 (print ((Int.toString i) ^ ": ") ; |
|
404 time (lexing_simp while_regs) (string_repeat prog2 i)) |
|
405 |
|
406 fun step_simp2 i = |
|
407 (print ((Int.toString i) ^ ": ") ; |
|
408 time (lexing_simp2 while_regs) (string_repeat prog2 i)) |
|
409 |
|
410 fun step_acc i = |
|
411 (print ((Int.toString i) ^ ": ") ; |
|
412 time (lexing_acc while_regs) (string_repeat prog2 i)) |
|
413 |
|
414 fun step_acc2 i = |
|
415 (print ((Int.toString i) ^ ": ") ; |
|
416 time (lexing_acc2 while_regs) (string_repeat prog2 i)) |
|
417 |
|
418 val main1 = forby 1000 (1000 to 5000) step_simp; |
|
419 print "\n"; |
|
420 val main2 = forby 1000 (1000 to 5000) step_simp2; |
|
421 print "\n"; |
|
422 val main3 = forby 1000 (1000 to 5000) step_acc; |
|
423 print "\n"; |
|
424 val main4 = forby 1000 (1000 to 5000) step_acc2; |
|
425 |
|
426 |