|
1 |
|
2 type rexp = |
|
3 NULL |
|
4 | EMPTY |
|
5 | CHAR of char |
|
6 | ALT of rexp * rexp |
|
7 | SEQ of rexp * rexp |
|
8 | STAR of rexp |
|
9 | RECD of string * rexp;; |
|
10 |
|
11 type value = |
|
12 Void |
|
13 | Chr of char |
|
14 | Sequ of value * value |
|
15 | Left of value |
|
16 | Right of value |
|
17 | Stars of value list |
|
18 | Rec of string * value;; |
|
19 |
|
20 (* some helper functions for strings *) |
|
21 let explode s = [for c in s -> c];; |
|
22 |
|
23 let string_repeat s n = String.replicate n s;; |
|
24 |
|
25 (* some helper functions for rexps *) |
|
26 let rec seq s = match s with |
|
27 | [] -> EMPTY |
|
28 | [c] -> CHAR(c) |
|
29 | c::cs -> SEQ(CHAR(c), seq cs);; |
|
30 |
|
31 let chr c = CHAR(c) |
|
32 |
|
33 let str s = seq(explode s);; |
|
34 |
|
35 let plus r = SEQ(r, STAR(r));; |
|
36 |
|
37 let (++) r1 r2 = ALT(r1, r2);; |
|
38 |
|
39 let (--) r1 r2 = SEQ(r1, r2);; |
|
40 |
|
41 let ($) x r = RECD(x, r);; |
|
42 |
|
43 let alts rs = match rs with |
|
44 | [] -> NULL |
|
45 | [r] -> r |
|
46 | r::rs -> List.fold (++) r rs;; |
|
47 |
|
48 |
|
49 (* size of a regular expressions - for testing purposes *) |
|
50 let rec size r = match r with |
|
51 | NULL -> 1 |
|
52 | EMPTY -> 1 |
|
53 | CHAR(_) -> 1 |
|
54 | ALT(r1, r2) -> 1 + (size r1) + (size r2) |
|
55 | SEQ(r1, r2) -> 1 + (size r1) + (size r2) |
|
56 | STAR(r) -> 1 + (size r) |
|
57 | RECD(_, r) -> 1 + (size r);; |
|
58 |
|
59 (* nullable function: tests whether the regular |
|
60 expression can recognise the empty string *) |
|
61 let rec nullable r = match r with |
|
62 | NULL -> false |
|
63 | EMPTY -> true |
|
64 | CHAR(_) -> false |
|
65 | ALT(r1, r2) -> nullable(r1) || nullable(r2) |
|
66 | SEQ(r1, r2) -> nullable(r1) && nullable(r2) |
|
67 | STAR(_) -> true |
|
68 | RECD(_, r) -> nullable(r);; |
|
69 |
|
70 (* derivative of a regular expression r w.r.t. a character c *) |
|
71 let rec der c r = match r with |
|
72 | NULL -> NULL |
|
73 | EMPTY -> NULL |
|
74 | CHAR(d) -> if c = d then EMPTY else NULL |
|
75 | ALT(r1, r2) -> ALT(der c r1, der c r2) |
|
76 | SEQ(r1, r2) -> |
|
77 if nullable r1 then ALT(SEQ(der c r1, r2), der c r2) |
|
78 else SEQ(der c r1, r2) |
|
79 | STAR(r) -> SEQ(der c r, STAR(r)) |
|
80 | RECD(_, r) -> der c r;; |
|
81 |
|
82 (* derivative w.r.t. a list of chars (iterates der) *) |
|
83 let rec ders s r = match s with |
|
84 | [] -> r |
|
85 | c::s -> ders s (der c r);; |
|
86 |
|
87 (* extracts a string from value *) |
|
88 let rec flatten v = match v with |
|
89 | Void -> "" |
|
90 | Chr(c) -> System.Convert.ToString(c) |
|
91 | Left(v) -> flatten v |
|
92 | Right(v) -> flatten v |
|
93 | Sequ(v1, v2) -> flatten v1 ^ flatten v2 |
|
94 | Stars(vs) -> String.concat "" (List.map flatten vs) |
|
95 | Rec(_, v) -> flatten v;; |
|
96 |
|
97 |
|
98 (* extracts an environment from a value *) |
|
99 let rec env v = match v with |
|
100 | Void -> [] |
|
101 | Chr(c) -> [] |
|
102 | Left(v) -> env v |
|
103 | Right(v) -> env v |
|
104 | Sequ(v1, v2) -> env v1 @ env v2 |
|
105 | Stars(vs) -> List.fold (@) [] (List.map env vs) |
|
106 | Rec(x, v) -> (x, flatten v) :: env v;; |
|
107 |
|
108 let string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")";; |
|
109 let string_of_env xs = String.concat "," (List.map string_of_pair xs);; |
|
110 |
|
111 |
|
112 (* the value for a nullable rexp *) |
|
113 let rec mkeps r = match r with |
|
114 | EMPTY -> Void |
|
115 | ALT(r1, r2) -> |
|
116 if nullable r1 then Left(mkeps r1) else Right(mkeps r2) |
|
117 | SEQ(r1, r2) -> Sequ(mkeps r1, mkeps r2) |
|
118 | STAR(r) -> Stars([]) |
|
119 | RECD(x, r) -> Rec(x, mkeps r);; |
|
120 |
|
121 |
|
122 (* injection of a char into a value *) |
|
123 let rec inj r c v = match r, v with |
|
124 | STAR(r), Sequ(v1, Stars(vs)) -> Stars(inj r c v1 :: vs) |
|
125 | SEQ(r1, r2), Sequ(v1, v2) -> Sequ(inj r1 c v1, v2) |
|
126 | SEQ(r1, r2), Left(Sequ(v1, v2)) -> Sequ(inj r1 c v1, v2) |
|
127 | SEQ(r1, r2), Right(v2) -> Sequ(mkeps r1, inj r2 c v2) |
|
128 | ALT(r1, r2), Left(v1) -> Left(inj r1 c v1) |
|
129 | ALT(r1, r2), Right(v2) -> Right(inj r2 c v2) |
|
130 | CHAR(d), Void -> Chr(d) |
|
131 | RECD(x, r1), _ -> Rec(x, inj r1 c v);; |
|
132 |
|
133 (* some "rectification" functions for simplification *) |
|
134 let f_id v = v;; |
|
135 let f_right f = fun v -> Right(f v);; |
|
136 let f_left f = fun v -> Left(f v);; |
|
137 let f_alt f1 f2 = fun v -> match v with |
|
138 Right(v) -> Right(f2 v) |
|
139 | Left(v) -> Left(f1 v);; |
|
140 let f_seq f1 f2 = fun v -> match v with |
|
141 Sequ(v1, v2) -> Sequ(f1 v1, f2 v2);; |
|
142 let f_seq_Void1 f1 f2 = fun v -> Sequ(f1 Void, f2 v);; |
|
143 let f_seq_Void2 f1 f2 = fun v -> Sequ(f1 v, f2 Void);; |
|
144 let f_rec f = fun v -> match v with |
|
145 Rec(x, v) -> Rec(x, f v);; |
|
146 |
|
147 (* simplification of regular expressions returning also an |
|
148 rectification function; no simplification under STARs *) |
|
149 let rec simp r = match r with |
|
150 ALT(r1, r2) -> |
|
151 let (r1s, f1s) = simp r1 in |
|
152 let (r2s, f2s) = simp r2 in |
|
153 (match r1s, r2s with |
|
154 NULL, _ -> (r2s, f_right f2s) |
|
155 | _, NULL -> (r1s, f_left f1s) |
|
156 | _, _ -> if r1s = r2s then (r1s, f_left f1s) |
|
157 else (ALT (r1s, r2s), f_alt f1s f2s)) |
|
158 | SEQ(r1, r2) -> |
|
159 let (r1s, f1s) = simp r1 in |
|
160 let (r2s, f2s) = simp r2 in |
|
161 (match r1s, r2s with |
|
162 NULL, _ -> (NULL, f_right f2s) |
|
163 | _, NULL -> (NULL, f_left f1s) |
|
164 | EMPTY, _ -> (r2s, f_seq_Void1 f1s f2s) |
|
165 | _, EMPTY -> (r1s, f_seq_Void2 f1s f2s) |
|
166 | _, _ -> (SEQ(r1s, r2s), f_seq f1s f2s)) |
|
167 | RECD(x, r1) -> |
|
168 let (r1s, f1s) = simp r1 in |
|
169 (RECD(x, r1s), f_rec f1s) |
|
170 | r -> (r, f_id) |
|
171 ;; |
|
172 |
|
173 (* matcher function *) |
|
174 let matcher r s = nullable(ders (explode s) r);; |
|
175 |
|
176 (* lexing function (produces a value) *) |
|
177 exception LexError;; |
|
178 |
|
179 let rec lex r s = match s with |
|
180 [] -> if (nullable r) then mkeps r else raise LexError |
|
181 | c::cs -> inj r c (lex (der c r) cs);; |
|
182 |
|
183 let lexing r s = lex r (explode s);; |
|
184 |
|
185 (* lexing with simplification *) |
|
186 let rec lex_simp r s = match s with |
|
187 [] -> if (nullable r) then mkeps r else raise LexError |
|
188 | c::cs -> |
|
189 let (r_simp, f_simp) = simp (der c r) in |
|
190 inj r c (f_simp (lex_simp r_simp cs));; |
|
191 |
|
192 let lexing_simp r s = lex_simp r (explode s);; |
|
193 |
|
194 |
|
195 |
|
196 |
|
197 (* Lexing rules for a small WHILE language *) |
|
198 let sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz"));; |
|
199 let digit = alts (List.map chr (explode "0123456789"));; |
|
200 let idents = sym -- STAR(sym ++ digit);; |
|
201 let nums = plus(digit);; |
|
202 let keywords = alts (List.map str ["skip"; "while"; "do"; "if"; "then"; "else"; "read"; "write"; "true"; "false"]);; |
|
203 let semicolon = str ";" |
|
204 let ops = alts (List.map str [":="; "=="; "-"; "+"; "*"; "!="; "<"; ">"; "<="; ">="; "%"; "/"]);; |
|
205 let whitespace = plus(str " " ++ str "\n" ++ str "\t");; |
|
206 let rparen = str ")";; |
|
207 let lparen = str "(";; |
|
208 let begin_paren = str "{";; |
|
209 let end_paren = str "}";; |
|
210 |
|
211 |
|
212 let while_regs = STAR(("k" $ keywords) ++ |
|
213 ("i" $ idents) ++ |
|
214 ("o" $ ops) ++ |
|
215 ("n" $ nums) ++ |
|
216 ("s" $ semicolon) ++ |
|
217 ("p" $ (lparen ++ rparen)) ++ |
|
218 ("b" $ (begin_paren ++ end_paren)) ++ |
|
219 ("w" $ whitespace));; |
|
220 |
|
221 |
|
222 |
|
223 (* Some Tests |
|
224 ============ *) |
|
225 |
|
226 let time f x = |
|
227 let t = System.DateTime.Now in |
|
228 let f_x = f x in |
|
229 (printfn "%O" (System.DateTime.Now - t); f_x);; |
|
230 |
|
231 let prog0 = "read n";; |
|
232 string_of_env (env (lexing while_regs prog0));; |
|
233 |
|
234 let prog1 = "read n; write (n)";; |
|
235 string_of_env (env (lexing_simp while_regs prog1));; |
|
236 |
|
237 |
|
238 let prog2 = " |
|
239 i := 2; |
|
240 max := 100; |
|
241 while i < max do { |
|
242 isprime := 1; |
|
243 j := 2; |
|
244 while (j * j) <= i + 1 do { |
|
245 if i % j == 0 then isprime := 0 else skip; |
|
246 j := j + 1 |
|
247 }; |
|
248 if isprime == 1 then write i else skip; |
|
249 i := i + 1 |
|
250 }";; |
|
251 |
|
252 for i = 1 to 100 do |
|
253 printf "%i: " i ; |
|
254 time (lexing_simp while_regs) (string_repeat prog2 i); |
|
255 done;; |
|
256 |