|
1 |
|
2 theory Blexer |
|
3 imports "Lexer" "PDerivs" |
|
4 begin |
|
5 |
|
6 section \<open>Bit-Encodings\<close> |
|
7 |
|
8 datatype bit = Z | S |
|
9 |
|
10 fun code :: "val \<Rightarrow> bit list" |
|
11 where |
|
12 "code Void = []" |
|
13 | "code (Char c) = []" |
|
14 | "code (Left v) = Z # (code v)" |
|
15 | "code (Right v) = S # (code v)" |
|
16 | "code (Seq v1 v2) = (code v1) @ (code v2)" |
|
17 | "code (Stars []) = [S]" |
|
18 | "code (Stars (v # vs)) = (Z # code v) @ code (Stars vs)" |
|
19 |
|
20 |
|
21 fun |
|
22 Stars_add :: "val \<Rightarrow> val \<Rightarrow> val" |
|
23 where |
|
24 "Stars_add v (Stars vs) = Stars (v # vs)" |
|
25 |
|
26 function |
|
27 decode' :: "bit list \<Rightarrow> rexp \<Rightarrow> (val * bit list)" |
|
28 where |
|
29 "decode' bs ZERO = (undefined, bs)" |
|
30 | "decode' bs ONE = (Void, bs)" |
|
31 | "decode' bs (CH d) = (Char d, bs)" |
|
32 | "decode' [] (ALT r1 r2) = (Void, [])" |
|
33 | "decode' (Z # bs) (ALT r1 r2) = (let (v, bs') = decode' bs r1 in (Left v, bs'))" |
|
34 | "decode' (S # bs) (ALT r1 r2) = (let (v, bs') = decode' bs r2 in (Right v, bs'))" |
|
35 | "decode' bs (SEQ r1 r2) = (let (v1, bs') = decode' bs r1 in |
|
36 let (v2, bs'') = decode' bs' r2 in (Seq v1 v2, bs''))" |
|
37 | "decode' [] (STAR r) = (Void, [])" |
|
38 | "decode' (S # bs) (STAR r) = (Stars [], bs)" |
|
39 | "decode' (Z # bs) (STAR r) = (let (v, bs') = decode' bs r in |
|
40 let (vs, bs'') = decode' bs' (STAR r) |
|
41 in (Stars_add v vs, bs''))" |
|
42 by pat_completeness auto |
|
43 |
|
44 lemma decode'_smaller: |
|
45 assumes "decode'_dom (bs, r)" |
|
46 shows "length (snd (decode' bs r)) \<le> length bs" |
|
47 using assms |
|
48 apply(induct bs r) |
|
49 apply(auto simp add: decode'.psimps split: prod.split) |
|
50 using dual_order.trans apply blast |
|
51 by (meson dual_order.trans le_SucI) |
|
52 |
|
53 termination "decode'" |
|
54 apply(relation "inv_image (measure(%cs. size cs) <*lex*> measure(%s. size s)) (%(ds,r). (r,ds))") |
|
55 apply(auto dest!: decode'_smaller) |
|
56 by (metis less_Suc_eq_le snd_conv) |
|
57 |
|
58 definition |
|
59 decode :: "bit list \<Rightarrow> rexp \<Rightarrow> val option" |
|
60 where |
|
61 "decode ds r \<equiv> (let (v, ds') = decode' ds r |
|
62 in (if ds' = [] then Some v else None))" |
|
63 |
|
64 lemma decode'_code_Stars: |
|
65 assumes "\<forall>v\<in>set vs. \<Turnstile> v : r \<and> (\<forall>x. decode' (code v @ x) r = (v, x)) \<and> flat v \<noteq> []" |
|
66 shows "decode' (code (Stars vs) @ ds) (STAR r) = (Stars vs, ds)" |
|
67 using assms |
|
68 apply(induct vs) |
|
69 apply(auto) |
|
70 done |
|
71 |
|
72 lemma decode'_code: |
|
73 assumes "\<Turnstile> v : r" |
|
74 shows "decode' ((code v) @ ds) r = (v, ds)" |
|
75 using assms |
|
76 apply(induct v r arbitrary: ds) |
|
77 apply(auto) |
|
78 using decode'_code_Stars by blast |
|
79 |
|
80 lemma decode_code: |
|
81 assumes "\<Turnstile> v : r" |
|
82 shows "decode (code v) r = Some v" |
|
83 using assms unfolding decode_def |
|
84 by (smt append_Nil2 decode'_code old.prod.case) |
|
85 |
|
86 |
|
87 section {* Annotated Regular Expressions *} |
|
88 |
|
89 datatype arexp = |
|
90 AZERO |
|
91 | AONE "bit list" |
|
92 | ACHAR "bit list" char |
|
93 | ASEQ "bit list" arexp arexp |
|
94 | AALTs "bit list" "arexp list" |
|
95 | ASTAR "bit list" arexp |
|
96 |
|
97 abbreviation |
|
98 "AALT bs r1 r2 \<equiv> AALTs bs [r1, r2]" |
|
99 |
|
100 fun asize :: "arexp \<Rightarrow> nat" where |
|
101 "asize AZERO = 1" |
|
102 | "asize (AONE cs) = 1" |
|
103 | "asize (ACHAR cs c) = 1" |
|
104 | "asize (AALTs cs rs) = Suc (sum_list (map asize rs))" |
|
105 | "asize (ASEQ cs r1 r2) = Suc (asize r1 + asize r2)" |
|
106 | "asize (ASTAR cs r) = Suc (asize r)" |
|
107 |
|
108 fun |
|
109 erase :: "arexp \<Rightarrow> rexp" |
|
110 where |
|
111 "erase AZERO = ZERO" |
|
112 | "erase (AONE _) = ONE" |
|
113 | "erase (ACHAR _ c) = CH c" |
|
114 | "erase (AALTs _ []) = ZERO" |
|
115 | "erase (AALTs _ [r]) = (erase r)" |
|
116 | "erase (AALTs bs (r#rs)) = ALT (erase r) (erase (AALTs bs rs))" |
|
117 | "erase (ASEQ _ r1 r2) = SEQ (erase r1) (erase r2)" |
|
118 | "erase (ASTAR _ r) = STAR (erase r)" |
|
119 |
|
120 |
|
121 fun fuse :: "bit list \<Rightarrow> arexp \<Rightarrow> arexp" where |
|
122 "fuse bs AZERO = AZERO" |
|
123 | "fuse bs (AONE cs) = AONE (bs @ cs)" |
|
124 | "fuse bs (ACHAR cs c) = ACHAR (bs @ cs) c" |
|
125 | "fuse bs (AALTs cs rs) = AALTs (bs @ cs) rs" |
|
126 | "fuse bs (ASEQ cs r1 r2) = ASEQ (bs @ cs) r1 r2" |
|
127 | "fuse bs (ASTAR cs r) = ASTAR (bs @ cs) r" |
|
128 |
|
129 lemma fuse_append: |
|
130 shows "fuse (bs1 @ bs2) r = fuse bs1 (fuse bs2 r)" |
|
131 apply(induct r) |
|
132 apply(auto) |
|
133 done |
|
134 |
|
135 |
|
136 fun intern :: "rexp \<Rightarrow> arexp" where |
|
137 "intern ZERO = AZERO" |
|
138 | "intern ONE = AONE []" |
|
139 | "intern (CH c) = ACHAR [] c" |
|
140 | "intern (ALT r1 r2) = AALT [] (fuse [Z] (intern r1)) |
|
141 (fuse [S] (intern r2))" |
|
142 | "intern (SEQ r1 r2) = ASEQ [] (intern r1) (intern r2)" |
|
143 | "intern (STAR r) = ASTAR [] (intern r)" |
|
144 |
|
145 |
|
146 fun retrieve :: "arexp \<Rightarrow> val \<Rightarrow> bit list" where |
|
147 "retrieve (AONE bs) Void = bs" |
|
148 | "retrieve (ACHAR bs c) (Char d) = bs" |
|
149 | "retrieve (AALTs bs [r]) v = bs @ retrieve r v" |
|
150 | "retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v" |
|
151 | "retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v" |
|
152 | "retrieve (ASEQ bs r1 r2) (Seq v1 v2) = bs @ retrieve r1 v1 @ retrieve r2 v2" |
|
153 | "retrieve (ASTAR bs r) (Stars []) = bs @ [S]" |
|
154 | "retrieve (ASTAR bs r) (Stars (v#vs)) = |
|
155 bs @ [Z] @ retrieve r v @ retrieve (ASTAR [] r) (Stars vs)" |
|
156 |
|
157 |
|
158 |
|
159 fun |
|
160 bnullable :: "arexp \<Rightarrow> bool" |
|
161 where |
|
162 "bnullable (AZERO) = False" |
|
163 | "bnullable (AONE bs) = True" |
|
164 | "bnullable (ACHAR bs c) = False" |
|
165 | "bnullable (AALTs bs rs) = (\<exists>r \<in> set rs. bnullable r)" |
|
166 | "bnullable (ASEQ bs r1 r2) = (bnullable r1 \<and> bnullable r2)" |
|
167 | "bnullable (ASTAR bs r) = True" |
|
168 |
|
169 abbreviation |
|
170 bnullables :: "arexp list \<Rightarrow> bool" |
|
171 where |
|
172 "bnullables rs \<equiv> (\<exists>r \<in> set rs. bnullable r)" |
|
173 |
|
174 fun |
|
175 bmkeps :: "arexp \<Rightarrow> bit list" and |
|
176 bmkepss :: "arexp list \<Rightarrow> bit list" |
|
177 where |
|
178 "bmkeps(AONE bs) = bs" |
|
179 | "bmkeps(ASEQ bs r1 r2) = bs @ (bmkeps r1) @ (bmkeps r2)" |
|
180 | "bmkeps(AALTs bs rs) = bs @ (bmkepss rs)" |
|
181 | "bmkeps(ASTAR bs r) = bs @ [S]" |
|
182 | "bmkepss (r # rs) = (if bnullable(r) then (bmkeps r) else (bmkepss rs))" |
|
183 |
|
184 lemma bmkepss1: |
|
185 assumes "\<not> bnullables rs1" |
|
186 shows "bmkepss (rs1 @ rs2) = bmkepss rs2" |
|
187 using assms |
|
188 by (induct rs1) (auto) |
|
189 |
|
190 lemma bmkepss2: |
|
191 assumes "bnullables rs1" |
|
192 shows "bmkepss (rs1 @ rs2) = bmkepss rs1" |
|
193 using assms |
|
194 by (induct rs1) (auto) |
|
195 |
|
196 |
|
197 fun |
|
198 bder :: "char \<Rightarrow> arexp \<Rightarrow> arexp" |
|
199 where |
|
200 "bder c (AZERO) = AZERO" |
|
201 | "bder c (AONE bs) = AZERO" |
|
202 | "bder c (ACHAR bs d) = (if c = d then AONE bs else AZERO)" |
|
203 | "bder c (AALTs bs rs) = AALTs bs (map (bder c) rs)" |
|
204 | "bder c (ASEQ bs r1 r2) = |
|
205 (if bnullable r1 |
|
206 then AALT bs (ASEQ [] (bder c r1) r2) (fuse (bmkeps r1) (bder c r2)) |
|
207 else ASEQ bs (bder c r1) r2)" |
|
208 | "bder c (ASTAR bs r) = ASEQ (bs @ [Z]) (bder c r) (ASTAR [] r)" |
|
209 |
|
210 |
|
211 fun |
|
212 bders :: "arexp \<Rightarrow> string \<Rightarrow> arexp" |
|
213 where |
|
214 "bders r [] = r" |
|
215 | "bders r (c#s) = bders (bder c r) s" |
|
216 |
|
217 lemma bders_append: |
|
218 "bders c (s1 @ s2) = bders (bders c s1) s2" |
|
219 apply(induct s1 arbitrary: c s2) |
|
220 apply(simp_all) |
|
221 done |
|
222 |
|
223 lemma bnullable_correctness: |
|
224 shows "nullable (erase r) = bnullable r" |
|
225 apply(induct r rule: erase.induct) |
|
226 apply(simp_all) |
|
227 done |
|
228 |
|
229 lemma erase_fuse: |
|
230 shows "erase (fuse bs r) = erase r" |
|
231 apply(induct r rule: erase.induct) |
|
232 apply(simp_all) |
|
233 done |
|
234 |
|
235 lemma erase_intern [simp]: |
|
236 shows "erase (intern r) = r" |
|
237 apply(induct r) |
|
238 apply(simp_all add: erase_fuse) |
|
239 done |
|
240 |
|
241 lemma erase_bder [simp]: |
|
242 shows "erase (bder a r) = der a (erase r)" |
|
243 apply(induct r rule: erase.induct) |
|
244 apply(simp_all add: erase_fuse bnullable_correctness) |
|
245 done |
|
246 |
|
247 lemma erase_bders [simp]: |
|
248 shows "erase (bders r s) = ders s (erase r)" |
|
249 apply(induct s arbitrary: r ) |
|
250 apply(simp_all) |
|
251 done |
|
252 |
|
253 lemma bnullable_fuse: |
|
254 shows "bnullable (fuse bs r) = bnullable r" |
|
255 apply(induct r arbitrary: bs) |
|
256 apply(auto) |
|
257 done |
|
258 |
|
259 lemma retrieve_encode_STARS: |
|
260 assumes "\<forall>v\<in>set vs. \<Turnstile> v : r \<and> code v = retrieve (intern r) v" |
|
261 shows "code (Stars vs) = retrieve (ASTAR [] (intern r)) (Stars vs)" |
|
262 using assms |
|
263 apply(induct vs) |
|
264 apply(simp_all) |
|
265 done |
|
266 |
|
267 lemma retrieve_fuse2: |
|
268 assumes "\<Turnstile> v : (erase r)" |
|
269 shows "retrieve (fuse bs r) v = bs @ retrieve r v" |
|
270 using assms |
|
271 apply(induct r arbitrary: v bs) |
|
272 apply(auto elim: Prf_elims)[4] |
|
273 apply(case_tac x2a) |
|
274 apply(simp) |
|
275 using Prf_elims(1) apply blast |
|
276 apply(case_tac x2a) |
|
277 apply(simp) |
|
278 apply(simp) |
|
279 apply(case_tac list) |
|
280 apply(simp) |
|
281 apply(simp) |
|
282 apply (smt (verit, best) Prf_elims(3) append_assoc retrieve.simps(4) retrieve.simps(5)) |
|
283 apply(simp) |
|
284 using retrieve_encode_STARS |
|
285 apply(auto elim!: Prf_elims)[1] |
|
286 apply(case_tac vs) |
|
287 apply(simp) |
|
288 apply(simp) |
|
289 done |
|
290 |
|
291 lemma retrieve_fuse: |
|
292 assumes "\<Turnstile> v : r" |
|
293 shows "retrieve (fuse bs (intern r)) v = bs @ retrieve (intern r) v" |
|
294 using assms |
|
295 by (simp_all add: retrieve_fuse2) |
|
296 |
|
297 |
|
298 lemma retrieve_code: |
|
299 assumes "\<Turnstile> v : r" |
|
300 shows "code v = retrieve (intern r) v" |
|
301 using assms |
|
302 apply(induct v r ) |
|
303 apply(simp_all add: retrieve_fuse retrieve_encode_STARS) |
|
304 done |
|
305 |
|
306 |
|
307 lemma retrieve_AALTs_bnullable1: |
|
308 assumes "bnullable r" |
|
309 shows "retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs)))) |
|
310 = bs @ retrieve r (mkeps (erase r))" |
|
311 using assms |
|
312 apply(case_tac rs) |
|
313 apply(auto simp add: bnullable_correctness) |
|
314 done |
|
315 |
|
316 lemma retrieve_AALTs_bnullable2: |
|
317 assumes "\<not>bnullable r" "bnullables rs" |
|
318 shows "retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs)))) |
|
319 = retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs)))" |
|
320 using assms |
|
321 apply(induct rs arbitrary: r bs) |
|
322 apply(auto) |
|
323 using bnullable_correctness apply blast |
|
324 apply(case_tac rs) |
|
325 apply(auto) |
|
326 using bnullable_correctness apply blast |
|
327 apply(case_tac rs) |
|
328 apply(auto) |
|
329 done |
|
330 |
|
331 lemma bmkeps_retrieve_AALTs: |
|
332 assumes "\<forall>r \<in> set rs. bnullable r \<longrightarrow> bmkeps r = retrieve r (mkeps (erase r))" |
|
333 "bnullables rs" |
|
334 shows "bs @ bmkepss rs = retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs)))" |
|
335 using assms |
|
336 apply(induct rs arbitrary: bs) |
|
337 apply(auto) |
|
338 using retrieve_AALTs_bnullable1 apply presburger |
|
339 apply (metis retrieve_AALTs_bnullable2) |
|
340 apply (simp add: retrieve_AALTs_bnullable1) |
|
341 by (metis retrieve_AALTs_bnullable2) |
|
342 |
|
343 |
|
344 lemma bmkeps_retrieve: |
|
345 assumes "bnullable r" |
|
346 shows "bmkeps r = retrieve r (mkeps (erase r))" |
|
347 using assms |
|
348 apply(induct r) |
|
349 apply(auto) |
|
350 using bmkeps_retrieve_AALTs by auto |
|
351 |
|
352 lemma bder_retrieve: |
|
353 assumes "\<Turnstile> v : der c (erase r)" |
|
354 shows "retrieve (bder c r) v = retrieve r (injval (erase r) c v)" |
|
355 using assms |
|
356 apply(induct r arbitrary: v rule: erase.induct) |
|
357 using Prf_elims(1) apply auto[1] |
|
358 using Prf_elims(1) apply auto[1] |
|
359 apply(auto)[1] |
|
360 apply (metis Prf_elims(4) injval.simps(1) retrieve.simps(1) retrieve.simps(2)) |
|
361 using Prf_elims(1) apply blast |
|
362 (* AALTs case *) |
|
363 apply(simp) |
|
364 apply(erule Prf_elims) |
|
365 apply(simp) |
|
366 apply(simp) |
|
367 apply(rename_tac "r\<^sub>1" "r\<^sub>2" rs v) |
|
368 apply(erule Prf_elims) |
|
369 apply(simp) |
|
370 apply(simp) |
|
371 apply(case_tac rs) |
|
372 apply(simp) |
|
373 apply(simp) |
|
374 using Prf_elims(3) apply fastforce |
|
375 (* ASEQ case *) |
|
376 apply(simp) |
|
377 apply(case_tac "nullable (erase r1)") |
|
378 apply(simp) |
|
379 apply(erule Prf_elims) |
|
380 using Prf_elims(2) bnullable_correctness apply force |
|
381 apply (simp add: bmkeps_retrieve bnullable_correctness retrieve_fuse2) |
|
382 apply (simp add: bmkeps_retrieve bnullable_correctness retrieve_fuse2) |
|
383 using Prf_elims(2) apply force |
|
384 (* ASTAR case *) |
|
385 apply(rename_tac bs r v) |
|
386 apply(simp) |
|
387 apply(erule Prf_elims) |
|
388 apply(clarify) |
|
389 apply(erule Prf_elims) |
|
390 apply(clarify) |
|
391 by (simp add: retrieve_fuse2) |
|
392 |
|
393 |
|
394 lemma MAIN_decode: |
|
395 assumes "\<Turnstile> v : ders s r" |
|
396 shows "Some (flex r id s v) = decode (retrieve (bders (intern r) s) v) r" |
|
397 using assms |
|
398 proof (induct s arbitrary: v rule: rev_induct) |
|
399 case Nil |
|
400 have "\<Turnstile> v : ders [] r" by fact |
|
401 then have "\<Turnstile> v : r" by simp |
|
402 then have "Some v = decode (retrieve (intern r) v) r" |
|
403 using decode_code retrieve_code by auto |
|
404 then show "Some (flex r id [] v) = decode (retrieve (bders (intern r) []) v) r" |
|
405 by simp |
|
406 next |
|
407 case (snoc c s v) |
|
408 have IH: "\<And>v. \<Turnstile> v : ders s r \<Longrightarrow> |
|
409 Some (flex r id s v) = decode (retrieve (bders (intern r) s) v) r" by fact |
|
410 have asm: "\<Turnstile> v : ders (s @ [c]) r" by fact |
|
411 then have asm2: "\<Turnstile> injval (ders s r) c v : ders s r" |
|
412 by (simp add: Prf_injval ders_append) |
|
413 have "Some (flex r id (s @ [c]) v) = Some (flex r id s (injval (ders s r) c v))" |
|
414 by (simp add: flex_append) |
|
415 also have "... = decode (retrieve (bders (intern r) s) (injval (ders s r) c v)) r" |
|
416 using asm2 IH by simp |
|
417 also have "... = decode (retrieve (bder c (bders (intern r) s)) v) r" |
|
418 using asm by (simp_all add: bder_retrieve ders_append) |
|
419 finally show "Some (flex r id (s @ [c]) v) = |
|
420 decode (retrieve (bders (intern r) (s @ [c])) v) r" by (simp add: bders_append) |
|
421 qed |
|
422 |
|
423 definition blexer where |
|
424 "blexer r s \<equiv> if bnullable (bders (intern r) s) then |
|
425 decode (bmkeps (bders (intern r) s)) r else None" |
|
426 |
|
427 lemma blexer_correctness: |
|
428 shows "blexer r s = lexer r s" |
|
429 proof - |
|
430 { define bds where "bds \<equiv> bders (intern r) s" |
|
431 define ds where "ds \<equiv> ders s r" |
|
432 assume asm: "nullable ds" |
|
433 have era: "erase bds = ds" |
|
434 unfolding ds_def bds_def by simp |
|
435 have mke: "\<Turnstile> mkeps ds : ds" |
|
436 using asm by (simp add: mkeps_nullable) |
|
437 have "decode (bmkeps bds) r = decode (retrieve bds (mkeps ds)) r" |
|
438 using bmkeps_retrieve |
|
439 using asm era |
|
440 using bnullable_correctness by force |
|
441 also have "... = Some (flex r id s (mkeps ds))" |
|
442 using mke by (simp_all add: MAIN_decode ds_def bds_def) |
|
443 finally have "decode (bmkeps bds) r = Some (flex r id s (mkeps ds))" |
|
444 unfolding bds_def ds_def . |
|
445 } |
|
446 then show "blexer r s = lexer r s" |
|
447 unfolding blexer_def lexer_flex |
|
448 by (auto simp add: bnullable_correctness[symmetric]) |
|
449 qed |
|
450 |
|
451 |
|
452 unused_thms |
|
453 |
|
454 end |