diff -r f493a20feeb3 -r 04b5e904a220 thys3/Blexer.thy --- a/thys3/Blexer.thy Sat Apr 30 00:50:08 2022 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,454 +0,0 @@ - -theory Blexer - imports "Lexer" "PDerivs" -begin - -section \Bit-Encodings\ - -datatype bit = Z | S - -fun code :: "val \ bit list" -where - "code Void = []" -| "code (Char c) = []" -| "code (Left v) = Z # (code v)" -| "code (Right v) = S # (code v)" -| "code (Seq v1 v2) = (code v1) @ (code v2)" -| "code (Stars []) = [S]" -| "code (Stars (v # vs)) = (Z # code v) @ code (Stars vs)" - - -fun - Stars_add :: "val \ val \ val" -where - "Stars_add v (Stars vs) = Stars (v # vs)" - -function - decode' :: "bit list \ rexp \ (val * bit list)" -where - "decode' bs ZERO = (undefined, bs)" -| "decode' bs ONE = (Void, bs)" -| "decode' bs (CH d) = (Char d, bs)" -| "decode' [] (ALT r1 r2) = (Void, [])" -| "decode' (Z # bs) (ALT r1 r2) = (let (v, bs') = decode' bs r1 in (Left v, bs'))" -| "decode' (S # bs) (ALT r1 r2) = (let (v, bs') = decode' bs r2 in (Right v, bs'))" -| "decode' bs (SEQ r1 r2) = (let (v1, bs') = decode' bs r1 in - let (v2, bs'') = decode' bs' r2 in (Seq v1 v2, bs''))" -| "decode' [] (STAR r) = (Void, [])" -| "decode' (S # bs) (STAR r) = (Stars [], bs)" -| "decode' (Z # bs) (STAR r) = (let (v, bs') = decode' bs r in - let (vs, bs'') = decode' bs' (STAR r) - in (Stars_add v vs, bs''))" -by pat_completeness auto - -lemma decode'_smaller: - assumes "decode'_dom (bs, r)" - shows "length (snd (decode' bs r)) \ length bs" -using assms -apply(induct bs r) -apply(auto simp add: decode'.psimps split: prod.split) -using dual_order.trans apply blast -by (meson dual_order.trans le_SucI) - -termination "decode'" -apply(relation "inv_image (measure(%cs. size cs) <*lex*> measure(%s. size s)) (%(ds,r). (r,ds))") -apply(auto dest!: decode'_smaller) -by (metis less_Suc_eq_le snd_conv) - -definition - decode :: "bit list \ rexp \ val option" -where - "decode ds r \ (let (v, ds') = decode' ds r - in (if ds' = [] then Some v else None))" - -lemma decode'_code_Stars: - assumes "\v\set vs. \ v : r \ (\x. decode' (code v @ x) r = (v, x)) \ flat v \ []" - shows "decode' (code (Stars vs) @ ds) (STAR r) = (Stars vs, ds)" - using assms - apply(induct vs) - apply(auto) - done - -lemma decode'_code: - assumes "\ v : r" - shows "decode' ((code v) @ ds) r = (v, ds)" -using assms - apply(induct v r arbitrary: ds) - apply(auto) - using decode'_code_Stars by blast - -lemma decode_code: - assumes "\ v : r" - shows "decode (code v) r = Some v" - using assms unfolding decode_def - by (smt append_Nil2 decode'_code old.prod.case) - - -section {* Annotated Regular Expressions *} - -datatype arexp = - AZERO -| AONE "bit list" -| ACHAR "bit list" char -| ASEQ "bit list" arexp arexp -| AALTs "bit list" "arexp list" -| ASTAR "bit list" arexp - -abbreviation - "AALT bs r1 r2 \ AALTs bs [r1, r2]" - -fun asize :: "arexp \ nat" where - "asize AZERO = 1" -| "asize (AONE cs) = 1" -| "asize (ACHAR cs c) = 1" -| "asize (AALTs cs rs) = Suc (sum_list (map asize rs))" -| "asize (ASEQ cs r1 r2) = Suc (asize r1 + asize r2)" -| "asize (ASTAR cs r) = Suc (asize r)" - -fun - erase :: "arexp \ rexp" -where - "erase AZERO = ZERO" -| "erase (AONE _) = ONE" -| "erase (ACHAR _ c) = CH c" -| "erase (AALTs _ []) = ZERO" -| "erase (AALTs _ [r]) = (erase r)" -| "erase (AALTs bs (r#rs)) = ALT (erase r) (erase (AALTs bs rs))" -| "erase (ASEQ _ r1 r2) = SEQ (erase r1) (erase r2)" -| "erase (ASTAR _ r) = STAR (erase r)" - - -fun fuse :: "bit list \ arexp \ arexp" where - "fuse bs AZERO = AZERO" -| "fuse bs (AONE cs) = AONE (bs @ cs)" -| "fuse bs (ACHAR cs c) = ACHAR (bs @ cs) c" -| "fuse bs (AALTs cs rs) = AALTs (bs @ cs) rs" -| "fuse bs (ASEQ cs r1 r2) = ASEQ (bs @ cs) r1 r2" -| "fuse bs (ASTAR cs r) = ASTAR (bs @ cs) r" - -lemma fuse_append: - shows "fuse (bs1 @ bs2) r = fuse bs1 (fuse bs2 r)" - apply(induct r) - apply(auto) - done - - -fun intern :: "rexp \ arexp" where - "intern ZERO = AZERO" -| "intern ONE = AONE []" -| "intern (CH c) = ACHAR [] c" -| "intern (ALT r1 r2) = AALT [] (fuse [Z] (intern r1)) - (fuse [S] (intern r2))" -| "intern (SEQ r1 r2) = ASEQ [] (intern r1) (intern r2)" -| "intern (STAR r) = ASTAR [] (intern r)" - - -fun retrieve :: "arexp \ val \ bit list" where - "retrieve (AONE bs) Void = bs" -| "retrieve (ACHAR bs c) (Char d) = bs" -| "retrieve (AALTs bs [r]) v = bs @ retrieve r v" -| "retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v" -| "retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v" -| "retrieve (ASEQ bs r1 r2) (Seq v1 v2) = bs @ retrieve r1 v1 @ retrieve r2 v2" -| "retrieve (ASTAR bs r) (Stars []) = bs @ [S]" -| "retrieve (ASTAR bs r) (Stars (v#vs)) = - bs @ [Z] @ retrieve r v @ retrieve (ASTAR [] r) (Stars vs)" - - - -fun - bnullable :: "arexp \ bool" -where - "bnullable (AZERO) = False" -| "bnullable (AONE bs) = True" -| "bnullable (ACHAR bs c) = False" -| "bnullable (AALTs bs rs) = (\r \ set rs. bnullable r)" -| "bnullable (ASEQ bs r1 r2) = (bnullable r1 \ bnullable r2)" -| "bnullable (ASTAR bs r) = True" - -abbreviation - bnullables :: "arexp list \ bool" -where - "bnullables rs \ (\r \ set rs. bnullable r)" - -fun - bmkeps :: "arexp \ bit list" and - bmkepss :: "arexp list \ bit list" -where - "bmkeps(AONE bs) = bs" -| "bmkeps(ASEQ bs r1 r2) = bs @ (bmkeps r1) @ (bmkeps r2)" -| "bmkeps(AALTs bs rs) = bs @ (bmkepss rs)" -| "bmkeps(ASTAR bs r) = bs @ [S]" -| "bmkepss (r # rs) = (if bnullable(r) then (bmkeps r) else (bmkepss rs))" - -lemma bmkepss1: - assumes "\ bnullables rs1" - shows "bmkepss (rs1 @ rs2) = bmkepss rs2" - using assms - by (induct rs1) (auto) - -lemma bmkepss2: - assumes "bnullables rs1" - shows "bmkepss (rs1 @ rs2) = bmkepss rs1" - using assms - by (induct rs1) (auto) - - -fun - bder :: "char \ arexp \ arexp" -where - "bder c (AZERO) = AZERO" -| "bder c (AONE bs) = AZERO" -| "bder c (ACHAR bs d) = (if c = d then AONE bs else AZERO)" -| "bder c (AALTs bs rs) = AALTs bs (map (bder c) rs)" -| "bder c (ASEQ bs r1 r2) = - (if bnullable r1 - then AALT bs (ASEQ [] (bder c r1) r2) (fuse (bmkeps r1) (bder c r2)) - else ASEQ bs (bder c r1) r2)" -| "bder c (ASTAR bs r) = ASEQ (bs @ [Z]) (bder c r) (ASTAR [] r)" - - -fun - bders :: "arexp \ string \ arexp" -where - "bders r [] = r" -| "bders r (c#s) = bders (bder c r) s" - -lemma bders_append: - "bders c (s1 @ s2) = bders (bders c s1) s2" - apply(induct s1 arbitrary: c s2) - apply(simp_all) - done - -lemma bnullable_correctness: - shows "nullable (erase r) = bnullable r" - apply(induct r rule: erase.induct) - apply(simp_all) - done - -lemma erase_fuse: - shows "erase (fuse bs r) = erase r" - apply(induct r rule: erase.induct) - apply(simp_all) - done - -lemma erase_intern [simp]: - shows "erase (intern r) = r" - apply(induct r) - apply(simp_all add: erase_fuse) - done - -lemma erase_bder [simp]: - shows "erase (bder a r) = der a (erase r)" - apply(induct r rule: erase.induct) - apply(simp_all add: erase_fuse bnullable_correctness) - done - -lemma erase_bders [simp]: - shows "erase (bders r s) = ders s (erase r)" - apply(induct s arbitrary: r ) - apply(simp_all) - done - -lemma bnullable_fuse: - shows "bnullable (fuse bs r) = bnullable r" - apply(induct r arbitrary: bs) - apply(auto) - done - -lemma retrieve_encode_STARS: - assumes "\v\set vs. \ v : r \ code v = retrieve (intern r) v" - shows "code (Stars vs) = retrieve (ASTAR [] (intern r)) (Stars vs)" - using assms - apply(induct vs) - apply(simp_all) - done - -lemma retrieve_fuse2: - assumes "\ v : (erase r)" - shows "retrieve (fuse bs r) v = bs @ retrieve r v" - using assms - apply(induct r arbitrary: v bs) - apply(auto elim: Prf_elims)[4] - apply(case_tac x2a) - apply(simp) - using Prf_elims(1) apply blast - apply(case_tac x2a) - apply(simp) - apply(simp) - apply(case_tac list) - apply(simp) - apply(simp) - apply (smt (verit, best) Prf_elims(3) append_assoc retrieve.simps(4) retrieve.simps(5)) - apply(simp) - using retrieve_encode_STARS - apply(auto elim!: Prf_elims)[1] - apply(case_tac vs) - apply(simp) - apply(simp) - done - -lemma retrieve_fuse: - assumes "\ v : r" - shows "retrieve (fuse bs (intern r)) v = bs @ retrieve (intern r) v" - using assms - by (simp_all add: retrieve_fuse2) - - -lemma retrieve_code: - assumes "\ v : r" - shows "code v = retrieve (intern r) v" - using assms - apply(induct v r ) - apply(simp_all add: retrieve_fuse retrieve_encode_STARS) - done - - -lemma retrieve_AALTs_bnullable1: - assumes "bnullable r" - shows "retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs)))) - = bs @ retrieve r (mkeps (erase r))" - using assms - apply(case_tac rs) - apply(auto simp add: bnullable_correctness) - done - -lemma retrieve_AALTs_bnullable2: - assumes "\bnullable r" "bnullables rs" - shows "retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs)))) - = retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs)))" - using assms - apply(induct rs arbitrary: r bs) - apply(auto) - using bnullable_correctness apply blast - apply(case_tac rs) - apply(auto) - using bnullable_correctness apply blast - apply(case_tac rs) - apply(auto) - done - -lemma bmkeps_retrieve_AALTs: - assumes "\r \ set rs. bnullable r \ bmkeps r = retrieve r (mkeps (erase r))" - "bnullables rs" - shows "bs @ bmkepss rs = retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs)))" - using assms - apply(induct rs arbitrary: bs) - apply(auto) - using retrieve_AALTs_bnullable1 apply presburger - apply (metis retrieve_AALTs_bnullable2) - apply (simp add: retrieve_AALTs_bnullable1) - by (metis retrieve_AALTs_bnullable2) - - -lemma bmkeps_retrieve: - assumes "bnullable r" - shows "bmkeps r = retrieve r (mkeps (erase r))" - using assms - apply(induct r) - apply(auto) - using bmkeps_retrieve_AALTs by auto - -lemma bder_retrieve: - assumes "\ v : der c (erase r)" - shows "retrieve (bder c r) v = retrieve r (injval (erase r) c v)" - using assms - apply(induct r arbitrary: v rule: erase.induct) - using Prf_elims(1) apply auto[1] - using Prf_elims(1) apply auto[1] - apply(auto)[1] - apply (metis Prf_elims(4) injval.simps(1) retrieve.simps(1) retrieve.simps(2)) - using Prf_elims(1) apply blast - (* AALTs case *) - apply(simp) - apply(erule Prf_elims) - apply(simp) - apply(simp) - apply(rename_tac "r\<^sub>1" "r\<^sub>2" rs v) - apply(erule Prf_elims) - apply(simp) - apply(simp) - apply(case_tac rs) - apply(simp) - apply(simp) - using Prf_elims(3) apply fastforce - (* ASEQ case *) - apply(simp) - apply(case_tac "nullable (erase r1)") - apply(simp) - apply(erule Prf_elims) - using Prf_elims(2) bnullable_correctness apply force - apply (simp add: bmkeps_retrieve bnullable_correctness retrieve_fuse2) - apply (simp add: bmkeps_retrieve bnullable_correctness retrieve_fuse2) - using Prf_elims(2) apply force - (* ASTAR case *) - apply(rename_tac bs r v) - apply(simp) - apply(erule Prf_elims) - apply(clarify) - apply(erule Prf_elims) - apply(clarify) - by (simp add: retrieve_fuse2) - - -lemma MAIN_decode: - assumes "\ v : ders s r" - shows "Some (flex r id s v) = decode (retrieve (bders (intern r) s) v) r" - using assms -proof (induct s arbitrary: v rule: rev_induct) - case Nil - have "\ v : ders [] r" by fact - then have "\ v : r" by simp - then have "Some v = decode (retrieve (intern r) v) r" - using decode_code retrieve_code by auto - then show "Some (flex r id [] v) = decode (retrieve (bders (intern r) []) v) r" - by simp -next - case (snoc c s v) - have IH: "\v. \ v : ders s r \ - Some (flex r id s v) = decode (retrieve (bders (intern r) s) v) r" by fact - have asm: "\ v : ders (s @ [c]) r" by fact - then have asm2: "\ injval (ders s r) c v : ders s r" - by (simp add: Prf_injval ders_append) - have "Some (flex r id (s @ [c]) v) = Some (flex r id s (injval (ders s r) c v))" - by (simp add: flex_append) - also have "... = decode (retrieve (bders (intern r) s) (injval (ders s r) c v)) r" - using asm2 IH by simp - also have "... = decode (retrieve (bder c (bders (intern r) s)) v) r" - using asm by (simp_all add: bder_retrieve ders_append) - finally show "Some (flex r id (s @ [c]) v) = - decode (retrieve (bders (intern r) (s @ [c])) v) r" by (simp add: bders_append) -qed - -definition blexer where - "blexer r s \ if bnullable (bders (intern r) s) then - decode (bmkeps (bders (intern r) s)) r else None" - -lemma blexer_correctness: - shows "blexer r s = lexer r s" -proof - - { define bds where "bds \ bders (intern r) s" - define ds where "ds \ ders s r" - assume asm: "nullable ds" - have era: "erase bds = ds" - unfolding ds_def bds_def by simp - have mke: "\ mkeps ds : ds" - using asm by (simp add: mkeps_nullable) - have "decode (bmkeps bds) r = decode (retrieve bds (mkeps ds)) r" - using bmkeps_retrieve - using asm era - using bnullable_correctness by force - also have "... = Some (flex r id s (mkeps ds))" - using mke by (simp_all add: MAIN_decode ds_def bds_def) - finally have "decode (bmkeps bds) r = Some (flex r id s (mkeps ds))" - unfolding bds_def ds_def . - } - then show "blexer r s = lexer r s" - unfolding blexer_def lexer_flex - by (auto simp add: bnullable_correctness[symmetric]) -qed - - -unused_thms - -end