thys3/src/Blexer2.thy
changeset 504 79fdaad6f36b
child 509 a42524ca6fc4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/thys3/src/Blexer2.thy	Sun May 08 09:58:50 2022 +0100
@@ -0,0 +1,564 @@
+
+theory Blexer2
+  imports "Lexer" "PDerivs"
+begin
+
+section \<open>Bit-Encodings\<close>
+
+datatype bit = Z | S
+
+fun code :: "val \<Rightarrow> bit list"
+where
+  "code Void = []"
+| "code (Char c) = []"
+| "code (Left v) = Z # (code v)"
+| "code (Right v) = S # (code v)"
+| "code (Seq v1 v2) = (code v1) @ (code v2)"
+| "code (Stars []) = [S]"
+| "code (Stars (v # vs)) =  (Z # code v) @ code (Stars vs)"
+
+
+fun 
+  Stars_add :: "val \<Rightarrow> val \<Rightarrow> val"
+where
+  "Stars_add v (Stars vs) = Stars (v # vs)"
+
+function
+  decode' :: "bit list \<Rightarrow> rexp \<Rightarrow> (val * bit list)"
+where
+  "decode' bs ZERO = (undefined, bs)"
+| "decode' bs ONE = (Void, bs)"
+| "decode' bs (CH d) = (Char d, bs)"
+| "decode' [] (ALT r1 r2) = (Void, [])"
+| "decode' (Z # bs) (ALT r1 r2) = (let (v, bs') = decode' bs r1 in (Left v, bs'))"
+| "decode' (S # bs) (ALT r1 r2) = (let (v, bs') = decode' bs r2 in (Right v, bs'))"
+| "decode' bs (SEQ r1 r2) = (let (v1, bs') = decode' bs r1 in
+                             let (v2, bs'') = decode' bs' r2 in (Seq v1 v2, bs''))"
+| "decode' [] (STAR r) = (Void, [])"
+| "decode' (S # bs) (STAR r) = (Stars [], bs)"
+| "decode' (Z # bs) (STAR r) = (let (v, bs') = decode' bs r in
+                                    let (vs, bs'') = decode' bs' (STAR r) 
+                                    in (Stars_add v vs, bs''))"
+by pat_completeness auto
+
+lemma decode'_smaller:
+  assumes "decode'_dom (bs, r)"
+  shows "length (snd (decode' bs r)) \<le> length bs"
+using assms
+apply(induct bs r)
+apply(auto simp add: decode'.psimps split: prod.split)
+using dual_order.trans apply blast
+by (meson dual_order.trans le_SucI)
+
+termination "decode'"  
+apply(relation "inv_image (measure(%cs. size cs) <*lex*> measure(%s. size s)) (%(ds,r). (r,ds))") 
+apply(auto dest!: decode'_smaller)
+by (metis less_Suc_eq_le snd_conv)
+
+definition
+  decode :: "bit list \<Rightarrow> rexp \<Rightarrow> val option"
+where
+  "decode ds r \<equiv> (let (v, ds') = decode' ds r 
+                  in (if ds' = [] then Some v else None))"
+
+lemma decode'_code_Stars:
+  assumes "\<forall>v\<in>set vs. \<Turnstile> v : r \<and> (\<forall>x. decode' (code v @ x) r = (v, x)) \<and> flat v \<noteq> []" 
+  shows "decode' (code (Stars vs) @ ds) (STAR r) = (Stars vs, ds)"
+  using assms
+  apply(induct vs)
+  apply(auto)
+  done
+
+lemma decode'_code:
+  assumes "\<Turnstile> v : r"
+  shows "decode' ((code v) @ ds) r = (v, ds)"
+using assms
+  apply(induct v r arbitrary: ds) 
+  apply(auto)
+  using decode'_code_Stars by blast
+
+lemma decode_code:
+  assumes "\<Turnstile> v : r"
+  shows "decode (code v) r = Some v"
+  using assms unfolding decode_def
+  by (smt append_Nil2 decode'_code old.prod.case)
+
+
+section {* Annotated Regular Expressions *}
+
+datatype arexp = 
+  AZERO
+| AONE "bit list"
+| ACHAR "bit list" char
+| ASEQs "bit list" "arexp list"
+| AALTs "bit list" "arexp list"
+| ASTAR "bit list" arexp
+
+abbreviation
+  "AALT bs r1 r2 \<equiv> AALTs bs [r1, r2]"
+
+abbreviation
+  "ASEQ bs r1 r2 \<equiv> ASEQs bs [r1, r2]"
+
+fun asize :: "arexp \<Rightarrow> nat" where
+  "asize AZERO = 1"
+| "asize (AONE cs) = 1" 
+| "asize (ACHAR cs c) = 1"
+| "asize (AALTs cs rs) = Suc (sum_list (map asize rs))"
+| "asize (ASEQs cs rs) = Suc (sum_list (map asize rs))"
+| "asize (ASTAR cs r) = Suc (asize r)"
+
+fun 
+  erase :: "arexp \<Rightarrow> rexp"
+where
+  "erase AZERO = ZERO"
+| "erase (AONE _) = ONE"
+| "erase (ACHAR _ c) = CH c"
+| "erase (AALTs _ []) = ZERO"
+| "erase (AALTs _ [r]) = (erase r)"
+| "erase (AALTs bs (r#rs)) = ALT (erase r) (erase (AALTs bs rs))"
+| "erase (ASEQs _ []) = ONE"
+| "erase (ASEQs _ [r]) = (erase r)"
+| "erase (ASEQs bs (r#rs)) = SEQ (erase r) (erase (ASEQs bs rs))"
+| "erase (ASTAR _ r) = STAR (erase r)"
+
+
+fun fuse :: "bit list \<Rightarrow> arexp \<Rightarrow> arexp" where
+  "fuse bs AZERO = AZERO"
+| "fuse bs (AONE cs) = AONE (bs @ cs)" 
+| "fuse bs (ACHAR cs c) = ACHAR (bs @ cs) c"
+| "fuse bs (AALTs cs rs) = AALTs (bs @ cs) rs"
+| "fuse bs (ASEQs cs rs) = ASEQs (bs @ cs) rs"
+| "fuse bs (ASTAR cs r) = ASTAR (bs @ cs) r"
+
+lemma fuse_append:
+  shows "fuse (bs1 @ bs2) r = fuse bs1 (fuse bs2 r)"
+  apply(induct r)
+  apply(auto)
+  done
+
+
+fun intern :: "rexp \<Rightarrow> arexp" where
+  "intern ZERO = AZERO"
+| "intern ONE = AONE []"
+| "intern (CH c) = ACHAR [] c"
+| "intern (ALT r1 r2) = AALT [] (fuse [Z] (intern r1)) 
+                                (fuse [S]  (intern r2))"
+| "intern (SEQ r1 r2) = ASEQ [] (intern r1) (intern r2)"
+| "intern (STAR r) = ASTAR [] (intern r)"
+
+
+fun retrieve :: "arexp \<Rightarrow> val \<Rightarrow> bit list" where
+  "retrieve (AONE bs) Void = bs"
+| "retrieve (ACHAR bs c) (Char d) = bs"
+| "retrieve (AALTs bs [r]) v = bs @ retrieve r v"
+| "retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v"
+| "retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v"
+| "retrieve (ASEQs bs []) v = bs"
+| "retrieve (ASEQs bs [r]) v = bs @ retrieve r v"
+| "retrieve (ASEQs bs (r#rs)) (Seq v1 v2) = bs @ retrieve r v1 @ retrieve (ASEQs [] rs) v2"
+| "retrieve (ASTAR bs r) (Stars []) = bs @ [S]"
+| "retrieve (ASTAR bs r) (Stars (v#vs)) = 
+     bs @ [Z] @ retrieve r v @ retrieve (ASTAR [] r) (Stars vs)"
+
+
+
+fun
+ bnullable :: "arexp \<Rightarrow> bool"
+where
+  "bnullable (AZERO) = False"
+| "bnullable (AONE bs) = True"
+| "bnullable (ACHAR bs c) = False"
+| "bnullable (AALTs bs rs) = (\<exists>r \<in> set rs. bnullable r)"
+| "bnullable (ASEQs bs rs) = (\<forall>r \<in> set rs. bnullable r)"
+| "bnullable (ASTAR bs r) = True"
+
+abbreviation
+  bnullables :: "arexp list \<Rightarrow> bool"
+where
+  "bnullables rs \<equiv> (\<exists>r \<in> set rs. bnullable r)"
+
+fun 
+  bmkeps :: "arexp \<Rightarrow> bit list" and
+  bmkepss :: "arexp list \<Rightarrow> bit list"
+where
+  "bmkeps(AONE bs) = bs"
+| "bmkeps(ASEQs bs rs) = bs @ concat (map bmkeps rs)"
+| "bmkeps(AALTs bs rs) = bs @ (bmkepss rs)"
+| "bmkeps(ASTAR bs r) = bs @ [S]"
+| "bmkepss (r # rs) = (if bnullable(r) then (bmkeps r) else (bmkepss rs))"
+
+lemma bmkepss1:
+  assumes "\<not> bnullables rs1"
+  shows "bmkepss (rs1 @ rs2) = bmkepss rs2"
+  using assms
+  by (induct rs1) (auto)
+
+lemma bmkepss2:
+  assumes "bnullables rs1"
+  shows "bmkepss (rs1 @ rs2) = bmkepss rs1"
+  using assms
+  by (induct rs1) (auto)
+
+
+fun
+ bder :: "char \<Rightarrow> arexp \<Rightarrow> arexp"
+where
+  "bder c (AZERO) = AZERO"
+| "bder c (AONE bs) = AZERO"
+| "bder c (ACHAR bs d) = (if c = d then AONE bs else AZERO)"
+| "bder c (AALTs bs rs) = AALTs bs (map (bder c) rs)"
+| "bder c (ASEQs bs []) = AZERO" 
+| "bder c (ASEQs bs [r1]) = fuse bs (bder c r1)" 
+| "bder c (ASEQs bs (r1#rs)) =
+     (if bnullable r1
+      then AALT bs (ASEQs [] ((bder c r1) # rs)) (fuse (bmkeps r1) (bder c (ASEQs [] rs)))
+      else ASEQs bs ((bder c r1) # rs))"
+| "bder c (ASTAR bs r) = ASEQ (bs @ [Z]) (bder c r) (ASTAR [] r)"
+
+
+fun 
+  bders :: "arexp \<Rightarrow> string \<Rightarrow> arexp"
+where
+  "bders r [] = r"
+| "bders r (c#s) = bders (bder c r) s"
+
+lemma bders_append:
+  "bders c (s1 @ s2) = bders (bders c s1) s2"
+  apply(induct s1 arbitrary: c s2)
+  apply(simp_all)
+  done
+
+lemma bnullable_correctness:
+  shows "nullable (erase r) = bnullable r"
+  apply(induct r rule: erase.induct)
+  apply(simp_all)
+  done
+
+lemma erase_fuse:
+  shows "erase (fuse bs r) = erase r"
+  apply(induct r rule: erase.induct)
+  apply(simp_all)
+  done
+
+lemma erase_intern [simp]:
+  shows "erase (intern r) = r"
+  apply(induct r)
+  apply(simp_all add: erase_fuse)
+  done
+
+lemma erase_ASEQs:
+  shows "erase (ASEQs [] rs) = erase (ASEQs bs rs)"
+  apply(induct rs arbitrary: bs)
+  apply(auto)
+  apply(case_tac rs)
+   apply(auto)
+  done
+
+lemma erase_bder [simp]:
+  shows "erase (bder a r) = der a (erase r)"
+  apply(induct r rule: erase.induct)
+           apply(simp_all add: erase_fuse bnullable_correctness)      
+  apply(case_tac va)
+   apply(simp_all add: erase_fuse bnullable_correctness)      
+  apply(auto)
+    apply(simp_all add: erase_fuse bnullable_correctness erase_ASEQs)   
+  done
+
+lemma erase_bders [simp]:
+  shows "erase (bders r s) = ders s (erase r)"
+  apply(induct s arbitrary: r )
+  apply(simp_all)
+  done
+
+lemma bnullable_fuse:
+  shows "bnullable (fuse bs r) = bnullable r"
+  apply(induct r arbitrary: bs)
+  apply(auto)
+  done
+
+lemma retrieve_encode_STARS:
+  assumes "\<forall>v\<in>set vs. \<Turnstile> v : r \<and> code v = retrieve (intern r) v"
+  shows "code (Stars vs) = retrieve (ASTAR [] (intern r)) (Stars vs)"
+  using assms
+  apply(induct vs)
+  apply(simp_all)
+  done
+
+
+
+lemma retrieve_fuse2:
+  assumes "\<Turnstile> v : (erase r)"
+  shows "retrieve (fuse bs r) v = bs @ retrieve r v"
+  using assms
+  apply(induct r arbitrary: v bs)
+  apply(auto elim: Prf_elims)[4]
+    defer
+    apply(case_tac x2a)
+     apply(simp)
+  using Prf_elims(1) apply blast
+    apply(simp)
+  apply(case_tac list)
+     apply(simp)
+    apply(simp)
+  apply (smt (verit, best) Prf_elims(3) append_assoc retrieve.simps(4) retrieve.simps(5))
+  using retrieve_encode_STARS
+  apply(auto elim!: Prf_elims)[1]
+  apply(case_tac vs)
+  apply(simp)
+   apply(simp)
+  apply(case_tac x2a)
+     apply(simp)
+    apply(simp)
+  apply(case_tac list)
+   apply(simp)
+  apply(simp)
+  by (smt (verit, best) Prf_elims(2) append_assoc retrieve.simps(8))
+
+lemma retrieve_fuse:
+  assumes "\<Turnstile> v : r"
+  shows "retrieve (fuse bs (intern r)) v = bs @ retrieve (intern r) v"
+  using assms 
+  by (simp_all add: retrieve_fuse2)
+
+
+lemma retrieve_code:
+  assumes "\<Turnstile> v : r"
+  shows "code v = retrieve (intern r) v"
+  using assms
+  apply(induct v r )
+  apply(simp_all add: retrieve_fuse retrieve_encode_STARS)
+  done
+
+
+lemma retrieve_AALTs_bnullable1:
+  assumes "bnullable r"
+  shows "retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs))))
+         = bs @ retrieve r (mkeps (erase r))"
+  using assms
+  apply(case_tac rs)
+  apply(auto simp add: bnullable_correctness)
+  done
+
+lemma retrieve_AALTs_bnullable2:
+  assumes "\<not>bnullable r" "bnullables rs"
+  shows "retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs))))
+         = retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs)))"
+  using assms
+  apply(induct rs arbitrary: r bs)
+  apply(auto)
+  using bnullable_correctness apply blast
+  apply(case_tac rs)
+  apply(auto)
+  using bnullable_correctness apply blast
+  apply(case_tac rs)
+  apply(auto)
+  done
+
+lemma bmkeps_retrieve_AALTs: 
+  assumes "\<forall>r \<in> set rs. bnullable r \<longrightarrow> bmkeps r = retrieve r (mkeps (erase r))" 
+          "bnullables rs"
+  shows "bs @ bmkepss rs = retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs)))"
+ using assms
+  apply(induct rs arbitrary: bs)
+  apply(auto)
+  using retrieve_AALTs_bnullable1 apply presburger
+  apply (metis retrieve_AALTs_bnullable2)
+  apply (simp add: retrieve_AALTs_bnullable1)
+  by (metis retrieve_AALTs_bnullable2)
+
+lemma bmkeps_retrieve_ASEQs: 
+  assumes "\<forall>r \<in> set rs. bmkeps r = retrieve r (mkeps (erase r))" 
+          "\<forall>r \<in> set rs. bnullable r"
+  shows "x1 @ concat (map bmkeps rs) = retrieve (ASEQs x1 rs) (mkeps (erase (ASEQs x1 rs)))"
+  using assms
+  apply(induct rs arbitrary: x1)
+  apply(auto)
+    apply(case_tac rs)
+   apply(auto)
+  by (metis erase_ASEQs self_append_conv2)
+  
+
+lemma bmkeps_retrieve:
+  assumes "bnullable r"
+  shows "bmkeps r = retrieve r (mkeps (erase r))"
+  using assms
+  apply(induct r)
+         apply(auto)    
+   defer
+  using bmkeps_retrieve_AALTs apply auto
+  by (simp add: bmkeps_retrieve_ASEQs)
+  
+lemma bder_retrieve:
+  assumes "\<Turnstile> v : der c (erase r)"
+  shows "retrieve (bder c r) v = retrieve r (injval (erase r) c v)"
+  using assms  
+  apply(induct r arbitrary: v rule: erase.induct)
+  using Prf_elims(1) apply auto[1]
+  using Prf_elims(1) apply auto[1]
+  apply(auto)[1]
+  apply (metis Prf_elims(4) injval.simps(1) retrieve.simps(1) retrieve.simps(2))
+  using Prf_elims(1) apply blast
+  (* AALTs case *)
+  apply(simp)
+  apply(erule Prf_elims)
+  apply(simp)
+  apply(simp)
+  apply(rename_tac "r\<^sub>1" "r\<^sub>2" rs v)
+  apply(erule Prf_elims)
+  apply(simp)
+  apply(simp)
+  apply(case_tac rs)
+  apply(simp)
+  apply(simp)
+  using Prf_elims(3) apply fastforce
+  (* ASTAR case *)  
+  prefer 4
+  apply(rename_tac bs r v)
+  apply(simp)  
+  apply(erule Prf_elims)
+  apply(clarify)
+  apply(erule Prf_elims)
+  apply(clarify)
+  apply (simp add: retrieve_fuse2)
+  (* ASEQ case *) 
+    prefer 2
+     apply(simp)
+     apply (simp add: erase_fuse retrieve_fuse2)
+   apply(auto)[1]
+  using Prf_elims(1) apply auto[1]
+  apply(simp)
+  apply(auto)
+   apply(subgoal_tac "nullable (erase r)")
+    prefer 2
+  using bnullable_correctness apply blast  
+  apply(simp)
+  apply(erule Prf_elims)
+    apply(simp)
+  using Prf_elims(2) apply force
+   apply(simp)
+  prefer 2
+  apply(subgoal_tac "\<not>nullable (erase r)")
+    apply(simp)
+    prefer 2
+  using bnullable_correctness apply presburger
+  using Prf_elims(2) apply force
+  apply (simp add: bmkeps_retrieve erase_fuse retrieve_fuse2)
+  apply(case_tac va)
+   apply(simp)
+  apply (simp add: erase_fuse retrieve_fuse2)
+  apply(simp)
+  apply(auto)
+   apply(subgoal_tac "nullable (erase v)")
+    prefer 2
+  using bnullable_correctness apply blast  
+   apply(simp)
+  apply(erule Prf_elims)
+    apply(simp)
+  apply(erule Prf_elims)
+    apply(simp)
+    apply(case_tac list)
+     apply(simp)
+  apply(rotate_tac 1)
+  apply(drule_tac x="Left v1" in meta_spec)
+     apply(drule meta_mp)
+      apply(rule Prf.intros)
+      apply(simp)
+  apply(rule Prf.intros)
+      apply(simp)
+      apply(simp)
+  apply simp
+    apply(simp)
+    apply(case_tac "bnullable a") 
+     apply(simp)
+    apply(subgoal_tac "nullable (erase a)")
+    prefer 2
+  using bnullable_correctness apply blast
+     apply(simp)
+  apply(rotate_tac 1)
+  apply(drule_tac x="Left v1" in meta_spec)
+     apply(drule meta_mp)
+      apply(rule Prf.intros)
+      apply(simp)
+  apply(rule Prf.intros)
+       apply(simp)
+  apply force
+  apply simp
+     apply(subgoal_tac "\<not>nullable (erase a)")
+     prefer 2
+  using bnullable_correctness apply presburger
+    apply(simp)
+   apply(rotate_tac 1)
+  apply(drule_tac x="Left v1" in meta_spec)
+     apply(drule meta_mp)
+     apply(rule Prf.intros)
+  using Prf.intros(1) apply blast
+    apply simp
+  using Prf.intros(3) apply fastforce
+   apply(subgoal_tac "\<not>nullable (erase v)")
+     prefer 2
+  using bnullable_correctness apply presburger
+  apply(simp)
+  using Prf_elims(2) by force
+
+lemma MAIN_decode:
+  assumes "\<Turnstile> v : ders s r"
+  shows "Some (flex r id s v) = decode (retrieve (bders (intern r) s) v) r"
+  using assms
+proof (induct s arbitrary: v rule: rev_induct)
+  case Nil
+  have "\<Turnstile> v : ders [] r" by fact
+  then have "\<Turnstile> v : r" by simp
+  then have "Some v = decode (retrieve (intern r) v) r"
+    using decode_code retrieve_code by auto
+  then show "Some (flex r id [] v) = decode (retrieve (bders (intern r) []) v) r"
+    by simp
+next
+  case (snoc c s v)
+  have IH: "\<And>v. \<Turnstile> v : ders s r \<Longrightarrow> 
+     Some (flex r id s v) = decode (retrieve (bders (intern r) s) v) r" by fact
+  have asm: "\<Turnstile> v : ders (s @ [c]) r" by fact
+  then have asm2: "\<Turnstile> injval (ders s r) c v : ders s r" 
+    by (simp add: Prf_injval ders_append)
+  have "Some (flex r id (s @ [c]) v) = Some (flex r id s (injval (ders s r) c v))"
+    by (simp add: flex_append)
+  also have "... = decode (retrieve (bders (intern r) s) (injval (ders s r) c v)) r"
+    using asm2 IH by simp
+  also have "... = decode (retrieve (bder c (bders (intern r) s)) v) r"
+    using asm by (simp_all add: bder_retrieve ders_append)
+  finally show "Some (flex r id (s @ [c]) v) = 
+                 decode (retrieve (bders (intern r) (s @ [c])) v) r" by (simp add: bders_append)
+qed
+
+definition blexer where
+ "blexer r s \<equiv> if bnullable (bders (intern r) s) then 
+                decode (bmkeps (bders (intern r) s)) r else None"
+
+lemma blexer_correctness:
+  shows "blexer r s = lexer r s"
+proof -
+  { define bds where "bds \<equiv> bders (intern r) s"
+    define ds  where "ds \<equiv> ders s r"
+    assume asm: "nullable ds"
+    have era: "erase bds = ds" 
+      unfolding ds_def bds_def by simp
+    have mke: "\<Turnstile> mkeps ds : ds"
+      using asm by (simp add: mkeps_nullable)
+    have "decode (bmkeps bds) r = decode (retrieve bds (mkeps ds)) r"
+      using bmkeps_retrieve
+      using asm era
+      using bnullable_correctness by force 
+    also have "... =  Some (flex r id s (mkeps ds))"
+      using mke by (simp_all add: MAIN_decode ds_def bds_def)
+    finally have "decode (bmkeps bds) r = Some (flex r id s (mkeps ds))" 
+      unfolding bds_def ds_def .
+  }
+  then show "blexer r s = lexer r s"
+    unfolding blexer_def lexer_flex
+    by (auto simp add: bnullable_correctness[symmetric])
+qed
+
+
+unused_thms
+
+end