thys3/src/Blexer.thy
changeset 496 f493a20feeb3
parent 495 f9cdc295ccf7
child 563 c92a41d9c4da
equal deleted inserted replaced
495:f9cdc295ccf7 496:f493a20feeb3
       
     1 
       
     2 theory Blexer
       
     3   imports "Lexer" "PDerivs"
       
     4 begin
       
     5 
       
     6 section \<open>Bit-Encodings\<close>
       
     7 
       
     8 datatype bit = Z | S
       
     9 
       
    10 fun code :: "val \<Rightarrow> bit list"
       
    11 where
       
    12   "code Void = []"
       
    13 | "code (Char c) = []"
       
    14 | "code (Left v) = Z # (code v)"
       
    15 | "code (Right v) = S # (code v)"
       
    16 | "code (Seq v1 v2) = (code v1) @ (code v2)"
       
    17 | "code (Stars []) = [S]"
       
    18 | "code (Stars (v # vs)) =  (Z # code v) @ code (Stars vs)"
       
    19 
       
    20 
       
    21 fun 
       
    22   Stars_add :: "val \<Rightarrow> val \<Rightarrow> val"
       
    23 where
       
    24   "Stars_add v (Stars vs) = Stars (v # vs)"
       
    25 
       
    26 function
       
    27   decode' :: "bit list \<Rightarrow> rexp \<Rightarrow> (val * bit list)"
       
    28 where
       
    29   "decode' bs ZERO = (undefined, bs)"
       
    30 | "decode' bs ONE = (Void, bs)"
       
    31 | "decode' bs (CH d) = (Char d, bs)"
       
    32 | "decode' [] (ALT r1 r2) = (Void, [])"
       
    33 | "decode' (Z # bs) (ALT r1 r2) = (let (v, bs') = decode' bs r1 in (Left v, bs'))"
       
    34 | "decode' (S # bs) (ALT r1 r2) = (let (v, bs') = decode' bs r2 in (Right v, bs'))"
       
    35 | "decode' bs (SEQ r1 r2) = (let (v1, bs') = decode' bs r1 in
       
    36                              let (v2, bs'') = decode' bs' r2 in (Seq v1 v2, bs''))"
       
    37 | "decode' [] (STAR r) = (Void, [])"
       
    38 | "decode' (S # bs) (STAR r) = (Stars [], bs)"
       
    39 | "decode' (Z # bs) (STAR r) = (let (v, bs') = decode' bs r in
       
    40                                     let (vs, bs'') = decode' bs' (STAR r) 
       
    41                                     in (Stars_add v vs, bs''))"
       
    42 by pat_completeness auto
       
    43 
       
    44 lemma decode'_smaller:
       
    45   assumes "decode'_dom (bs, r)"
       
    46   shows "length (snd (decode' bs r)) \<le> length bs"
       
    47 using assms
       
    48 apply(induct bs r)
       
    49 apply(auto simp add: decode'.psimps split: prod.split)
       
    50 using dual_order.trans apply blast
       
    51 by (meson dual_order.trans le_SucI)
       
    52 
       
    53 termination "decode'"  
       
    54 apply(relation "inv_image (measure(%cs. size cs) <*lex*> measure(%s. size s)) (%(ds,r). (r,ds))") 
       
    55 apply(auto dest!: decode'_smaller)
       
    56 by (metis less_Suc_eq_le snd_conv)
       
    57 
       
    58 definition
       
    59   decode :: "bit list \<Rightarrow> rexp \<Rightarrow> val option"
       
    60 where
       
    61   "decode ds r \<equiv> (let (v, ds') = decode' ds r 
       
    62                   in (if ds' = [] then Some v else None))"
       
    63 
       
    64 lemma decode'_code_Stars:
       
    65   assumes "\<forall>v\<in>set vs. \<Turnstile> v : r \<and> (\<forall>x. decode' (code v @ x) r = (v, x)) \<and> flat v \<noteq> []" 
       
    66   shows "decode' (code (Stars vs) @ ds) (STAR r) = (Stars vs, ds)"
       
    67   using assms
       
    68   apply(induct vs)
       
    69   apply(auto)
       
    70   done
       
    71 
       
    72 lemma decode'_code:
       
    73   assumes "\<Turnstile> v : r"
       
    74   shows "decode' ((code v) @ ds) r = (v, ds)"
       
    75 using assms
       
    76   apply(induct v r arbitrary: ds) 
       
    77   apply(auto)
       
    78   using decode'_code_Stars by blast
       
    79 
       
    80 lemma decode_code:
       
    81   assumes "\<Turnstile> v : r"
       
    82   shows "decode (code v) r = Some v"
       
    83   using assms unfolding decode_def
       
    84   by (smt append_Nil2 decode'_code old.prod.case)
       
    85 
       
    86 
       
    87 section {* Annotated Regular Expressions *}
       
    88 
       
    89 datatype arexp = 
       
    90   AZERO
       
    91 | AONE "bit list"
       
    92 | ACHAR "bit list" char
       
    93 | ASEQ "bit list" arexp arexp
       
    94 | AALTs "bit list" "arexp list"
       
    95 | ASTAR "bit list" arexp
       
    96 
       
    97 abbreviation
       
    98   "AALT bs r1 r2 \<equiv> AALTs bs [r1, r2]"
       
    99 
       
   100 fun asize :: "arexp \<Rightarrow> nat" where
       
   101   "asize AZERO = 1"
       
   102 | "asize (AONE cs) = 1" 
       
   103 | "asize (ACHAR cs c) = 1"
       
   104 | "asize (AALTs cs rs) = Suc (sum_list (map asize rs))"
       
   105 | "asize (ASEQ cs r1 r2) = Suc (asize r1 + asize r2)"
       
   106 | "asize (ASTAR cs r) = Suc (asize r)"
       
   107 
       
   108 fun 
       
   109   erase :: "arexp \<Rightarrow> rexp"
       
   110 where
       
   111   "erase AZERO = ZERO"
       
   112 | "erase (AONE _) = ONE"
       
   113 | "erase (ACHAR _ c) = CH c"
       
   114 | "erase (AALTs _ []) = ZERO"
       
   115 | "erase (AALTs _ [r]) = (erase r)"
       
   116 | "erase (AALTs bs (r#rs)) = ALT (erase r) (erase (AALTs bs rs))"
       
   117 | "erase (ASEQ _ r1 r2) = SEQ (erase r1) (erase r2)"
       
   118 | "erase (ASTAR _ r) = STAR (erase r)"
       
   119 
       
   120 
       
   121 fun fuse :: "bit list \<Rightarrow> arexp \<Rightarrow> arexp" where
       
   122   "fuse bs AZERO = AZERO"
       
   123 | "fuse bs (AONE cs) = AONE (bs @ cs)" 
       
   124 | "fuse bs (ACHAR cs c) = ACHAR (bs @ cs) c"
       
   125 | "fuse bs (AALTs cs rs) = AALTs (bs @ cs) rs"
       
   126 | "fuse bs (ASEQ cs r1 r2) = ASEQ (bs @ cs) r1 r2"
       
   127 | "fuse bs (ASTAR cs r) = ASTAR (bs @ cs) r"
       
   128 
       
   129 lemma fuse_append:
       
   130   shows "fuse (bs1 @ bs2) r = fuse bs1 (fuse bs2 r)"
       
   131   apply(induct r)
       
   132   apply(auto)
       
   133   done
       
   134 
       
   135 
       
   136 fun intern :: "rexp \<Rightarrow> arexp" where
       
   137   "intern ZERO = AZERO"
       
   138 | "intern ONE = AONE []"
       
   139 | "intern (CH c) = ACHAR [] c"
       
   140 | "intern (ALT r1 r2) = AALT [] (fuse [Z] (intern r1)) 
       
   141                                 (fuse [S]  (intern r2))"
       
   142 | "intern (SEQ r1 r2) = ASEQ [] (intern r1) (intern r2)"
       
   143 | "intern (STAR r) = ASTAR [] (intern r)"
       
   144 
       
   145 
       
   146 fun retrieve :: "arexp \<Rightarrow> val \<Rightarrow> bit list" where
       
   147   "retrieve (AONE bs) Void = bs"
       
   148 | "retrieve (ACHAR bs c) (Char d) = bs"
       
   149 | "retrieve (AALTs bs [r]) v = bs @ retrieve r v"
       
   150 | "retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v"
       
   151 | "retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v"
       
   152 | "retrieve (ASEQ bs r1 r2) (Seq v1 v2) = bs @ retrieve r1 v1 @ retrieve r2 v2"
       
   153 | "retrieve (ASTAR bs r) (Stars []) = bs @ [S]"
       
   154 | "retrieve (ASTAR bs r) (Stars (v#vs)) = 
       
   155      bs @ [Z] @ retrieve r v @ retrieve (ASTAR [] r) (Stars vs)"
       
   156 
       
   157 
       
   158 
       
   159 fun
       
   160  bnullable :: "arexp \<Rightarrow> bool"
       
   161 where
       
   162   "bnullable (AZERO) = False"
       
   163 | "bnullable (AONE bs) = True"
       
   164 | "bnullable (ACHAR bs c) = False"
       
   165 | "bnullable (AALTs bs rs) = (\<exists>r \<in> set rs. bnullable r)"
       
   166 | "bnullable (ASEQ bs r1 r2) = (bnullable r1 \<and> bnullable r2)"
       
   167 | "bnullable (ASTAR bs r) = True"
       
   168 
       
   169 abbreviation
       
   170   bnullables :: "arexp list \<Rightarrow> bool"
       
   171 where
       
   172   "bnullables rs \<equiv> (\<exists>r \<in> set rs. bnullable r)"
       
   173 
       
   174 fun 
       
   175   bmkeps :: "arexp \<Rightarrow> bit list" and
       
   176   bmkepss :: "arexp list \<Rightarrow> bit list"
       
   177 where
       
   178   "bmkeps(AONE bs) = bs"
       
   179 | "bmkeps(ASEQ bs r1 r2) = bs @ (bmkeps r1) @ (bmkeps r2)"
       
   180 | "bmkeps(AALTs bs rs) = bs @ (bmkepss rs)"
       
   181 | "bmkeps(ASTAR bs r) = bs @ [S]"
       
   182 | "bmkepss (r # rs) = (if bnullable(r) then (bmkeps r) else (bmkepss rs))"
       
   183 
       
   184 lemma bmkepss1:
       
   185   assumes "\<not> bnullables rs1"
       
   186   shows "bmkepss (rs1 @ rs2) = bmkepss rs2"
       
   187   using assms
       
   188   by (induct rs1) (auto)
       
   189 
       
   190 lemma bmkepss2:
       
   191   assumes "bnullables rs1"
       
   192   shows "bmkepss (rs1 @ rs2) = bmkepss rs1"
       
   193   using assms
       
   194   by (induct rs1) (auto)
       
   195 
       
   196 
       
   197 fun
       
   198  bder :: "char \<Rightarrow> arexp \<Rightarrow> arexp"
       
   199 where
       
   200   "bder c (AZERO) = AZERO"
       
   201 | "bder c (AONE bs) = AZERO"
       
   202 | "bder c (ACHAR bs d) = (if c = d then AONE bs else AZERO)"
       
   203 | "bder c (AALTs bs rs) = AALTs bs (map (bder c) rs)"
       
   204 | "bder c (ASEQ bs r1 r2) = 
       
   205      (if bnullable r1
       
   206       then AALT bs (ASEQ [] (bder c r1) r2) (fuse (bmkeps r1) (bder c r2))
       
   207       else ASEQ bs (bder c r1) r2)"
       
   208 | "bder c (ASTAR bs r) = ASEQ (bs @ [Z]) (bder c r) (ASTAR [] r)"
       
   209 
       
   210 
       
   211 fun 
       
   212   bders :: "arexp \<Rightarrow> string \<Rightarrow> arexp"
       
   213 where
       
   214   "bders r [] = r"
       
   215 | "bders r (c#s) = bders (bder c r) s"
       
   216 
       
   217 lemma bders_append:
       
   218   "bders c (s1 @ s2) = bders (bders c s1) s2"
       
   219   apply(induct s1 arbitrary: c s2)
       
   220   apply(simp_all)
       
   221   done
       
   222 
       
   223 lemma bnullable_correctness:
       
   224   shows "nullable (erase r) = bnullable r"
       
   225   apply(induct r rule: erase.induct)
       
   226   apply(simp_all)
       
   227   done
       
   228 
       
   229 lemma erase_fuse:
       
   230   shows "erase (fuse bs r) = erase r"
       
   231   apply(induct r rule: erase.induct)
       
   232   apply(simp_all)
       
   233   done
       
   234 
       
   235 lemma erase_intern [simp]:
       
   236   shows "erase (intern r) = r"
       
   237   apply(induct r)
       
   238   apply(simp_all add: erase_fuse)
       
   239   done
       
   240 
       
   241 lemma erase_bder [simp]:
       
   242   shows "erase (bder a r) = der a (erase r)"
       
   243   apply(induct r rule: erase.induct)
       
   244   apply(simp_all add: erase_fuse bnullable_correctness)
       
   245   done
       
   246 
       
   247 lemma erase_bders [simp]:
       
   248   shows "erase (bders r s) = ders s (erase r)"
       
   249   apply(induct s arbitrary: r )
       
   250   apply(simp_all)
       
   251   done
       
   252 
       
   253 lemma bnullable_fuse:
       
   254   shows "bnullable (fuse bs r) = bnullable r"
       
   255   apply(induct r arbitrary: bs)
       
   256   apply(auto)
       
   257   done
       
   258 
       
   259 lemma retrieve_encode_STARS:
       
   260   assumes "\<forall>v\<in>set vs. \<Turnstile> v : r \<and> code v = retrieve (intern r) v"
       
   261   shows "code (Stars vs) = retrieve (ASTAR [] (intern r)) (Stars vs)"
       
   262   using assms
       
   263   apply(induct vs)
       
   264   apply(simp_all)
       
   265   done
       
   266 
       
   267 lemma retrieve_fuse2:
       
   268   assumes "\<Turnstile> v : (erase r)"
       
   269   shows "retrieve (fuse bs r) v = bs @ retrieve r v"
       
   270   using assms
       
   271   apply(induct r arbitrary: v bs)
       
   272   apply(auto elim: Prf_elims)[4]
       
   273   apply(case_tac x2a)
       
   274   apply(simp)
       
   275   using Prf_elims(1) apply blast
       
   276   apply(case_tac x2a)
       
   277   apply(simp)
       
   278   apply(simp)
       
   279   apply(case_tac list)
       
   280   apply(simp)
       
   281   apply(simp)
       
   282   apply (smt (verit, best) Prf_elims(3) append_assoc retrieve.simps(4) retrieve.simps(5))
       
   283   apply(simp)
       
   284   using retrieve_encode_STARS
       
   285   apply(auto elim!: Prf_elims)[1]
       
   286   apply(case_tac vs)
       
   287   apply(simp)
       
   288   apply(simp)
       
   289   done
       
   290 
       
   291 lemma retrieve_fuse:
       
   292   assumes "\<Turnstile> v : r"
       
   293   shows "retrieve (fuse bs (intern r)) v = bs @ retrieve (intern r) v"
       
   294   using assms 
       
   295   by (simp_all add: retrieve_fuse2)
       
   296 
       
   297 
       
   298 lemma retrieve_code:
       
   299   assumes "\<Turnstile> v : r"
       
   300   shows "code v = retrieve (intern r) v"
       
   301   using assms
       
   302   apply(induct v r )
       
   303   apply(simp_all add: retrieve_fuse retrieve_encode_STARS)
       
   304   done
       
   305 
       
   306 
       
   307 lemma retrieve_AALTs_bnullable1:
       
   308   assumes "bnullable r"
       
   309   shows "retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs))))
       
   310          = bs @ retrieve r (mkeps (erase r))"
       
   311   using assms
       
   312   apply(case_tac rs)
       
   313   apply(auto simp add: bnullable_correctness)
       
   314   done
       
   315 
       
   316 lemma retrieve_AALTs_bnullable2:
       
   317   assumes "\<not>bnullable r" "bnullables rs"
       
   318   shows "retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs))))
       
   319          = retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs)))"
       
   320   using assms
       
   321   apply(induct rs arbitrary: r bs)
       
   322   apply(auto)
       
   323   using bnullable_correctness apply blast
       
   324   apply(case_tac rs)
       
   325   apply(auto)
       
   326   using bnullable_correctness apply blast
       
   327   apply(case_tac rs)
       
   328   apply(auto)
       
   329   done
       
   330 
       
   331 lemma bmkeps_retrieve_AALTs: 
       
   332   assumes "\<forall>r \<in> set rs. bnullable r \<longrightarrow> bmkeps r = retrieve r (mkeps (erase r))" 
       
   333           "bnullables rs"
       
   334   shows "bs @ bmkepss rs = retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs)))"
       
   335  using assms
       
   336   apply(induct rs arbitrary: bs)
       
   337   apply(auto)
       
   338   using retrieve_AALTs_bnullable1 apply presburger
       
   339   apply (metis retrieve_AALTs_bnullable2)
       
   340   apply (simp add: retrieve_AALTs_bnullable1)
       
   341   by (metis retrieve_AALTs_bnullable2)
       
   342 
       
   343     
       
   344 lemma bmkeps_retrieve:
       
   345   assumes "bnullable r"
       
   346   shows "bmkeps r = retrieve r (mkeps (erase r))"
       
   347   using assms
       
   348   apply(induct r)
       
   349   apply(auto)  
       
   350   using bmkeps_retrieve_AALTs by auto
       
   351 
       
   352 lemma bder_retrieve:
       
   353   assumes "\<Turnstile> v : der c (erase r)"
       
   354   shows "retrieve (bder c r) v = retrieve r (injval (erase r) c v)"
       
   355   using assms  
       
   356   apply(induct r arbitrary: v rule: erase.induct)
       
   357   using Prf_elims(1) apply auto[1]
       
   358   using Prf_elims(1) apply auto[1]
       
   359   apply(auto)[1]
       
   360   apply (metis Prf_elims(4) injval.simps(1) retrieve.simps(1) retrieve.simps(2))
       
   361   using Prf_elims(1) apply blast
       
   362   (* AALTs case *)
       
   363   apply(simp)
       
   364   apply(erule Prf_elims)
       
   365   apply(simp)
       
   366   apply(simp)
       
   367   apply(rename_tac "r\<^sub>1" "r\<^sub>2" rs v)
       
   368   apply(erule Prf_elims)
       
   369   apply(simp)
       
   370   apply(simp)
       
   371   apply(case_tac rs)
       
   372   apply(simp)
       
   373   apply(simp)
       
   374   using Prf_elims(3) apply fastforce
       
   375   (* ASEQ case *) 
       
   376   apply(simp)
       
   377   apply(case_tac "nullable (erase r1)")
       
   378   apply(simp)
       
   379   apply(erule Prf_elims)
       
   380   using Prf_elims(2) bnullable_correctness apply force
       
   381   apply (simp add: bmkeps_retrieve bnullable_correctness retrieve_fuse2)
       
   382   apply (simp add: bmkeps_retrieve bnullable_correctness retrieve_fuse2)
       
   383   using Prf_elims(2) apply force
       
   384   (* ASTAR case *)  
       
   385   apply(rename_tac bs r v)
       
   386   apply(simp)  
       
   387   apply(erule Prf_elims)
       
   388   apply(clarify)
       
   389   apply(erule Prf_elims)
       
   390   apply(clarify)
       
   391   by (simp add: retrieve_fuse2)
       
   392 
       
   393 
       
   394 lemma MAIN_decode:
       
   395   assumes "\<Turnstile> v : ders s r"
       
   396   shows "Some (flex r id s v) = decode (retrieve (bders (intern r) s) v) r"
       
   397   using assms
       
   398 proof (induct s arbitrary: v rule: rev_induct)
       
   399   case Nil
       
   400   have "\<Turnstile> v : ders [] r" by fact
       
   401   then have "\<Turnstile> v : r" by simp
       
   402   then have "Some v = decode (retrieve (intern r) v) r"
       
   403     using decode_code retrieve_code by auto
       
   404   then show "Some (flex r id [] v) = decode (retrieve (bders (intern r) []) v) r"
       
   405     by simp
       
   406 next
       
   407   case (snoc c s v)
       
   408   have IH: "\<And>v. \<Turnstile> v : ders s r \<Longrightarrow> 
       
   409      Some (flex r id s v) = decode (retrieve (bders (intern r) s) v) r" by fact
       
   410   have asm: "\<Turnstile> v : ders (s @ [c]) r" by fact
       
   411   then have asm2: "\<Turnstile> injval (ders s r) c v : ders s r" 
       
   412     by (simp add: Prf_injval ders_append)
       
   413   have "Some (flex r id (s @ [c]) v) = Some (flex r id s (injval (ders s r) c v))"
       
   414     by (simp add: flex_append)
       
   415   also have "... = decode (retrieve (bders (intern r) s) (injval (ders s r) c v)) r"
       
   416     using asm2 IH by simp
       
   417   also have "... = decode (retrieve (bder c (bders (intern r) s)) v) r"
       
   418     using asm by (simp_all add: bder_retrieve ders_append)
       
   419   finally show "Some (flex r id (s @ [c]) v) = 
       
   420                  decode (retrieve (bders (intern r) (s @ [c])) v) r" by (simp add: bders_append)
       
   421 qed
       
   422 
       
   423 definition blexer where
       
   424  "blexer r s \<equiv> if bnullable (bders (intern r) s) then 
       
   425                 decode (bmkeps (bders (intern r) s)) r else None"
       
   426 
       
   427 lemma blexer_correctness:
       
   428   shows "blexer r s = lexer r s"
       
   429 proof -
       
   430   { define bds where "bds \<equiv> bders (intern r) s"
       
   431     define ds  where "ds \<equiv> ders s r"
       
   432     assume asm: "nullable ds"
       
   433     have era: "erase bds = ds" 
       
   434       unfolding ds_def bds_def by simp
       
   435     have mke: "\<Turnstile> mkeps ds : ds"
       
   436       using asm by (simp add: mkeps_nullable)
       
   437     have "decode (bmkeps bds) r = decode (retrieve bds (mkeps ds)) r"
       
   438       using bmkeps_retrieve
       
   439       using asm era
       
   440       using bnullable_correctness by force 
       
   441     also have "... =  Some (flex r id s (mkeps ds))"
       
   442       using mke by (simp_all add: MAIN_decode ds_def bds_def)
       
   443     finally have "decode (bmkeps bds) r = Some (flex r id s (mkeps ds))" 
       
   444       unfolding bds_def ds_def .
       
   445   }
       
   446   then show "blexer r s = lexer r s"
       
   447     unfolding blexer_def lexer_flex
       
   448     by (auto simp add: bnullable_correctness[symmetric])
       
   449 qed
       
   450 
       
   451 
       
   452 unused_thms
       
   453 
       
   454 end