Derivatives.thy
changeset 379 8c4b6fb43ebe
parent 246 161128ccb65a
equal deleted inserted replaced
378:a0bcf886b8ef 379:8c4b6fb43ebe
     6 imports Regular_Exp
     6 imports Regular_Exp
     7 begin
     7 begin
     8 
     8 
     9 text{* This theory is based on work by Brozowski \cite{Brzozowski64} and Antimirov \cite{Antimirov95}. *}
     9 text{* This theory is based on work by Brozowski \cite{Brzozowski64} and Antimirov \cite{Antimirov95}. *}
    10 
    10 
    11 subsection {* Left-Quotients of languages *}
       
    12 
       
    13 definition Deriv :: "'a \<Rightarrow> 'a lang \<Rightarrow> 'a lang"
       
    14 where "Deriv x A = { xs. x#xs \<in> A }"
       
    15 
       
    16 definition Derivs :: "'a list \<Rightarrow> 'a lang \<Rightarrow> 'a lang"
       
    17 where "Derivs xs A = { ys. xs @ ys \<in> A }"
       
    18 
       
    19 abbreviation 
       
    20   Derivss :: "'a list \<Rightarrow> 'a lang set \<Rightarrow> 'a lang"
       
    21 where
       
    22   "Derivss s As \<equiv> \<Union> (Derivs s) ` As"
       
    23 
       
    24 
       
    25 lemma Deriv_empty[simp]:   "Deriv a {} = {}"
       
    26   and Deriv_epsilon[simp]: "Deriv a {[]} = {}"
       
    27   and Deriv_char[simp]:    "Deriv a {[b]} = (if a = b then {[]} else {})"
       
    28   and Deriv_union[simp]:   "Deriv a (A \<union> B) = Deriv a A \<union> Deriv a B"
       
    29 by (auto simp: Deriv_def)
       
    30 
       
    31 lemma Deriv_conc_subset:
       
    32 "Deriv a A @@ B \<subseteq> Deriv a (A @@ B)" (is "?L \<subseteq> ?R")
       
    33 proof 
       
    34   fix w assume "w \<in> ?L"
       
    35   then obtain u v where "w = u @ v" "a # u \<in> A" "v \<in> B"
       
    36     by (auto simp: Deriv_def)
       
    37   then have "a # w \<in> A @@ B"
       
    38     by (auto intro: concI[of "a # u", simplified])
       
    39   thus "w \<in> ?R" by (auto simp: Deriv_def)
       
    40 qed
       
    41 
       
    42 lemma Der_conc [simp]:
       
    43   shows "Deriv c (A @@ B) = (Deriv c A) @@ B \<union> (if [] \<in> A then Deriv c B else {})"
       
    44 unfolding Deriv_def conc_def
       
    45 by (auto simp add: Cons_eq_append_conv)
       
    46 
       
    47 lemma Deriv_star [simp]:
       
    48   shows "Deriv c (star A) = (Deriv c A) @@ star A"
       
    49 proof -
       
    50   have incl: "[] \<in> A \<Longrightarrow> Deriv c (star A) \<subseteq> (Deriv c A) @@ star A"
       
    51     unfolding Deriv_def conc_def 
       
    52     apply(auto simp add: Cons_eq_append_conv)
       
    53     apply(drule star_decom)
       
    54     apply(auto simp add: Cons_eq_append_conv)
       
    55     done
       
    56 
       
    57   have "Deriv c (star A) = Deriv c (A @@ star A \<union> {[]})"
       
    58     by (simp only: star_unfold_left[symmetric])
       
    59   also have "... = Deriv c (A @@ star A)"
       
    60     by (simp only: Deriv_union) (simp)
       
    61   also have "... =  (Deriv c A) @@ (star A) \<union> (if [] \<in> A then Deriv c (star A) else {})"
       
    62     by simp
       
    63    also have "... =  (Deriv c A) @@ star A"
       
    64     using incl by auto
       
    65   finally show "Deriv c (star A) = (Deriv c A) @@ star A" . 
       
    66 qed
       
    67 
       
    68 lemma Derivs_simps [simp]:
       
    69   shows "Derivs [] A = A"
       
    70   and   "Derivs (c # s) A = Derivs s (Deriv c A)"
       
    71   and   "Derivs (s1 @ s2) A = Derivs s2 (Derivs s1 A)"
       
    72 unfolding Derivs_def Deriv_def by auto
       
    73 
       
    74 
       
    75 subsection {* Brozowski's derivatives of regular expressions *}
    11 subsection {* Brozowski's derivatives of regular expressions *}
    76 
       
    77 fun
       
    78   nullable :: "'a rexp \<Rightarrow> bool"
       
    79 where
       
    80   "nullable (Zero) = False"
       
    81 | "nullable (One) = True"
       
    82 | "nullable (Atom c) = False"
       
    83 | "nullable (Plus r1 r2) = (nullable r1 \<or> nullable r2)"
       
    84 | "nullable (Times r1 r2) = (nullable r1 \<and> nullable r2)"
       
    85 | "nullable (Star r) = True"
       
    86 
    12 
    87 fun
    13 fun
    88   deriv :: "'a \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp"
    14   deriv :: "'a \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp"
    89 where
    15 where
    90   "deriv c (Zero) = Zero"
    16   "deriv c (Zero) = Zero"
   100 where
    26 where
   101   "derivs [] r = r"
    27   "derivs [] r = r"
   102 | "derivs (c # s) r = derivs s (deriv c r)"
    28 | "derivs (c # s) r = derivs s (deriv c r)"
   103 
    29 
   104 
    30 
   105 lemma nullable_iff:
    31 lemma lang_deriv: "lang (deriv c r) = Deriv c (lang r)"
   106   shows "nullable r \<longleftrightarrow> [] \<in> lang r"
       
   107 by (induct r) (auto simp add: conc_def split: if_splits)
       
   108 
       
   109 lemma Deriv_deriv:
       
   110   shows "Deriv c (lang r) = lang (deriv c r)"
       
   111 by (induct r) (simp_all add: nullable_iff)
    32 by (induct r) (simp_all add: nullable_iff)
   112 
    33 
   113 lemma Derivs_derivs:
    34 lemma lang_derivs: "lang (derivs s r) = Derivs s (lang r)"
   114   shows "Derivs s (lang r) = lang (derivs s r)"
    35 by (induct s arbitrary: r) (simp_all add: lang_deriv)
   115 by (induct s arbitrary: r) (simp_all add: Deriv_deriv)
    36 
       
    37 text {* A regular expression matcher: *}
       
    38 
       
    39 definition matcher :: "'a rexp \<Rightarrow> 'a list \<Rightarrow> bool" where
       
    40 "matcher r s = nullable (derivs s r)"
       
    41 
       
    42 lemma matcher_correctness: "matcher r s \<longleftrightarrow> s \<in> lang r"
       
    43 by (induct s arbitrary: r)
       
    44    (simp_all add: nullable_iff lang_deriv matcher_def Deriv_def)
   116 
    45 
   117 
    46 
   118 subsection {* Antimirov's partial derivatives *}
    47 subsection {* Antimirov's partial derivatives *}
   119 
    48 
   120 abbreviation
    49 abbreviation
   121   "Timess rs r \<equiv> {Times r' r | r'. r' \<in> rs}"
    50   "Timess rs r \<equiv> (\<Union>r' \<in> rs. {Times r' r})"
   122 
    51 
   123 fun
    52 fun
   124   pderiv :: "'a \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp set"
    53   pderiv :: "'a \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp set"
   125 where
    54 where
   126   "pderiv c Zero = {}"
    55   "pderiv c Zero = {}"
   133 
    62 
   134 fun
    63 fun
   135   pderivs :: "'a list \<Rightarrow> 'a rexp \<Rightarrow> ('a rexp) set"
    64   pderivs :: "'a list \<Rightarrow> 'a rexp \<Rightarrow> ('a rexp) set"
   136 where
    65 where
   137   "pderivs [] r = {r}"
    66   "pderivs [] r = {r}"
   138 | "pderivs (c # s) r = \<Union> (pderivs s) ` (pderiv c r)"
    67 | "pderivs (c # s) r = \<Union> (pderivs s ` pderiv c r)"
   139 
    68 
   140 abbreviation
    69 abbreviation
   141  pderiv_set :: "'a \<Rightarrow> 'a rexp set \<Rightarrow> 'a rexp set"
    70  pderiv_set :: "'a \<Rightarrow> 'a rexp set \<Rightarrow> 'a rexp set"
   142 where
    71 where
   143   "pderiv_set c rs \<equiv> \<Union> pderiv c ` rs"
    72   "pderiv_set c rs \<equiv> \<Union> (pderiv c ` rs)"
   144 
    73 
   145 abbreviation
    74 abbreviation
   146   pderivs_set :: "'a list \<Rightarrow> 'a rexp set \<Rightarrow> 'a rexp set"
    75   pderivs_set :: "'a list \<Rightarrow> 'a rexp set \<Rightarrow> 'a rexp set"
   147 where
    76 where
   148   "pderivs_set s rs \<equiv> \<Union> (pderivs s) ` rs"
    77   "pderivs_set s rs \<equiv> \<Union> (pderivs s ` rs)"
   149 
    78 
   150 lemma pderivs_append:
    79 lemma pderivs_append:
   151   "pderivs (s1 @ s2) r = \<Union> (pderivs s2) ` (pderivs s1 r)"
    80   "pderivs (s1 @ s2) r = \<Union> (pderivs s2 ` pderivs s1 r)"
   152 by (induct s1 arbitrary: r) (simp_all)
    81 by (induct s1 arbitrary: r) (simp_all)
   153 
    82 
   154 lemma pderivs_snoc:
    83 lemma pderivs_snoc:
   155   shows "pderivs (s @ [c]) r = pderiv_set c (pderivs s r)"
    84   shows "pderivs (s @ [c]) r = pderiv_set c (pderivs s r)"
   156 by (simp add: pderivs_append)
    85 by (simp add: pderivs_append)
   166 by (induct s) (simp_all)
    95 by (induct s) (simp_all)
   167 
    96 
   168 subsection {* Relating left-quotients and partial derivatives *}
    97 subsection {* Relating left-quotients and partial derivatives *}
   169 
    98 
   170 lemma Deriv_pderiv:
    99 lemma Deriv_pderiv:
   171   shows "Deriv c (lang r) = \<Union> lang ` (pderiv c r)"
   100   shows "Deriv c (lang r) = \<Union> (lang ` pderiv c r)"
   172 by (induct r) (auto simp add: nullable_iff conc_UNION_distrib)
   101 by (induct r) (auto simp add: nullable_iff conc_UNION_distrib)
   173 
   102 
   174 lemma Derivs_pderivs:
   103 lemma Derivs_pderivs:
   175   shows "Derivs s (lang r) = \<Union> lang ` (pderivs s r)"
   104   shows "Derivs s (lang r) = \<Union> (lang ` pderivs s r)"
   176 proof (induct s arbitrary: r)
   105 proof (induct s arbitrary: r)
   177   case (Cons c s)
   106   case (Cons c s)
   178   have ih: "\<And>r. Derivs s (lang r) = \<Union> lang ` (pderivs s r)" by fact
   107   have ih: "\<And>r. Derivs s (lang r) = \<Union> (lang ` pderivs s r)" by fact
   179   have "Derivs (c # s) (lang r) = Derivs s (Deriv c (lang r))" by simp
   108   have "Derivs (c # s) (lang r) = Derivs s (Deriv c (lang r))" by simp
   180   also have "\<dots> = Derivs s (\<Union> lang ` (pderiv c r))" by (simp add: Deriv_pderiv)
   109   also have "\<dots> = Derivs s (\<Union> (lang ` pderiv c r))" by (simp add: Deriv_pderiv)
   181   also have "\<dots> = Derivss s (lang ` (pderiv c r))"
   110   also have "\<dots> = Derivss s (lang ` (pderiv c r))"
   182     by (auto simp add:  Derivs_def)
   111     by (auto simp add:  Derivs_def)
   183   also have "\<dots> = \<Union> lang ` (pderivs_set s (pderiv c r))"
   112   also have "\<dots> = \<Union> (lang ` (pderivs_set s (pderiv c r)))"
   184     using ih by auto
   113     using ih by auto
   185   also have "\<dots> = \<Union> lang ` (pderivs (c # s) r)" by simp
   114   also have "\<dots> = \<Union> (lang ` (pderivs (c # s) r))" by simp
   186   finally show "Derivs (c # s) (lang r) = \<Union> lang ` pderivs (c # s) r" .
   115   finally show "Derivs (c # s) (lang r) = \<Union> (lang ` pderivs (c # s) r)" .
   187 qed (simp add: Derivs_def)
   116 qed (simp add: Derivs_def)
   188 
   117 
   189 subsection {* Relating derivatives and partial derivatives *}
   118 subsection {* Relating derivatives and partial derivatives *}
   190 
   119 
   191 lemma deriv_pderiv:
   120 lemma deriv_pderiv:
   192   shows "(\<Union> lang ` (pderiv c r)) = lang (deriv c r)"
   121   shows "\<Union> (lang ` (pderiv c r)) = lang (deriv c r)"
   193 unfolding Deriv_deriv[symmetric] Deriv_pderiv by simp
   122 unfolding lang_deriv Deriv_pderiv by simp
   194 
   123 
   195 lemma derivs_pderivs:
   124 lemma derivs_pderivs:
   196   shows "(\<Union> lang ` (pderivs s r)) = lang (derivs s r)"
   125   shows "\<Union> (lang ` (pderivs s r)) = lang (derivs s r)"
   197 unfolding Derivs_derivs[symmetric] Derivs_pderivs by simp
   126 unfolding lang_derivs Derivs_pderivs by simp
   198 
   127 
   199 
   128 
   200 subsection {* Finiteness property of partial derivatives *}
   129 subsection {* Finiteness property of partial derivatives *}
   201 
   130 
   202 definition
   131 definition
   270   have ih: "pderivs s (Times r1 r2) \<subseteq> Timess (pderivs s r1) r2 \<union> (pderivs_lang (PSuf s) r2)" 
   199   have ih: "pderivs s (Times r1 r2) \<subseteq> Timess (pderivs s r1) r2 \<union> (pderivs_lang (PSuf s) r2)" 
   271     by fact
   200     by fact
   272   have "pderivs (s @ [c]) (Times r1 r2) = pderiv_set c (pderivs s (Times r1 r2))" 
   201   have "pderivs (s @ [c]) (Times r1 r2) = pderiv_set c (pderivs s (Times r1 r2))" 
   273     by (simp add: pderivs_snoc)
   202     by (simp add: pderivs_snoc)
   274   also have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs s r1) r2 \<union> (pderivs_lang (PSuf s) r2))"
   203   also have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs s r1) r2 \<union> (pderivs_lang (PSuf s) r2))"
   275     using ih by (auto) (blast)
   204     using ih by fast
   276   also have "\<dots> = pderiv_set c (Timess (pderivs s r1) r2) \<union> pderiv_set c (pderivs_lang (PSuf s) r2)"
   205   also have "\<dots> = pderiv_set c (Timess (pderivs s r1) r2) \<union> pderiv_set c (pderivs_lang (PSuf s) r2)"
   277     by (simp)
   206     by (simp)
   278   also have "\<dots> = pderiv_set c (Timess (pderivs s r1) r2) \<union> pderivs_lang (PSuf s @@ {[c]}) r2"
   207   also have "\<dots> = pderiv_set c (Timess (pderivs s r1) r2) \<union> pderivs_lang (PSuf s @@ {[c]}) r2"
   279     by (simp add: pderivs_lang_snoc)
   208     by (simp add: pderivs_lang_snoc)
   280   also 
   209   also 
   281   have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs s r1) r2) \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2"
   210   have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs s r1) r2) \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2"
   282     by auto
   211     by auto
   283   also 
   212   also 
   284   have "\<dots> \<subseteq> Timess (pderiv_set c (pderivs s r1)) r2 \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2"
   213   have "\<dots> \<subseteq> Timess (pderiv_set c (pderivs s r1)) r2 \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2"
   285     by (auto simp add: if_splits) (blast)
   214     by (auto simp add: if_splits)
   286   also have "\<dots> = Timess (pderivs (s @ [c]) r1) r2 \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2"
   215   also have "\<dots> = Timess (pderivs (s @ [c]) r1) r2 \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2"
   287     by (simp add: pderivs_snoc)
   216     by (simp add: pderivs_snoc)
   288   also have "\<dots> \<subseteq> Timess (pderivs (s @ [c]) r1) r2 \<union> pderivs_lang (PSuf (s @ [c])) r2"
   217   also have "\<dots> \<subseteq> Timess (pderivs (s @ [c]) r1) r2 \<union> pderivs_lang (PSuf (s @ [c])) r2"
   289     unfolding pderivs_lang_def by (auto simp add: PSuf_snoc)  
   218     unfolding pderivs_lang_def by (auto simp add: PSuf_snoc)  
   290   finally show ?case .
   219   finally show ?case .
   317   case (snoc c s)
   246   case (snoc c s)
   318   have ih: "s \<noteq> [] \<Longrightarrow> pderivs s (Star r) \<subseteq> Timess (pderivs_lang (PSuf s) r) (Star r)" by fact
   247   have ih: "s \<noteq> [] \<Longrightarrow> pderivs s (Star r) \<subseteq> Timess (pderivs_lang (PSuf s) r) (Star r)" by fact
   319   { assume asm: "s \<noteq> []"
   248   { assume asm: "s \<noteq> []"
   320     have "pderivs (s @ [c]) (Star r) = pderiv_set c (pderivs s (Star r))" by (simp add: pderivs_snoc)
   249     have "pderivs (s @ [c]) (Star r) = pderiv_set c (pderivs s (Star r))" by (simp add: pderivs_snoc)
   321     also have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs_lang (PSuf s) r) (Star r))"
   250     also have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs_lang (PSuf s) r) (Star r))"
   322       using ih[OF asm] by (auto) (blast)
   251       using ih[OF asm] by fast
   323     also have "\<dots> \<subseteq> Timess (pderiv_set c (pderivs_lang (PSuf s) r)) (Star r) \<union> pderiv c (Star r)"
   252     also have "\<dots> \<subseteq> Timess (pderiv_set c (pderivs_lang (PSuf s) r)) (Star r) \<union> pderiv c (Star r)"
   324       by (auto split: if_splits) (blast)+
   253       by (auto split: if_splits)
   325     also have "\<dots> \<subseteq> Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r) \<union> (Timess (pderiv c r) (Star r))"
   254     also have "\<dots> \<subseteq> Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r) \<union> (Timess (pderiv c r) (Star r))"
   326       by (simp only: PSuf_snoc pderivs_lang_snoc pderivs_lang_union)
   255       by (simp only: PSuf_snoc pderivs_lang_snoc pderivs_lang_union)
   327          (auto simp add: pderivs_lang_def)
   256          (auto simp add: pderivs_lang_def)
   328     also have "\<dots> = Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r)"
   257     also have "\<dots> = Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r)"
   329       by (auto simp add: PSuf_snoc PSuf_Union pderivs_snoc pderivs_lang_def)
   258       by (auto simp add: PSuf_snoc PSuf_Union pderivs_snoc pderivs_lang_def)
   330     finally have ?case .
   259     finally have ?case .
   331   }
   260   }
   332   moreover
   261   moreover
   333   { assume asm: "s = []"
   262   { assume asm: "s = []"
   334     then have ?case
   263     then have ?case by (auto simp add: pderivs_lang_def pderivs_snoc PSuf_def)
   335       apply (auto simp add: pderivs_lang_def pderivs_snoc PSuf_def)
       
   336       apply(rule_tac x = "[c]" in exI)
       
   337       apply(auto)
       
   338       done
       
   339   }
   264   }
   340   ultimately show ?case by blast
   265   ultimately show ?case by blast
   341 qed (simp)
   266 qed (simp)
   342 
   267 
   343 lemma pderivs_lang_Star:
   268 lemma pderivs_lang_Star:
   375 
   300 
   376 lemma finite_pderivs_lang:
   301 lemma finite_pderivs_lang:
   377   shows "finite (pderivs_lang A r)"
   302   shows "finite (pderivs_lang A r)"
   378 by (metis finite_pderivs_lang_UNIV pderivs_lang_subset rev_finite_subset subset_UNIV)
   303 by (metis finite_pderivs_lang_UNIV pderivs_lang_subset rev_finite_subset subset_UNIV)
   379 
   304 
   380 
       
   381 subsection {* A regular expression matcher based on Brozowski's derivatives *}
       
   382 
       
   383 fun
       
   384   matcher :: "'a rexp \<Rightarrow> 'a list \<Rightarrow> bool"
       
   385 where
       
   386   "matcher r s = nullable (derivs s r)"
       
   387 
       
   388 lemma matcher_correctness:
       
   389   shows "matcher r s \<longleftrightarrow> s \<in> lang r"
       
   390 by (induct s arbitrary: r)
       
   391    (simp_all add: nullable_iff Deriv_deriv[symmetric] Deriv_def)
       
   392 
       
   393 
       
   394 end
   305 end