header "Derivatives of regular expressions"(* Author: Christian Urban *)theory Derivativesimports Regular_Expbegintext{* This theory is based on work by Brozowski \cite{Brzozowski64} and Antimirov \cite{Antimirov95}. *}subsection {* Brozowski's derivatives of regular expressions *}fun deriv :: "'a \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp"where "deriv c (Zero) = Zero"| "deriv c (One) = Zero"| "deriv c (Atom c') = (if c = c' then One else Zero)"| "deriv c (Plus r1 r2) = Plus (deriv c r1) (deriv c r2)"| "deriv c (Times r1 r2) = (if nullable r1 then Plus (Times (deriv c r1) r2) (deriv c r2) else Times (deriv c r1) r2)"| "deriv c (Star r) = Times (deriv c r) (Star r)"fun derivs :: "'a list \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp"where "derivs [] r = r"| "derivs (c # s) r = derivs s (deriv c r)"lemma lang_deriv: "lang (deriv c r) = Deriv c (lang r)"by (induct r) (simp_all add: nullable_iff)lemma lang_derivs: "lang (derivs s r) = Derivs s (lang r)"by (induct s arbitrary: r) (simp_all add: lang_deriv)text {* A regular expression matcher: *}definition matcher :: "'a rexp \<Rightarrow> 'a list \<Rightarrow> bool" where"matcher r s = nullable (derivs s r)"lemma matcher_correctness: "matcher r s \<longleftrightarrow> s \<in> lang r"by (induct s arbitrary: r) (simp_all add: nullable_iff lang_deriv matcher_def Deriv_def)subsection {* Antimirov's partial derivatives *}abbreviation "Timess rs r \<equiv> (\<Union>r' \<in> rs. {Times r' r})"fun pderiv :: "'a \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp set"where "pderiv c Zero = {}"| "pderiv c One = {}"| "pderiv c (Atom c') = (if c = c' then {One} else {})"| "pderiv c (Plus r1 r2) = (pderiv c r1) \<union> (pderiv c r2)"| "pderiv c (Times r1 r2) = (if nullable r1 then Timess (pderiv c r1) r2 \<union> pderiv c r2 else Timess (pderiv c r1) r2)"| "pderiv c (Star r) = Timess (pderiv c r) (Star r)"fun pderivs :: "'a list \<Rightarrow> 'a rexp \<Rightarrow> ('a rexp) set"where "pderivs [] r = {r}"| "pderivs (c # s) r = \<Union> (pderivs s ` pderiv c r)"abbreviation pderiv_set :: "'a \<Rightarrow> 'a rexp set \<Rightarrow> 'a rexp set"where "pderiv_set c rs \<equiv> \<Union> (pderiv c ` rs)"abbreviation pderivs_set :: "'a list \<Rightarrow> 'a rexp set \<Rightarrow> 'a rexp set"where "pderivs_set s rs \<equiv> \<Union> (pderivs s ` rs)"lemma pderivs_append: "pderivs (s1 @ s2) r = \<Union> (pderivs s2 ` pderivs s1 r)"by (induct s1 arbitrary: r) (simp_all)lemma pderivs_snoc: shows "pderivs (s @ [c]) r = pderiv_set c (pderivs s r)"by (simp add: pderivs_append)lemma pderivs_simps [simp]: shows "pderivs s Zero = (if s = [] then {Zero} else {})" and "pderivs s One = (if s = [] then {One} else {})" and "pderivs s (Plus r1 r2) = (if s = [] then {Plus r1 r2} else (pderivs s r1) \<union> (pderivs s r2))"by (induct s) (simp_all)lemma pderivs_Atom: shows "pderivs s (Atom c) \<subseteq> {Atom c, One}"by (induct s) (simp_all)subsection {* Relating left-quotients and partial derivatives *}lemma Deriv_pderiv: shows "Deriv c (lang r) = \<Union> (lang ` pderiv c r)"by (induct r) (auto simp add: nullable_iff conc_UNION_distrib)lemma Derivs_pderivs: shows "Derivs s (lang r) = \<Union> (lang ` pderivs s r)"proof (induct s arbitrary: r) case (Cons c s) have ih: "\<And>r. Derivs s (lang r) = \<Union> (lang ` pderivs s r)" by fact have "Derivs (c # s) (lang r) = Derivs s (Deriv c (lang r))" by simp also have "\<dots> = Derivs s (\<Union> (lang ` pderiv c r))" by (simp add: Deriv_pderiv) also have "\<dots> = Derivss s (lang ` (pderiv c r))" by (auto simp add: Derivs_def) also have "\<dots> = \<Union> (lang ` (pderivs_set s (pderiv c r)))" using ih by auto also have "\<dots> = \<Union> (lang ` (pderivs (c # s) r))" by simp finally show "Derivs (c # s) (lang r) = \<Union> (lang ` pderivs (c # s) r)" .qed (simp add: Derivs_def)subsection {* Relating derivatives and partial derivatives *}lemma deriv_pderiv: shows "\<Union> (lang ` (pderiv c r)) = lang (deriv c r)"unfolding lang_deriv Deriv_pderiv by simplemma derivs_pderivs: shows "\<Union> (lang ` (pderivs s r)) = lang (derivs s r)"unfolding lang_derivs Derivs_pderivs by simpsubsection {* Finiteness property of partial derivatives *}definition pderivs_lang :: "'a lang \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp set"where "pderivs_lang A r \<equiv> \<Union>x \<in> A. pderivs x r"lemma pderivs_lang_subsetI: assumes "\<And>s. s \<in> A \<Longrightarrow> pderivs s r \<subseteq> C" shows "pderivs_lang A r \<subseteq> C"using assms unfolding pderivs_lang_def by (rule UN_least)lemma pderivs_lang_union: shows "pderivs_lang (A \<union> B) r = (pderivs_lang A r \<union> pderivs_lang B r)"by (simp add: pderivs_lang_def)lemma pderivs_lang_subset: shows "A \<subseteq> B \<Longrightarrow> pderivs_lang A r \<subseteq> pderivs_lang B r"by (auto simp add: pderivs_lang_def)definition "UNIV1 \<equiv> UNIV - {[]}"lemma pderivs_lang_Zero [simp]: shows "pderivs_lang UNIV1 Zero = {}"unfolding UNIV1_def pderivs_lang_def by autolemma pderivs_lang_One [simp]: shows "pderivs_lang UNIV1 One = {}"unfolding UNIV1_def pderivs_lang_def by (auto split: if_splits)lemma pderivs_lang_Atom [simp]: shows "pderivs_lang UNIV1 (Atom c) = {One}"unfolding UNIV1_def pderivs_lang_def apply(auto)apply(frule rev_subsetD)apply(rule pderivs_Atom)apply(simp)apply(case_tac xa)apply(auto split: if_splits)donelemma pderivs_lang_Plus [simp]: shows "pderivs_lang UNIV1 (Plus r1 r2) = pderivs_lang UNIV1 r1 \<union> pderivs_lang UNIV1 r2"unfolding UNIV1_def pderivs_lang_def by autotext {* Non-empty suffixes of a string (needed for the cases of @{const Times} and @{const Star} below) *}definition "PSuf s \<equiv> {v. v \<noteq> [] \<and> (\<exists>u. u @ v = s)}"lemma PSuf_snoc: shows "PSuf (s @ [c]) = (PSuf s) @@ {[c]} \<union> {[c]}"unfolding PSuf_def conc_defby (auto simp add: append_eq_append_conv2 append_eq_Cons_conv)lemma PSuf_Union: shows "(\<Union>v \<in> PSuf s @@ {[c]}. f v) = (\<Union>v \<in> PSuf s. f (v @ [c]))"by (auto simp add: conc_def)lemma pderivs_lang_snoc: shows "pderivs_lang (PSuf s @@ {[c]}) r = (pderiv_set c (pderivs_lang (PSuf s) r))"unfolding pderivs_lang_defby (simp add: PSuf_Union pderivs_snoc)lemma pderivs_Times: shows "pderivs s (Times r1 r2) \<subseteq> Timess (pderivs s r1) r2 \<union> (pderivs_lang (PSuf s) r2)"proof (induct s rule: rev_induct) case (snoc c s) have ih: "pderivs s (Times r1 r2) \<subseteq> Timess (pderivs s r1) r2 \<union> (pderivs_lang (PSuf s) r2)" by fact have "pderivs (s @ [c]) (Times r1 r2) = pderiv_set c (pderivs s (Times r1 r2))" by (simp add: pderivs_snoc) also have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs s r1) r2 \<union> (pderivs_lang (PSuf s) r2))" using ih by fast also have "\<dots> = pderiv_set c (Timess (pderivs s r1) r2) \<union> pderiv_set c (pderivs_lang (PSuf s) r2)" by (simp) also have "\<dots> = pderiv_set c (Timess (pderivs s r1) r2) \<union> pderivs_lang (PSuf s @@ {[c]}) r2" by (simp add: pderivs_lang_snoc) also have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs s r1) r2) \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2" by auto also have "\<dots> \<subseteq> Timess (pderiv_set c (pderivs s r1)) r2 \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2" by (auto simp add: if_splits) also have "\<dots> = Timess (pderivs (s @ [c]) r1) r2 \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2" by (simp add: pderivs_snoc) also have "\<dots> \<subseteq> Timess (pderivs (s @ [c]) r1) r2 \<union> pderivs_lang (PSuf (s @ [c])) r2" unfolding pderivs_lang_def by (auto simp add: PSuf_snoc) finally show ?case .qed (simp) lemma pderivs_lang_Times_aux1: assumes a: "s \<in> UNIV1" shows "pderivs_lang (PSuf s) r \<subseteq> pderivs_lang UNIV1 r"using a unfolding UNIV1_def PSuf_def pderivs_lang_def by autolemma pderivs_lang_Times_aux2: assumes a: "s \<in> UNIV1" shows "Timess (pderivs s r1) r2 \<subseteq> Timess (pderivs_lang UNIV1 r1) r2"using a unfolding pderivs_lang_def by autolemma pderivs_lang_Times: shows "pderivs_lang UNIV1 (Times r1 r2) \<subseteq> Timess (pderivs_lang UNIV1 r1) r2 \<union> pderivs_lang UNIV1 r2"apply(rule pderivs_lang_subsetI)apply(rule subset_trans)apply(rule pderivs_Times)using pderivs_lang_Times_aux1 pderivs_lang_Times_aux2apply(blast)donelemma pderivs_Star: assumes a: "s \<noteq> []" shows "pderivs s (Star r) \<subseteq> Timess (pderivs_lang (PSuf s) r) (Star r)"using aproof (induct s rule: rev_induct) case (snoc c s) have ih: "s \<noteq> [] \<Longrightarrow> pderivs s (Star r) \<subseteq> Timess (pderivs_lang (PSuf s) r) (Star r)" by fact { assume asm: "s \<noteq> []" have "pderivs (s @ [c]) (Star r) = pderiv_set c (pderivs s (Star r))" by (simp add: pderivs_snoc) also have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs_lang (PSuf s) r) (Star r))" using ih[OF asm] by fast also have "\<dots> \<subseteq> Timess (pderiv_set c (pderivs_lang (PSuf s) r)) (Star r) \<union> pderiv c (Star r)" by (auto split: if_splits) also have "\<dots> \<subseteq> Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r) \<union> (Timess (pderiv c r) (Star r))" by (simp only: PSuf_snoc pderivs_lang_snoc pderivs_lang_union) (auto simp add: pderivs_lang_def) also have "\<dots> = Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r)" by (auto simp add: PSuf_snoc PSuf_Union pderivs_snoc pderivs_lang_def) finally have ?case . } moreover { assume asm: "s = []" then have ?case by (auto simp add: pderivs_lang_def pderivs_snoc PSuf_def) } ultimately show ?case by blastqed (simp)lemma pderivs_lang_Star: shows "pderivs_lang UNIV1 (Star r) \<subseteq> Timess (pderivs_lang UNIV1 r) (Star r)"apply(rule pderivs_lang_subsetI)apply(rule subset_trans)apply(rule pderivs_Star)apply(simp add: UNIV1_def)apply(simp add: UNIV1_def PSuf_def)apply(auto simp add: pderivs_lang_def)donelemma finite_Timess [simp]: assumes a: "finite A" shows "finite (Timess A r)"using a by autolemma finite_pderivs_lang_UNIV1: shows "finite (pderivs_lang UNIV1 r)"apply(induct r)apply(simp_all add: finite_subset[OF pderivs_lang_Times] finite_subset[OF pderivs_lang_Star])donelemma pderivs_lang_UNIV: shows "pderivs_lang UNIV r = pderivs [] r \<union> pderivs_lang UNIV1 r"unfolding UNIV1_def pderivs_lang_defby blastlemma finite_pderivs_lang_UNIV: shows "finite (pderivs_lang UNIV r)"unfolding pderivs_lang_UNIVby (simp add: finite_pderivs_lang_UNIV1)lemma finite_pderivs_lang: shows "finite (pderivs_lang A r)"by (metis finite_pderivs_lang_UNIV pderivs_lang_subset rev_finite_subset subset_UNIV)end