AFP-Submission/Derivatives.thy
changeset 191 6bb15b8e6301
equal deleted inserted replaced
190:2a07222e2a8b 191:6bb15b8e6301
       
     1 section "Derivatives of regular expressions"
       
     2 
       
     3 (* Author: Christian Urban *)
       
     4 
       
     5 theory Derivatives
       
     6 imports Regular_Exp
       
     7 begin
       
     8 
       
     9 text{* This theory is based on work by Brozowski \cite{Brzozowski64} and Antimirov \cite{Antimirov95}. *}
       
    10 
       
    11 subsection {* Brzozowski's derivatives of regular expressions *}
       
    12 
       
    13 primrec
       
    14   deriv :: "'a \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp"
       
    15 where
       
    16   "deriv c (Zero) = Zero"
       
    17 | "deriv c (One) = Zero"
       
    18 | "deriv c (Atom c') = (if c = c' then One else Zero)"
       
    19 | "deriv c (Plus r1 r2) = Plus (deriv c r1) (deriv c r2)"
       
    20 | "deriv c (Times r1 r2) = 
       
    21     (if nullable r1 then Plus (Times (deriv c r1) r2) (deriv c r2) else Times (deriv c r1) r2)"
       
    22 | "deriv c (Star r) = Times (deriv c r) (Star r)"
       
    23 
       
    24 primrec 
       
    25   derivs :: "'a list \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp"
       
    26 where
       
    27   "derivs [] r = r"
       
    28 | "derivs (c # s) r = derivs s (deriv c r)"
       
    29 
       
    30 
       
    31 lemma atoms_deriv_subset: "atoms (deriv x r) \<subseteq> atoms r"
       
    32 by (induction r) (auto)
       
    33 
       
    34 lemma atoms_derivs_subset: "atoms (derivs w r) \<subseteq> atoms r"
       
    35 by (induction w arbitrary: r) (auto dest: atoms_deriv_subset[THEN subsetD])
       
    36 
       
    37 lemma lang_deriv: "lang (deriv c r) = Deriv c (lang r)"
       
    38 by (induct r) (simp_all add: nullable_iff)
       
    39 
       
    40 lemma lang_derivs: "lang (derivs s r) = Derivs s (lang r)"
       
    41 by (induct s arbitrary: r) (simp_all add: lang_deriv)
       
    42 
       
    43 text {* A regular expression matcher: *}
       
    44 
       
    45 definition matcher :: "'a rexp \<Rightarrow> 'a list \<Rightarrow> bool" where
       
    46 "matcher r s = nullable (derivs s r)"
       
    47 
       
    48 lemma matcher_correctness: "matcher r s \<longleftrightarrow> s \<in> lang r"
       
    49 by (induct s arbitrary: r)
       
    50    (simp_all add: nullable_iff lang_deriv matcher_def Deriv_def)
       
    51 
       
    52 
       
    53 subsection {* Antimirov's partial derivatives *}
       
    54 
       
    55 abbreviation
       
    56   "Timess rs r \<equiv> (\<Union>r' \<in> rs. {Times r' r})"
       
    57 
       
    58 primrec
       
    59   pderiv :: "'a \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp set"
       
    60 where
       
    61   "pderiv c Zero = {}"
       
    62 | "pderiv c One = {}"
       
    63 | "pderiv c (Atom c') = (if c = c' then {One} else {})"
       
    64 | "pderiv c (Plus r1 r2) = (pderiv c r1) \<union> (pderiv c r2)"
       
    65 | "pderiv c (Times r1 r2) = 
       
    66     (if nullable r1 then Timess (pderiv c r1) r2 \<union> pderiv c r2 else Timess (pderiv c r1) r2)"
       
    67 | "pderiv c (Star r) = Timess (pderiv c r) (Star r)"
       
    68 
       
    69 primrec
       
    70   pderivs :: "'a list \<Rightarrow> 'a rexp \<Rightarrow> ('a rexp) set"
       
    71 where
       
    72   "pderivs [] r = {r}"
       
    73 | "pderivs (c # s) r = \<Union> (pderivs s ` pderiv c r)"
       
    74 
       
    75 abbreviation
       
    76  pderiv_set :: "'a \<Rightarrow> 'a rexp set \<Rightarrow> 'a rexp set"
       
    77 where
       
    78   "pderiv_set c rs \<equiv> \<Union> (pderiv c ` rs)"
       
    79 
       
    80 abbreviation
       
    81   pderivs_set :: "'a list \<Rightarrow> 'a rexp set \<Rightarrow> 'a rexp set"
       
    82 where
       
    83   "pderivs_set s rs \<equiv> \<Union> (pderivs s ` rs)"
       
    84 
       
    85 lemma pderivs_append:
       
    86   "pderivs (s1 @ s2) r = \<Union> (pderivs s2 ` pderivs s1 r)"
       
    87 by (induct s1 arbitrary: r) (simp_all)
       
    88 
       
    89 lemma pderivs_snoc:
       
    90   shows "pderivs (s @ [c]) r = pderiv_set c (pderivs s r)"
       
    91 by (simp add: pderivs_append)
       
    92 
       
    93 lemma pderivs_simps [simp]:
       
    94   shows "pderivs s Zero = (if s = [] then {Zero} else {})"
       
    95   and   "pderivs s One = (if s = [] then {One} else {})"
       
    96   and   "pderivs s (Plus r1 r2) = (if s = [] then {Plus r1 r2} else (pderivs s r1) \<union> (pderivs s r2))"
       
    97 by (induct s) (simp_all)
       
    98 
       
    99 lemma pderivs_Atom:
       
   100   shows "pderivs s (Atom c) \<subseteq> {Atom c, One}"
       
   101 by (induct s) (simp_all)
       
   102 
       
   103 subsection {* Relating left-quotients and partial derivatives *}
       
   104 
       
   105 lemma Deriv_pderiv:
       
   106   shows "Deriv c (lang r) = \<Union> (lang ` pderiv c r)"
       
   107 by (induct r) (auto simp add: nullable_iff conc_UNION_distrib)
       
   108 
       
   109 lemma Derivs_pderivs:
       
   110   shows "Derivs s (lang r) = \<Union> (lang ` pderivs s r)"
       
   111 proof (induct s arbitrary: r)
       
   112   case (Cons c s)
       
   113   have ih: "\<And>r. Derivs s (lang r) = \<Union> (lang ` pderivs s r)" by fact
       
   114   have "Derivs (c # s) (lang r) = Derivs s (Deriv c (lang r))" by simp
       
   115   also have "\<dots> = Derivs s (\<Union> (lang ` pderiv c r))" by (simp add: Deriv_pderiv)
       
   116   also have "\<dots> = Derivss s (lang ` (pderiv c r))"
       
   117     by (auto simp add:  Derivs_def)
       
   118   also have "\<dots> = \<Union> (lang ` (pderivs_set s (pderiv c r)))"
       
   119     using ih by auto
       
   120   also have "\<dots> = \<Union> (lang ` (pderivs (c # s) r))" by simp
       
   121   finally show "Derivs (c # s) (lang r) = \<Union> (lang ` pderivs (c # s) r)" .
       
   122 qed (simp add: Derivs_def)
       
   123 
       
   124 subsection {* Relating derivatives and partial derivatives *}
       
   125 
       
   126 lemma deriv_pderiv:
       
   127   shows "\<Union> (lang ` (pderiv c r)) = lang (deriv c r)"
       
   128 unfolding lang_deriv Deriv_pderiv by simp
       
   129 
       
   130 lemma derivs_pderivs:
       
   131   shows "\<Union> (lang ` (pderivs s r)) = lang (derivs s r)"
       
   132 unfolding lang_derivs Derivs_pderivs by simp
       
   133 
       
   134 
       
   135 subsection {* Finiteness property of partial derivatives *}
       
   136 
       
   137 definition
       
   138   pderivs_lang :: "'a lang \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp set"
       
   139 where
       
   140   "pderivs_lang A r \<equiv> \<Union>x \<in> A. pderivs x r"
       
   141 
       
   142 lemma pderivs_lang_subsetI:
       
   143   assumes "\<And>s. s \<in> A \<Longrightarrow> pderivs s r \<subseteq> C"
       
   144   shows "pderivs_lang A r \<subseteq> C"
       
   145 using assms unfolding pderivs_lang_def by (rule UN_least)
       
   146 
       
   147 lemma pderivs_lang_union:
       
   148   shows "pderivs_lang (A \<union> B) r = (pderivs_lang A r \<union> pderivs_lang B r)"
       
   149 by (simp add: pderivs_lang_def)
       
   150 
       
   151 lemma pderivs_lang_subset:
       
   152   shows "A \<subseteq> B \<Longrightarrow> pderivs_lang A r \<subseteq> pderivs_lang B r"
       
   153 by (auto simp add: pderivs_lang_def)
       
   154 
       
   155 definition
       
   156   "UNIV1 \<equiv> UNIV - {[]}"
       
   157 
       
   158 lemma pderivs_lang_Zero [simp]:
       
   159   shows "pderivs_lang UNIV1 Zero = {}"
       
   160 unfolding UNIV1_def pderivs_lang_def by auto
       
   161 
       
   162 lemma pderivs_lang_One [simp]:
       
   163   shows "pderivs_lang UNIV1 One = {}"
       
   164 unfolding UNIV1_def pderivs_lang_def by (auto split: if_splits)
       
   165 
       
   166 lemma pderivs_lang_Atom [simp]:
       
   167   shows "pderivs_lang UNIV1 (Atom c) = {One}"
       
   168 unfolding UNIV1_def pderivs_lang_def 
       
   169 apply(auto)
       
   170 apply(frule rev_subsetD)
       
   171 apply(rule pderivs_Atom)
       
   172 apply(simp)
       
   173 apply(case_tac xa)
       
   174 apply(auto split: if_splits)
       
   175 done
       
   176 
       
   177 lemma pderivs_lang_Plus [simp]:
       
   178   shows "pderivs_lang UNIV1 (Plus r1 r2) = pderivs_lang UNIV1 r1 \<union> pderivs_lang UNIV1 r2"
       
   179 unfolding UNIV1_def pderivs_lang_def by auto
       
   180 
       
   181 
       
   182 text {* Non-empty suffixes of a string (needed for the cases of @{const Times} and @{const Star} below) *}
       
   183 
       
   184 definition
       
   185   "PSuf s \<equiv> {v. v \<noteq> [] \<and> (\<exists>u. u @ v = s)}"
       
   186 
       
   187 lemma PSuf_snoc:
       
   188   shows "PSuf (s @ [c]) = (PSuf s) @@ {[c]} \<union> {[c]}"
       
   189 unfolding PSuf_def conc_def
       
   190 by (auto simp add: append_eq_append_conv2 append_eq_Cons_conv)
       
   191 
       
   192 lemma PSuf_Union:
       
   193   shows "(\<Union>v \<in> PSuf s @@ {[c]}. f v) = (\<Union>v \<in> PSuf s. f (v @ [c]))"
       
   194 by (auto simp add: conc_def)
       
   195 
       
   196 lemma pderivs_lang_snoc:
       
   197   shows "pderivs_lang (PSuf s @@ {[c]}) r = (pderiv_set c (pderivs_lang (PSuf s) r))"
       
   198 unfolding pderivs_lang_def
       
   199 by (simp add: PSuf_Union pderivs_snoc)
       
   200 
       
   201 lemma pderivs_Times:
       
   202   shows "pderivs s (Times r1 r2) \<subseteq> Timess (pderivs s r1) r2 \<union> (pderivs_lang (PSuf s) r2)"
       
   203 proof (induct s rule: rev_induct)
       
   204   case (snoc c s)
       
   205   have ih: "pderivs s (Times r1 r2) \<subseteq> Timess (pderivs s r1) r2 \<union> (pderivs_lang (PSuf s) r2)" 
       
   206     by fact
       
   207   have "pderivs (s @ [c]) (Times r1 r2) = pderiv_set c (pderivs s (Times r1 r2))" 
       
   208     by (simp add: pderivs_snoc)
       
   209   also have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs s r1) r2 \<union> (pderivs_lang (PSuf s) r2))"
       
   210     using ih by fast
       
   211   also have "\<dots> = pderiv_set c (Timess (pderivs s r1) r2) \<union> pderiv_set c (pderivs_lang (PSuf s) r2)"
       
   212     by (simp)
       
   213   also have "\<dots> = pderiv_set c (Timess (pderivs s r1) r2) \<union> pderivs_lang (PSuf s @@ {[c]}) r2"
       
   214     by (simp add: pderivs_lang_snoc)
       
   215   also 
       
   216   have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs s r1) r2) \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2"
       
   217     by auto
       
   218   also 
       
   219   have "\<dots> \<subseteq> Timess (pderiv_set c (pderivs s r1)) r2 \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2"
       
   220     by (auto simp add: if_splits)
       
   221   also have "\<dots> = Timess (pderivs (s @ [c]) r1) r2 \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2"
       
   222     by (simp add: pderivs_snoc)
       
   223   also have "\<dots> \<subseteq> Timess (pderivs (s @ [c]) r1) r2 \<union> pderivs_lang (PSuf (s @ [c])) r2"
       
   224     unfolding pderivs_lang_def by (auto simp add: PSuf_snoc)  
       
   225   finally show ?case .
       
   226 qed (simp) 
       
   227 
       
   228 lemma pderivs_lang_Times_aux1:
       
   229   assumes a: "s \<in> UNIV1"
       
   230   shows "pderivs_lang (PSuf s) r \<subseteq> pderivs_lang UNIV1 r"
       
   231 using a unfolding UNIV1_def PSuf_def pderivs_lang_def by auto
       
   232 
       
   233 lemma pderivs_lang_Times_aux2:
       
   234   assumes a: "s \<in> UNIV1"
       
   235   shows "Timess (pderivs s r1) r2 \<subseteq> Timess (pderivs_lang UNIV1 r1) r2"
       
   236 using a unfolding pderivs_lang_def by auto
       
   237 
       
   238 lemma pderivs_lang_Times:
       
   239   shows "pderivs_lang UNIV1 (Times r1 r2) \<subseteq> Timess (pderivs_lang UNIV1 r1) r2 \<union> pderivs_lang UNIV1 r2"
       
   240 apply(rule pderivs_lang_subsetI)
       
   241 apply(rule subset_trans)
       
   242 apply(rule pderivs_Times)
       
   243 using pderivs_lang_Times_aux1 pderivs_lang_Times_aux2
       
   244 apply(blast)
       
   245 done
       
   246 
       
   247 lemma pderivs_Star:
       
   248   assumes a: "s \<noteq> []"
       
   249   shows "pderivs s (Star r) \<subseteq> Timess (pderivs_lang (PSuf s) r) (Star r)"
       
   250 using a
       
   251 proof (induct s rule: rev_induct)
       
   252   case (snoc c s)
       
   253   have ih: "s \<noteq> [] \<Longrightarrow> pderivs s (Star r) \<subseteq> Timess (pderivs_lang (PSuf s) r) (Star r)" by fact
       
   254   { assume asm: "s \<noteq> []"
       
   255     have "pderivs (s @ [c]) (Star r) = pderiv_set c (pderivs s (Star r))" by (simp add: pderivs_snoc)
       
   256     also have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs_lang (PSuf s) r) (Star r))"
       
   257       using ih[OF asm] by fast
       
   258     also have "\<dots> \<subseteq> Timess (pderiv_set c (pderivs_lang (PSuf s) r)) (Star r) \<union> pderiv c (Star r)"
       
   259       by (auto split: if_splits)
       
   260     also have "\<dots> \<subseteq> Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r) \<union> (Timess (pderiv c r) (Star r))"
       
   261       by (simp only: PSuf_snoc pderivs_lang_snoc pderivs_lang_union)
       
   262          (auto simp add: pderivs_lang_def)
       
   263     also have "\<dots> = Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r)"
       
   264       by (auto simp add: PSuf_snoc PSuf_Union pderivs_snoc pderivs_lang_def)
       
   265     finally have ?case .
       
   266   }
       
   267   moreover
       
   268   { assume asm: "s = []"
       
   269     then have ?case by (auto simp add: pderivs_lang_def pderivs_snoc PSuf_def)
       
   270   }
       
   271   ultimately show ?case by blast
       
   272 qed (simp)
       
   273 
       
   274 lemma pderivs_lang_Star:
       
   275   shows "pderivs_lang UNIV1 (Star r) \<subseteq> Timess (pderivs_lang UNIV1 r) (Star r)"
       
   276 apply(rule pderivs_lang_subsetI)
       
   277 apply(rule subset_trans)
       
   278 apply(rule pderivs_Star)
       
   279 apply(simp add: UNIV1_def)
       
   280 apply(simp add: UNIV1_def PSuf_def)
       
   281 apply(auto simp add: pderivs_lang_def)
       
   282 done
       
   283 
       
   284 lemma finite_Timess [simp]:
       
   285   assumes a: "finite A"
       
   286   shows "finite (Timess A r)"
       
   287 using a by auto
       
   288 
       
   289 lemma finite_pderivs_lang_UNIV1:
       
   290   shows "finite (pderivs_lang UNIV1 r)"
       
   291 apply(induct r)
       
   292 apply(simp_all add: 
       
   293   finite_subset[OF pderivs_lang_Times]
       
   294   finite_subset[OF pderivs_lang_Star])
       
   295 done
       
   296     
       
   297 lemma pderivs_lang_UNIV:
       
   298   shows "pderivs_lang UNIV r = pderivs [] r \<union> pderivs_lang UNIV1 r"
       
   299 unfolding UNIV1_def pderivs_lang_def
       
   300 by blast
       
   301 
       
   302 lemma finite_pderivs_lang_UNIV:
       
   303   shows "finite (pderivs_lang UNIV r)"
       
   304 unfolding pderivs_lang_UNIV
       
   305 by (simp add: finite_pderivs_lang_UNIV1)
       
   306 
       
   307 lemma finite_pderivs_lang:
       
   308   shows "finite (pderivs_lang A r)"
       
   309 by (metis finite_pderivs_lang_UNIV pderivs_lang_subset rev_finite_subset subset_UNIV)
       
   310 
       
   311 
       
   312 text{* The following relationship between the alphabetic width of regular expressions
       
   313 (called @{text awidth} below) and the number of partial derivatives was proved
       
   314 by Antimirov~\cite{Antimirov95} and formalized by Max Haslbeck. *}
       
   315 
       
   316 fun awidth :: "'a rexp \<Rightarrow> nat" where
       
   317 "awidth Zero = 0" |
       
   318 "awidth One = 0" |
       
   319 "awidth (Atom a) = 1" |
       
   320 "awidth (Plus r1 r2) = awidth r1 + awidth r2" |
       
   321 "awidth (Times r1 r2) = awidth r1 + awidth r2" |
       
   322 "awidth (Star r1) = awidth r1"
       
   323 
       
   324 lemma card_Timess_pderivs_lang_le:
       
   325   "card (Timess (pderivs_lang A r) s) \<le> card (pderivs_lang A r)"
       
   326 by (metis card_image_le finite_pderivs_lang image_eq_UN)
       
   327 
       
   328 lemma card_pderivs_lang_UNIV1_le_awidth: "card (pderivs_lang UNIV1 r) \<le> awidth r"
       
   329 proof (induction r)
       
   330   case (Plus r1 r2)
       
   331   have "card (pderivs_lang UNIV1 (Plus r1 r2)) = card (pderivs_lang UNIV1 r1 \<union> pderivs_lang UNIV1 r2)" by simp
       
   332   also have "\<dots> \<le> card (pderivs_lang UNIV1 r1) + card (pderivs_lang UNIV1 r2)"
       
   333     by(simp add: card_Un_le)
       
   334   also have "\<dots> \<le> awidth (Plus r1 r2)" using Plus.IH by simp
       
   335   finally show ?case .
       
   336 next
       
   337   case (Times r1 r2)
       
   338   have "card (pderivs_lang UNIV1 (Times r1 r2)) \<le> card (Timess (pderivs_lang UNIV1 r1) r2 \<union> pderivs_lang UNIV1 r2)"
       
   339     by (simp add: card_mono finite_pderivs_lang pderivs_lang_Times)
       
   340   also have "\<dots> \<le> card (Timess (pderivs_lang UNIV1 r1) r2) + card (pderivs_lang UNIV1 r2)"
       
   341     by (simp add: card_Un_le)
       
   342   also have "\<dots> \<le> card (pderivs_lang UNIV1 r1) + card (pderivs_lang UNIV1 r2)"
       
   343     by (simp add: card_Timess_pderivs_lang_le)
       
   344   also have "\<dots> \<le> awidth (Times r1 r2)" using Times.IH by simp
       
   345   finally show ?case .
       
   346 next
       
   347   case (Star r)
       
   348   have "card (pderivs_lang UNIV1 (Star r)) \<le> card (Timess (pderivs_lang UNIV1 r) (Star r))"
       
   349     by (simp add: card_mono finite_pderivs_lang pderivs_lang_Star)
       
   350   also have "\<dots> \<le> card (pderivs_lang UNIV1 r)" by (rule card_Timess_pderivs_lang_le)
       
   351   also have "\<dots> \<le> awidth (Star r)" by (simp add: Star.IH)
       
   352   finally show ?case .
       
   353 qed (auto)
       
   354 
       
   355 text{* Antimirov's Theorem 3.4: *}
       
   356 theorem card_pderivs_lang_UNIV_le_awidth: "card (pderivs_lang UNIV r) \<le> awidth r + 1"
       
   357 proof -
       
   358   have "card (insert r (pderivs_lang UNIV1 r)) \<le> Suc (card (pderivs_lang UNIV1 r))"
       
   359     by(auto simp: card_insert_if[OF finite_pderivs_lang_UNIV1])
       
   360   also have "\<dots> \<le> Suc (awidth r)" by(simp add: card_pderivs_lang_UNIV1_le_awidth)
       
   361   finally show ?thesis by(simp add: pderivs_lang_UNIV)
       
   362 qed 
       
   363 
       
   364 text{* Antimirov's Corollary 3.5: *}
       
   365 corollary card_pderivs_lang_le_awidth: "card (pderivs_lang A r) \<le> awidth r + 1"
       
   366 by(rule order_trans[OF
       
   367   card_mono[OF finite_pderivs_lang_UNIV pderivs_lang_subset[OF subset_UNIV]]
       
   368   card_pderivs_lang_UNIV_le_awidth])
       
   369 
       
   370 end