theory Myhill_2 imports Myhill_1 Prefix_subtract "~~/src/HOL/Library/List_Prefix"beginsection {* Direction @{text "regular language \<Rightarrow>finite partition"} *}subsection {* The scheme*}text {* The following convenient notation @{text "x \<approx>A y"} means: string @{text "x"} and @{text "y"} are equivalent with respect to language @{text "A"}. *}definition str_eq :: "string \<Rightarrow> lang \<Rightarrow> string \<Rightarrow> bool" ("_ \<approx>_ _")where "x \<approx>A y \<equiv> (x, y) \<in> (\<approx>A)"text {* The main lemma (@{text "rexp_imp_finite"}) is proved by a structural induction over regular expressions. where base cases (cases for @{const "NULL"}, @{const "EMPTY"}, @{const "CHAR"}) are quite straightforward to proof. Real difficulty lies in inductive cases. By inductive hypothesis, languages defined by sub-expressions induce finite partitiions. Under such hypothsis, we need to prove that the language defined by the composite regular expression gives rise to finite partion. The basic idea is to attach a tag @{text "tag(x)"} to every string @{text "x"}. The tagging fuction @{text "tag"} is carefully devised, which returns tags made of equivalent classes of the partitions induced by subexpressoins, and therefore has a finite range. Let @{text "Lang"} be the composite language, it is proved that: \begin{quote} If strings with the same tag are equivalent with respect to @{text "Lang"}, expressed as: \[ @{text "tag(x) = tag(y) \<Longrightarrow> x \<approx>Lang y"} \] then the partition induced by @{text "Lang"} must be finite. \end{quote} There are two arguments for this. The first goes as the following: \begin{enumerate} \item First, the tagging function @{text "tag"} induces an equivalent relation @{text "(=tag=)"} (defiintion of @{text "f_eq_rel"} and lemma @{text "equiv_f_eq_rel"}). \item It is shown that: if the range of @{text "tag"} (denoted @{text "range(tag)"}) is finite, the partition given rise by @{text "(=tag=)"} is finite (lemma @{text "finite_eq_f_rel"}). Since tags are made from equivalent classes from component partitions, and the inductive hypothesis ensures the finiteness of these partitions, it is not difficult to prove the finiteness of @{text "range(tag)"}. \item It is proved that if equivalent relation @{text "R1"} is more refined than @{text "R2"} (expressed as @{text "R1 \<subseteq> R2"}), and the partition induced by @{text "R1"} is finite, then the partition induced by @{text "R2"} is finite as well (lemma @{text "refined_partition_finite"}). \item The injectivity assumption @{text "tag(x) = tag(y) \<Longrightarrow> x \<approx>Lang y"} implies that @{text "(=tag=)"} is more refined than @{text "(\<approx>Lang)"}. \item Combining the points above, we have: the partition induced by language @{text "Lang"} is finite (lemma @{text "tag_finite_imageD"}). \end{enumerate}*}definition tag_eq_rel :: "(string \<Rightarrow> 'b) \<Rightarrow> (string \<times> string) set" ("=_=")where "=tag= \<equiv> {(x, y) | x y. tag x = tag y}"lemma finite_eq_tag_rel: assumes rng_fnt: "finite (range tag)" shows "finite (UNIV // =tag=)"proof - let "?f" = "\<lambda>X. tag ` X" and ?A = "(UNIV // =tag=)" -- {* The finiteness of @{text "f"}-image is a consequence of @{text "rng_fnt"} *} have "finite (?f ` ?A)" proof - have "range ?f \<subseteq> (Pow (range tag))" unfolding Pow_def by auto moreover have "finite (Pow (range tag))" using rng_fnt by simp ultimately have "finite (range ?f)" unfolding image_def by (blast intro: finite_subset) moreover have "?f ` ?A \<subseteq> range ?f" by auto ultimately show "finite (?f ` ?A)" by (rule rev_finite_subset) qed moreover -- {* The injectivity of @{text "f"}-image follows from the definition of @{text "(=tag=)"} *} have "inj_on ?f ?A" proof - { fix X Y assume X_in: "X \<in> ?A" and Y_in: "Y \<in> ?A" and tag_eq: "?f X = ?f Y" then obtain x y where "x \<in> X" "y \<in> Y" "tag x = tag y" unfolding quotient_def Image_def image_def tag_eq_rel_def by (simp) (blast) with X_in Y_in have "X = Y" unfolding quotient_def tag_eq_rel_def by auto } then show "inj_on ?f ?A" unfolding inj_on_def by auto qed ultimately show "finite (UNIV // =tag=)" by (rule finite_imageD)qedlemma refined_partition_finite: assumes fnt: "finite (UNIV // R1)" and refined: "R1 \<subseteq> R2" and eq1: "equiv UNIV R1" and eq2: "equiv UNIV R2" shows "finite (UNIV // R2)"proof - let ?f = "\<lambda>X. {R1 `` {x} | x. x \<in> X}" and ?A = "UNIV // R2" and ?B = "UNIV // R1" have "?f ` ?A \<subseteq> Pow ?B" unfolding image_def Pow_def quotient_def by auto moreover have "finite (Pow ?B)" using fnt by simp ultimately have "finite (?f ` ?A)" by (rule finite_subset) moreover have "inj_on ?f ?A" proof - { fix X Y assume X_in: "X \<in> ?A" and Y_in: "Y \<in> ?A" and eq_f: "?f X = ?f Y" from quotientE [OF X_in] obtain x where "X = R2 `` {x}" by blast with equiv_class_self[OF eq2] have x_in: "x \<in> X" by simp then have "R1 ``{x} \<in> ?f X" by auto with eq_f have "R1 `` {x} \<in> ?f Y" by simp then obtain y where y_in: "y \<in> Y" and eq_r1_xy: "R1 `` {x} = R1 `` {y}" by auto with eq_equiv_class[OF _ eq1] have "(x, y) \<in> R1" by blast with refined have "(x, y) \<in> R2" by auto with quotient_eqI [OF eq2 X_in Y_in x_in y_in] have "X = Y" . } then show "inj_on ?f ?A" unfolding inj_on_def by blast qed ultimately show "finite (UNIV // R2)" by (rule finite_imageD)qedlemma tag_finite_imageD: assumes rng_fnt: "finite (range tag)" and same_tag_eqvt: "\<And> m n. tag m = tag (n::string) \<Longrightarrow> m \<approx>A n" shows "finite (UNIV // \<approx>A)"proof (rule_tac refined_partition_finite [of "=tag="]) show "finite (UNIV // =tag=)" by (rule finite_eq_tag_rel[OF rng_fnt])next from same_tag_eqvt show "=tag= \<subseteq> \<approx>A" unfolding tag_eq_rel_def str_eq_def by autonext show "equiv UNIV =tag=" unfolding equiv_def tag_eq_rel_def refl_on_def sym_def trans_def by autonext show "equiv UNIV (\<approx>A)" unfolding equiv_def str_eq_rel_def sym_def refl_on_def trans_def by blastqedsubsection {* The proof*}text {* Each case is given in a separate section, as well as the final main lemma. Detailed explainations accompanied by illustrations are given for non-trivial cases. For ever inductive case, there are two tasks, the easier one is to show the range finiteness of of the tagging function based on the finiteness of component partitions, the difficult one is to show that strings with the same tag are equivalent with respect to the composite language. Suppose the composite language be @{text "Lang"}, tagging function be @{text "tag"}, it amounts to show: \[ @{text "tag(x) = tag(y) \<Longrightarrow> x \<approx>Lang y"} \] expanding the definition of @{text "\<approx>Lang"}, it amounts to show: \[ @{text "tag(x) = tag(y) \<Longrightarrow> (\<forall> z. x@z \<in> Lang \<longleftrightarrow> y@z \<in> Lang)"} \] Because the assumed tag equlity @{text "tag(x) = tag(y)"} is symmetric, it is suffcient to show just one direction: \[ @{text "\<And> x y z. \<lbrakk>tag(x) = tag(y); x@z \<in> Lang\<rbrakk> \<Longrightarrow> y@z \<in> Lang"} \] This is the pattern followed by every inductive case. *}subsubsection {* The base case for @{const "NULL"} *}lemma quot_null_eq: shows "(UNIV // \<approx>{}) = ({UNIV}::lang set)" unfolding quotient_def Image_def str_eq_rel_def by autolemma quot_null_finiteI [intro]: shows "finite ((UNIV // \<approx>{})::lang set)"unfolding quot_null_eq by simpsubsubsection {* The base case for @{const "EMPTY"} *}lemma quot_empty_subset: "UNIV // (\<approx>{[]}) \<subseteq> {{[]}, UNIV - {[]}}"proof fix x assume "x \<in> UNIV // \<approx>{[]}" then obtain y where h: "x = {z. (y, z) \<in> \<approx>{[]}}" unfolding quotient_def Image_def by blast show "x \<in> {{[]}, UNIV - {[]}}" proof (cases "y = []") case True with h have "x = {[]}" by (auto simp: str_eq_rel_def) thus ?thesis by simp next case False with h have "x = UNIV - {[]}" by (auto simp: str_eq_rel_def) thus ?thesis by simp qedqedlemma quot_empty_finiteI [intro]: shows "finite (UNIV // (\<approx>{[]}))"by (rule finite_subset[OF quot_empty_subset]) (simp)subsubsection {* The base case for @{const "CHAR"} *}lemma quot_char_subset: "UNIV // (\<approx>{[c]}) \<subseteq> {{[]},{[c]}, UNIV - {[], [c]}}"proof fix x assume "x \<in> UNIV // \<approx>{[c]}" then obtain y where h: "x = {z. (y, z) \<in> \<approx>{[c]}}" unfolding quotient_def Image_def by blast show "x \<in> {{[]},{[c]}, UNIV - {[], [c]}}" proof - { assume "y = []" hence "x = {[]}" using h by (auto simp:str_eq_rel_def) } moreover { assume "y = [c]" hence "x = {[c]}" using h by (auto dest!:spec[where x = "[]"] simp:str_eq_rel_def) } moreover { assume "y \<noteq> []" and "y \<noteq> [c]" hence "\<forall> z. (y @ z) \<noteq> [c]" by (case_tac y, auto) moreover have "\<And> p. (p \<noteq> [] \<and> p \<noteq> [c]) = (\<forall> q. p @ q \<noteq> [c])" by (case_tac p, auto) ultimately have "x = UNIV - {[],[c]}" using h by (auto simp add:str_eq_rel_def) } ultimately show ?thesis by blast qedqedlemma quot_char_finiteI [intro]: shows "finite (UNIV // (\<approx>{[c]}))"by (rule finite_subset[OF quot_char_subset]) (simp)subsubsection {* The inductive case for @{const ALT} *}definition tag_str_ALT :: "lang \<Rightarrow> lang \<Rightarrow> string \<Rightarrow> (lang \<times> lang)"where "tag_str_ALT A B \<equiv> (\<lambda>x. (\<approx>A `` {x}, \<approx>B `` {x}))"lemma quot_union_finiteI [intro]: fixes L1 L2::"lang" assumes finite1: "finite (UNIV // \<approx>A)" and finite2: "finite (UNIV // \<approx>B)" shows "finite (UNIV // \<approx>(A \<union> B))"proof (rule_tac tag = "tag_str_ALT A B" in tag_finite_imageD) have "finite ((UNIV // \<approx>A) \<times> (UNIV // \<approx>B))" using finite1 finite2 by auto then show "finite (range (tag_str_ALT A B))" unfolding tag_str_ALT_def quotient_def by (rule rev_finite_subset) (auto)next show "\<And>x y. tag_str_ALT A B x = tag_str_ALT A B y \<Longrightarrow> x \<approx>(A \<union> B) y" unfolding tag_str_ALT_def unfolding str_eq_def unfolding str_eq_rel_def by autoqedsubsubsection {* The inductive case for @{text "SEQ"}*}text {* For case @{const "SEQ"}, the language @{text "L"} is @{text "L\<^isub>1 ;; L\<^isub>2"}. Given @{text "x @ z \<in> L\<^isub>1 ;; L\<^isub>2"}, according to the defintion of @{text " L\<^isub>1 ;; L\<^isub>2"}, string @{text "x @ z"} can be splitted with the prefix in @{text "L\<^isub>1"} and suffix in @{text "L\<^isub>2"}. The split point can either be in @{text "x"} (as shown in Fig. \ref{seq_first_split}), or in @{text "z"} (as shown in Fig. \ref{seq_snd_split}). Whichever way it goes, the structure on @{text "x @ z"} cn be transfered faithfully onto @{text "y @ z"} (as shown in Fig. \ref{seq_trans_first_split} and \ref{seq_trans_snd_split}) with the the help of the assumed tag equality. The following tag function @{text "tag_str_SEQ"} is such designed to facilitate such transfers and lemma @{text "tag_str_SEQ_injI"} formalizes the informal argument above. The details of structure transfer will be given their.\input{fig_seq} *}definition tag_str_SEQ :: "lang \<Rightarrow> lang \<Rightarrow> string \<Rightarrow> (lang \<times> lang set)"where "tag_str_SEQ L1 L2 \<equiv> (\<lambda>x. (\<approx>L1 `` {x}, {(\<approx>L2 `` {x - x'}) | x'. x' \<le> x \<and> x' \<in> L1}))"text {* The following is a techical lemma which helps to split the @{text "x @ z \<in> L\<^isub>1 ;; L\<^isub>2"} mentioned above.*}lemma append_seq_elim: assumes "x @ y \<in> L\<^isub>1 ;; L\<^isub>2" shows "(\<exists> xa \<le> x. xa \<in> L\<^isub>1 \<and> (x - xa) @ y \<in> L\<^isub>2) \<or> (\<exists> ya \<le> y. (x @ ya) \<in> L\<^isub>1 \<and> (y - ya) \<in> L\<^isub>2)"proof- from assms obtain s\<^isub>1 s\<^isub>2 where eq_xys: "x @ y = s\<^isub>1 @ s\<^isub>2" and in_seq: "s\<^isub>1 \<in> L\<^isub>1 \<and> s\<^isub>2 \<in> L\<^isub>2" by (auto simp:Seq_def) from app_eq_dest [OF eq_xys] have "(x \<le> s\<^isub>1 \<and> (s\<^isub>1 - x) @ s\<^isub>2 = y) \<or> (s\<^isub>1 \<le> x \<and> (x - s\<^isub>1) @ y = s\<^isub>2)" (is "?Split1 \<or> ?Split2") . moreover have "?Split1 \<Longrightarrow> \<exists> ya \<le> y. (x @ ya) \<in> L\<^isub>1 \<and> (y - ya) \<in> L\<^isub>2" using in_seq by (rule_tac x = "s\<^isub>1 - x" in exI, auto elim:prefixE) moreover have "?Split2 \<Longrightarrow> \<exists> xa \<le> x. xa \<in> L\<^isub>1 \<and> (x - xa) @ y \<in> L\<^isub>2" using in_seq by (rule_tac x = s\<^isub>1 in exI, auto) ultimately show ?thesis by blastqedlemma tag_str_SEQ_injI: fixes v w assumes eq_tag: "tag_str_SEQ L\<^isub>1 L\<^isub>2 v = tag_str_SEQ L\<^isub>1 L\<^isub>2 w" shows "v \<approx>(L\<^isub>1 ;; L\<^isub>2) w"proof- -- {* As explained before, a pattern for just one direction needs to be dealt with:*} { fix x y z assume xz_in_seq: "x @ z \<in> L\<^isub>1 ;; L\<^isub>2" and tag_xy: "tag_str_SEQ L\<^isub>1 L\<^isub>2 x = tag_str_SEQ L\<^isub>1 L\<^isub>2 y" have"y @ z \<in> L\<^isub>1 ;; L\<^isub>2" proof- -- {* There are two ways to split @{text "x@z"}: *} from append_seq_elim [OF xz_in_seq] have "(\<exists> xa \<le> x. xa \<in> L\<^isub>1 \<and> (x - xa) @ z \<in> L\<^isub>2) \<or> (\<exists> za \<le> z. (x @ za) \<in> L\<^isub>1 \<and> (z - za) \<in> L\<^isub>2)" . -- {* It can be shown that @{text "?thesis"} holds in either case: *} moreover { -- {* The case for the first split:*} fix xa assume h1: "xa \<le> x" and h2: "xa \<in> L\<^isub>1" and h3: "(x - xa) @ z \<in> L\<^isub>2" -- {* The following subgoal implements the structure transfer:*} obtain ya where "ya \<le> y" and "ya \<in> L\<^isub>1" and "(y - ya) @ z \<in> L\<^isub>2" proof - -- {* \begin{minipage}{0.8\textwidth} By expanding the definition of @{thm [display] "tag_xy"} and extracting the second compoent, we get: \end{minipage} *} have "{\<approx>L\<^isub>2 `` {x - xa} |xa. xa \<le> x \<and> xa \<in> L\<^isub>1} = {\<approx>L\<^isub>2 `` {y - ya} |ya. ya \<le> y \<and> ya \<in> L\<^isub>1}" (is "?Left = ?Right") using tag_xy unfolding tag_str_SEQ_def by simp -- {* Since @{thm "h1"} and @{thm "h2"} hold, it is not difficult to show: *} moreover have "\<approx>L\<^isub>2 `` {x - xa} \<in> ?Left" using h1 h2 by auto -- {* \begin{minipage}{0.7\textwidth} Through tag equality, equivalent class @{term "\<approx>L\<^isub>2 `` {x - xa}"} also belongs to the @{text "?Right"}: \end{minipage} *} ultimately have "\<approx>L\<^isub>2 `` {x - xa} \<in> ?Right" by simp -- {* From this, the counterpart of @{text "xa"} in @{text "y"} is obtained:*} then obtain ya where eq_xya: "\<approx>L\<^isub>2 `` {x - xa} = \<approx>L\<^isub>2 `` {y - ya}" and pref_ya: "ya \<le> y" and ya_in: "ya \<in> L\<^isub>1" by simp blast -- {* It can be proved that @{text "ya"} has the desired property:*} have "(y - ya)@z \<in> L\<^isub>2" proof - from eq_xya have "(x - xa) \<approx>L\<^isub>2 (y - ya)" unfolding Image_def str_eq_rel_def str_eq_def by auto with h3 show ?thesis unfolding str_eq_rel_def str_eq_def by simp qed -- {* Now, @{text "ya"} has all properties to be a qualified candidate:*} with pref_ya ya_in show ?thesis using that by blast qed -- {* From the properties of @{text "ya"}, @{text "y @ z \<in> L\<^isub>1 ;; L\<^isub>2"} is derived easily.*} hence "y @ z \<in> L\<^isub>1 ;; L\<^isub>2" by (erule_tac prefixE, auto simp:Seq_def) } moreover { -- {* The other case is even more simpler: *} fix za assume h1: "za \<le> z" and h2: "(x @ za) \<in> L\<^isub>1" and h3: "z - za \<in> L\<^isub>2" have "y @ za \<in> L\<^isub>1" proof- have "\<approx>L\<^isub>1 `` {x} = \<approx>L\<^isub>1 `` {y}" using tag_xy unfolding tag_str_SEQ_def by simp with h2 show ?thesis unfolding Image_def str_eq_rel_def str_eq_def by auto qed with h1 h3 have "y @ z \<in> L\<^isub>1 ;; L\<^isub>2" by (drule_tac A = L\<^isub>1 in seq_intro, auto elim:prefixE) } ultimately show ?thesis by blast qed } -- {* \begin{minipage}{0.8\textwidth} @{text "?thesis"} is proved by exploiting the symmetry of @{thm [source] "eq_tag"}: \end{minipage} *} from this [OF _ eq_tag] and this [OF _ eq_tag [THEN sym]] show ?thesis unfolding str_eq_def str_eq_rel_def by blastqed lemma quot_seq_finiteI [intro]: fixes L1 L2::"lang" assumes fin1: "finite (UNIV // \<approx>L1)" and fin2: "finite (UNIV // \<approx>L2)" shows "finite (UNIV // \<approx>(L1 ;; L2))"proof (rule_tac tag = "tag_str_SEQ L1 L2" in tag_finite_imageD) show "\<And>x y. tag_str_SEQ L1 L2 x = tag_str_SEQ L1 L2 y \<Longrightarrow> x \<approx>(L1 ;; L2) y" by (rule tag_str_SEQ_injI)next have *: "finite ((UNIV // \<approx>L1) \<times> (Pow (UNIV // \<approx>L2)))" using fin1 fin2 by auto show "finite (range (tag_str_SEQ L1 L2))" unfolding tag_str_SEQ_def apply(rule finite_subset[OF _ *]) unfolding quotient_def by autoqedsubsubsection {* The inductive case for @{const "STAR"} *}text {* This turned out to be the trickiest case. The essential goal is to proved @{text "y @ z \<in> L\<^isub>1*"} under the assumptions that @{text "x @ z \<in> L\<^isub>1*"} and that @{text "x"} and @{text "y"} have the same tag. The reasoning goes as the following: \begin{enumerate} \item Since @{text "x @ z \<in> L\<^isub>1*"} holds, a prefix @{text "xa"} of @{text "x"} can be found such that @{text "xa \<in> L\<^isub>1*"} and @{text "(x - xa)@z \<in> L\<^isub>1*"}, as shown in Fig. \ref{first_split}. Such a prefix always exists, @{text "xa = []"}, for example, is one. \item There could be many but fintie many of such @{text "xa"}, from which we can find the longest and name it @{text "xa_max"}, as shown in Fig. \ref{max_split}. \item The next step is to split @{text "z"} into @{text "za"} and @{text "zb"} such that @{text "(x - xa_max) @ za \<in> L\<^isub>1"} and @{text "zb \<in> L\<^isub>1*"} as shown in Fig. \ref{last_split}. Such a split always exists because: \begin{enumerate} \item Because @{text "(x - x_max) @ z \<in> L\<^isub>1*"}, it can always be splitted into prefix @{text "a"} and suffix @{text "b"}, such that @{text "a \<in> L\<^isub>1"} and @{text "b \<in> L\<^isub>1*"}, as shown in Fig. \ref{ab_split}. \item But the prefix @{text "a"} CANNOT be shorter than @{text "x - xa_max"} (as shown in Fig. \ref{ab_split_wrong}), becasue otherwise, @{text "ma_max@a"} would be in the same kind as @{text "xa_max"} but with a larger size, conflicting with the fact that @{text "xa_max"} is the longest. \end{enumerate} \item \label{tansfer_step} By the assumption that @{text "x"} and @{text "y"} have the same tag, the structure on @{text "x @ z"} can be transferred to @{text "y @ z"} as shown in Fig. \ref{trans_split}. The detailed steps are: \begin{enumerate} \item A @{text "y"}-prefix @{text "ya"} corresponding to @{text "xa"} can be found, which satisfies conditions: @{text "ya \<in> L\<^isub>1*"} and @{text "(y - ya)@za \<in> L\<^isub>1"}. \item Since we already know @{text "zb \<in> L\<^isub>1*"}, we get @{text "(y - ya)@za@zb \<in> L\<^isub>1*"}, and this is just @{text "(y - ya)@z \<in> L\<^isub>1*"}. \item With fact @{text "ya \<in> L\<^isub>1*"}, we finally get @{text "y@z \<in> L\<^isub>1*"}. \end{enumerate} \end{enumerate} The formal proof of lemma @{text "tag_str_STAR_injI"} faithfully follows this informal argument while the tagging function @{text "tag_str_STAR"} is defined to make the transfer in step \ref{ansfer_step} feasible. \input{fig_star}*} definition tag_str_STAR :: "lang \<Rightarrow> string \<Rightarrow> lang set"where "tag_str_STAR L1 \<equiv> (\<lambda>x. {\<approx>L1 `` {x - x'} | x'. x' < x \<and> x' \<in> L1\<star>})"text {* A technical lemma. *}lemma finite_set_has_max: "\<lbrakk>finite A; A \<noteq> {}\<rbrakk> \<Longrightarrow> (\<exists> max \<in> A. \<forall> a \<in> A. f a <= (f max :: nat))"proof (induct rule:finite.induct) case emptyI thus ?case by simpnext case (insertI A a) show ?case proof (cases "A = {}") case True thus ?thesis by (rule_tac x = a in bexI, auto) next case False with insertI.hyps and False obtain max where h1: "max \<in> A" and h2: "\<forall>a\<in>A. f a \<le> f max" by blast show ?thesis proof (cases "f a \<le> f max") assume "f a \<le> f max" with h1 h2 show ?thesis by (rule_tac x = max in bexI, auto) next assume "\<not> (f a \<le> f max)" thus ?thesis using h2 by (rule_tac x = a in bexI, auto) qed qedqedtext {* The following is a technical lemma.which helps to show the range finiteness of tag function. *}lemma finite_strict_prefix_set: "finite {xa. xa < (x::string)}"apply (induct x rule:rev_induct, simp)apply (subgoal_tac "{xa. xa < xs @ [x]} = {xa. xa < xs} \<union> {xs}")by (auto simp:strict_prefix_def)lemma tag_str_STAR_injI: fixes v w assumes eq_tag: "tag_str_STAR L\<^isub>1 v = tag_str_STAR L\<^isub>1 w" shows "(v::string) \<approx>(L\<^isub>1\<star>) w"proof- -- {* As explained before, a pattern for just one direction needs to be dealt with:*} { fix x y z assume xz_in_star: "x @ z \<in> L\<^isub>1\<star>" and tag_xy: "tag_str_STAR L\<^isub>1 x = tag_str_STAR L\<^isub>1 y" have "y @ z \<in> L\<^isub>1\<star>" proof(cases "x = []") -- {* The degenerated case when @{text "x"} is a null string is easy to prove: *} case True with tag_xy have "y = []" by (auto simp add: tag_str_STAR_def strict_prefix_def) thus ?thesis using xz_in_star True by simp next -- {* The nontrival case: *} case False -- {* \begin{minipage}{0.8\textwidth} Since @{text "x @ z \<in> L\<^isub>1\<star>"}, @{text "x"} can always be splitted by a prefix @{text "xa"} together with its suffix @{text "x - xa"}, such that both @{text "xa"} and @{text "(x - xa) @ z"} are in @{text "L\<^isub>1\<star>"}, and there could be many such splittings.Therefore, the following set @{text "?S"} is nonempty, and finite as well: \end{minipage} *} let ?S = "{xa. xa < x \<and> xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star>}" have "finite ?S" by (rule_tac B = "{xa. xa < x}" in finite_subset, auto simp:finite_strict_prefix_set) moreover have "?S \<noteq> {}" using False xz_in_star by (simp, rule_tac x = "[]" in exI, auto simp:strict_prefix_def) -- {* \begin{minipage}{0.7\textwidth} Since @{text "?S"} is finite, we can always single out the longest and name it @{text "xa_max"}: \end{minipage} *} ultimately have "\<exists> xa_max \<in> ?S. \<forall> xa \<in> ?S. length xa \<le> length xa_max" using finite_set_has_max by blast then obtain xa_max where h1: "xa_max < x" and h2: "xa_max \<in> L\<^isub>1\<star>" and h3: "(x - xa_max) @ z \<in> L\<^isub>1\<star>" and h4:"\<forall> xa < x. xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star> \<longrightarrow> length xa \<le> length xa_max" by blast -- {* \begin{minipage}{0.8\textwidth} By the equality of tags, the counterpart of @{text "xa_max"} among @{text "y"}-prefixes, named @{text "ya"}, can be found: \end{minipage} *} obtain ya where h5: "ya < y" and h6: "ya \<in> L\<^isub>1\<star>" and eq_xya: "(x - xa_max) \<approx>L\<^isub>1 (y - ya)" proof- from tag_xy have "{\<approx>L\<^isub>1 `` {x - xa} |xa. xa < x \<and> xa \<in> L\<^isub>1\<star>} = {\<approx>L\<^isub>1 `` {y - xa} |xa. xa < y \<and> xa \<in> L\<^isub>1\<star>}" (is "?left = ?right") by (auto simp:tag_str_STAR_def) moreover have "\<approx>L\<^isub>1 `` {x - xa_max} \<in> ?left" using h1 h2 by auto ultimately have "\<approx>L\<^isub>1 `` {x - xa_max} \<in> ?right" by simp thus ?thesis using that apply (simp add:Image_def str_eq_rel_def str_eq_def) by blast qed -- {* \begin{minipage}{0.8\textwidth} The @{text "?thesis"}, @{prop "y @ z \<in> L\<^isub>1\<star>"}, is a simple consequence of the following proposition: \end{minipage} *} have "(y - ya) @ z \<in> L\<^isub>1\<star>" proof- -- {* The idea is to split the suffix @{text "z"} into @{text "za"} and @{text "zb"}, such that: *} obtain za zb where eq_zab: "z = za @ zb" and l_za: "(y - ya)@za \<in> L\<^isub>1" and ls_zb: "zb \<in> L\<^isub>1\<star>" proof - -- {* \begin{minipage}{0.8\textwidth} Since @{thm "h1"}, @{text "x"} can be splitted into @{text "a"} and @{text "b"} such that: \end{minipage} *} from h1 have "(x - xa_max) @ z \<noteq> []" by (auto simp:strict_prefix_def elim:prefixE) from star_decom [OF h3 this] obtain a b where a_in: "a \<in> L\<^isub>1" and a_neq: "a \<noteq> []" and b_in: "b \<in> L\<^isub>1\<star>" and ab_max: "(x - xa_max) @ z = a @ b" by blast -- {* Now the candiates for @{text "za"} and @{text "zb"} are found:*} let ?za = "a - (x - xa_max)" and ?zb = "b" have pfx: "(x - xa_max) \<le> a" (is "?P1") and eq_z: "z = ?za @ ?zb" (is "?P2") proof - -- {* \begin{minipage}{0.8\textwidth} Since @{text "(x - xa_max) @ z = a @ b"}, string @{text "(x - xa_max) @ z"} can be splitted in two ways: \end{minipage} *} have "((x - xa_max) \<le> a \<and> (a - (x - xa_max)) @ b = z) \<or> (a < (x - xa_max) \<and> ((x - xa_max) - a) @ z = b)" using app_eq_dest[OF ab_max] by (auto simp:strict_prefix_def) moreover { -- {* However, the undsired way can be refuted by absurdity: *} assume np: "a < (x - xa_max)" and b_eqs: "((x - xa_max) - a) @ z = b" have "False" proof - let ?xa_max' = "xa_max @ a" have "?xa_max' < x" using np h1 by (clarsimp simp:strict_prefix_def diff_prefix) moreover have "?xa_max' \<in> L\<^isub>1\<star>" using a_in h2 by (simp add:star_intro3) moreover have "(x - ?xa_max') @ z \<in> L\<^isub>1\<star>" using b_eqs b_in np h1 by (simp add:diff_diff_appd) moreover have "\<not> (length ?xa_max' \<le> length xa_max)" using a_neq by simp ultimately show ?thesis using h4 by blast qed } -- {* Now it can be shown that the splitting goes the way we desired. *} ultimately show ?P1 and ?P2 by auto qed hence "(x - xa_max)@?za \<in> L\<^isub>1" using a_in by (auto elim:prefixE) -- {* Now candidates @{text "?za"} and @{text "?zb"} have all the requred properteis. *} with eq_xya have "(y - ya) @ ?za \<in> L\<^isub>1" by (auto simp:str_eq_def str_eq_rel_def) with eq_z and b_in show ?thesis using that by blast qed -- {* @{text "?thesis"} can easily be shown using properties of @{text "za"} and @{text "zb"}: *} have "((y - ya) @ za) @ zb \<in> L\<^isub>1\<star>" using l_za ls_zb by blast with eq_zab show ?thesis by simp qed with h5 h6 show ?thesis by (drule_tac star_intro1) (auto simp:strict_prefix_def elim:prefixE) qed } -- {* By instantiating the reasoning pattern just derived for both directions:*} from this [OF _ eq_tag] and this [OF _ eq_tag [THEN sym]] -- {* The thesis is proved as a trival consequence: *} show ?thesis unfolding str_eq_def str_eq_rel_def by blastqedlemma -- {* The oringal version with less explicit details. *} fixes v w assumes eq_tag: "tag_str_STAR L\<^isub>1 v = tag_str_STAR L\<^isub>1 w" shows "(v::string) \<approx>(L\<^isub>1\<star>) w"proof- -- {* \begin{minipage}{0.8\textwidth} According to the definition of @{text "\<approx>Lang"}, proving @{text "v \<approx>(L\<^isub>1\<star>) w"} amounts to showing: for any string @{text "u"}, if @{text "v @ u \<in> (L\<^isub>1\<star>)"} then @{text "w @ u \<in> (L\<^isub>1\<star>)"} and vice versa. The reasoning pattern for both directions are the same, as derived in the following: \end{minipage} *} { fix x y z assume xz_in_star: "x @ z \<in> L\<^isub>1\<star>" and tag_xy: "tag_str_STAR L\<^isub>1 x = tag_str_STAR L\<^isub>1 y" have "y @ z \<in> L\<^isub>1\<star>" proof(cases "x = []") -- {* The degenerated case when @{text "x"} is a null string is easy to prove: *} case True with tag_xy have "y = []" by (auto simp:tag_str_STAR_def strict_prefix_def) thus ?thesis using xz_in_star True by simp next -- {* \begin{minipage}{0.8\textwidth} The case when @{text "x"} is not null, and @{text "x @ z"} is in @{text "L\<^isub>1\<star>"}, \end{minipage} *} case False obtain x_max where h1: "x_max < x" and h2: "x_max \<in> L\<^isub>1\<star>" and h3: "(x - x_max) @ z \<in> L\<^isub>1\<star>" and h4:"\<forall> xa < x. xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star> \<longrightarrow> length xa \<le> length x_max" proof- let ?S = "{xa. xa < x \<and> xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star>}" have "finite ?S" by (rule_tac B = "{xa. xa < x}" in finite_subset, auto simp:finite_strict_prefix_set) moreover have "?S \<noteq> {}" using False xz_in_star by (simp, rule_tac x = "[]" in exI, auto simp:strict_prefix_def) ultimately have "\<exists> max \<in> ?S. \<forall> a \<in> ?S. length a \<le> length max" using finite_set_has_max by blast thus ?thesis using that by blast qed obtain ya where h5: "ya < y" and h6: "ya \<in> L\<^isub>1\<star>" and h7: "(x - x_max) \<approx>L\<^isub>1 (y - ya)" proof- from tag_xy have "{\<approx>L\<^isub>1 `` {x - xa} |xa. xa < x \<and> xa \<in> L\<^isub>1\<star>} = {\<approx>L\<^isub>1 `` {y - xa} |xa. xa < y \<and> xa \<in> L\<^isub>1\<star>}" (is "?left = ?right") by (auto simp:tag_str_STAR_def) moreover have "\<approx>L\<^isub>1 `` {x - x_max} \<in> ?left" using h1 h2 by auto ultimately have "\<approx>L\<^isub>1 `` {x - x_max} \<in> ?right" by simp with that show ?thesis apply (simp add:Image_def str_eq_rel_def str_eq_def) by blast qed have "(y - ya) @ z \<in> L\<^isub>1\<star>" proof- from h3 h1 obtain a b where a_in: "a \<in> L\<^isub>1" and a_neq: "a \<noteq> []" and b_in: "b \<in> L\<^isub>1\<star>" and ab_max: "(x - x_max) @ z = a @ b" by (drule_tac star_decom, auto simp:strict_prefix_def elim:prefixE) have "(x - x_max) \<le> a \<and> (a - (x - x_max)) @ b = z" proof - have "((x - x_max) \<le> a \<and> (a - (x - x_max)) @ b = z) \<or> (a < (x - x_max) \<and> ((x - x_max) - a) @ z = b)" using app_eq_dest[OF ab_max] by (auto simp:strict_prefix_def) moreover { assume np: "a < (x - x_max)" and b_eqs: " ((x - x_max) - a) @ z = b" have "False" proof - let ?x_max' = "x_max @ a" have "?x_max' < x" using np h1 by (clarsimp simp:strict_prefix_def diff_prefix) moreover have "?x_max' \<in> L\<^isub>1\<star>" using a_in h2 by (simp add:star_intro3) moreover have "(x - ?x_max') @ z \<in> L\<^isub>1\<star>" using b_eqs b_in np h1 by (simp add:diff_diff_appd) moreover have "\<not> (length ?x_max' \<le> length x_max)" using a_neq by simp ultimately show ?thesis using h4 by blast qed } ultimately show ?thesis by blast qed then obtain za where z_decom: "z = za @ b" and x_za: "(x - x_max) @ za \<in> L\<^isub>1" using a_in by (auto elim:prefixE) from x_za h7 have "(y - ya) @ za \<in> L\<^isub>1" by (auto simp:str_eq_def str_eq_rel_def) with b_in have "((y - ya) @ za) @ b \<in> L\<^isub>1\<star>" by blast with z_decom show ?thesis by auto qed with h5 h6 show ?thesis by (drule_tac star_intro1) (auto simp:strict_prefix_def elim:prefixE) qed } -- {* By instantiating the reasoning pattern just derived for both directions:*} from this [OF _ eq_tag] and this [OF _ eq_tag [THEN sym]] -- {* The thesis is proved as a trival consequence: *} show ?thesis unfolding str_eq_def str_eq_rel_def by blastqedlemma quot_star_finiteI [intro]: fixes L1::"lang" assumes finite1: "finite (UNIV // \<approx>L1)" shows "finite (UNIV // \<approx>(L1\<star>))"proof (rule_tac tag = "tag_str_STAR L1" in tag_finite_imageD) show "\<And>x y. tag_str_STAR L1 x = tag_str_STAR L1 y \<Longrightarrow> x \<approx>(L1\<star>) y" by (rule tag_str_STAR_injI)next have *: "finite (Pow (UNIV // \<approx>L1))" using finite1 by auto show "finite (range (tag_str_STAR L1))" unfolding tag_str_STAR_def apply(rule finite_subset[OF _ *]) unfolding quotient_def by autoqedsubsubsection{* The conclusion *}lemma Myhill_Nerode2: fixes r::"rexp" shows "finite (UNIV // \<approx>(L r))"by (induct r) (auto)theorem Myhill_Nerode: shows "(\<exists>r::rexp. A = L r) \<longleftrightarrow> finite (UNIV // \<approx>A)"using Myhill_Nerode1 Myhill_Nerode2 by metis(*section {* Closure properties *}abbreviation reg :: "lang \<Rightarrow> bool"where "reg A \<equiv> \<exists>r::rexp. A = L r"lemma closure_union[intro]: assumes "reg A" "reg B" shows "reg (A \<union> B)"using assmsapply(auto)apply(rule_tac x="ALT r ra" in exI)apply(auto)donelemma closure_seq[intro]: assumes "reg A" "reg B" shows "reg (A ;; B)"using assmsapply(auto)apply(rule_tac x="SEQ r ra" in exI)apply(auto)donelemma closure_star[intro]: assumes "reg A" shows "reg (A\<star>)"using assmsapply(auto)apply(rule_tac x="STAR r" in exI)apply(auto)donelemma closure_complement[intro]: assumes "reg A" shows "reg (- A)"using assmsunfolding Myhill_Nerodeunfolding str_eq_rel_defby autolemma closure_difference[intro]: assumes "reg A" "reg B" shows "reg (A - B)"proof - have "A - B = - ((- A) \<union> B)" by blast moreover have "reg (- ((- A) \<union> B))" using assms by blast ultimately show "reg (A - B)" by simpqedlemma closure_intersection[intro]: assumes "reg A" "reg B" shows "reg (A \<inter> B)"proof - have "A \<inter> B = - ((- A) \<union> (- B))" by blast moreover have "reg (- ((- A) \<union> (- B)))" using assms by blast ultimately show "reg (A \<inter> B)" by simpqed*)end