theory Myhill
imports Myhill_1
begin
section {* Direction: @{text "regular language \<Rightarrow>finite partition"} *}
subsection {* The scheme for this direction *}
text {*
The following convenient notation @{text "x \<approx>Lang y"} means:
string @{text "x"} and @{text "y"} are equivalent with respect to
language @{text "Lang"}.
*}
definition
str_eq :: "string \<Rightarrow> lang \<Rightarrow> string \<Rightarrow> bool" ("_ \<approx>_ _")
where
"x \<approx>Lang y \<equiv> (x, y) \<in> (\<approx>Lang)"
text {*
The very basic scheme to show the finiteness of the partion generated by a language @{text "Lang"}
is by attaching a tag to every string. The set of tags are carfully choosen to be finite so that
the range of tagging function is finite. If it can be proved that strings with the same tag
are equivlent with respect @{text "Lang"}, then the partition given rise by @{text "Lang"} must be finite.
The detailed argjument for this is formalized by the following lemma @{text "tag_finite_imageD"}.
The basic idea is using lemma @{thm [source] "finite_imageD"}
from standard library:
\[
@{thm "finite_imageD" [no_vars]}
\]
which says: if the image of injective function @{text "f"} over set @{text "A"} is
finite, then @{text "A"} must be finte.
*}
(* I am trying to reduce the following proof to even simpler principles. But not yet succeed. *)
definition
f_eq_rel ("\<cong>_")
where
"\<cong>(f::'a \<Rightarrow> 'b) = {(x, y) | x y. f x = f y}"
thm finite.induct
lemma finite_range_image: "finite (range f) \<Longrightarrow> finite (f ` A)"
by (rule_tac B = "{y. \<exists>x. y = f x}" in finite_subset, auto simp:image_def)
lemma "equiv UNIV (\<cong>f)"
by (auto simp:equiv_def f_eq_rel_def refl_on_def sym_def trans_def)
lemma
assumes rng_fnt: "finite (range tag)"
shows "finite (UNIV // (\<cong>tag))"
proof -
let "?f" = "op ` tag" and ?A = "(UNIV // (\<cong>tag))"
show ?thesis
proof (rule_tac f = "?f" and A = ?A in finite_imageD)
-- {*
The finiteness of @{text "f"}-image is a simple consequence of assumption @{text "rng_fnt"}:
*}
show "finite (?f ` ?A)"
proof -
have "\<forall> X. ?f X \<in> (Pow (range tag))" by (auto simp:image_def Pow_def)
moreover from rng_fnt have "finite (Pow (range tag))" by simp
ultimately have "finite (range ?f)"
by (auto simp only:image_def intro:finite_subset)
from finite_range_image [OF this] show ?thesis .
qed
next
-- {*
The injectivity of @{text "f"}-image is a consequence of the definition of @{text "\<cong>tag"}
*}
show "inj_on ?f ?A"
proof-
{ fix X Y
assume X_in: "X \<in> ?A"
and Y_in: "Y \<in> ?A"
and tag_eq: "?f X = ?f Y"
have "X = Y"
proof -
from X_in Y_in tag_eq
obtain x y where x_in: "x \<in> X" and y_in: "y \<in> Y" and eq_tg: "tag x = tag y"
unfolding quotient_def Image_def str_eq_rel_def str_eq_def image_def f_eq_rel_def
apply simp by blast
with X_in Y_in show ?thesis
by (auto simp:quotient_def str_eq_rel_def str_eq_def f_eq_rel_def)
qed
} thus ?thesis unfolding inj_on_def by auto
qed
qed
qed
(*
lemma finite_range_image: "finite (range f) \<Longrightarrow> finite (f ` A)"
by (rule_tac B = "{y. \<exists>x. y = f x}" in finite_subset, auto simp:image_def)
*)
lemma tag_finite_imageD:
fixes L :: "lang"
assumes rng_fnt: "finite (range tag)"
-- {* Suppose the range of tagging fucntion @{text "tag"} is finite. *}
and same_tag_eqvt: "\<And> m n. tag m = tag n \<Longrightarrow> m \<approx>L n"
-- {* And strings with same tag are equivalent *}
shows "finite (UNIV // \<approx>L)"
-- {* Then the partition generated by @{text "\<approx>L"} is finite. *}
proof -
-- {* The particular @{text "f"} and @{text "A"} used in @{thm [source] "finite_imageD"} are:*}
let "?f" = "op ` tag" and ?A = "(UNIV // \<approx>L)"
show ?thesis
proof (rule_tac f = "?f" and A = ?A in finite_imageD)
-- {*
The finiteness of @{text "f"}-image is a simple consequence of assumption @{text "rng_fnt"}:
*}
show "finite (?f ` ?A)"
proof -
have "\<forall> X. ?f X \<in> (Pow (range tag))" by (auto simp:image_def Pow_def)
moreover from rng_fnt have "finite (Pow (range tag))" by simp
ultimately have "finite (range ?f)"
by (auto simp only:image_def intro:finite_subset)
from finite_range_image [OF this] show ?thesis .
qed
next
-- {*
The injectivity of @{text "f"} is the consequence of assumption @{text "same_tag_eqvt"}:
*}
show "inj_on ?f ?A"
proof-
{ fix X Y
assume X_in: "X \<in> ?A"
and Y_in: "Y \<in> ?A"
and tag_eq: "?f X = ?f Y"
have "X = Y"
proof -
from X_in Y_in tag_eq
obtain x y where x_in: "x \<in> X" and y_in: "y \<in> Y" and eq_tg: "tag x = tag y"
unfolding quotient_def Image_def str_eq_rel_def str_eq_def image_def
apply simp by blast
from same_tag_eqvt [OF eq_tg] have "x \<approx>L y" .
with X_in Y_in x_in y_in
show ?thesis by (auto simp:quotient_def str_eq_rel_def str_eq_def)
qed
} thus ?thesis unfolding inj_on_def by auto
qed
qed
qed
subsection {* Lemmas for basic cases *}
text {*
The the final result of this direction is in @{text "rexp_imp_finite"},
which is an induction on the structure of regular expressions. There is one
case for each regular expression operator. For basic operators such as
@{const NULL}, @{const EMPTY}, @{const CHAR}, the finiteness of their
language partition can be established directly with no need of tagging.
This section contains several technical lemma for these base cases.
The inductive cases involve operators @{const ALT}, @{const SEQ} and @{const
STAR}. Tagging functions need to be defined individually for each of
them. There will be one dedicated section for each of these cases, and each
section goes virtually the same way: gives definition of the tagging
function and prove that strings with the same tag are equivalent.
*}
subsection {* The case for @{const "NULL"} *}
lemma quot_null_eq:
shows "(UNIV // \<approx>{}) = ({UNIV}::lang set)"
unfolding quotient_def Image_def str_eq_rel_def by auto
lemma quot_null_finiteI [intro]:
shows "finite ((UNIV // \<approx>{})::lang set)"
unfolding quot_null_eq by simp
subsection {* The case for @{const "EMPTY"} *}
lemma quot_empty_subset:
"UNIV // (\<approx>{[]}) \<subseteq> {{[]}, UNIV - {[]}}"
proof
fix x
assume "x \<in> UNIV // \<approx>{[]}"
then obtain y where h: "x = {z. (y, z) \<in> \<approx>{[]}}"
unfolding quotient_def Image_def by blast
show "x \<in> {{[]}, UNIV - {[]}}"
proof (cases "y = []")
case True with h
have "x = {[]}" by (auto simp: str_eq_rel_def)
thus ?thesis by simp
next
case False with h
have "x = UNIV - {[]}" by (auto simp: str_eq_rel_def)
thus ?thesis by simp
qed
qed
lemma quot_empty_finiteI [intro]:
shows "finite (UNIV // (\<approx>{[]}))"
by (rule finite_subset[OF quot_empty_subset]) (simp)
subsection {* The case for @{const "CHAR"} *}
lemma quot_char_subset:
"UNIV // (\<approx>{[c]}) \<subseteq> {{[]},{[c]}, UNIV - {[], [c]}}"
proof
fix x
assume "x \<in> UNIV // \<approx>{[c]}"
then obtain y where h: "x = {z. (y, z) \<in> \<approx>{[c]}}"
unfolding quotient_def Image_def by blast
show "x \<in> {{[]},{[c]}, UNIV - {[], [c]}}"
proof -
{ assume "y = []" hence "x = {[]}" using h
by (auto simp:str_eq_rel_def)
} moreover {
assume "y = [c]" hence "x = {[c]}" using h
by (auto dest!:spec[where x = "[]"] simp:str_eq_rel_def)
} moreover {
assume "y \<noteq> []" and "y \<noteq> [c]"
hence "\<forall> z. (y @ z) \<noteq> [c]" by (case_tac y, auto)
moreover have "\<And> p. (p \<noteq> [] \<and> p \<noteq> [c]) = (\<forall> q. p @ q \<noteq> [c])"
by (case_tac p, auto)
ultimately have "x = UNIV - {[],[c]}" using h
by (auto simp add:str_eq_rel_def)
} ultimately show ?thesis by blast
qed
qed
lemma quot_char_finiteI [intro]:
shows "finite (UNIV // (\<approx>{[c]}))"
by (rule finite_subset[OF quot_char_subset]) (simp)
subsection {* The case for @{const SEQ}*}
definition
tag_str_SEQ :: "lang \<Rightarrow> lang \<Rightarrow> string \<Rightarrow> (lang \<times> lang set)"
where
"tag_str_SEQ L1 L2 = (\<lambda>x. (\<approx>L1 `` {x}, {(\<approx>L2 `` {x - xa}) | xa. xa \<le> x \<and> xa \<in> L1}))"
lemma append_seq_elim:
assumes "x @ y \<in> L\<^isub>1 ;; L\<^isub>2"
shows "(\<exists> xa \<le> x. xa \<in> L\<^isub>1 \<and> (x - xa) @ y \<in> L\<^isub>2) \<or>
(\<exists> ya \<le> y. (x @ ya) \<in> L\<^isub>1 \<and> (y - ya) \<in> L\<^isub>2)"
proof-
from assms obtain s\<^isub>1 s\<^isub>2
where "x @ y = s\<^isub>1 @ s\<^isub>2"
and in_seq: "s\<^isub>1 \<in> L\<^isub>1 \<and> s\<^isub>2 \<in> L\<^isub>2"
by (auto simp:Seq_def)
hence "(x \<le> s\<^isub>1 \<and> (s\<^isub>1 - x) @ s\<^isub>2 = y) \<or> (s\<^isub>1 \<le> x \<and> (x - s\<^isub>1) @ y = s\<^isub>2)"
using app_eq_dest by auto
moreover have "\<lbrakk>x \<le> s\<^isub>1; (s\<^isub>1 - x) @ s\<^isub>2 = y\<rbrakk> \<Longrightarrow>
\<exists> ya \<le> y. (x @ ya) \<in> L\<^isub>1 \<and> (y - ya) \<in> L\<^isub>2"
using in_seq by (rule_tac x = "s\<^isub>1 - x" in exI, auto elim:prefixE)
moreover have "\<lbrakk>s\<^isub>1 \<le> x; (x - s\<^isub>1) @ y = s\<^isub>2\<rbrakk> \<Longrightarrow>
\<exists> xa \<le> x. xa \<in> L\<^isub>1 \<and> (x - xa) @ y \<in> L\<^isub>2"
using in_seq by (rule_tac x = s\<^isub>1 in exI, auto)
ultimately show ?thesis by blast
qed
lemma tag_str_SEQ_injI:
"tag_str_SEQ L\<^isub>1 L\<^isub>2 m = tag_str_SEQ L\<^isub>1 L\<^isub>2 n \<Longrightarrow> m \<approx>(L\<^isub>1 ;; L\<^isub>2) n"
proof-
{ fix x y z
assume xz_in_seq: "x @ z \<in> L\<^isub>1 ;; L\<^isub>2"
and tag_xy: "tag_str_SEQ L\<^isub>1 L\<^isub>2 x = tag_str_SEQ L\<^isub>1 L\<^isub>2 y"
have"y @ z \<in> L\<^isub>1 ;; L\<^isub>2"
proof-
have "(\<exists> xa \<le> x. xa \<in> L\<^isub>1 \<and> (x - xa) @ z \<in> L\<^isub>2) \<or>
(\<exists> za \<le> z. (x @ za) \<in> L\<^isub>1 \<and> (z - za) \<in> L\<^isub>2)"
using xz_in_seq append_seq_elim by simp
moreover {
fix xa
assume h1: "xa \<le> x" and h2: "xa \<in> L\<^isub>1" and h3: "(x - xa) @ z \<in> L\<^isub>2"
obtain ya where "ya \<le> y" and "ya \<in> L\<^isub>1" and "(y - ya) @ z \<in> L\<^isub>2"
proof -
have "\<exists> ya. ya \<le> y \<and> ya \<in> L\<^isub>1 \<and> (x - xa) \<approx>L\<^isub>2 (y - ya)"
proof -
have "{\<approx>L\<^isub>2 `` {x - xa} |xa. xa \<le> x \<and> xa \<in> L\<^isub>1} =
{\<approx>L\<^isub>2 `` {y - xa} |xa. xa \<le> y \<and> xa \<in> L\<^isub>1}"
(is "?Left = ?Right")
using h1 tag_xy by (auto simp:tag_str_SEQ_def)
moreover have "\<approx>L\<^isub>2 `` {x - xa} \<in> ?Left" using h1 h2 by auto
ultimately have "\<approx>L\<^isub>2 `` {x - xa} \<in> ?Right" by simp
thus ?thesis by (auto simp:Image_def str_eq_rel_def str_eq_def)
qed
with prems show ?thesis by (auto simp:str_eq_rel_def str_eq_def)
qed
hence "y @ z \<in> L\<^isub>1 ;; L\<^isub>2" by (erule_tac prefixE, auto simp:Seq_def)
} moreover {
fix za
assume h1: "za \<le> z" and h2: "(x @ za) \<in> L\<^isub>1" and h3: "z - za \<in> L\<^isub>2"
hence "y @ za \<in> L\<^isub>1"
proof-
have "\<approx>L\<^isub>1 `` {x} = \<approx>L\<^isub>1 `` {y}"
using h1 tag_xy by (auto simp:tag_str_SEQ_def)
with h2 show ?thesis
by (auto simp:Image_def str_eq_rel_def str_eq_def)
qed
with h1 h3 have "y @ z \<in> L\<^isub>1 ;; L\<^isub>2"
by (drule_tac A = L\<^isub>1 in seq_intro, auto elim:prefixE)
}
ultimately show ?thesis by blast
qed
} thus "tag_str_SEQ L\<^isub>1 L\<^isub>2 m = tag_str_SEQ L\<^isub>1 L\<^isub>2 n \<Longrightarrow> m \<approx>(L\<^isub>1 ;; L\<^isub>2) n"
by (auto simp add: str_eq_def str_eq_rel_def)
qed
lemma quot_seq_finiteI [intro]:
fixes L1 L2::"lang"
assumes fin1: "finite (UNIV // \<approx>L1)"
and fin2: "finite (UNIV // \<approx>L2)"
shows "finite (UNIV // \<approx>(L1 ;; L2))"
proof (rule_tac tag = "tag_str_SEQ L1 L2" in tag_finite_imageD)
show "\<And>x y. tag_str_SEQ L1 L2 x = tag_str_SEQ L1 L2 y \<Longrightarrow> x \<approx>(L1 ;; L2) y"
by (rule tag_str_SEQ_injI)
next
have *: "finite ((UNIV // \<approx>L1) \<times> (Pow (UNIV // \<approx>L2)))"
using fin1 fin2 by auto
show "finite (range (tag_str_SEQ L1 L2))"
unfolding tag_str_SEQ_def
apply(rule finite_subset[OF _ *])
unfolding quotient_def
by auto
qed
subsection {* The case for @{const ALT} *}
definition
tag_str_ALT :: "lang \<Rightarrow> lang \<Rightarrow> string \<Rightarrow> (lang \<times> lang)"
where
"tag_str_ALT L1 L2 = (\<lambda>x. (\<approx>L1 `` {x}, \<approx>L2 `` {x}))"
lemma quot_union_finiteI [intro]:
fixes L1 L2::"lang"
assumes finite1: "finite (UNIV // \<approx>L1)"
and finite2: "finite (UNIV // \<approx>L2)"
shows "finite (UNIV // \<approx>(L1 \<union> L2))"
proof (rule_tac tag = "tag_str_ALT L1 L2" in tag_finite_imageD)
show "\<And>x y. tag_str_ALT L1 L2 x = tag_str_ALT L1 L2 y \<Longrightarrow> x \<approx>(L1 \<union> L2) y"
unfolding tag_str_ALT_def
unfolding str_eq_def
unfolding Image_def
unfolding str_eq_rel_def
by auto
next
have *: "finite ((UNIV // \<approx>L1) \<times> (UNIV // \<approx>L2))"
using finite1 finite2 by auto
show "finite (range (tag_str_ALT L1 L2))"
unfolding tag_str_ALT_def
apply(rule finite_subset[OF _ *])
unfolding quotient_def
by auto
qed
subsection {* The case for @{const "STAR"} *}
text {*
This turned out to be the trickiest case.
*} (* I will make some illustrations for it. *)
definition
tag_str_STAR :: "lang \<Rightarrow> string \<Rightarrow> lang set"
where
"tag_str_STAR L1 = (\<lambda>x. {\<approx>L1 `` {x - xa} | xa. xa < x \<and> xa \<in> L1\<star>})"
lemma finite_set_has_max:
"\<lbrakk>finite A; A \<noteq> {}\<rbrakk> \<Longrightarrow> (\<exists> max \<in> A. \<forall> a \<in> A. f a <= (f max :: nat))"
proof (induct rule:finite.induct)
case emptyI thus ?case by simp
next
case (insertI A a)
show ?case
proof (cases "A = {}")
case True thus ?thesis by (rule_tac x = a in bexI, auto)
next
case False
with prems obtain max
where h1: "max \<in> A"
and h2: "\<forall>a\<in>A. f a \<le> f max" by blast
show ?thesis
proof (cases "f a \<le> f max")
assume "f a \<le> f max"
with h1 h2 show ?thesis by (rule_tac x = max in bexI, auto)
next
assume "\<not> (f a \<le> f max)"
thus ?thesis using h2 by (rule_tac x = a in bexI, auto)
qed
qed
qed
lemma finite_strict_prefix_set: "finite {xa. xa < (x::string)}"
apply (induct x rule:rev_induct, simp)
apply (subgoal_tac "{xa. xa < xs @ [x]} = {xa. xa < xs} \<union> {xs}")
by (auto simp:strict_prefix_def)
lemma tag_str_STAR_injI:
"tag_str_STAR L\<^isub>1 m = tag_str_STAR L\<^isub>1 n \<Longrightarrow> m \<approx>(L\<^isub>1\<star>) n"
proof-
{ fix x y z
assume xz_in_star: "x @ z \<in> L\<^isub>1\<star>"
and tag_xy: "tag_str_STAR L\<^isub>1 x = tag_str_STAR L\<^isub>1 y"
have "y @ z \<in> L\<^isub>1\<star>"
proof(cases "x = []")
case True
with tag_xy have "y = []"
by (auto simp:tag_str_STAR_def strict_prefix_def)
thus ?thesis using xz_in_star True by simp
next
case False
obtain x_max
where h1: "x_max < x"
and h2: "x_max \<in> L\<^isub>1\<star>"
and h3: "(x - x_max) @ z \<in> L\<^isub>1\<star>"
and h4:"\<forall> xa < x. xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star>
\<longrightarrow> length xa \<le> length x_max"
proof-
let ?S = "{xa. xa < x \<and> xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star>}"
have "finite ?S"
by (rule_tac B = "{xa. xa < x}" in finite_subset,
auto simp:finite_strict_prefix_set)
moreover have "?S \<noteq> {}" using False xz_in_star
by (simp, rule_tac x = "[]" in exI, auto simp:strict_prefix_def)
ultimately have "\<exists> max \<in> ?S. \<forall> a \<in> ?S. length a \<le> length max"
using finite_set_has_max by blast
with prems show ?thesis by blast
qed
obtain ya
where h5: "ya < y" and h6: "ya \<in> L\<^isub>1\<star>" and h7: "(x - x_max) \<approx>L\<^isub>1 (y - ya)"
proof-
from tag_xy have "{\<approx>L\<^isub>1 `` {x - xa} |xa. xa < x \<and> xa \<in> L\<^isub>1\<star>} =
{\<approx>L\<^isub>1 `` {y - xa} |xa. xa < y \<and> xa \<in> L\<^isub>1\<star>}" (is "?left = ?right")
by (auto simp:tag_str_STAR_def)
moreover have "\<approx>L\<^isub>1 `` {x - x_max} \<in> ?left" using h1 h2 by auto
ultimately have "\<approx>L\<^isub>1 `` {x - x_max} \<in> ?right" by simp
with prems show ?thesis apply
(simp add:Image_def str_eq_rel_def str_eq_def) by blast
qed
have "(y - ya) @ z \<in> L\<^isub>1\<star>"
proof-
from h3 h1 obtain a b where a_in: "a \<in> L\<^isub>1"
and a_neq: "a \<noteq> []" and b_in: "b \<in> L\<^isub>1\<star>"
and ab_max: "(x - x_max) @ z = a @ b"
by (drule_tac star_decom, auto simp:strict_prefix_def elim:prefixE)
have "(x - x_max) \<le> a \<and> (a - (x - x_max)) @ b = z"
proof -
have "((x - x_max) \<le> a \<and> (a - (x - x_max)) @ b = z) \<or>
(a < (x - x_max) \<and> ((x - x_max) - a) @ z = b)"
using app_eq_dest[OF ab_max] by (auto simp:strict_prefix_def)
moreover {
assume np: "a < (x - x_max)" and b_eqs: " ((x - x_max) - a) @ z = b"
have "False"
proof -
let ?x_max' = "x_max @ a"
have "?x_max' < x"
using np h1 by (clarsimp simp:strict_prefix_def diff_prefix)
moreover have "?x_max' \<in> L\<^isub>1\<star>"
using a_in h2 by (simp add:star_intro3)
moreover have "(x - ?x_max') @ z \<in> L\<^isub>1\<star>"
using b_eqs b_in np h1 by (simp add:diff_diff_appd)
moreover have "\<not> (length ?x_max' \<le> length x_max)"
using a_neq by simp
ultimately show ?thesis using h4 by blast
qed
} ultimately show ?thesis by blast
qed
then obtain za where z_decom: "z = za @ b"
and x_za: "(x - x_max) @ za \<in> L\<^isub>1"
using a_in by (auto elim:prefixE)
from x_za h7 have "(y - ya) @ za \<in> L\<^isub>1"
by (auto simp:str_eq_def str_eq_rel_def)
with z_decom b_in show ?thesis by (auto dest!:step[of "(y - ya) @ za"])
qed
with h5 h6 show ?thesis
by (drule_tac star_intro1, auto simp:strict_prefix_def elim:prefixE)
qed
} thus "tag_str_STAR L\<^isub>1 m = tag_str_STAR L\<^isub>1 n \<Longrightarrow> m \<approx>(L\<^isub>1\<star>) n"
by (auto simp add:str_eq_def str_eq_rel_def)
qed
lemma quot_star_finiteI [intro]:
fixes L1::"lang"
assumes finite1: "finite (UNIV // \<approx>L1)"
shows "finite (UNIV // \<approx>(L1\<star>))"
proof (rule_tac tag = "tag_str_STAR L1" in tag_finite_imageD)
show "\<And>x y. tag_str_STAR L1 x = tag_str_STAR L1 y \<Longrightarrow> x \<approx>(L1\<star>) y"
by (rule tag_str_STAR_injI)
next
have *: "finite (Pow (UNIV // \<approx>L1))"
using finite1 by auto
show "finite (range (tag_str_STAR L1))"
unfolding tag_str_STAR_def
apply(rule finite_subset[OF _ *])
unfolding quotient_def
by auto
qed
subsection {* The main lemma *}
lemma rexp_imp_finite:
fixes r::"rexp"
shows "finite (UNIV // \<approx>(L r))"
by (induct r) (auto)
end