regexp: Myhill_2.thy@79b37ef9505f


theory Myhill_2
  imports Myhill_1 List_Prefix Prefix_subtract
begin

section {* Direction @{text "regular language \<Rightarrow>finite partition"} *}

subsection {* The scheme*}

text {* 
  The following convenient notation @{text "x \<approx>A y"} means:
  string @{text "x"} and @{text "y"} are equivalent with respect to 
  language @{text "A"}.
  *}

definition
  str_eq :: "string \<Rightarrow> lang \<Rightarrow> string \<Rightarrow> bool" ("_ \<approx>_ _")
where
  "x \<approx>A y \<equiv> (x, y) \<in> (\<approx>A)"

text {*
  The main lemma (@{text "rexp_imp_finite"}) is proved by a structural
  induction over regular expressions.  where base cases (cases for @{const
  "NULL"}, @{const "EMPTY"}, @{const "CHAR"}) are quite straightforward to
  proof. Real difficulty lies in inductive cases.  By inductive hypothesis,
  languages defined by sub-expressions induce finite partitiions. Under such
  hypothsis, we need to prove that the language defined by the composite
  regular expression gives rise to finite partion.  The basic idea is to
  attach a tag @{text "tag(x)"} to every string @{text "x"}. The tagging
  fuction @{text "tag"} is carefully devised, which returns tags made of
  equivalent classes of the partitions induced by subexpressoins, and
  therefore has a finite range. Let @{text "Lang"} be the composite language,
  it is proved that:
  \begin{quote}
  If strings with the same tag are equivalent with respect to @{text "Lang"}, expressed as:
  \[
  @{text "tag(x) = tag(y) \<Longrightarrow> x \<approx>Lang y"}
  \]
  then the partition induced by @{text "Lang"} must be finite.
  \end{quote}
  There are two arguments for this. The first goes as the following:
  \begin{enumerate}
    \item First, the tagging function @{text "tag"} induces an equivalent relation @{text "(=tag=)"} 
          (defiintion of @{text "f_eq_rel"} and lemma @{text "equiv_f_eq_rel"}).
    \item It is shown that: if the range of @{text "tag"} (denoted @{text "range(tag)"}) is finite, 
           the partition given rise by @{text "(=tag=)"} is finite (lemma @{text "finite_eq_f_rel"}).
           Since tags are made from equivalent classes from component partitions, and the inductive
           hypothesis ensures the finiteness of these partitions, it is not difficult to prove
           the finiteness of @{text "range(tag)"}.
    \item It is proved that if equivalent relation @{text "R1"} is more refined than @{text "R2"}
           (expressed as @{text "R1 \<subseteq> R2"}),
           and the partition induced by @{text "R1"} is finite, then the partition induced by @{text "R2"}
           is finite as well (lemma @{text "refined_partition_finite"}).
    \item The injectivity assumption @{text "tag(x) = tag(y) \<Longrightarrow> x \<approx>Lang y"} implies that
            @{text "(=tag=)"} is more refined than @{text "(\<approx>Lang)"}.
    \item Combining the points above, we have: the partition induced by language @{text "Lang"}
          is finite (lemma @{text "tag_finite_imageD"}).
  \end{enumerate}
*}

definition 
   f_eq_rel ("=_=")
where
   "=f= \<equiv> {(x, y) | x y. f x = f y}"

lemma equiv_f_eq_rel:"equiv UNIV (=f=)"
  by (auto simp:equiv_def f_eq_rel_def refl_on_def sym_def trans_def)

lemma finite_range_image: 
  assumes "finite (range f)"
  shows "finite (f ` A)"
  using assms unfolding image_def
  by (rule_tac finite_subset) (auto)

lemma finite_eq_f_rel:
  assumes rng_fnt: "finite (range tag)"
  shows "finite (UNIV // =tag=)"
proof -
  let "?f" =  "op ` tag" and ?A = "(UNIV // =tag=)"
  show ?thesis
  proof (rule_tac f = "?f" and A = ?A in finite_imageD) 
    -- {* 
      The finiteness of @{text "f"}-image is a simple consequence of assumption @{text "rng_fnt"}:
      *}
    show "finite (?f ` ?A)" 
    proof -
      have "\<forall> X. ?f X \<in> (Pow (range tag))" by (auto simp:image_def Pow_def)
      moreover from rng_fnt have "finite (Pow (range tag))" by simp
      ultimately have "finite (range ?f)"
        by (auto simp only:image_def intro:finite_subset)
      from finite_range_image [OF this] show ?thesis .
    qed
  next
    -- {* 
      The injectivity of @{text "f"}-image is a consequence of the definition of @{text "(=tag=)"}:
      *}
    show "inj_on ?f ?A" 
    proof-
      { fix X Y
        assume X_in: "X \<in> ?A"
          and  Y_in: "Y \<in> ?A"
          and  tag_eq: "?f X = ?f Y"
        have "X = Y"
        proof -
          from X_in Y_in tag_eq 
          obtain x y 
            where x_in: "x \<in> X" and y_in: "y \<in> Y" and eq_tg: "tag x = tag y"
            unfolding quotient_def Image_def str_eq_rel_def 
                                   str_eq_def image_def f_eq_rel_def
            apply simp by blast
          with X_in Y_in show ?thesis 
            by (auto simp:quotient_def str_eq_rel_def str_eq_def f_eq_rel_def) 
        qed
      } thus ?thesis unfolding inj_on_def by auto
    qed
  qed
qed

lemma finite_image_finite: 
  "\<lbrakk>\<forall> x \<in> A. f x \<in> B; finite B\<rbrakk> \<Longrightarrow> finite (f ` A)"
  by (rule finite_subset [of _ B], auto)

lemma refined_partition_finite:
  fixes R1 R2 A
  assumes fnt: "finite (A // R1)"
  and refined: "R1 \<subseteq> R2"
  and eq1: "equiv A R1" and eq2: "equiv A R2"
  shows "finite (A // R2)"
proof -
  let ?f = "\<lambda> X. {R1 `` {x} | x. x \<in> X}" 
    and ?A = "(A // R2)" and ?B = "(A // R1)"
  show ?thesis
  proof(rule_tac f = ?f and A = ?A in finite_imageD)
    show "finite (?f ` ?A)"
    proof(rule finite_subset [of _ "Pow ?B"])
      from fnt show "finite (Pow (A // R1))" by simp
    next
      from eq2
      show " ?f ` A // R2 \<subseteq> Pow ?B"
        unfolding image_def Pow_def quotient_def
        apply auto
        by (rule_tac x = xb in bexI, simp, 
                 unfold equiv_def sym_def refl_on_def, blast)
    qed
  next
    show "inj_on ?f ?A"
    proof -
      { fix X Y
        assume X_in: "X \<in> ?A" and Y_in: "Y \<in> ?A" 
          and eq_f: "?f X = ?f Y" (is "?L = ?R")
        have "X = Y" using X_in
        proof(rule quotientE)
          fix x
          assume "X = R2 `` {x}" and "x \<in> A" with eq2
          have x_in: "x \<in> X" 
            unfolding equiv_def quotient_def refl_on_def by auto
          with eq_f have "R1 `` {x} \<in> ?R" by auto
          then obtain y where 
            y_in: "y \<in> Y" and eq_r: "R1 `` {x} = R1 ``{y}" by auto
          have "(x, y) \<in> R1"
          proof -
            from x_in X_in y_in Y_in eq2
            have "x \<in> A" and "y \<in> A" 
              unfolding equiv_def quotient_def refl_on_def by auto
            from eq_equiv_class_iff [OF eq1 this] and eq_r
            show ?thesis by simp
          qed
          with refined have xy_r2: "(x, y) \<in> R2" by auto
          from quotient_eqI [OF eq2 X_in Y_in x_in y_in this]
          show ?thesis .
        qed
      } thus ?thesis by (auto simp:inj_on_def)
    qed
  qed
qed

lemma equiv_lang_eq: "equiv UNIV (\<approx>Lang)"
  unfolding equiv_def str_eq_rel_def sym_def refl_on_def trans_def
  by blast

lemma tag_finite_imageD:
  fixes tag
  assumes rng_fnt: "finite (range tag)" 
  -- {* Suppose the rang of tagging fucntion @{text "tag"} is finite. *}
  and same_tag_eqvt: "\<And> m n. tag m = tag (n::string) \<Longrightarrow> m \<approx>Lang n"
  -- {* And strings with same tag are equivalent *}
  shows "finite (UNIV // (\<approx>Lang))"
proof -
  let ?R1 = "(=tag=)"
  show ?thesis
  proof(rule_tac refined_partition_finite [of _ ?R1])
    from finite_eq_f_rel [OF rng_fnt]
     show "finite (UNIV // =tag=)" . 
   next
     from same_tag_eqvt
     show "(=tag=) \<subseteq> (\<approx>Lang)"
       by (auto simp:f_eq_rel_def str_eq_def)
   next
     from equiv_f_eq_rel
     show "equiv UNIV (=tag=)" by blast
   next
     from equiv_lang_eq
     show "equiv UNIV (\<approx>Lang)" by blast
  qed
qed

text {*
  A more concise, but less intelligible argument for @{text "tag_finite_imageD"} 
  is given as the following. The basic idea is still using standard library 
  lemma @{thm [source] "finite_imageD"}:
  \[
  @{thm "finite_imageD" [no_vars]}
  \]
  which says: if the image of injective function @{text "f"} over set @{text "A"} is 
  finite, then @{text "A"} must be finte, as we did in the lemmas above.
  *}

lemma 
  fixes tag
  assumes rng_fnt: "finite (range tag)" 
  -- {* Suppose the rang of tagging fucntion @{text "tag"} is finite. *}
  and same_tag_eqvt: "\<And> m n. tag m = tag (n::string) \<Longrightarrow> m \<approx>Lang n"
  -- {* And strings with same tag are equivalent *}
  shows "finite (UNIV // (\<approx>Lang))"
  -- {* Then the partition generated by @{text "(\<approx>Lang)"} is finite. *}
proof -
  -- {* The particular @{text "f"} and @{text "A"} used in @{thm [source] "finite_imageD"} are:*}
  let "?f" =  "op ` tag" and ?A = "(UNIV // \<approx>Lang)"
  show ?thesis
  proof (rule_tac f = "?f" and A = ?A in finite_imageD) 
    -- {* 
      The finiteness of @{text "f"}-image is a simple consequence of assumption @{text "rng_fnt"}:
      *}
    show "finite (?f ` ?A)" 
    proof -
      have "\<forall> X. ?f X \<in> (Pow (range tag))" by (auto simp:image_def Pow_def)
      moreover from rng_fnt have "finite (Pow (range tag))" by simp
      ultimately have "finite (range ?f)"
        by (auto simp only:image_def intro:finite_subset)
      from finite_range_image [OF this] show ?thesis .
    qed
  next
    -- {* 
      The injectivity of @{text "f"} is the consequence of assumption @{text "same_tag_eqvt"}:
      *}
    show "inj_on ?f ?A" 
    proof-
      { fix X Y
        assume X_in: "X \<in> ?A"
          and  Y_in: "Y \<in> ?A"
          and  tag_eq: "?f X = ?f Y"
        have "X = Y"
        proof -
          from X_in Y_in tag_eq 
          obtain x y where x_in: "x \<in> X" and y_in: "y \<in> Y" and eq_tg: "tag x = tag y"
            unfolding quotient_def Image_def str_eq_rel_def str_eq_def image_def
            apply simp by blast 
          from same_tag_eqvt [OF eq_tg] have "x \<approx>Lang y" .
          with X_in Y_in x_in y_in
          show ?thesis by (auto simp:quotient_def str_eq_rel_def str_eq_def) 
        qed
      } thus ?thesis unfolding inj_on_def by auto
    qed
  qed
qed

subsection {* The proof*}

text {*
  Each case is given in a separate section, as well as the final main lemma. Detailed explainations accompanied by
  illustrations are given for non-trivial cases.

  For ever inductive case, there are two tasks, the easier one is to show the range finiteness of
  of the tagging function based on the finiteness of component partitions, the
  difficult one is to show that strings with the same tag are equivalent with respect to the 
  composite language. Suppose the composite language be @{text "Lang"}, tagging function be 
  @{text "tag"}, it amounts to show:
  \[
  @{text "tag(x) = tag(y) \<Longrightarrow> x \<approx>Lang y"}
  \]
  expanding the definition of @{text "\<approx>Lang"}, it amounts to show:
  \[
  @{text "tag(x) = tag(y) \<Longrightarrow> (\<forall> z. x@z \<in> Lang \<longleftrightarrow> y@z \<in> Lang)"}
  \]
  Because the assumed tag equlity @{text "tag(x) = tag(y)"} is symmetric,
  it is suffcient to show just one direction:
  \[
  @{text "\<And> x y z. \<lbrakk>tag(x) = tag(y); x@z \<in> Lang\<rbrakk> \<Longrightarrow> y@z \<in> Lang"}
  \]
  This is the pattern followed by every inductive case.
  *}

subsubsection {* The base case for @{const "NULL"} *}

lemma quot_null_eq:
  shows "(UNIV // \<approx>{}) = ({UNIV}::lang set)"
  unfolding quotient_def Image_def str_eq_rel_def by auto

lemma quot_null_finiteI [intro]:
  shows "finite ((UNIV // \<approx>{})::lang set)"
unfolding quot_null_eq by simp


subsubsection {* The base case for @{const "EMPTY"} *}


lemma quot_empty_subset:
  "UNIV // (\<approx>{[]}) \<subseteq> {{[]}, UNIV - {[]}}"
proof
  fix x
  assume "x \<in> UNIV // \<approx>{[]}"
  then obtain y where h: "x = {z. (y, z) \<in> \<approx>{[]}}" 
    unfolding quotient_def Image_def by blast
  show "x \<in> {{[]}, UNIV - {[]}}"
  proof (cases "y = []")
    case True with h
    have "x = {[]}" by (auto simp: str_eq_rel_def)
    thus ?thesis by simp
  next
    case False with h
    have "x = UNIV - {[]}" by (auto simp: str_eq_rel_def)
    thus ?thesis by simp
  qed
qed

lemma quot_empty_finiteI [intro]:
  shows "finite (UNIV // (\<approx>{[]}))"
by (rule finite_subset[OF quot_empty_subset]) (simp)


subsubsection {* The base case for @{const "CHAR"} *}

lemma quot_char_subset:
  "UNIV // (\<approx>{[c]}) \<subseteq> {{[]},{[c]}, UNIV - {[], [c]}}"
proof 
  fix x 
  assume "x \<in> UNIV // \<approx>{[c]}"
  then obtain y where h: "x = {z. (y, z) \<in> \<approx>{[c]}}" 
    unfolding quotient_def Image_def by blast
  show "x \<in> {{[]},{[c]}, UNIV - {[], [c]}}"
  proof -
    { assume "y = []" hence "x = {[]}" using h 
        by (auto simp:str_eq_rel_def)
    } moreover {
      assume "y = [c]" hence "x = {[c]}" using h 
        by (auto dest!:spec[where x = "[]"] simp:str_eq_rel_def)
    } moreover {
      assume "y \<noteq> []" and "y \<noteq> [c]"
      hence "\<forall> z. (y @ z) \<noteq> [c]" by (case_tac y, auto)
      moreover have "\<And> p. (p \<noteq> [] \<and> p \<noteq> [c]) = (\<forall> q. p @ q \<noteq> [c])" 
        by (case_tac p, auto)
      ultimately have "x = UNIV - {[],[c]}" using h
        by (auto simp add:str_eq_rel_def)
    } ultimately show ?thesis by blast
  qed
qed

lemma quot_char_finiteI [intro]:
  shows "finite (UNIV // (\<approx>{[c]}))"
by (rule finite_subset[OF quot_char_subset]) (simp)


subsubsection {* The inductive case for @{const ALT} *}

definition 
  tag_str_ALT :: "lang \<Rightarrow> lang \<Rightarrow> string \<Rightarrow> (lang \<times> lang)"
where
  "tag_str_ALT L1 L2 \<equiv> (\<lambda>x. (\<approx>L1 `` {x}, \<approx>L2 `` {x}))"

lemma quot_union_finiteI [intro]:
  fixes L1 L2::"lang"
  assumes finite1: "finite (UNIV // \<approx>L1)"
  and     finite2: "finite (UNIV // \<approx>L2)"
  shows "finite (UNIV // \<approx>(L1 \<union> L2))"
proof (rule_tac tag = "tag_str_ALT L1 L2" in tag_finite_imageD)
  show "\<And>x y. tag_str_ALT L1 L2 x = tag_str_ALT L1 L2 y \<Longrightarrow> x \<approx>(L1 \<union> L2) y"
    unfolding tag_str_ALT_def 
    unfolding str_eq_def
    unfolding Image_def 
    unfolding str_eq_rel_def
    by auto
next
  have *: "finite ((UNIV // \<approx>L1) \<times> (UNIV // \<approx>L2))" 
    using finite1 finite2 by auto
  show "finite (range (tag_str_ALT L1 L2))"
    unfolding tag_str_ALT_def
    apply(rule finite_subset[OF _ *])
    unfolding quotient_def
    by auto
qed

subsubsection {* The inductive case for @{text "SEQ"}*}

text {*
  For case @{const "SEQ"}, the language @{text "L"} is @{text "L\<^isub>1 ;; L\<^isub>2"}.
  Given @{text "x @ z \<in> L\<^isub>1 ;; L\<^isub>2"}, according to the defintion of @{text " L\<^isub>1 ;; L\<^isub>2"},
  string @{text "x @ z"} can be splitted with the prefix in @{text "L\<^isub>1"} and suffix in @{text "L\<^isub>2"}.
  The split point can either be in @{text "x"} (as shown in Fig. \ref{seq_first_split}),
  or in @{text "z"} (as shown in Fig. \ref{seq_snd_split}). Whichever way it goes, the structure
  on @{text "x @ z"} cn be transfered faithfully onto @{text "y @ z"} 
  (as shown in Fig. \ref{seq_trans_first_split} and \ref{seq_trans_snd_split}) with the the help of the assumed 
  tag equality. The following tag function @{text "tag_str_SEQ"} is such designed to facilitate
  such transfers and lemma @{text "tag_str_SEQ_injI"} formalizes the informal argument above. The details 
  of structure transfer will be given their.
\input{fig_seq}

  *}

definition 
  tag_str_SEQ :: "lang \<Rightarrow> lang \<Rightarrow> string \<Rightarrow> (lang \<times> lang set)"
where
  "tag_str_SEQ L1 L2 \<equiv>
     (\<lambda>x. (\<approx>L1 `` {x}, {(\<approx>L2 `` {x - xa}) | xa.  xa \<le> x \<and> xa \<in> L1}))"

text {* The following is a techical lemma which helps to split the @{text "x @ z \<in> L\<^isub>1 ;; L\<^isub>2"} mentioned above.*}

lemma append_seq_elim:
  assumes "x @ y \<in> L\<^isub>1 ;; L\<^isub>2"
  shows "(\<exists> xa \<le> x. xa \<in> L\<^isub>1 \<and> (x - xa) @ y \<in> L\<^isub>2) \<or> 
          (\<exists> ya \<le> y. (x @ ya) \<in> L\<^isub>1 \<and> (y - ya) \<in> L\<^isub>2)"
proof-
  from assms obtain s\<^isub>1 s\<^isub>2 
    where eq_xys: "x @ y = s\<^isub>1 @ s\<^isub>2" 
    and in_seq: "s\<^isub>1 \<in> L\<^isub>1 \<and> s\<^isub>2 \<in> L\<^isub>2" 
    by (auto simp:Seq_def)
  from app_eq_dest [OF eq_xys]
  have
    "(x \<le> s\<^isub>1 \<and> (s\<^isub>1 - x) @ s\<^isub>2 = y) \<or> (s\<^isub>1 \<le> x \<and> (x - s\<^isub>1) @ y = s\<^isub>2)" 
               (is "?Split1 \<or> ?Split2") .
  moreover have "?Split1 \<Longrightarrow> \<exists> ya \<le> y. (x @ ya) \<in> L\<^isub>1 \<and> (y - ya) \<in> L\<^isub>2" 
    using in_seq by (rule_tac x = "s\<^isub>1 - x" in exI, auto elim:prefixE)
  moreover have "?Split2 \<Longrightarrow> \<exists> xa \<le> x. xa \<in> L\<^isub>1 \<and> (x - xa) @ y \<in> L\<^isub>2" 
    using in_seq by (rule_tac x = s\<^isub>1 in exI, auto)
  ultimately show ?thesis by blast
qed


lemma tag_str_SEQ_injI:
  fixes v w 
  assumes eq_tag: "tag_str_SEQ L\<^isub>1 L\<^isub>2 v = tag_str_SEQ L\<^isub>1 L\<^isub>2 w" 
  shows "v \<approx>(L\<^isub>1 ;; L\<^isub>2) w"
proof-
    -- {* As explained before, a pattern for just one direction needs to be dealt with:*}
  { fix x y z
    assume xz_in_seq: "x @ z \<in> L\<^isub>1 ;; L\<^isub>2"
    and tag_xy: "tag_str_SEQ L\<^isub>1 L\<^isub>2 x = tag_str_SEQ L\<^isub>1 L\<^isub>2 y"
    have"y @ z \<in> L\<^isub>1 ;; L\<^isub>2" 
    proof-
      -- {* There are two ways to split @{text "x@z"}: *}
      from append_seq_elim [OF xz_in_seq]
      have "(\<exists> xa \<le> x. xa \<in> L\<^isub>1 \<and> (x - xa) @ z \<in> L\<^isub>2) \<or> 
               (\<exists> za \<le> z. (x @ za) \<in> L\<^isub>1 \<and> (z - za) \<in> L\<^isub>2)" .
      -- {* It can be shown that @{text "?thesis"} holds in either case: *}
      moreover {
        -- {* The case for the first split:*}
        fix xa
        assume h1: "xa \<le> x" and h2: "xa \<in> L\<^isub>1" and h3: "(x - xa) @ z \<in> L\<^isub>2"
        -- {* The following subgoal implements the structure transfer:*}
        obtain ya 
          where "ya \<le> y" 
          and "ya \<in> L\<^isub>1" 
          and "(y - ya) @ z \<in> L\<^isub>2"
        proof -
        -- {*
            \begin{minipage}{0.8\textwidth}
            By expanding the definition of 
            @{thm [display] "tag_xy"}
            and extracting the second compoent, we get:
            \end{minipage}
            *}
          have "{\<approx>L\<^isub>2 `` {x - xa} |xa. xa \<le> x \<and> xa \<in> L\<^isub>1} = 
                   {\<approx>L\<^isub>2 `` {y - ya} |ya. ya \<le> y \<and> ya \<in> L\<^isub>1}" (is "?Left = ?Right")
            using tag_xy unfolding tag_str_SEQ_def by simp
            -- {* Since @{thm "h1"} and @{thm "h2"} hold, it is not difficult to show: *}
          moreover have "\<approx>L\<^isub>2 `` {x - xa} \<in> ?Left" using h1 h2 by auto
            -- {* 
            \begin{minipage}{0.7\textwidth}
            Through tag equality, equivalent class @{term "\<approx>L\<^isub>2 `` {x - xa}"} also 
                  belongs to the @{text "?Right"}:
            \end{minipage}
            *}
          ultimately have "\<approx>L\<^isub>2 `` {x - xa} \<in> ?Right" by simp
            -- {* From this, the counterpart of @{text "xa"} in @{text "y"} is obtained:*}
          then obtain ya 
            where eq_xya: "\<approx>L\<^isub>2 `` {x - xa} = \<approx>L\<^isub>2 `` {y - ya}" 
            and pref_ya: "ya \<le> y" and ya_in: "ya \<in> L\<^isub>1"
            by simp blast
          -- {* It can be proved that @{text "ya"} has the desired property:*}
          have "(y - ya)@z \<in> L\<^isub>2" 
          proof -
            from eq_xya have "(x - xa)  \<approx>L\<^isub>2 (y - ya)" 
              unfolding Image_def str_eq_rel_def str_eq_def by auto
            with h3 show ?thesis unfolding str_eq_rel_def str_eq_def by simp
          qed
          -- {* Now, @{text "ya"} has all properties to be a qualified candidate:*}
          with pref_ya ya_in 
          show ?thesis using that by blast
        qed
          -- {* From the properties of @{text "ya"}, @{text "y @ z \<in> L\<^isub>1 ;; L\<^isub>2"} is derived easily.*}
        hence "y @ z \<in> L\<^isub>1 ;; L\<^isub>2" by (erule_tac prefixE, auto simp:Seq_def)
      } moreover {
        -- {* The other case is even more simpler: *}
        fix za
        assume h1: "za \<le> z" and h2: "(x @ za) \<in> L\<^isub>1" and h3: "z - za \<in> L\<^isub>2"
        have "y @ za \<in> L\<^isub>1"
        proof-
          have "\<approx>L\<^isub>1 `` {x} = \<approx>L\<^isub>1 `` {y}" 
            using tag_xy unfolding tag_str_SEQ_def by simp
          with h2 show ?thesis
            unfolding Image_def str_eq_rel_def str_eq_def by auto
        qed
        with h1 h3 have "y @ z \<in> L\<^isub>1 ;; L\<^isub>2" 
          by (drule_tac A = L\<^isub>1 in seq_intro, auto elim:prefixE)
      }
      ultimately show ?thesis by blast
    qed
  } 
  -- {* 
      \begin{minipage}{0.8\textwidth}
      @{text "?thesis"} is proved by exploiting the symmetry of 
      @{thm [source] "eq_tag"}:
      \end{minipage}
      *}
  from this [OF _ eq_tag] and this [OF _ eq_tag [THEN sym]]
    show ?thesis unfolding str_eq_def str_eq_rel_def by blast
qed 

lemma quot_seq_finiteI [intro]:
  fixes L1 L2::"lang"
  assumes fin1: "finite (UNIV // \<approx>L1)" 
  and     fin2: "finite (UNIV // \<approx>L2)" 
  shows "finite (UNIV // \<approx>(L1 ;; L2))"
proof (rule_tac tag = "tag_str_SEQ L1 L2" in tag_finite_imageD)
  show "\<And>x y. tag_str_SEQ L1 L2 x = tag_str_SEQ L1 L2 y \<Longrightarrow> x \<approx>(L1 ;; L2) y"
    by (rule tag_str_SEQ_injI)
next
  have *: "finite ((UNIV // \<approx>L1) \<times> (Pow (UNIV // \<approx>L2)))" 
    using fin1 fin2 by auto
  show "finite (range (tag_str_SEQ L1 L2))" 
    unfolding tag_str_SEQ_def
    apply(rule finite_subset[OF _ *])
    unfolding quotient_def
    by auto
qed

subsubsection {* The inductive case for @{const "STAR"} *}

text {* 
  This turned out to be the trickiest case. The essential goal is 
  to proved @{text "y @ z \<in>  L\<^isub>1*"} under the assumptions that @{text "x @ z \<in>  L\<^isub>1*"}
  and that @{text "x"} and @{text "y"} have the same tag. The reasoning goes as the following:
  \begin{enumerate}
    \item Since @{text "x @ z \<in>  L\<^isub>1*"} holds, a prefix @{text "xa"} of @{text "x"} can be found
          such that @{text "xa \<in> L\<^isub>1*"} and @{text "(x - xa)@z \<in> L\<^isub>1*"}, as shown in Fig. \ref{first_split}.
          Such a prefix always exists, @{text "xa = []"}, for example, is one. 
    \item There could be many but fintie many of such @{text "xa"}, from which we can find the longest
          and name it @{text "xa_max"}, as shown in Fig. \ref{max_split}.
    \item The next step is to split @{text "z"} into @{text "za"} and @{text "zb"} such that
           @{text "(x - xa_max) @ za \<in> L\<^isub>1"} and @{text "zb \<in> L\<^isub>1*"}  as shown in Fig. \ref{last_split}.
          Such a split always exists because:
          \begin{enumerate}
            \item Because @{text "(x - x_max) @ z \<in> L\<^isub>1*"}, it can always be splitted into prefix @{text "a"}
              and suffix @{text "b"}, such that @{text "a \<in> L\<^isub>1"} and @{text "b \<in> L\<^isub>1*"},
              as shown in Fig. \ref{ab_split}.
            \item But the prefix @{text "a"} CANNOT be shorter than @{text "x - xa_max"} 
              (as shown in Fig. \ref{ab_split_wrong}), becasue otherwise,
                   @{text "ma_max@a"} would be in the same kind as @{text "xa_max"} but with 
                   a larger size, conflicting with the fact that @{text "xa_max"} is the longest.
          \end{enumerate}
    \item  \label{tansfer_step} 
         By the assumption that @{text "x"} and @{text "y"} have the same tag, the structure on @{text "x @ z"}
          can be transferred to @{text "y @ z"} as shown in Fig. \ref{trans_split}. The detailed steps are:
          \begin{enumerate}
            \item A @{text "y"}-prefix @{text "ya"} corresponding to @{text "xa"} can be found, 
                  which satisfies conditions: @{text "ya \<in> L\<^isub>1*"} and @{text "(y - ya)@za \<in> L\<^isub>1"}.
            \item Since we already know @{text "zb \<in> L\<^isub>1*"}, we get @{text "(y - ya)@za@zb \<in> L\<^isub>1*"},
                  and this is just @{text "(y - ya)@z \<in> L\<^isub>1*"}.
            \item With fact @{text "ya \<in> L\<^isub>1*"}, we finally get @{text "y@z \<in> L\<^isub>1*"}.
          \end{enumerate}
  \end{enumerate}

  The formal proof of lemma @{text "tag_str_STAR_injI"} faithfully follows this informal argument 
  while the tagging function @{text "tag_str_STAR"} is defined to make the transfer in step
  \ref{ansfer_step} feasible.

  \input{fig_star}
*} 

definition 
  tag_str_STAR :: "lang \<Rightarrow> string \<Rightarrow> lang set"
where
  "tag_str_STAR L1 \<equiv> (\<lambda>x. {\<approx>L1 `` {x - xa} | xa. xa < x \<and> xa \<in> L1\<star>})"

text {* A technical lemma. *}
lemma finite_set_has_max: "\<lbrakk>finite A; A \<noteq> {}\<rbrakk> \<Longrightarrow> 
           (\<exists> max \<in> A. \<forall> a \<in> A. f a <= (f max :: nat))"
proof (induct rule:finite.induct)
  case emptyI thus ?case by simp
next
  case (insertI A a)
  show ?case
  proof (cases "A = {}")
    case True thus ?thesis by (rule_tac x = a in bexI, auto)
  next
    case False
    with insertI.hyps and False  
    obtain max 
      where h1: "max \<in> A" 
      and h2: "\<forall>a\<in>A. f a \<le> f max" by blast
    show ?thesis
    proof (cases "f a \<le> f max")
      assume "f a \<le> f max"
      with h1 h2 show ?thesis by (rule_tac x = max in bexI, auto)
    next
      assume "\<not> (f a \<le> f max)"
      thus ?thesis using h2 by (rule_tac x = a in bexI, auto)
    qed
  qed
qed


text {* The following is a technical lemma.which helps to show the range finiteness of tag function. *}
lemma finite_strict_prefix_set: "finite {xa. xa < (x::string)}"
apply (induct x rule:rev_induct, simp)
apply (subgoal_tac "{xa. xa < xs @ [x]} = {xa. xa < xs} \<union> {xs}")
by (auto simp:strict_prefix_def)


lemma tag_str_STAR_injI:
  fixes v w
  assumes eq_tag: "tag_str_STAR L\<^isub>1 v = tag_str_STAR L\<^isub>1 w"
  shows "(v::string) \<approx>(L\<^isub>1\<star>) w"
proof-
    -- {* As explained before, a pattern for just one direction needs to be dealt with:*}
  { fix x y z
    assume xz_in_star: "x @ z \<in> L\<^isub>1\<star>" 
      and tag_xy: "tag_str_STAR L\<^isub>1 x = tag_str_STAR L\<^isub>1 y"
    have "y @ z \<in> L\<^isub>1\<star>"
    proof(cases "x = []")
      -- {* 
        The degenerated case when @{text "x"} is a null string is easy to prove:
        *}
      case True
      with tag_xy have "y = []" 
        by (auto simp add: tag_str_STAR_def strict_prefix_def)
      thus ?thesis using xz_in_star True by simp
    next
        -- {* The nontrival case:
        *}
      case False
      -- {* 
        \begin{minipage}{0.8\textwidth}
        Since @{text "x @ z \<in> L\<^isub>1\<star>"}, @{text "x"} can always be splitted
        by a prefix @{text "xa"} together with its suffix @{text "x - xa"}, such
        that both @{text "xa"} and @{text "(x - xa) @ z"} are in @{text "L\<^isub>1\<star>"},
        and there could be many such splittings.Therefore, the following set @{text "?S"} 
        is nonempty, and finite as well:
        \end{minipage}
        *}
      let ?S = "{xa. xa < x \<and> xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star>}"
      have "finite ?S"
        by (rule_tac B = "{xa. xa < x}" in finite_subset, 
          auto simp:finite_strict_prefix_set)
      moreover have "?S \<noteq> {}" using False xz_in_star
        by (simp, rule_tac x = "[]" in exI, auto simp:strict_prefix_def)
      -- {* \begin{minipage}{0.7\textwidth} 
            Since @{text "?S"} is finite, we can always single out the longest and name it @{text "xa_max"}: 
            \end{minipage}
          *}
      ultimately have "\<exists> xa_max \<in> ?S. \<forall> xa \<in> ?S. length xa \<le> length xa_max" 
        using finite_set_has_max by blast
      then obtain xa_max 
        where h1: "xa_max < x" 
        and h2: "xa_max \<in> L\<^isub>1\<star>" 
        and h3: "(x - xa_max) @ z \<in> L\<^isub>1\<star>" 
        and h4:"\<forall> xa < x. xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star>  
                                     \<longrightarrow> length xa \<le> length xa_max"
        by blast
      -- {*
          \begin{minipage}{0.8\textwidth}
          By the equality of tags, the counterpart of @{text "xa_max"} among 
          @{text "y"}-prefixes, named @{text "ya"}, can be found:
          \end{minipage}
          *}
      obtain ya 
        where h5: "ya < y" and h6: "ya \<in> L\<^isub>1\<star>" 
        and eq_xya: "(x - xa_max) \<approx>L\<^isub>1 (y - ya)"
      proof-
        from tag_xy have "{\<approx>L\<^isub>1 `` {x - xa} |xa. xa < x \<and> xa \<in> L\<^isub>1\<star>} = 
          {\<approx>L\<^isub>1 `` {y - xa} |xa. xa < y \<and> xa \<in> L\<^isub>1\<star>}" (is "?left = ?right")
          by (auto simp:tag_str_STAR_def)
        moreover have "\<approx>L\<^isub>1 `` {x - xa_max} \<in> ?left" using h1 h2 by auto
        ultimately have "\<approx>L\<^isub>1 `` {x - xa_max} \<in> ?right" by simp
        thus ?thesis using that 
          apply (simp add:Image_def str_eq_rel_def str_eq_def) by blast
      qed 
      -- {*
          \begin{minipage}{0.8\textwidth}
          The @{text "?thesis"}, @{prop "y @ z \<in> L\<^isub>1\<star>"}, is a simple consequence
          of the following proposition:
          \end{minipage}
          *}
      have "(y - ya) @ z \<in> L\<^isub>1\<star>" 
      proof-
        -- {* The idea is to split the suffix @{text "z"} into @{text "za"} and @{text "zb"}, 
          such that: *}
        obtain za zb where eq_zab: "z = za @ zb" 
          and l_za: "(y - ya)@za \<in> L\<^isub>1" and ls_zb: "zb \<in> L\<^isub>1\<star>"
        proof -
          -- {* 
            \begin{minipage}{0.8\textwidth}
            Since @{thm "h1"}, @{text "x"} can be splitted into
            @{text "a"} and @{text "b"} such that:
            \end{minipage}
            *}
          from h1 have "(x - xa_max) @ z \<noteq> []" 
            by (auto simp:strict_prefix_def elim:prefixE)
          from star_decom [OF h3 this]
          obtain a b where a_in: "a \<in> L\<^isub>1" 
            and a_neq: "a \<noteq> []" and b_in: "b \<in> L\<^isub>1\<star>" 
            and ab_max: "(x - xa_max) @ z = a @ b" by blast
          -- {* Now the candiates for @{text "za"} and @{text "zb"} are found:*}
          let ?za = "a - (x - xa_max)" and ?zb = "b"
          have pfx: "(x - xa_max) \<le> a" (is "?P1") 
            and eq_z: "z = ?za @ ?zb" (is "?P2")
          proof -
            -- {* 
              \begin{minipage}{0.8\textwidth}
              Since @{text "(x - xa_max) @ z = a @ b"}, string @{text "(x - xa_max) @ z"}
              can be splitted in two ways:
              \end{minipage}
              *}
            have "((x - xa_max) \<le> a \<and> (a - (x - xa_max)) @ b = z) \<or> 
              (a < (x - xa_max) \<and> ((x - xa_max) - a) @ z = b)" 
              using app_eq_dest[OF ab_max] by (auto simp:strict_prefix_def)
            moreover {
              -- {* However, the undsired way can be refuted by absurdity: *}
              assume np: "a < (x - xa_max)" 
                and b_eqs: "((x - xa_max) - a) @ z = b"
              have "False"
              proof -
                let ?xa_max' = "xa_max @ a"
                have "?xa_max' < x" 
                  using np h1 by (clarsimp simp:strict_prefix_def diff_prefix) 
                moreover have "?xa_max' \<in> L\<^isub>1\<star>" 
                  using a_in h2 by (simp add:star_intro3) 
                moreover have "(x - ?xa_max') @ z \<in> L\<^isub>1\<star>" 
                  using b_eqs b_in np h1 by (simp add:diff_diff_appd)
                moreover have "\<not> (length ?xa_max' \<le> length xa_max)" 
                  using a_neq by simp
                ultimately show ?thesis using h4 by blast
              qed }
            -- {* Now it can be shown that the splitting goes the way we desired. *}
            ultimately show ?P1 and ?P2 by auto
          qed
          hence "(x - xa_max)@?za \<in> L\<^isub>1" using a_in by (auto elim:prefixE)
          -- {* Now candidates @{text "?za"} and @{text "?zb"} have all the requred properteis. *}
          with eq_xya have "(y - ya) @ ?za \<in> L\<^isub>1" 
            by (auto simp:str_eq_def str_eq_rel_def)
           with eq_z and b_in 
          show ?thesis using that by blast
        qed
        -- {* 
           @{text "?thesis"} can easily be shown using properties of @{text "za"} and @{text "zb"}:
            *}
        have "((y - ya) @ za) @ zb \<in> L\<^isub>1\<star>" using  l_za ls_zb by blast
        with eq_zab show ?thesis by simp
      qed
      with h5 h6 show ?thesis 
        by (drule_tac star_intro1, auto simp:strict_prefix_def elim:prefixE)
    qed
  } 
  -- {* By instantiating the reasoning pattern just derived for both directions:*} 
  from this [OF _ eq_tag] and this [OF _ eq_tag [THEN sym]]
  -- {* The thesis is proved as a trival consequence: *} 
    show  ?thesis unfolding str_eq_def str_eq_rel_def by blast
qed

lemma -- {* The oringal version with less explicit details. *}
  fixes v w
  assumes eq_tag: "tag_str_STAR L\<^isub>1 v = tag_str_STAR L\<^isub>1 w"
  shows "(v::string) \<approx>(L\<^isub>1\<star>) w"
proof-
    -- {* 
    \begin{minipage}{0.8\textwidth}
    According to the definition of @{text "\<approx>Lang"}, 
    proving @{text "v \<approx>(L\<^isub>1\<star>) w"} amounts to
    showing: for any string @{text "u"},
    if @{text "v @ u \<in> (L\<^isub>1\<star>)"} then @{text "w @ u \<in> (L\<^isub>1\<star>)"} and vice versa.
    The reasoning pattern for both directions are the same, as derived
    in the following:
    \end{minipage}
    *}
  { fix x y z
    assume xz_in_star: "x @ z \<in> L\<^isub>1\<star>" 
      and tag_xy: "tag_str_STAR L\<^isub>1 x = tag_str_STAR L\<^isub>1 y"
    have "y @ z \<in> L\<^isub>1\<star>"
    proof(cases "x = []")
      -- {* 
        The degenerated case when @{text "x"} is a null string is easy to prove:
        *}
      case True
      with tag_xy have "y = []" 
        by (auto simp:tag_str_STAR_def strict_prefix_def)
      thus ?thesis using xz_in_star True by simp
    next
        -- {*
        \begin{minipage}{0.8\textwidth}
        The case when @{text "x"} is not null, and
        @{text "x @ z"} is in @{text "L\<^isub>1\<star>"}, 
        \end{minipage}
        *}
      case False
      obtain x_max 
        where h1: "x_max < x" 
        and h2: "x_max \<in> L\<^isub>1\<star>" 
        and h3: "(x - x_max) @ z \<in> L\<^isub>1\<star>" 
        and h4:"\<forall> xa < x. xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star> 
                                     \<longrightarrow> length xa \<le> length x_max"
      proof-
        let ?S = "{xa. xa < x \<and> xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star>}"
        have "finite ?S"
          by (rule_tac B = "{xa. xa < x}" in finite_subset, 
                                auto simp:finite_strict_prefix_set)
        moreover have "?S \<noteq> {}" using False xz_in_star
          by (simp, rule_tac x = "[]" in exI, auto simp:strict_prefix_def)
        ultimately have "\<exists> max \<in> ?S. \<forall> a \<in> ?S. length a \<le> length max" 
          using finite_set_has_max by blast
        thus ?thesis using that by blast
      qed
      obtain ya 
        where h5: "ya < y" and h6: "ya \<in> L\<^isub>1\<star>" and h7: "(x - x_max) \<approx>L\<^isub>1 (y - ya)"
      proof-
        from tag_xy have "{\<approx>L\<^isub>1 `` {x - xa} |xa. xa < x \<and> xa \<in> L\<^isub>1\<star>} = 
          {\<approx>L\<^isub>1 `` {y - xa} |xa. xa < y \<and> xa \<in> L\<^isub>1\<star>}" (is "?left = ?right")
          by (auto simp:tag_str_STAR_def)
        moreover have "\<approx>L\<^isub>1 `` {x - x_max} \<in> ?left" using h1 h2 by auto
        ultimately have "\<approx>L\<^isub>1 `` {x - x_max} \<in> ?right" by simp
        with that show ?thesis apply 
          (simp add:Image_def str_eq_rel_def str_eq_def) by blast
      qed      
      have "(y - ya) @ z \<in> L\<^isub>1\<star>" 
      proof-
        from h3 h1 obtain a b where a_in: "a \<in> L\<^isub>1" 
          and a_neq: "a \<noteq> []" and b_in: "b \<in> L\<^isub>1\<star>" 
          and ab_max: "(x - x_max) @ z = a @ b" 
          by (drule_tac star_decom, auto simp:strict_prefix_def elim:prefixE)
        have "(x - x_max) \<le> a \<and> (a - (x - x_max)) @ b = z" 
        proof -
          have "((x - x_max) \<le> a \<and> (a - (x - x_max)) @ b = z) \<or> 
                            (a < (x - x_max) \<and> ((x - x_max) - a) @ z = b)" 
            using app_eq_dest[OF ab_max] by (auto simp:strict_prefix_def)
          moreover { 
            assume np: "a < (x - x_max)" and b_eqs: " ((x - x_max) - a) @ z = b"
            have "False"
            proof -
              let ?x_max' = "x_max @ a"
              have "?x_max' < x" 
                using np h1 by (clarsimp simp:strict_prefix_def diff_prefix) 
              moreover have "?x_max' \<in> L\<^isub>1\<star>" 
                using a_in h2 by (simp add:star_intro3) 
              moreover have "(x - ?x_max') @ z \<in> L\<^isub>1\<star>" 
                using b_eqs b_in np h1 by (simp add:diff_diff_appd)
              moreover have "\<not> (length ?x_max' \<le> length x_max)" 
                using a_neq by simp
              ultimately show ?thesis using h4 by blast
            qed 
          } ultimately show ?thesis by blast
        qed
        then obtain za where z_decom: "z = za @ b" 
          and x_za: "(x - x_max) @ za \<in> L\<^isub>1" 
          using a_in by (auto elim:prefixE)        
        from x_za h7 have "(y - ya) @ za \<in> L\<^isub>1" 
          by (auto simp:str_eq_def str_eq_rel_def)
	with b_in have "((y - ya) @ za) @ b \<in> L\<^isub>1\<star>" by blast
        with z_decom show ?thesis by auto 
      qed
      with h5 h6 show ?thesis 
        by (drule_tac star_intro1, auto simp:strict_prefix_def elim:prefixE)
    qed
  } 
  -- {* By instantiating the reasoning pattern just derived for both directions:*} 
  from this [OF _ eq_tag] and this [OF _ eq_tag [THEN sym]]
  -- {* The thesis is proved as a trival consequence: *} 
    show  ?thesis unfolding str_eq_def str_eq_rel_def by blast
qed

lemma quot_star_finiteI [intro]:
  fixes L1::"lang"
  assumes finite1: "finite (UNIV // \<approx>L1)"
  shows "finite (UNIV // \<approx>(L1\<star>))"
proof (rule_tac tag = "tag_str_STAR L1" in tag_finite_imageD)
  show "\<And>x y. tag_str_STAR L1 x = tag_str_STAR L1 y \<Longrightarrow> x \<approx>(L1\<star>) y"
    by (rule tag_str_STAR_injI)
next
  have *: "finite (Pow (UNIV // \<approx>L1))" 
    using finite1 by auto
  show "finite (range (tag_str_STAR L1))"
    unfolding tag_str_STAR_def
    apply(rule finite_subset[OF _ *])
    unfolding quotient_def
    by auto
qed

subsubsection{* The conclusion *}

lemma rexp_imp_finite:
  fixes r::"rexp"
  shows "finite (UNIV // \<approx>(L r))"
by (induct r) (auto)

end
author	urbanc
	Wed, 16 Feb 2011 12:25:53 +0000
changeset 109	79b37ef9505f
parent 99	54aa3b6dd71c
child 112	62fdb4bf7239
permissions	-rw-r--r--