regexp: Myhill.thy@a59473f0229d


theory Myhill
  imports Main List_Prefix Prefix_subtract Prelude
begin

(*
text {*
     \begin{figure}
    \centering
    \scalebox{0.95}{
    \begin{tikzpicture}[->,>=latex,shorten >=1pt,auto,node distance=1.2cm, semithick]
        \node[state,initial] (n1)                   {$1$};
        \node[state,accepting] (n2) [right = 10em of n1]   {$2$};

        \path (n1) edge [bend left] node {$0$} (n2)
            (n1) edge [loop above] node{$1$} (n1)
            (n2) edge [loop above] node{$0$} (n2)
            (n2) edge [bend left]  node {$1$} (n1)
            ;
    \end{tikzpicture}}
    \caption{An example automaton (or partition)}\label{fig:example_automata}
    \end{figure}
*}

*)

section {* Preliminary definitions *}

types lang = "string set"

text {* 
  Sequential composition of two languages @{text "L1"} and @{text "L2"} 
*}


definition Seq :: "lang \<Rightarrow> lang \<Rightarrow> lang" ("_ ;; _" [100,100] 100)
where 
  "L1 ;; L2 = {s1 @ s2 | s1 s2. s1 \<in> L1 \<and> s2 \<in> L2}"

text {* Transitive closure of language @{text "L"}. *}
inductive_set
  Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
  for L :: "string set"
where
  start[intro]: "[] \<in> L\<star>"
| step[intro]:  "\<lbrakk>s1 \<in> L; s2 \<in> L\<star>\<rbrakk> \<Longrightarrow> s1@s2 \<in> L\<star>" 

text {* Some properties of operator @{text ";;"}.*}

lemma seq_union_distrib:
  "(A \<union> B) ;; C = (A ;; C) \<union> (B ;; C)"
by (auto simp:Seq_def)

lemma seq_intro:
  "\<lbrakk>x \<in> A; y \<in> B\<rbrakk> \<Longrightarrow> x @ y \<in> A ;; B "
by (auto simp:Seq_def)

lemma seq_assoc:
  "(A ;; B) ;; C = A ;; (B ;; C)"
apply(auto simp:Seq_def)
apply blast
by (metis append_assoc)

lemma star_intro1[rule_format]: "x \<in> lang\<star> \<Longrightarrow> \<forall> y. y \<in> lang\<star> \<longrightarrow> x @ y \<in> lang\<star>"
by (erule Star.induct, auto)

lemma star_intro2: "y \<in> lang \<Longrightarrow> y \<in> lang\<star>"
by (drule step[of y lang "[]"], auto simp:start)

lemma star_intro3[rule_format]: 
  "x \<in> lang\<star> \<Longrightarrow> \<forall>y . y \<in> lang \<longrightarrow> x @ y \<in> lang\<star>"
by (erule Star.induct, auto intro:star_intro2)

lemma star_decom: 
  "\<lbrakk>x \<in> lang\<star>; x \<noteq> []\<rbrakk> \<Longrightarrow>(\<exists> a b. x = a @ b \<and> a \<noteq> [] \<and> a \<in> lang \<and> b \<in> lang\<star>)"
by (induct x rule: Star.induct, simp, blast)

lemma star_decom': 
  "\<lbrakk>x \<in> lang\<star>; x \<noteq> []\<rbrakk> \<Longrightarrow> \<exists>a b. x = a @ b \<and> a \<in> lang\<star> \<and> b \<in> lang"
apply (induct x rule:Star.induct, simp)
apply (case_tac "s2 = []")
apply (rule_tac x = "[]" in exI, rule_tac x = s1 in exI, simp add:start)
apply (simp, (erule exE| erule conjE)+)
by (rule_tac x = "s1 @ a" in exI, rule_tac x = b in exI, simp add:step)

text {* Ardens lemma expressed at the level of language, rather than the level of regular expression. *}

theorem ardens_revised:
  assumes nemp: "[] \<notin> A"
  shows "(X = X ;; A \<union> B) \<longleftrightarrow> (X = B ;; A\<star>)"
proof
  assume eq: "X = B ;; A\<star>"
  have "A\<star> =  {[]} \<union> A\<star> ;; A" 
    by (auto simp:Seq_def star_intro3 star_decom')  
  then have "B ;; A\<star> = B ;; ({[]} \<union> A\<star> ;; A)" 
    unfolding Seq_def by simp
  also have "\<dots> = B \<union> B ;; (A\<star> ;; A)"  
    unfolding Seq_def by auto
  also have "\<dots> = B \<union> (B ;; A\<star>) ;; A" 
    by (simp only:seq_assoc)
  finally show "X = X ;; A \<union> B" 
    using eq by blast 
next
  assume eq': "X = X ;; A \<union> B"
  hence c1': "\<And> x. x \<in> B \<Longrightarrow> x \<in> X" 
    and c2': "\<And> x y. \<lbrakk>x \<in> X; y \<in> A\<rbrakk> \<Longrightarrow> x @ y \<in> X" 
    using Seq_def by auto
  show "X = B ;; A\<star>" 
  proof
    show "B ;; A\<star> \<subseteq> X"
    proof-
      { fix x y
        have "\<lbrakk>y \<in> A\<star>; x \<in> X\<rbrakk> \<Longrightarrow> x @ y \<in> X "
          apply (induct arbitrary:x rule:Star.induct, simp)
          by (auto simp only:append_assoc[THEN sym] dest:c2')
      } thus ?thesis using c1' by (auto simp:Seq_def) 
    qed
  next
    show "X \<subseteq> B ;; A\<star>"
    proof-
      { fix x 
        have "x \<in> X \<Longrightarrow> x \<in> B ;; A\<star>"
        proof (induct x taking:length rule:measure_induct)
          fix z
          assume hyps: 
            "\<forall>y. length y < length z \<longrightarrow> y \<in> X \<longrightarrow> y \<in> B ;; A\<star>" 
            and z_in: "z \<in> X"
          show "z \<in> B ;; A\<star>"
          proof (cases "z \<in> B")
            case True thus ?thesis by (auto simp:Seq_def start)
          next
            case False hence "z \<in> X ;; A" using eq' z_in by auto
            then obtain za zb where za_in: "za \<in> X" 
              and zab: "z = za @ zb \<and> zb \<in> A" and zbne: "zb \<noteq> []" 
              using nemp unfolding Seq_def by blast
            from zbne zab have "length za < length z" by auto
            with za_in hyps have "za \<in> B ;; A\<star>" by blast
            hence "za @ zb \<in> B ;; A\<star>" using zab 
              by (clarsimp simp:Seq_def, blast dest:star_intro3)
            thus ?thesis using zab by simp       
          qed
        qed 
      } thus ?thesis by blast
    qed
  qed
qed


text {* The syntax of regular expressions is defined by the datatype @{text "rexp"}. *}
datatype rexp =
  NULL
| EMPTY
| CHAR char
| SEQ rexp rexp
| ALT rexp rexp
| STAR rexp


text {* 
  The following @{text "L"} is an overloaded operator, where @{text "L(x)"} evaluates to 
  the language represented by the syntactic object @{text "x"}.
*}
consts L:: "'a \<Rightarrow> string set"


text {* 
  The @{text "L(rexp)"} for regular expression @{text "rexp"} is defined by the 
  following overloading function @{text "L_rexp"}.
*}
overloading L_rexp \<equiv> "L::  rexp \<Rightarrow> string set"
begin
fun
  L_rexp :: "rexp \<Rightarrow> string set"
where
    "L_rexp (NULL) = {}"
  | "L_rexp (EMPTY) = {[]}"
  | "L_rexp (CHAR c) = {[c]}"
  | "L_rexp (SEQ r1 r2) = (L_rexp r1) ;; (L_rexp r2)"
  | "L_rexp (ALT r1 r2) = (L_rexp r1) \<union> (L_rexp r2)"
  | "L_rexp (STAR r) = (L_rexp r)\<star>"
end

text {*
  To obtain equational system out of finite set of equivalent classes, a fold operation
  on finite set @{text "folds"} is defined. The use of @{text "SOME"} makes @{text "fold"}
  more robust than the @{text "fold"} in Isabelle library. The expression @{text "folds f"}
  makes sense when @{text "f"} is not @{text "associative"} and @{text "commutitive"},
  while @{text "fold f"} does not.  
*}

definition 
  folds :: "('a \<Rightarrow> 'b \<Rightarrow> 'b) \<Rightarrow> 'b \<Rightarrow> 'a set \<Rightarrow> 'b"
where
  "folds f z S \<equiv> SOME x. fold_graph f z S x"

text {* 
  The following lemma assures that the arbitrary choice made by the @{text "SOME"} in @{text "folds"}
  does not affect the @{text "L"}-value of the resultant regular expression. 
  *}
lemma folds_alt_simp [simp]:
  "finite rs \<Longrightarrow> L (folds ALT NULL rs) = \<Union> (L ` rs)"
apply (rule set_eq_intro, simp add:folds_def)
apply (rule someI2_ex, erule finite_imp_fold_graph)
by (erule fold_graph.induct, auto)

(* Just a technical lemma. *)
lemma [simp]:
  shows "(x, y) \<in> {(x, y). P x y} \<longleftrightarrow> P x y"
by simp

text {*
  @{text "\<approx>L"} is an equivalent class defined by language @{text "Lang"}.
*}

definition
  str_eq_rel ("\<approx>_" [100] 100)
where
  "\<approx>Lang \<equiv> {(x, y).  (\<forall>z. x @ z \<in> Lang \<longleftrightarrow> y @ z \<in> Lang)}"

text {* 
  Among equivlant clases of @{text "\<approx>Lang"}, the set @{text "finals(Lang)"} singles out 
  those which contains strings from @{text "Lang"}.
*}

definition 
   "finals Lang \<equiv> {\<approx>Lang `` {x} | x . x \<in> Lang}"

text {* 
  The following lemma show the relationshipt between @{text "finals(Lang)"} and @{text "Lang"}.
*}
lemma lang_is_union_of_finals: 
  "Lang = \<Union> finals(Lang)"
proof 
  show "Lang \<subseteq> \<Union> (finals Lang)"
  proof
    fix x
    assume "x \<in> Lang"   
    thus "x \<in> \<Union> (finals Lang)"
      apply (simp add:finals_def, rule_tac x = "(\<approx>Lang) `` {x}" in exI)
      by (auto simp:Image_def str_eq_rel_def)    
  qed
next
  show "\<Union> (finals Lang) \<subseteq> Lang"
    apply (clarsimp simp:finals_def str_eq_rel_def)
    by (drule_tac x = "[]" in spec, auto)
qed




section {* Direction @{text "finite partition \<Rightarrow> regular language"}*}

text {* 
  The relationship between equivalent classes can be described by an
  equational system.
  For example, in equational system \eqref{example_eqns},  $X_0, X_1$ are equivalent 
  classes. The first equation says every string in $X_0$ is obtained either by
  appending one $b$ to a string in $X_0$ or by appending one $a$ to a string in
  $X_1$ or just be an empty string (represented by the regular expression $\lambda$). Similary,
  the second equation tells how the strings inside $X_1$ are composed.
  \begin{equation}\label{example_eqns}
    \begin{aligned}
      X_0 & = X_0 b + X_1 a + \lambda \\
      X_1 & = X_0 a + X_1 b
    \end{aligned}
  \end{equation}
  The summands on the right hand side is represented by the following data type
  @{text "rhs_item"}, mnemonic for 'right hand side item'.
  Generally, there are two kinds of right hand side items, one kind corresponds to
  pure regular expressions, like the $\lambda$ in \eqref{example_eqns}, the other kind corresponds to
  transitions from one one equivalent class to another, like the $X_0 b, X_1 a$ etc.
  *}

datatype rhs_item = 
   Lam "rexp"                           (* Lambda *)
 | Trn "(string set)" "rexp"              (* Transition *)

text {*
  In this formalization, pure regular expressions like $\lambda$ is 
  repsented by @{text "Lam(EMPTY)"}, while transitions like $X_0 a$ is represented by $Trn~X_0~(CHAR~a)$.
  *}

text {*
  The functions @{text "the_r"} and @{text "the_Trn"} are used to extract
  subcomponents from right hand side items.
  *}

fun the_r :: "rhs_item \<Rightarrow> rexp"
where "the_r (Lam r) = r"

fun the_Trn:: "rhs_item \<Rightarrow> (string set \<times> rexp)"
where "the_Trn (Trn Y r) = (Y, r)"

text {*
  Every right hand side item @{text "itm"} defines a string set given 
  @{text "L(itm)"}, defined as:
*}
overloading L_rhs_e \<equiv> "L:: rhs_item \<Rightarrow> string set"
begin
  fun L_rhs_e:: "rhs_item \<Rightarrow> string set"
  where
     "L_rhs_e (Lam r) = L r" |
     "L_rhs_e (Trn X r) = X ;; L r"
end

text {*
  The right hand side of every equation is represented by a set of
  items. The string set defined by such a set @{text "itms"} is given
  by @{text "L(itms)"}, defined as:
*}

overloading L_rhs \<equiv> "L:: rhs_item set \<Rightarrow> string set"
begin
   fun L_rhs:: "rhs_item set \<Rightarrow> string set"
   where "L_rhs rhs = \<Union> (L ` rhs)"
end

text {* 
  Given a set of equivalent classses @{text "CS"} and one equivalent class @{text "X"} among
  @{text "CS"}, the term @{text "init_rhs CS X"} is used to extract the right hand side of
  the equation describing the formation of @{text "X"}. The definition of @{text "init_rhs"}
  is:
  *}

definition
  "init_rhs CS X \<equiv>  
      if ([] \<in> X) then 
          {Lam(EMPTY)} \<union> {Trn Y (CHAR c) | Y c. Y \<in> CS \<and> Y ;; {[c]} \<subseteq> X}
      else 
          {Trn Y (CHAR c)| Y c. Y \<in> CS \<and> Y ;; {[c]} \<subseteq> X}"

text {*
  In the definition of @{text "init_rhs"}, the term 
  @{text "{Trn Y (CHAR c)| Y c. Y \<in> CS \<and> Y ;; {[c]} \<subseteq> X}"} appearing on both branches
  describes the formation of strings in @{text "X"} out of transitions, while 
  the term @{text "{Lam(EMPTY)}"} describes the empty string which is intrinsically contained in
  @{text "X"} rather than by transition. This @{text "{Lam(EMPTY)}"} corresponds to 
  the $\lambda$ in \eqref{example_eqns}.

  With the help of @{text "init_rhs"}, the equitional system descrbing the formation of every
  equivalent class inside @{text "CS"} is given by the following @{text "eqs(CS)"}.
  *}

definition "eqs CS \<equiv> {(X, init_rhs CS X) | X.  X \<in> CS}"
(************ arden's lemma variation ********************)

text {* 
  The following @{text "items_of rhs X"} returns all @{text "X"}-items in @{text "rhs"}.
  *}
definition
  "items_of rhs X \<equiv> {Trn X r | r. (Trn X r) \<in> rhs}"

text {* 
  The following @{text "rexp_of rhs X"} combines all regular expressions in @{text "X"}-items
  using @{text "ALT"} to form a single regular expression. 
  It will be used later to implement @{text "arden_variate"} and @{text "rhs_subst"}.
  *}

definition 
  "rexp_of rhs X \<equiv> folds ALT NULL ((snd o the_Trn) ` items_of rhs X)"

text {* 
  The following @{text "lam_of rhs"} returns all pure regular expression items in @{text "rhs"}.
  *}

definition
  "lam_of rhs \<equiv> {Lam r | r. Lam r \<in> rhs}"

text {*
  The following @{text "rexp_of_lam rhs"} combines pure regular expression items in @{text "rhs"}
  using @{text "ALT"} to form a single regular expression. 
  When all variables inside @{text "rhs"} are eliminated, @{text "rexp_of_lam rhs"}
  is used to compute compute the regular expression corresponds to @{text "rhs"}.
  *}

definition
  "rexp_of_lam rhs \<equiv> folds ALT NULL (the_r ` lam_of rhs)"

text {*
  The following @{text "attach_rexp rexp' itm"} attach 
  the regular expression @{text "rexp'"} to
  the right of right hand side item @{text "itm"}.
  *}

fun attach_rexp :: "rexp \<Rightarrow> rhs_item \<Rightarrow> rhs_item"
where
  "attach_rexp rexp' (Lam rexp)   = Lam (SEQ rexp rexp')"
| "attach_rexp rexp' (Trn X rexp) = Trn X (SEQ rexp rexp')"

text {* 
  The following @{text "append_rhs_rexp rhs rexp"} attaches 
  @{text "rexp"} to every item in @{text "rhs"}.
  *}

definition
  "append_rhs_rexp rhs rexp \<equiv> (attach_rexp rexp) ` rhs"

text {*
  With the help of the two functions immediately above, Ardens'
  transformation on right hand side @{text "rhs"} is implemented
  by the following function @{text "arden_variate X rhs"}.
  After this transformation, the recursive occurent of @{text "X"}
  in @{text "rhs"} will be eliminated, while the 
  string set defined by @{text "rhs"} is kept unchanged.
  *}
definition 
  "arden_variate X rhs \<equiv> 
        append_rhs_rexp (rhs - items_of rhs X) (STAR (rexp_of rhs X))"


(*********** substitution of ES *************)

text {* 
  Suppose the equation defining @{text "X"} is $X = xrhs$,
  the purpose of @{text "rhs_subst"} is to substitute all occurences of @{text "X"} in
  @{text "rhs"} by @{text "xrhs"}.
  A litte thought may reveal that the final result
  should be: first append $(a_1 | a_2 | \ldots | a_n)$ to every item of @{text "xrhs"} and then
  union the result with all non-@{text "X"}-items of @{text "rhs"}.
 *}
definition 
  "rhs_subst rhs X xrhs \<equiv> 
        (rhs - (items_of rhs X)) \<union> (append_rhs_rexp xrhs (rexp_of rhs X))"

text {*
  Suppose the equation defining @{text "X"} is $X = xrhs$, the follwing
  @{text "eqs_subst ES X xrhs"} substitute @{text "xrhs"} into every equation
  of the equational system @{text "ES"}.
  *}

definition
  "eqs_subst ES X xrhs \<equiv> {(Y, rhs_subst yrhs X xrhs) | Y yrhs. (Y, yrhs) \<in> ES}"

text {*
  The computation of regular expressions for equivalent classes is accomplished
  using a iteration principle given by the following lemma.
  *}

lemma wf_iter [rule_format]: 
  fixes f
  assumes step: "\<And> e. \<lbrakk>P e; \<not> Q e\<rbrakk> \<Longrightarrow> (\<exists> e'. P e' \<and>  (f(e'), f(e)) \<in> less_than)"
  shows pe:     "P e \<longrightarrow> (\<exists> e'. P e' \<and>  Q e')"
proof(induct e rule: wf_induct 
           [OF wf_inv_image[OF wf_less_than, where f = "f"]], clarify)
  fix x 
  assume h [rule_format]: 
    "\<forall>y. (y, x) \<in> inv_image less_than f \<longrightarrow> P y \<longrightarrow> (\<exists>e'. P e' \<and> Q e')"
    and px: "P x"
  show "\<exists>e'. P e' \<and> Q e'"
  proof(cases "Q x")
    assume "Q x" with px show ?thesis by blast
  next
    assume nq: "\<not> Q x"
    from step [OF px nq]
    obtain e' where pe': "P e'" and ltf: "(f e', f x) \<in> less_than" by auto
    show ?thesis
    proof(rule h)
      from ltf show "(e', x) \<in> inv_image less_than f" 
	by (simp add:inv_image_def)
    next
      from pe' show "P e'" .
    qed
  qed
qed

text {*
  The @{text "P"} in lemma @{text "wf_iter"} is an invaiant kept throughout the iteration procedure.
  The particular invariant used to solve our problem is defined by function @{text "Inv(ES)"},
  an invariant over equal system @{text "ES"}.
  Every definition starting next till @{text "Inv"} stipulates a property to be satisfied by @{text "ES"}.
*}

text {* 
  Every variable is defined at most onece in @{text "ES"}.
  *}
definition 
  "distinct_equas ES \<equiv> 
            \<forall> X rhs rhs'. (X, rhs) \<in> ES \<and> (X, rhs') \<in> ES \<longrightarrow> rhs = rhs'"
text {* 
  Every equation in @{text "ES"} (represented by @{text "(X, rhs)"}) is valid, i.e. @{text "(X = L rhs)"}.
  *}
definition 
  "valid_eqns ES \<equiv> \<forall> X rhs. (X, rhs) \<in> ES \<longrightarrow> (X = L rhs)"

text {*
  The following @{text "rhs_nonempty rhs"} requires regular expressions occuring in transitional 
  items of @{text "rhs"} does not contain empty string. This is necessary for
  the application of Arden's transformation to @{text "rhs"}.
  *}
definition 
  "rhs_nonempty rhs \<equiv> (\<forall> Y r. Trn Y r \<in> rhs \<longrightarrow> [] \<notin> L r)"

text {*
  The following @{text "ardenable ES"} requires that Arden's transformation is applicable
  to every equation of equational system @{text "ES"}.
  *}
definition 
  "ardenable ES \<equiv> \<forall> X rhs. (X, rhs) \<in> ES \<longrightarrow> rhs_nonempty rhs"

(* The following non_empty seems useless. *)
definition 
  "non_empty ES \<equiv> \<forall> X rhs. (X, rhs) \<in> ES \<longrightarrow> X \<noteq> {}"

text {*
  The following @{text "finite_rhs ES"} requires every equation in @{text "rhs"} be finite.
  *}
definition
  "finite_rhs ES \<equiv> \<forall> X rhs. (X, rhs) \<in> ES \<longrightarrow> finite rhs"

text {*
  The following @{text "classes_of rhs"} returns all variables (or equivalent classes)
  occuring in @{text "rhs"}.
  *}
definition 
  "classes_of rhs \<equiv> {X. \<exists> r. Trn X r \<in> rhs}"

text {*
  The following @{text "lefts_of ES"} returns all variables 
  defined by equational system @{text "ES"}.
  *}
definition
  "lefts_of ES \<equiv> {Y | Y yrhs. (Y, yrhs) \<in> ES}"

text {*
  The following @{text "self_contained ES"} requires that every
  variable occuring on the right hand side of equations is already defined by some
  equation in @{text "ES"}.
  *}
definition 
  "self_contained ES \<equiv> \<forall> (X, xrhs) \<in> ES. classes_of xrhs \<subseteq> lefts_of ES"


text {*
  The invariant @{text "Inv(ES)"} is a conjunction of all the previously defined constaints.
  *}
definition 
  "Inv ES \<equiv> valid_eqns ES \<and> finite ES \<and> distinct_equas ES \<and> ardenable ES \<and> 
                non_empty ES \<and> finite_rhs ES \<and> self_contained ES"

subsection {* The proof of this direction *}

subsubsection {* Basic properties *}

text {*
  The following are some basic properties of the above definitions.
*}

lemma L_rhs_union_distrib:
  " L (A::rhs_item set) \<union> L B = L (A \<union> B)"
by simp

lemma finite_snd_Trn:
  assumes finite:"finite rhs"
  shows "finite {r\<^isub>2. Trn Y r\<^isub>2 \<in> rhs}" (is "finite ?B")
proof-
  def rhs' \<equiv> "{e \<in> rhs. \<exists> r. e = Trn Y r}"
  have "?B = (snd o the_Trn) ` rhs'" using rhs'_def by (auto simp:image_def)
  moreover have "finite rhs'" using finite rhs'_def by auto
  ultimately show ?thesis by simp
qed

lemma rexp_of_empty:
  assumes finite:"finite rhs"
  and nonempty:"rhs_nonempty rhs"
  shows "[] \<notin> L (rexp_of rhs X)"
using finite nonempty rhs_nonempty_def
by (drule_tac finite_snd_Trn[where Y = X], auto simp:rexp_of_def items_of_def)

lemma [intro!]:
  "P (Trn X r) \<Longrightarrow> (\<exists>a. (\<exists>r. a = Trn X r \<and> P a))" by auto

lemma finite_items_of:
  "finite rhs \<Longrightarrow> finite (items_of rhs X)"
by (auto simp:items_of_def intro:finite_subset)

lemma lang_of_rexp_of:
  assumes finite:"finite rhs"
  shows "L (items_of rhs X) = X ;; (L (rexp_of rhs X))"
proof -
  have "finite ((snd \<circ> the_Trn) ` items_of rhs X)" using finite_items_of[OF finite] by auto
  thus ?thesis
    apply (auto simp:rexp_of_def Seq_def items_of_def)
    apply (rule_tac x = s1 in exI, rule_tac x = s2 in exI, auto)
    by (rule_tac x= "Trn X r" in exI, auto simp:Seq_def)
qed

lemma rexp_of_lam_eq_lam_set:
  assumes finite: "finite rhs"
  shows "L (rexp_of_lam rhs) = L (lam_of rhs)"
proof -
  have "finite (the_r ` {Lam r |r. Lam r \<in> rhs})" using finite
    by (rule_tac finite_imageI, auto intro:finite_subset)
  thus ?thesis by (auto simp:rexp_of_lam_def lam_of_def)
qed

lemma [simp]:
  " L (attach_rexp r xb) = L xb ;; L r"
apply (cases xb, auto simp:Seq_def)
by (rule_tac x = "s1 @ s1a" in exI, rule_tac x = s2a in exI,auto simp:Seq_def)

lemma lang_of_append_rhs:
  "L (append_rhs_rexp rhs r) = L rhs ;; L r"
apply (auto simp:append_rhs_rexp_def image_def)
apply (auto simp:Seq_def)
apply (rule_tac x = "L xb ;; L r" in exI, auto simp add:Seq_def)
by (rule_tac x = "attach_rexp r xb" in exI, auto simp:Seq_def)

lemma classes_of_union_distrib:
  "classes_of A \<union> classes_of B = classes_of (A \<union> B)"
by (auto simp add:classes_of_def)

lemma lefts_of_union_distrib:
  "lefts_of A \<union> lefts_of B = lefts_of (A \<union> B)"
by (auto simp:lefts_of_def)


subsubsection {* Intialization *}

text {*
  The following several lemmas until @{text "init_ES_satisfy_Inv"} shows that
  the initial equational system satisfies invariant @{text "Inv"}.
  *}

lemma defined_by_str:
  "\<lbrakk>s \<in> X; X \<in> UNIV // (\<approx>Lang)\<rbrakk> \<Longrightarrow> X = (\<approx>Lang) `` {s}"
by (auto simp:quotient_def Image_def str_eq_rel_def)

lemma every_eqclass_has_transition:
  assumes has_str: "s @ [c] \<in> X"
  and     in_CS:   "X \<in> UNIV // (\<approx>Lang)"
  obtains Y where "Y \<in> UNIV // (\<approx>Lang)" and "Y ;; {[c]} \<subseteq> X" and "s \<in> Y"
proof -
  def Y \<equiv> "(\<approx>Lang) `` {s}"
  have "Y \<in> UNIV // (\<approx>Lang)" 
    unfolding Y_def quotient_def by auto
  moreover
  have "X = (\<approx>Lang) `` {s @ [c]}" 
    using has_str in_CS defined_by_str by blast
  then have "Y ;; {[c]} \<subseteq> X" 
    unfolding Y_def Image_def Seq_def
    unfolding str_eq_rel_def
    by clarsimp
  moreover
  have "s \<in> Y" unfolding Y_def 
    unfolding Image_def str_eq_rel_def by simp
  ultimately show thesis by (blast intro: that)
qed

lemma l_eq_r_in_eqs:
  assumes X_in_eqs: "(X, xrhs) \<in> (eqs (UNIV // (\<approx>Lang)))"
  shows "X = L xrhs"
proof 
  show "X \<subseteq> L xrhs"
  proof
    fix x
    assume "(1)": "x \<in> X"
    show "x \<in> L xrhs"          
    proof (cases "x = []")
      assume empty: "x = []"
      thus ?thesis using X_in_eqs "(1)"
        by (auto simp:eqs_def init_rhs_def)
    next
      assume not_empty: "x \<noteq> []"
      then obtain clist c where decom: "x = clist @ [c]"
        by (case_tac x rule:rev_cases, auto)
      have "X \<in> UNIV // (\<approx>Lang)" using X_in_eqs by (auto simp:eqs_def)
      then obtain Y 
        where "Y \<in> UNIV // (\<approx>Lang)" 
        and "Y ;; {[c]} \<subseteq> X"
        and "clist \<in> Y"
        using decom "(1)" every_eqclass_has_transition by blast
      hence 
        "x \<in> L {Trn Y (CHAR c)| Y c. Y \<in> UNIV // (\<approx>Lang) \<and> Y ;; {[c]} \<subseteq> X}"
        using "(1)" decom
        by (simp, rule_tac x = "Trn Y (CHAR c)" in exI, simp add:Seq_def)
      thus ?thesis using X_in_eqs "(1)"
        by (simp add:eqs_def init_rhs_def)
    qed
  qed
next
  show "L xrhs \<subseteq> X" using X_in_eqs
    by (auto simp:eqs_def init_rhs_def) 
qed

lemma finite_init_rhs: 
  assumes finite: "finite CS"
  shows "finite (init_rhs CS X)"
proof-
  have "finite {Trn Y (CHAR c) |Y c. Y \<in> CS \<and> Y ;; {[c]} \<subseteq> X}" (is "finite ?A")
  proof -
    def S \<equiv> "{(Y, c)| Y c. Y \<in> CS \<and> Y ;; {[c]} \<subseteq> X}" 
    def h \<equiv> "\<lambda> (Y, c). Trn Y (CHAR c)"
    have "finite (CS \<times> (UNIV::char set))" using finite by auto
    hence "finite S" using S_def 
      by (rule_tac B = "CS \<times> UNIV" in finite_subset, auto)
    moreover have "?A = h ` S" by (auto simp: S_def h_def image_def)
    ultimately show ?thesis 
      by auto
  qed
  thus ?thesis by (simp add:init_rhs_def)
qed

lemma init_ES_satisfy_Inv:
  assumes finite_CS: "finite (UNIV // (\<approx>Lang))"
  shows "Inv (eqs (UNIV // (\<approx>Lang)))"
proof -
  have "finite (eqs (UNIV // (\<approx>Lang)))" using finite_CS
    by (simp add:eqs_def)
  moreover have "distinct_equas (eqs (UNIV // (\<approx>Lang)))"     
    by (simp add:distinct_equas_def eqs_def)
  moreover have "ardenable (eqs (UNIV // (\<approx>Lang)))"
    by (auto simp add:ardenable_def eqs_def init_rhs_def rhs_nonempty_def del:L_rhs.simps)
  moreover have "valid_eqns (eqs (UNIV // (\<approx>Lang)))"
    using l_eq_r_in_eqs by (simp add:valid_eqns_def)
  moreover have "non_empty (eqs (UNIV // (\<approx>Lang)))"
    by (auto simp:non_empty_def eqs_def quotient_def Image_def str_eq_rel_def)
  moreover have "finite_rhs (eqs (UNIV // (\<approx>Lang)))"
    using finite_init_rhs[OF finite_CS] 
    by (auto simp:finite_rhs_def eqs_def)
  moreover have "self_contained (eqs (UNIV // (\<approx>Lang)))"
    by (auto simp:self_contained_def eqs_def init_rhs_def classes_of_def lefts_of_def)
  ultimately show ?thesis by (simp add:Inv_def)
qed

subsubsection {* 
  Interation step
  *}

text {*
  From this point until @{text "iteration_step"}, it is proved
  that there exists iteration steps which keep @{text "Inv(ES)"} while
  decreasing the size of @{text "ES"}.
  *}
lemma arden_variate_keeps_eq:
  assumes l_eq_r: "X = L rhs"
  and not_empty: "[] \<notin> L (rexp_of rhs X)"
  and finite: "finite rhs"
  shows "X = L (arden_variate X rhs)"
proof -
  def A \<equiv> "L (rexp_of rhs X)"
  def b \<equiv> "rhs - items_of rhs X"
  def B \<equiv> "L b" 
  have "X = B ;; A\<star>"
  proof-
    have "rhs = items_of rhs X \<union> b" by (auto simp:b_def items_of_def)
    hence "L rhs = L(items_of rhs X \<union> b)" by simp
    hence "L rhs = L(items_of rhs X) \<union> B" by (simp only:L_rhs_union_distrib B_def)
    with lang_of_rexp_of
    have "L rhs = X ;; A \<union> B " using finite by (simp only:B_def b_def A_def)
    thus ?thesis
      using l_eq_r not_empty
      apply (drule_tac B = B and X = X in ardens_revised)
      by (auto simp:A_def simp del:L_rhs.simps)
  qed
  moreover have "L (arden_variate X rhs) = (B ;; A\<star>)" (is "?L = ?R")
    by (simp only:arden_variate_def L_rhs_union_distrib lang_of_append_rhs 
                  B_def A_def b_def L_rexp.simps seq_union_distrib)
   ultimately show ?thesis by simp
qed 

lemma append_keeps_finite:
  "finite rhs \<Longrightarrow> finite (append_rhs_rexp rhs r)"
by (auto simp:append_rhs_rexp_def)

lemma arden_variate_keeps_finite:
  "finite rhs \<Longrightarrow> finite (arden_variate X rhs)"
by (auto simp:arden_variate_def append_keeps_finite)

lemma append_keeps_nonempty:
  "rhs_nonempty rhs \<Longrightarrow> rhs_nonempty (append_rhs_rexp rhs r)"
apply (auto simp:rhs_nonempty_def append_rhs_rexp_def)
by (case_tac x, auto simp:Seq_def)

lemma nonempty_set_sub:
  "rhs_nonempty rhs \<Longrightarrow> rhs_nonempty (rhs - A)"
by (auto simp:rhs_nonempty_def)

lemma nonempty_set_union:
  "\<lbrakk>rhs_nonempty rhs; rhs_nonempty rhs'\<rbrakk> \<Longrightarrow> rhs_nonempty (rhs \<union> rhs')"
by (auto simp:rhs_nonempty_def)

lemma arden_variate_keeps_nonempty:
  "rhs_nonempty rhs \<Longrightarrow> rhs_nonempty (arden_variate X rhs)"
by (simp only:arden_variate_def append_keeps_nonempty nonempty_set_sub)


lemma rhs_subst_keeps_nonempty:
  "\<lbrakk>rhs_nonempty rhs; rhs_nonempty xrhs\<rbrakk> \<Longrightarrow> rhs_nonempty (rhs_subst rhs X xrhs)"
by (simp only:rhs_subst_def append_keeps_nonempty  nonempty_set_union nonempty_set_sub)

lemma rhs_subst_keeps_eq:
  assumes substor: "X = L xrhs"
  and finite: "finite rhs"
  shows "L (rhs_subst rhs X xrhs) = L rhs" (is "?Left = ?Right")
proof-
  def A \<equiv> "L (rhs - items_of rhs X)"
  have "?Left = A \<union> L (append_rhs_rexp xrhs (rexp_of rhs X))"
    by (simp only:rhs_subst_def L_rhs_union_distrib A_def)
  moreover have "?Right = A \<union> L (items_of rhs X)"
  proof-
    have "rhs = (rhs - items_of rhs X) \<union> (items_of rhs X)" by (auto simp:items_of_def)
    thus ?thesis by (simp only:L_rhs_union_distrib A_def)
  qed
  moreover have "L (append_rhs_rexp xrhs (rexp_of rhs X)) = L (items_of rhs X)" 
    using finite substor  by (simp only:lang_of_append_rhs lang_of_rexp_of)
  ultimately show ?thesis by simp
qed

lemma rhs_subst_keeps_finite_rhs:
  "\<lbrakk>finite rhs; finite yrhs\<rbrakk> \<Longrightarrow> finite (rhs_subst rhs Y yrhs)"
by (auto simp:rhs_subst_def append_keeps_finite)

lemma eqs_subst_keeps_finite:
  assumes finite:"finite (ES:: (string set \<times> rhs_item set) set)"
  shows "finite (eqs_subst ES Y yrhs)"
proof -
  have "finite {(Ya, rhs_subst yrhsa Y yrhs) |Ya yrhsa. (Ya, yrhsa) \<in> ES}" 
                                                                  (is "finite ?A")
  proof-
    def eqns' \<equiv> "{((Ya::string set), yrhsa)| Ya yrhsa. (Ya, yrhsa) \<in> ES}"
    def h \<equiv> "\<lambda> ((Ya::string set), yrhsa). (Ya, rhs_subst yrhsa Y yrhs)"
    have "finite (h ` eqns')" using finite h_def eqns'_def by auto
    moreover have "?A = h ` eqns'" by (auto simp:h_def eqns'_def)
    ultimately show ?thesis by auto      
  qed
  thus ?thesis by (simp add:eqs_subst_def)
qed

lemma eqs_subst_keeps_finite_rhs:
  "\<lbrakk>finite_rhs ES; finite yrhs\<rbrakk> \<Longrightarrow> finite_rhs (eqs_subst ES Y yrhs)"
by (auto intro:rhs_subst_keeps_finite_rhs simp add:eqs_subst_def finite_rhs_def)

lemma append_rhs_keeps_cls:
  "classes_of (append_rhs_rexp rhs r) = classes_of rhs"
apply (auto simp:classes_of_def append_rhs_rexp_def)
apply (case_tac xa, auto simp:image_def)
by (rule_tac x = "SEQ ra r" in exI, rule_tac x = "Trn x ra" in bexI, simp+)

lemma arden_variate_removes_cl:
  "classes_of (arden_variate Y yrhs) = classes_of yrhs - {Y}"
apply (simp add:arden_variate_def append_rhs_keeps_cls items_of_def)
by (auto simp:classes_of_def)

lemma lefts_of_keeps_cls:
  "lefts_of (eqs_subst ES Y yrhs) = lefts_of ES"
by (auto simp:lefts_of_def eqs_subst_def)

lemma rhs_subst_updates_cls:
  "X \<notin> classes_of xrhs \<Longrightarrow> 
      classes_of (rhs_subst rhs X xrhs) = classes_of rhs \<union> classes_of xrhs - {X}"
apply (simp only:rhs_subst_def append_rhs_keeps_cls 
                              classes_of_union_distrib[THEN sym])
by (auto simp:classes_of_def items_of_def)

lemma eqs_subst_keeps_self_contained:
  fixes Y
  assumes sc: "self_contained (ES \<union> {(Y, yrhs)})" (is "self_contained ?A")
  shows "self_contained (eqs_subst ES Y (arden_variate Y yrhs))" 
                                                   (is "self_contained ?B")
proof-
  { fix X xrhs'
    assume "(X, xrhs') \<in> ?B"
    then obtain xrhs 
      where xrhs_xrhs': "xrhs' = rhs_subst xrhs Y (arden_variate Y yrhs)"
      and X_in: "(X, xrhs) \<in> ES" by (simp add:eqs_subst_def, blast)    
    have "classes_of xrhs' \<subseteq> lefts_of ?B"
    proof-
      have "lefts_of ?B = lefts_of ES" by (auto simp add:lefts_of_def eqs_subst_def)
      moreover have "classes_of xrhs' \<subseteq> lefts_of ES"
      proof-
        have "classes_of xrhs' \<subseteq> 
                        classes_of xrhs \<union> classes_of (arden_variate Y yrhs) - {Y}"
        proof-
          have "Y \<notin> classes_of (arden_variate Y yrhs)" 
            using arden_variate_removes_cl by simp
          thus ?thesis using xrhs_xrhs' by (auto simp:rhs_subst_updates_cls)
        qed
        moreover have "classes_of xrhs \<subseteq> lefts_of ES \<union> {Y}" using X_in sc
          apply (simp only:self_contained_def lefts_of_union_distrib[THEN sym])
          by (drule_tac x = "(X, xrhs)" in bspec, auto simp:lefts_of_def)
        moreover have "classes_of (arden_variate Y yrhs) \<subseteq> lefts_of ES \<union> {Y}" 
          using sc 
          by (auto simp add:arden_variate_removes_cl self_contained_def lefts_of_def)
        ultimately show ?thesis by auto
      qed
      ultimately show ?thesis by simp
    qed
  } thus ?thesis by (auto simp only:eqs_subst_def self_contained_def)
qed

lemma eqs_subst_satisfy_Inv:
  assumes Inv_ES: "Inv (ES \<union> {(Y, yrhs)})"
  shows "Inv (eqs_subst ES Y (arden_variate Y yrhs))"
proof -  
  have finite_yrhs: "finite yrhs" 
    using Inv_ES by (auto simp:Inv_def finite_rhs_def)
  have nonempty_yrhs: "rhs_nonempty yrhs" 
    using Inv_ES by (auto simp:Inv_def ardenable_def)
  have Y_eq_yrhs: "Y = L yrhs" 
    using Inv_ES by (simp only:Inv_def valid_eqns_def, blast)
  have "distinct_equas (eqs_subst ES Y (arden_variate Y yrhs))" 
    using Inv_ES
    by (auto simp:distinct_equas_def eqs_subst_def Inv_def)
  moreover have "finite (eqs_subst ES Y (arden_variate Y yrhs))" 
    using Inv_ES by (simp add:Inv_def eqs_subst_keeps_finite)
  moreover have "finite_rhs (eqs_subst ES Y (arden_variate Y yrhs))"
  proof-
    have "finite_rhs ES" using Inv_ES 
      by (simp add:Inv_def finite_rhs_def)
    moreover have "finite (arden_variate Y yrhs)"
    proof -
      have "finite yrhs" using Inv_ES 
        by (auto simp:Inv_def finite_rhs_def)
      thus ?thesis using arden_variate_keeps_finite by simp
    qed
    ultimately show ?thesis 
      by (simp add:eqs_subst_keeps_finite_rhs)
  qed
  moreover have "ardenable (eqs_subst ES Y (arden_variate Y yrhs))"
  proof - 
    { fix X rhs
      assume "(X, rhs) \<in> ES"
      hence "rhs_nonempty rhs"  using prems Inv_ES  
        by (simp add:Inv_def ardenable_def)
      with nonempty_yrhs 
      have "rhs_nonempty (rhs_subst rhs Y (arden_variate Y yrhs))"
        by (simp add:nonempty_yrhs 
               rhs_subst_keeps_nonempty arden_variate_keeps_nonempty)
    } thus ?thesis by (auto simp add:ardenable_def eqs_subst_def)
  qed
  moreover have "valid_eqns (eqs_subst ES Y (arden_variate Y yrhs))"
  proof-
    have "Y = L (arden_variate Y yrhs)" 
      using Y_eq_yrhs Inv_ES finite_yrhs nonempty_yrhs      
      by (rule_tac arden_variate_keeps_eq, (simp add:rexp_of_empty)+)
    thus ?thesis using Inv_ES 
      by (clarsimp simp add:valid_eqns_def 
              eqs_subst_def rhs_subst_keeps_eq Inv_def finite_rhs_def
                   simp del:L_rhs.simps)
  qed
  moreover have 
    non_empty_subst: "non_empty (eqs_subst ES Y (arden_variate Y yrhs))"
    using Inv_ES by (auto simp:Inv_def non_empty_def eqs_subst_def)
  moreover 
  have self_subst: "self_contained (eqs_subst ES Y (arden_variate Y yrhs))"
    using Inv_ES eqs_subst_keeps_self_contained by (simp add:Inv_def)
  ultimately show ?thesis using Inv_ES by (simp add:Inv_def)
qed

lemma eqs_subst_card_le: 
  assumes finite: "finite (ES::(string set \<times> rhs_item set) set)"
  shows "card (eqs_subst ES Y yrhs) <= card ES"
proof-
  def f \<equiv> "\<lambda> x. ((fst x)::string set, rhs_subst (snd x) Y yrhs)"
  have "eqs_subst ES Y yrhs = f ` ES" 
    apply (auto simp:eqs_subst_def f_def image_def)
    by (rule_tac x = "(Ya, yrhsa)" in bexI, simp+)
  thus ?thesis using finite by (auto intro:card_image_le)
qed

lemma eqs_subst_cls_remains: 
  "(X, xrhs) \<in> ES \<Longrightarrow> \<exists> xrhs'. (X, xrhs') \<in> (eqs_subst ES Y yrhs)"
by (auto simp:eqs_subst_def)

lemma card_noteq_1_has_more:
  assumes card:"card S \<noteq> 1"
  and e_in: "e \<in> S"
  and finite: "finite S"
  obtains e' where "e' \<in> S \<and> e \<noteq> e'" 
proof-
  have "card (S - {e}) > 0"
  proof -
    have "card S > 1" using card e_in finite  
      by (case_tac "card S", auto) 
    thus ?thesis using finite e_in by auto
  qed
  hence "S - {e} \<noteq> {}" using finite by (rule_tac notI, simp)
  thus "(\<And>e'. e' \<in> S \<and> e \<noteq> e' \<Longrightarrow> thesis) \<Longrightarrow> thesis" by auto
qed

lemma iteration_step: 
  assumes Inv_ES: "Inv ES"
  and    X_in_ES: "(X, xrhs) \<in> ES"
  and    not_T: "card ES \<noteq> 1"
  shows "\<exists> ES'. (Inv ES' \<and> (\<exists> xrhs'.(X, xrhs') \<in> ES')) \<and> 
                (card ES', card ES) \<in> less_than" (is "\<exists> ES'. ?P ES'")
proof -
  have finite_ES: "finite ES" using Inv_ES by (simp add:Inv_def)
  then obtain Y yrhs 
    where Y_in_ES: "(Y, yrhs) \<in> ES" and not_eq: "(X, xrhs) \<noteq> (Y, yrhs)" 
    using not_T X_in_ES by (drule_tac card_noteq_1_has_more, auto)
  def ES' == "ES - {(Y, yrhs)}"
  let ?ES'' = "eqs_subst ES' Y (arden_variate Y yrhs)"
  have "?P ?ES''"
  proof -
    have "Inv ?ES''" using Y_in_ES Inv_ES
      by (rule_tac eqs_subst_satisfy_Inv, simp add:ES'_def insert_absorb)
    moreover have "\<exists>xrhs'. (X, xrhs') \<in> ?ES''"  using not_eq X_in_ES
      by (rule_tac ES = ES' in eqs_subst_cls_remains, auto simp add:ES'_def)
    moreover have "(card ?ES'', card ES) \<in> less_than" 
    proof -
      have "finite ES'" using finite_ES ES'_def by auto
      moreover have "card ES' < card ES" using finite_ES Y_in_ES
        by (auto simp:ES'_def card_gt_0_iff intro:diff_Suc_less)
      ultimately show ?thesis 
        by (auto dest:eqs_subst_card_le elim:le_less_trans)
    qed
    ultimately show ?thesis by simp
  qed
  thus ?thesis by blast
qed

subsubsection {*
  Conclusion of the proof
  *}

text {*
  From this point until @{text "hard_direction"}, the hard direction is proved
  through a simple application of the iteration principle.
*}

lemma iteration_conc: 
  assumes history: "Inv ES"
  and    X_in_ES: "\<exists> xrhs. (X, xrhs) \<in> ES"
  shows 
  "\<exists> ES'. (Inv ES' \<and> (\<exists> xrhs'. (X, xrhs') \<in> ES')) \<and> card ES' = 1" 
                                                          (is "\<exists> ES'. ?P ES'")
proof (cases "card ES = 1")
  case True
  thus ?thesis using history X_in_ES
    by blast
next
  case False  
  thus ?thesis using history iteration_step X_in_ES
    by (rule_tac f = card in wf_iter, auto)
qed
  
lemma last_cl_exists_rexp:
  assumes ES_single: "ES = {(X, xrhs)}" 
  and Inv_ES: "Inv ES"
  shows "\<exists> (r::rexp). L r = X" (is "\<exists> r. ?P r")
proof-
  let ?A = "arden_variate X xrhs"
  have "?P (rexp_of_lam ?A)"
  proof -
    have "L (rexp_of_lam ?A) = L (lam_of ?A)"
    proof(rule rexp_of_lam_eq_lam_set)
      show "finite (arden_variate X xrhs)" using Inv_ES ES_single 
        by (rule_tac arden_variate_keeps_finite, 
                       auto simp add:Inv_def finite_rhs_def)
    qed
    also have "\<dots> = L ?A"
    proof-
      have "lam_of ?A = ?A"
      proof-
        have "classes_of ?A = {}" using Inv_ES ES_single
          by (simp add:arden_variate_removes_cl 
                       self_contained_def Inv_def lefts_of_def) 
        thus ?thesis 
          by (auto simp only:lam_of_def classes_of_def, case_tac x, auto)
      qed
      thus ?thesis by simp
    qed
    also have "\<dots> = X"
    proof(rule arden_variate_keeps_eq [THEN sym])
      show "X = L xrhs" using Inv_ES ES_single 
        by (auto simp only:Inv_def valid_eqns_def)  
    next
      from Inv_ES ES_single show "[] \<notin> L (rexp_of xrhs X)"
        by(simp add:Inv_def ardenable_def rexp_of_empty finite_rhs_def)
    next
      from Inv_ES ES_single show "finite xrhs" 
        by (simp add:Inv_def finite_rhs_def)
    qed
    finally show ?thesis by simp
  qed
  thus ?thesis by auto
qed
   
lemma every_eqcl_has_reg: 
  assumes finite_CS: "finite (UNIV // (\<approx>Lang))"
  and X_in_CS: "X \<in> (UNIV // (\<approx>Lang))"
  shows "\<exists> (reg::rexp). L reg = X" (is "\<exists> r. ?E r")
proof -
  from X_in_CS have "\<exists> xrhs. (X, xrhs) \<in> (eqs (UNIV  // (\<approx>Lang)))"
    by (auto simp:eqs_def init_rhs_def)
  then obtain ES xrhs where Inv_ES: "Inv ES" 
    and X_in_ES: "(X, xrhs) \<in> ES"
    and card_ES: "card ES = 1"
    using finite_CS X_in_CS init_ES_satisfy_Inv iteration_conc
    by blast
  hence ES_single_equa: "ES = {(X, xrhs)}" 
    by (auto simp:Inv_def dest!:card_Suc_Diff1 simp:card_eq_0_iff) 
  thus ?thesis using Inv_ES
    by (rule last_cl_exists_rexp)
qed

lemma finals_in_partitions:
  "finals Lang \<subseteq> (UNIV // (\<approx>Lang))"
  by (auto simp:finals_def quotient_def)   

theorem hard_direction: 
  assumes finite_CS: "finite (UNIV // (\<approx>Lang))"
  shows   "\<exists> (reg::rexp). Lang = L reg"
proof -
  have "\<forall> X \<in> (UNIV // (\<approx>Lang)). \<exists> (reg::rexp). X = L reg" 
    using finite_CS every_eqcl_has_reg by blast
  then obtain f 
    where f_prop: "\<forall> X \<in> (UNIV // (\<approx>Lang)). X = L ((f X)::rexp)" 
    by (auto dest:bchoice)
  def rs \<equiv> "f ` (finals Lang)"  
  have "Lang = \<Union> (finals Lang)" using lang_is_union_of_finals by auto
  also have "\<dots> = L (folds ALT NULL rs)" 
  proof -
    have "finite rs"
    proof -
      have "finite (finals Lang)" 
        using finite_CS finals_in_partitions[of "Lang"]   
        by (erule_tac finite_subset, simp)
      thus ?thesis using rs_def by auto
    qed
    thus ?thesis 
      using f_prop rs_def finals_in_partitions[of "Lang"] by auto
  qed
  finally show ?thesis by blast
qed 



section {* Direction: @{text "regular language \<Rightarrow> finite partitions"} *}

subsection {* The scheme for this direction *}

text {* 
  The following convenient notation @{text "x \<approx>Lang y"} means:
  string @{text "x"} and @{text "y"} are equivalent with respect to 
  language @{text "Lang"}.
  *}

definition
  str_eq ("_ \<approx>_ _")
where
  "x \<approx>Lang y \<equiv> (x, y) \<in> (\<approx>Lang)"

text {*
  The very basic scheme to show the finiteness of the partion generated by a
  language @{text "Lang"} is by attaching tags to every string. The set of
  tags are carefully choosen to make it finite.  If it can be proved that
  strings with the same tag are equivlent with respect @{text "Lang"}, then
  the partition given rise by @{text "Lang"} must be finite. The reason for
  this is a lemma in standard library (@{text "finite_imageD"}), which says:
  if the image of an injective function on a set @{text "A"} is finite, then
  @{text "A"} is finite. It can be shown that the function obtained by
  lifting @{text "tag"} to the level of equivalence classes (i.e. @{text "((op
  `) tag)"}) is injective (by lemma @{text "tag_image_injI"}) and the image of
  this function is finite (with the help of lemma @{text
  "finite_tag_imageI"}). This argument is formalized by the following lemma
  @{text "tag_finite_imageD"}.


  {\bf
  Theorems @{text "tag_image_injI"} and @{text
  "finite_tag_imageI"} do not exist. Can this comment be deleted?
  \marginpar{\bf COMMENT}
  }
*}

lemma tag_finite_imageD:
  fixes L1::"lang"
  assumes str_inj: "\<And> m n. tag m = tag n \<Longrightarrow> m \<approx>L1 n"
  and range: "finite (range tag)"
  shows "finite (UNIV // \<approx>L1)"
proof (rule_tac f = "(op `) tag" in finite_imageD)
  show "finite (op ` tag ` UNIV // \<approx>L1)" using range
    apply (rule_tac B = "Pow (tag ` UNIV)" in finite_subset)
    by (auto simp add:image_def Pow_def)
next
  show "inj_on (op ` tag) (UNIV // \<approx>L1)" 
  proof-
    { fix X Y
      assume X_in: "X \<in> UNIV // \<approx>L1"
        and  Y_in: "Y \<in> UNIV // \<approx>L1"
        and  tag_eq: "tag ` X = tag ` Y"
      then obtain x y where "x \<in> X" and "y \<in> Y" and "tag x = tag y"
        unfolding quotient_def Image_def str_eq_rel_def str_eq_def image_def
        apply simp by blast
      with X_in Y_in str_inj[of x y]
      have "X = Y" by (auto simp:quotient_def str_eq_rel_def str_eq_def) 
    } thus ?thesis unfolding inj_on_def by auto
  qed
qed

subsection {* Lemmas for basic cases *}

text {*
  The the final result of this direction is in @{text "rexp_imp_finite"},
  which is an induction on the structure of regular expressions. There is one
  case for each regular expression operator. For basic operators such as
  @{const NULL}, @{const EMPTY}, @{const CHAR}, the finiteness of their 
  language partition can be established directly with no need of tagging. 
  This section contains several technical lemma for these base cases.

  The inductive cases involve operators @{const ALT}, @{const SEQ} and @{const
  STAR}. Tagging functions need to be defined individually for each of
  them. There will be one dedicated section for each of these cases, and each
  section goes virtually the same way: gives definition of the tagging
  function and prove that strings with the same tag are equivalent.
*}

subsection {* The case for @{const "NULL"} *}

lemma quot_null_eq:
  shows "(UNIV // \<approx>{}) = ({UNIV}::lang set)"
  unfolding quotient_def Image_def str_eq_rel_def by auto

lemma quot_null_finiteI [intro]:
  shows "finite ((UNIV // \<approx>{})::lang set)"
unfolding quot_null_eq by simp


subsection {* The case for @{const "EMPTY"} *}


lemma quot_empty_subset:
  "UNIV // (\<approx>{[]}) \<subseteq> {{[]}, UNIV - {[]}}"
proof
  fix x
  assume "x \<in> UNIV // \<approx>{[]}"
  then obtain y where h: "x = {z. (y, z) \<in> \<approx>{[]}}" 
    unfolding quotient_def Image_def by blast
  show "x \<in> {{[]}, UNIV - {[]}}"
  proof (cases "y = []")
    case True with h
    have "x = {[]}" by (auto simp: str_eq_rel_def)
    thus ?thesis by simp
  next
    case False with h
    have "x = UNIV - {[]}" by (auto simp: str_eq_rel_def)
    thus ?thesis by simp
  qed
qed

lemma quot_empty_finiteI [intro]:
  shows "finite (UNIV // (\<approx>{[]}))"
by (rule finite_subset[OF quot_empty_subset]) (simp)


subsection {* The case for @{const "CHAR"} *}

lemma quot_char_subset:
  "UNIV // (\<approx>{[c]}) \<subseteq> {{[]},{[c]}, UNIV - {[], [c]}}"
proof 
  fix x 
  assume "x \<in> UNIV // \<approx>{[c]}"
  then obtain y where h: "x = {z. (y, z) \<in> \<approx>{[c]}}" 
    unfolding quotient_def Image_def by blast
  show "x \<in> {{[]},{[c]}, UNIV - {[], [c]}}"
  proof -
    { assume "y = []" hence "x = {[]}" using h 
        by (auto simp:str_eq_rel_def)
    } moreover {
      assume "y = [c]" hence "x = {[c]}" using h 
        by (auto dest!:spec[where x = "[]"] simp:str_eq_rel_def)
    } moreover {
      assume "y \<noteq> []" and "y \<noteq> [c]"
      hence "\<forall> z. (y @ z) \<noteq> [c]" by (case_tac y, auto)
      moreover have "\<And> p. (p \<noteq> [] \<and> p \<noteq> [c]) = (\<forall> q. p @ q \<noteq> [c])" 
        by (case_tac p, auto)
      ultimately have "x = UNIV - {[],[c]}" using h
        by (auto simp add:str_eq_rel_def)
    } ultimately show ?thesis by blast
  qed
qed

lemma quot_char_finiteI [intro]:
  shows "finite (UNIV // (\<approx>{[c]}))"
by (rule finite_subset[OF quot_char_subset]) (simp)


subsection {* The case for @{const "SEQ"}*}

definition 
  "tag_str_SEQ L\<^isub>1 L\<^isub>2 x \<equiv> 
       ((\<approx>L\<^isub>1) `` {x}, {(\<approx>L\<^isub>2) `` {x - xa}| xa.  xa \<le> x \<and> xa \<in> L\<^isub>1})"

lemma tag_str_seq_range_finite:
  "\<lbrakk>finite (UNIV // \<approx>L\<^isub>1); finite (UNIV // \<approx>L\<^isub>2)\<rbrakk> 
                              \<Longrightarrow> finite (range (tag_str_SEQ L\<^isub>1 L\<^isub>2))"
apply (rule_tac B = "(UNIV // \<approx>L\<^isub>1) \<times> (Pow (UNIV // \<approx>L\<^isub>2))" in finite_subset)
by (auto simp:tag_str_SEQ_def Image_def quotient_def split:if_splits)

lemma append_seq_elim:
  assumes "x @ y \<in> L\<^isub>1 ;; L\<^isub>2"
  shows "(\<exists> xa \<le> x. xa \<in> L\<^isub>1 \<and> (x - xa) @ y \<in> L\<^isub>2) \<or> 
          (\<exists> ya \<le> y. (x @ ya) \<in> L\<^isub>1 \<and> (y - ya) \<in> L\<^isub>2)"
proof-
  from assms obtain s\<^isub>1 s\<^isub>2 
    where "x @ y = s\<^isub>1 @ s\<^isub>2" 
    and in_seq: "s\<^isub>1 \<in> L\<^isub>1 \<and> s\<^isub>2 \<in> L\<^isub>2" 
    by (auto simp:Seq_def)
  hence "(x \<le> s\<^isub>1 \<and> (s\<^isub>1 - x) @ s\<^isub>2 = y) \<or> (s\<^isub>1 \<le> x \<and> (x - s\<^isub>1) @ y = s\<^isub>2)"
    using app_eq_dest by auto
  moreover have "\<lbrakk>x \<le> s\<^isub>1; (s\<^isub>1 - x) @ s\<^isub>2 = y\<rbrakk> \<Longrightarrow> 
                       \<exists> ya \<le> y. (x @ ya) \<in> L\<^isub>1 \<and> (y - ya) \<in> L\<^isub>2" 
    using in_seq by (rule_tac x = "s\<^isub>1 - x" in exI, auto elim:prefixE)
  moreover have "\<lbrakk>s\<^isub>1 \<le> x; (x - s\<^isub>1) @ y = s\<^isub>2\<rbrakk> \<Longrightarrow> 
                    \<exists> xa \<le> x. xa \<in> L\<^isub>1 \<and> (x - xa) @ y \<in> L\<^isub>2" 
    using in_seq by (rule_tac x = s\<^isub>1 in exI, auto)
  ultimately show ?thesis by blast
qed

lemma tag_str_SEQ_injI:
  "tag_str_SEQ L\<^isub>1 L\<^isub>2 m = tag_str_SEQ L\<^isub>1 L\<^isub>2 n \<Longrightarrow> m \<approx>(L\<^isub>1 ;; L\<^isub>2) n"
proof-
  { fix x y z
    assume xz_in_seq: "x @ z \<in> L\<^isub>1 ;; L\<^isub>2"
    and tag_xy: "tag_str_SEQ L\<^isub>1 L\<^isub>2 x = tag_str_SEQ L\<^isub>1 L\<^isub>2 y"
    have"y @ z \<in> L\<^isub>1 ;; L\<^isub>2" 
    proof-
      have "(\<exists> xa \<le> x. xa \<in> L\<^isub>1 \<and> (x - xa) @ z \<in> L\<^isub>2) \<or> 
               (\<exists> za \<le> z. (x @ za) \<in> L\<^isub>1 \<and> (z - za) \<in> L\<^isub>2)"
        using xz_in_seq append_seq_elim by simp
      moreover {
        fix xa
        assume h1: "xa \<le> x" and h2: "xa \<in> L\<^isub>1" and h3: "(x - xa) @ z \<in> L\<^isub>2"
        obtain ya where "ya \<le> y" and "ya \<in> L\<^isub>1" and "(y - ya) @ z \<in> L\<^isub>2" 
        proof -
          have "\<exists> ya.  ya \<le> y \<and> ya \<in> L\<^isub>1 \<and> (x - xa) \<approx>L\<^isub>2 (y - ya)"
          proof -
            have "{\<approx>L\<^isub>2 `` {x - xa} |xa. xa \<le> x \<and> xa \<in> L\<^isub>1} = 
                  {\<approx>L\<^isub>2 `` {y - xa} |xa. xa \<le> y \<and> xa \<in> L\<^isub>1}" 
                          (is "?Left = ?Right") 
              using h1 tag_xy by (auto simp:tag_str_SEQ_def)
            moreover have "\<approx>L\<^isub>2 `` {x - xa} \<in> ?Left" using h1 h2 by auto
            ultimately have "\<approx>L\<^isub>2 `` {x - xa} \<in> ?Right" by simp
            thus ?thesis by (auto simp:Image_def str_eq_rel_def str_eq_def)
          qed
          with prems show ?thesis by (auto simp:str_eq_rel_def str_eq_def)
        qed
        hence "y @ z \<in> L\<^isub>1 ;; L\<^isub>2" by (erule_tac prefixE, auto simp:Seq_def)          
      } moreover {
        fix za
        assume h1: "za \<le> z" and h2: "(x @ za) \<in> L\<^isub>1" and h3: "z - za \<in> L\<^isub>2"
        hence "y @ za \<in> L\<^isub>1"
        proof-
          have "\<approx>L\<^isub>1 `` {x} = \<approx>L\<^isub>1 `` {y}" 
            using h1 tag_xy by (auto simp:tag_str_SEQ_def)
          with h2 show ?thesis 
            by (auto simp:Image_def str_eq_rel_def str_eq_def) 
        qed
        with h1 h3 have "y @ z \<in> L\<^isub>1 ;; L\<^isub>2" 
          by (drule_tac A = L\<^isub>1 in seq_intro, auto elim:prefixE)
      }
      ultimately show ?thesis by blast
    qed
  } thus "tag_str_SEQ L\<^isub>1 L\<^isub>2 m = tag_str_SEQ L\<^isub>1 L\<^isub>2 n \<Longrightarrow> m \<approx>(L\<^isub>1 ;; L\<^isub>2) n" 
    by (auto simp add: str_eq_def str_eq_rel_def)
qed 

lemma quot_seq_finiteI [intro]:
  "\<lbrakk>finite (UNIV // \<approx>L\<^isub>1); finite (UNIV // \<approx>L\<^isub>2)\<rbrakk> 
  \<Longrightarrow> finite (UNIV // \<approx>(L\<^isub>1 ;; L\<^isub>2))"
  apply (rule_tac tag = "tag_str_SEQ L\<^isub>1 L\<^isub>2" in tag_finite_imageD)  
  by (auto intro:tag_str_SEQ_injI elim:tag_str_seq_range_finite)


subsection {* The case for @{const "ALT"} *}

definition 
  tag_str_ALT :: "lang \<Rightarrow> lang \<Rightarrow> string \<Rightarrow> (lang \<times> lang)"
where
  "tag_str_ALT L1 L2 = (\<lambda>x. (\<approx>L1 `` {x}, \<approx>L2 `` {x}))"


lemma quot_union_finiteI [intro]:
  fixes L1 L2::"lang"
  assumes finite1: "finite (UNIV // \<approx>L1)"
  and     finite2: "finite (UNIV // \<approx>L2)"
  shows "finite (UNIV // \<approx>(L1 \<union> L2))"
proof (rule_tac tag = "tag_str_ALT L1 L2" in tag_finite_imageD)
  show "\<And>x y. tag_str_ALT L1 L2 x = tag_str_ALT L1 L2 y \<Longrightarrow> x \<approx>(L1 \<union> L2) y"
    unfolding tag_str_ALT_def 
    unfolding str_eq_def
    unfolding Image_def 
    unfolding str_eq_rel_def
    by auto
next
  have *: "finite ((UNIV // \<approx>L1) \<times> (UNIV // \<approx>L2))" using finite1 finite2 by auto
  show "finite (range (tag_str_ALT L1 L2))"
    unfolding tag_str_ALT_def
    apply(rule finite_subset[OF _ *])
    unfolding quotient_def
    by auto
qed


subsection {* The case for @{const "STAR"} *}

text {* 
  This turned out to be the trickiest case. 
  *} (* I will make some illustrations for it. *)

definition 
  "tag_str_STAR L\<^isub>1 x \<equiv> {(\<approx>L\<^isub>1) `` {x - xa} | xa. xa < x \<and> xa \<in> L\<^isub>1\<star>}"

lemma finite_set_has_max: "\<lbrakk>finite A; A \<noteq> {}\<rbrakk> \<Longrightarrow> 
           (\<exists> max \<in> A. \<forall> a \<in> A. f a <= (f max :: nat))"
proof (induct rule:finite.induct)
  case emptyI thus ?case by simp
next
  case (insertI A a)
  show ?case
  proof (cases "A = {}")
    case True thus ?thesis by (rule_tac x = a in bexI, auto)
  next
    case False
    with prems obtain max 
      where h1: "max \<in> A" 
      and h2: "\<forall>a\<in>A. f a \<le> f max" by blast
    show ?thesis
    proof (cases "f a \<le> f max")
      assume "f a \<le> f max"
      with h1 h2 show ?thesis by (rule_tac x = max in bexI, auto)
    next
      assume "\<not> (f a \<le> f max)"
      thus ?thesis using h2 by (rule_tac x = a in bexI, auto)
    qed
  qed
qed

lemma finite_strict_prefix_set: "finite {xa. xa < (x::string)}"
apply (induct x rule:rev_induct, simp)
apply (subgoal_tac "{xa. xa < xs @ [x]} = {xa. xa < xs} \<union> {xs}")
by (auto simp:strict_prefix_def)


lemma tag_str_star_range_finite:
  "finite (UNIV // \<approx>L\<^isub>1) \<Longrightarrow> finite (range (tag_str_STAR L\<^isub>1))"
apply (rule_tac B = "Pow (UNIV // \<approx>L\<^isub>1)" in finite_subset)
by (auto simp:tag_str_STAR_def Image_def 
                       quotient_def split:if_splits)

lemma tag_str_STAR_injI:
  "tag_str_STAR L\<^isub>1 m = tag_str_STAR L\<^isub>1 n \<Longrightarrow> m \<approx>(L\<^isub>1\<star>) n"
proof-
  { fix x y z
    assume xz_in_star: "x @ z \<in> L\<^isub>1\<star>"
    and tag_xy: "tag_str_STAR L\<^isub>1 x = tag_str_STAR L\<^isub>1 y"
    have "y @ z \<in> L\<^isub>1\<star>"
    proof(cases "x = []")
      case True
      with tag_xy have "y = []" 
        by (auto simp:tag_str_STAR_def strict_prefix_def)
      thus ?thesis using xz_in_star True by simp
    next
      case False
      obtain x_max 
        where h1: "x_max < x" 
        and h2: "x_max \<in> L\<^isub>1\<star>" 
        and h3: "(x - x_max) @ z \<in> L\<^isub>1\<star>" 
        and h4:"\<forall> xa < x. xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star> 
                                     \<longrightarrow> length xa \<le> length x_max"
      proof-
        let ?S = "{xa. xa < x \<and> xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star>}"
        have "finite ?S"
          by (rule_tac B = "{xa. xa < x}" in finite_subset, 
                                auto simp:finite_strict_prefix_set)
        moreover have "?S \<noteq> {}" using False xz_in_star
          by (simp, rule_tac x = "[]" in exI, auto simp:strict_prefix_def)
        ultimately have "\<exists> max \<in> ?S. \<forall> a \<in> ?S. length a \<le> length max" 
          using finite_set_has_max by blast
        with prems show ?thesis by blast
      qed
      obtain ya 
        where h5: "ya < y" and h6: "ya \<in> L\<^isub>1\<star>" and h7: "(x - x_max) \<approx>L\<^isub>1 (y - ya)"
      proof-
        from tag_xy have "{\<approx>L\<^isub>1 `` {x - xa} |xa. xa < x \<and> xa \<in> L\<^isub>1\<star>} = 
          {\<approx>L\<^isub>1 `` {y - xa} |xa. xa < y \<and> xa \<in> L\<^isub>1\<star>}" (is "?left = ?right")
          by (auto simp:tag_str_STAR_def)
        moreover have "\<approx>L\<^isub>1 `` {x - x_max} \<in> ?left" using h1 h2 by auto
        ultimately have "\<approx>L\<^isub>1 `` {x - x_max} \<in> ?right" by simp
        with prems show ?thesis apply 
          (simp add:Image_def str_eq_rel_def str_eq_def) by blast
      qed      
      have "(y - ya) @ z \<in> L\<^isub>1\<star>" 
      proof-
        from h3 h1 obtain a b where a_in: "a \<in> L\<^isub>1" 
          and a_neq: "a \<noteq> []" and b_in: "b \<in> L\<^isub>1\<star>" 
          and ab_max: "(x - x_max) @ z = a @ b" 
          by (drule_tac star_decom, auto simp:strict_prefix_def elim:prefixE)
        have "(x - x_max) \<le> a \<and> (a - (x - x_max)) @ b = z" 
        proof -
          have "((x - x_max) \<le> a \<and> (a - (x - x_max)) @ b = z) \<or> 
                            (a < (x - x_max) \<and> ((x - x_max) - a) @ z = b)" 
            using app_eq_dest[OF ab_max] by (auto simp:strict_prefix_def)
          moreover { 
            assume np: "a < (x - x_max)" and b_eqs: " ((x - x_max) - a) @ z = b"
            have "False"
            proof -
              let ?x_max' = "x_max @ a"
              have "?x_max' < x" 
                using np h1 by (clarsimp simp:strict_prefix_def diff_prefix) 
              moreover have "?x_max' \<in> L\<^isub>1\<star>" 
                using a_in h2 by (simp add:star_intro3) 
              moreover have "(x - ?x_max') @ z \<in> L\<^isub>1\<star>" 
                using b_eqs b_in np h1 by (simp add:diff_diff_appd)
              moreover have "\<not> (length ?x_max' \<le> length x_max)" 
                using a_neq by simp
              ultimately show ?thesis using h4 by blast
            qed 
          } ultimately show ?thesis by blast
        qed
        then obtain za where z_decom: "z = za @ b" 
          and x_za: "(x - x_max) @ za \<in> L\<^isub>1" 
          using a_in by (auto elim:prefixE)        
        from x_za h7 have "(y - ya) @ za \<in> L\<^isub>1" 
          by (auto simp:str_eq_def str_eq_rel_def)
        with z_decom b_in show ?thesis by (auto dest!:step[of "(y - ya) @ za"])
      qed
      with h5 h6 show ?thesis 
        by (drule_tac star_intro1, auto simp:strict_prefix_def elim:prefixE)
    qed      
  } thus "tag_str_STAR L\<^isub>1 m = tag_str_STAR L\<^isub>1 n \<Longrightarrow> m \<approx>(L\<^isub>1\<star>) n"
    by (auto simp add:str_eq_def str_eq_rel_def)
qed

lemma quot_star_finiteI [intro]:
  "finite (UNIV // \<approx>L\<^isub>1) \<Longrightarrow> finite (UNIV // \<approx>(L\<^isub>1\<star>))"
  apply (rule_tac tag = "tag_str_STAR L\<^isub>1" in tag_finite_imageD)
  by (auto intro:tag_str_STAR_injI elim:tag_str_star_range_finite)


subsection {* The main lemma *}

lemma rexp_imp_finite:
  fixes r::"rexp"
  shows "finite (UNIV // \<approx>(L r))"
by (induct r) (auto)


end
author	urbanc
	Thu, 27 Jan 2011 00:51:46 +0000
changeset 39	a59473f0229d
parent 35	d2ddce8b36fd
child 40	50d00d7dc413
permissions	-rw-r--r--