theory Myhill
imports Main List_Prefix Prefix_subtract Prelude
begin
(*
text {*
\begin{figure}
\centering
\scalebox{0.95}{
\begin{tikzpicture}[->,>=latex,shorten >=1pt,auto,node distance=1.2cm, semithick]
\node[state,initial] (n1) {$1$};
\node[state,accepting] (n2) [right = 10em of n1] {$2$};
\path (n1) edge [bend left] node {$0$} (n2)
(n1) edge [loop above] node{$1$} (n1)
(n2) edge [loop above] node{$0$} (n2)
(n2) edge [bend left] node {$1$} (n1)
;
\end{tikzpicture}}
\caption{An example automaton (or partition)}\label{fig:example_automata}
\end{figure}
*}
*)
section {* Preliminary definitions *}
types lang = "string set"
text {*
Sequential composition of two languages @{text "L1"} and @{text "L2"}
*}
definition Seq :: "lang \<Rightarrow> lang \<Rightarrow> lang" ("_ ;; _" [100,100] 100)
where
"L1 ;; L2 = {s1 @ s2 | s1 s2. s1 \<in> L1 \<and> s2 \<in> L2}"
text {* Transitive closure of language @{text "L"}. *}
inductive_set
Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
for L :: "string set"
where
start[intro]: "[] \<in> L\<star>"
| step[intro]: "\<lbrakk>s1 \<in> L; s2 \<in> L\<star>\<rbrakk> \<Longrightarrow> s1@s2 \<in> L\<star>"
text {* Some properties of operator @{text ";;"}.*}
lemma seq_union_distrib:
"(A \<union> B) ;; C = (A ;; C) \<union> (B ;; C)"
by (auto simp:Seq_def)
lemma seq_intro:
"\<lbrakk>x \<in> A; y \<in> B\<rbrakk> \<Longrightarrow> x @ y \<in> A ;; B "
by (auto simp:Seq_def)
lemma seq_assoc:
"(A ;; B) ;; C = A ;; (B ;; C)"
apply(auto simp:Seq_def)
apply blast
by (metis append_assoc)
lemma star_intro1[rule_format]: "x \<in> lang\<star> \<Longrightarrow> \<forall> y. y \<in> lang\<star> \<longrightarrow> x @ y \<in> lang\<star>"
by (erule Star.induct, auto)
lemma star_intro2: "y \<in> lang \<Longrightarrow> y \<in> lang\<star>"
by (drule step[of y lang "[]"], auto simp:start)
lemma star_intro3[rule_format]:
"x \<in> lang\<star> \<Longrightarrow> \<forall>y . y \<in> lang \<longrightarrow> x @ y \<in> lang\<star>"
by (erule Star.induct, auto intro:star_intro2)
lemma star_decom:
"\<lbrakk>x \<in> lang\<star>; x \<noteq> []\<rbrakk> \<Longrightarrow>(\<exists> a b. x = a @ b \<and> a \<noteq> [] \<and> a \<in> lang \<and> b \<in> lang\<star>)"
by (induct x rule: Star.induct, simp, blast)
lemma star_decom':
"\<lbrakk>x \<in> lang\<star>; x \<noteq> []\<rbrakk> \<Longrightarrow> \<exists>a b. x = a @ b \<and> a \<in> lang\<star> \<and> b \<in> lang"
apply (induct x rule:Star.induct, simp)
apply (case_tac "s2 = []")
apply (rule_tac x = "[]" in exI, rule_tac x = s1 in exI, simp add:start)
apply (simp, (erule exE| erule conjE)+)
by (rule_tac x = "s1 @ a" in exI, rule_tac x = b in exI, simp add:step)
text {* Ardens lemma expressed at the level of language, rather than the level of regular expression. *}
theorem ardens_revised:
assumes nemp: "[] \<notin> A"
shows "(X = X ;; A \<union> B) \<longleftrightarrow> (X = B ;; A\<star>)"
proof
assume eq: "X = B ;; A\<star>"
have "A\<star> = {[]} \<union> A\<star> ;; A"
by (auto simp:Seq_def star_intro3 star_decom')
then have "B ;; A\<star> = B ;; ({[]} \<union> A\<star> ;; A)"
unfolding Seq_def by simp
also have "\<dots> = B \<union> B ;; (A\<star> ;; A)"
unfolding Seq_def by auto
also have "\<dots> = B \<union> (B ;; A\<star>) ;; A"
by (simp only:seq_assoc)
finally show "X = X ;; A \<union> B"
using eq by blast
next
assume eq': "X = X ;; A \<union> B"
hence c1': "\<And> x. x \<in> B \<Longrightarrow> x \<in> X"
and c2': "\<And> x y. \<lbrakk>x \<in> X; y \<in> A\<rbrakk> \<Longrightarrow> x @ y \<in> X"
using Seq_def by auto
show "X = B ;; A\<star>"
proof
show "B ;; A\<star> \<subseteq> X"
proof-
{ fix x y
have "\<lbrakk>y \<in> A\<star>; x \<in> X\<rbrakk> \<Longrightarrow> x @ y \<in> X "
apply (induct arbitrary:x rule:Star.induct, simp)
by (auto simp only:append_assoc[THEN sym] dest:c2')
} thus ?thesis using c1' by (auto simp:Seq_def)
qed
next
show "X \<subseteq> B ;; A\<star>"
proof-
{ fix x
have "x \<in> X \<Longrightarrow> x \<in> B ;; A\<star>"
proof (induct x taking:length rule:measure_induct)
fix z
assume hyps:
"\<forall>y. length y < length z \<longrightarrow> y \<in> X \<longrightarrow> y \<in> B ;; A\<star>"
and z_in: "z \<in> X"
show "z \<in> B ;; A\<star>"
proof (cases "z \<in> B")
case True thus ?thesis by (auto simp:Seq_def start)
next
case False hence "z \<in> X ;; A" using eq' z_in by auto
then obtain za zb where za_in: "za \<in> X"
and zab: "z = za @ zb \<and> zb \<in> A" and zbne: "zb \<noteq> []"
using nemp unfolding Seq_def by blast
from zbne zab have "length za < length z" by auto
with za_in hyps have "za \<in> B ;; A\<star>" by blast
hence "za @ zb \<in> B ;; A\<star>" using zab
by (clarsimp simp:Seq_def, blast dest:star_intro3)
thus ?thesis using zab by simp
qed
qed
} thus ?thesis by blast
qed
qed
qed
text {* The syntax of regular expressions is defined by the datatype @{text "rexp"}. *}
datatype rexp =
NULL
| EMPTY
| CHAR char
| SEQ rexp rexp
| ALT rexp rexp
| STAR rexp
text {*
The following @{text "L"} is an overloaded operator, where @{text "L(x)"} evaluates to
the language represented by the syntactic object @{text "x"}.
*}
consts L:: "'a \<Rightarrow> string set"
text {*
The @{text "L(rexp)"} for regular expression @{text "rexp"} is defined by the
following overloading function @{text "L_rexp"}.
*}
overloading L_rexp \<equiv> "L:: rexp \<Rightarrow> string set"
begin
fun
L_rexp :: "rexp \<Rightarrow> string set"
where
"L_rexp (NULL) = {}"
| "L_rexp (EMPTY) = {[]}"
| "L_rexp (CHAR c) = {[c]}"
| "L_rexp (SEQ r1 r2) = (L_rexp r1) ;; (L_rexp r2)"
| "L_rexp (ALT r1 r2) = (L_rexp r1) \<union> (L_rexp r2)"
| "L_rexp (STAR r) = (L_rexp r)\<star>"
end
text {*
To obtain equational system out of finite set of equivalent classes, a fold operation
on finite set @{text "folds"} is defined. The use of @{text "SOME"} makes @{text "fold"}
more robust than the @{text "fold"} in Isabelle library. The expression @{text "folds f"}
makes sense when @{text "f"} is not @{text "associative"} and @{text "commutitive"},
while @{text "fold f"} does not.
*}
definition
folds :: "('a \<Rightarrow> 'b \<Rightarrow> 'b) \<Rightarrow> 'b \<Rightarrow> 'a set \<Rightarrow> 'b"
where
"folds f z S \<equiv> SOME x. fold_graph f z S x"
text {*
The following lemma assures that the arbitrary choice made by the @{text "SOME"} in @{text "folds"}
does not affect the @{text "L"}-value of the resultant regular expression.
*}
lemma folds_alt_simp [simp]:
"finite rs \<Longrightarrow> L (folds ALT NULL rs) = \<Union> (L ` rs)"
apply (rule set_eq_intro, simp add:folds_def)
apply (rule someI2_ex, erule finite_imp_fold_graph)
by (erule fold_graph.induct, auto)
(* Just a technical lemma. *)
lemma [simp]:
shows "(x, y) \<in> {(x, y). P x y} \<longleftrightarrow> P x y"
by simp
text {*
@{text "\<approx>L"} is an equivalent class defined by language @{text "Lang"}.
*}
definition
str_eq_rel ("\<approx>_" [100] 100)
where
"\<approx>Lang \<equiv> {(x, y). (\<forall>z. x @ z \<in> Lang \<longleftrightarrow> y @ z \<in> Lang)}"
text {*
Among equivlant clases of @{text "\<approx>Lang"}, the set @{text "finals(Lang)"} singles out
those which contains strings from @{text "Lang"}.
*}
definition
"finals Lang \<equiv> {\<approx>Lang `` {x} | x . x \<in> Lang}"
text {*
The following lemma show the relationshipt between @{text "finals(Lang)"} and @{text "Lang"}.
*}
lemma lang_is_union_of_finals:
"Lang = \<Union> finals(Lang)"
proof
show "Lang \<subseteq> \<Union> (finals Lang)"
proof
fix x
assume "x \<in> Lang"
thus "x \<in> \<Union> (finals Lang)"
apply (simp add:finals_def, rule_tac x = "(\<approx>Lang) `` {x}" in exI)
by (auto simp:Image_def str_eq_rel_def)
qed
next
show "\<Union> (finals Lang) \<subseteq> Lang"
apply (clarsimp simp:finals_def str_eq_rel_def)
by (drule_tac x = "[]" in spec, auto)
qed
section {* Direction @{text "finite partition \<Rightarrow> regular language"}*}
text {*
The relationship between equivalent classes can be described by an
equational system.
For example, in equational system \eqref{example_eqns}, $X_0, X_1$ are equivalent
classes. The first equation says every string in $X_0$ is obtained either by
appending one $b$ to a string in $X_0$ or by appending one $a$ to a string in
$X_1$ or just be an empty string (represented by the regular expression $\lambda$). Similary,
the second equation tells how the strings inside $X_1$ are composed.
\begin{equation}\label{example_eqns}
\begin{aligned}
X_0 & = X_0 b + X_1 a + \lambda \\
X_1 & = X_0 a + X_1 b
\end{aligned}
\end{equation}
The summands on the right hand side is represented by the following data type
@{text "rhs_item"}, mnemonic for 'right hand side item'.
Generally, there are two kinds of right hand side items, one kind corresponds to
pure regular expressions, like the $\lambda$ in \eqref{example_eqns}, the other kind corresponds to
transitions from one one equivalent class to another, like the $X_0 b, X_1 a$ etc.
*}
datatype rhs_item =
Lam "rexp" (* Lambda *)
| Trn "(string set)" "rexp" (* Transition *)
text {*
In this formalization, pure regular expressions like $\lambda$ is
repsented by @{text "Lam(EMPTY)"}, while transitions like $X_0 a$ is represented by $Trn~X_0~(CHAR~a)$.
*}
text {*
The functions @{text "the_r"} and @{text "the_Trn"} are used to extract
subcomponents from right hand side items.
*}
fun the_r :: "rhs_item \<Rightarrow> rexp"
where "the_r (Lam r) = r"
fun the_Trn:: "rhs_item \<Rightarrow> (string set \<times> rexp)"
where "the_Trn (Trn Y r) = (Y, r)"
text {*
Every right hand side item @{text "itm"} defines a string set given
@{text "L(itm)"}, defined as:
*}
overloading L_rhs_e \<equiv> "L:: rhs_item \<Rightarrow> string set"
begin
fun L_rhs_e:: "rhs_item \<Rightarrow> string set"
where
"L_rhs_e (Lam r) = L r" |
"L_rhs_e (Trn X r) = X ;; L r"
end
text {*
The right hand side of every equation is represented by a set of
items. The string set defined by such a set @{text "itms"} is given
by @{text "L(itms)"}, defined as:
*}
overloading L_rhs \<equiv> "L:: rhs_item set \<Rightarrow> string set"
begin
fun L_rhs:: "rhs_item set \<Rightarrow> string set"
where "L_rhs rhs = \<Union> (L ` rhs)"
end
text {*
Given a set of equivalent classses @{text "CS"} and one equivalent class @{text "X"} among
@{text "CS"}, the term @{text "init_rhs CS X"} is used to extract the right hand side of
the equation describing the formation of @{text "X"}. The definition of @{text "init_rhs"}
is:
*}
definition
"init_rhs CS X \<equiv>
if ([] \<in> X) then
{Lam(EMPTY)} \<union> {Trn Y (CHAR c) | Y c. Y \<in> CS \<and> Y ;; {[c]} \<subseteq> X}
else
{Trn Y (CHAR c)| Y c. Y \<in> CS \<and> Y ;; {[c]} \<subseteq> X}"
text {*
In the definition of @{text "init_rhs"}, the term
@{text "{Trn Y (CHAR c)| Y c. Y \<in> CS \<and> Y ;; {[c]} \<subseteq> X}"} appearing on both branches
describes the formation of strings in @{text "X"} out of transitions, while
the term @{text "{Lam(EMPTY)}"} describes the empty string which is intrinsically contained in
@{text "X"} rather than by transition. This @{text "{Lam(EMPTY)}"} corresponds to
the $\lambda$ in \eqref{example_eqns}.
With the help of @{text "init_rhs"}, the equitional system descrbing the formation of every
equivalent class inside @{text "CS"} is given by the following @{text "eqs(CS)"}.
*}
definition "eqs CS \<equiv> {(X, init_rhs CS X) | X. X \<in> CS}"
(************ arden's lemma variation ********************)
text {*
The following @{text "items_of rhs X"} returns all @{text "X"}-items in @{text "rhs"}.
*}
definition
"items_of rhs X \<equiv> {Trn X r | r. (Trn X r) \<in> rhs}"
text {*
The following @{text "rexp_of rhs X"} combines all regular expressions in @{text "X"}-items
using @{text "ALT"} to form a single regular expression.
It will be used later to implement @{text "arden_variate"} and @{text "rhs_subst"}.
*}
definition
"rexp_of rhs X \<equiv> folds ALT NULL ((snd o the_Trn) ` items_of rhs X)"
text {*
The following @{text "lam_of rhs"} returns all pure regular expression items in @{text "rhs"}.
*}
definition
"lam_of rhs \<equiv> {Lam r | r. Lam r \<in> rhs}"
text {*
The following @{text "rexp_of_lam rhs"} combines pure regular expression items in @{text "rhs"}
using @{text "ALT"} to form a single regular expression.
When all variables inside @{text "rhs"} are eliminated, @{text "rexp_of_lam rhs"}
is used to compute compute the regular expression corresponds to @{text "rhs"}.
*}
definition
"rexp_of_lam rhs \<equiv> folds ALT NULL (the_r ` lam_of rhs)"
text {*
The following @{text "attach_rexp rexp' itm"} attach
the regular expression @{text "rexp'"} to
the right of right hand side item @{text "itm"}.
*}
fun attach_rexp :: "rexp \<Rightarrow> rhs_item \<Rightarrow> rhs_item"
where
"attach_rexp rexp' (Lam rexp) = Lam (SEQ rexp rexp')"
| "attach_rexp rexp' (Trn X rexp) = Trn X (SEQ rexp rexp')"
text {*
The following @{text "append_rhs_rexp rhs rexp"} attaches
@{text "rexp"} to every item in @{text "rhs"}.
*}
definition
"append_rhs_rexp rhs rexp \<equiv> (attach_rexp rexp) ` rhs"
text {*
With the help of the two functions immediately above, Ardens'
transformation on right hand side @{text "rhs"} is implemented
by the following function @{text "arden_variate X rhs"}.
After this transformation, the recursive occurent of @{text "X"}
in @{text "rhs"} will be eliminated, while the
string set defined by @{text "rhs"} is kept unchanged.
*}
definition
"arden_variate X rhs \<equiv>
append_rhs_rexp (rhs - items_of rhs X) (STAR (rexp_of rhs X))"
(*********** substitution of ES *************)
text {*
Suppose the equation defining @{text "X"} is $X = xrhs$,
the purpose of @{text "rhs_subst"} is to substitute all occurences of @{text "X"} in
@{text "rhs"} by @{text "xrhs"}.
A litte thought may reveal that the final result
should be: first append $(a_1 | a_2 | \ldots | a_n)$ to every item of @{text "xrhs"} and then
union the result with all non-@{text "X"}-items of @{text "rhs"}.
*}
definition
"rhs_subst rhs X xrhs \<equiv>
(rhs - (items_of rhs X)) \<union> (append_rhs_rexp xrhs (rexp_of rhs X))"
text {*
Suppose the equation defining @{text "X"} is $X = xrhs$, the follwing
@{text "eqs_subst ES X xrhs"} substitute @{text "xrhs"} into every equation
of the equational system @{text "ES"}.
*}
definition
"eqs_subst ES X xrhs \<equiv> {(Y, rhs_subst yrhs X xrhs) | Y yrhs. (Y, yrhs) \<in> ES}"
text {*
The computation of regular expressions for equivalent classes is accomplished
using a iteration principle given by the following lemma.
*}
lemma wf_iter [rule_format]:
fixes f
assumes step: "\<And> e. \<lbrakk>P e; \<not> Q e\<rbrakk> \<Longrightarrow> (\<exists> e'. P e' \<and> (f(e'), f(e)) \<in> less_than)"
shows pe: "P e \<longrightarrow> (\<exists> e'. P e' \<and> Q e')"
proof(induct e rule: wf_induct
[OF wf_inv_image[OF wf_less_than, where f = "f"]], clarify)
fix x
assume h [rule_format]:
"\<forall>y. (y, x) \<in> inv_image less_than f \<longrightarrow> P y \<longrightarrow> (\<exists>e'. P e' \<and> Q e')"
and px: "P x"
show "\<exists>e'. P e' \<and> Q e'"
proof(cases "Q x")
assume "Q x" with px show ?thesis by blast
next
assume nq: "\<not> Q x"
from step [OF px nq]
obtain e' where pe': "P e'" and ltf: "(f e', f x) \<in> less_than" by auto
show ?thesis
proof(rule h)
from ltf show "(e', x) \<in> inv_image less_than f"
by (simp add:inv_image_def)
next
from pe' show "P e'" .
qed
qed
qed
text {*
The @{text "P"} in lemma @{text "wf_iter"} is an invaiant kept throughout the iteration procedure.
The particular invariant used to solve our problem is defined by function @{text "Inv(ES)"},
an invariant over equal system @{text "ES"}.
Every definition starting next till @{text "Inv"} stipulates a property to be satisfied by @{text "ES"}.
*}
text {*
Every variable is defined at most onece in @{text "ES"}.
*}
definition
"distinct_equas ES \<equiv>
\<forall> X rhs rhs'. (X, rhs) \<in> ES \<and> (X, rhs') \<in> ES \<longrightarrow> rhs = rhs'"
text {*
Every equation in @{text "ES"} (represented by @{text "(X, rhs)"}) is valid, i.e. @{text "(X = L rhs)"}.
*}
definition
"valid_eqns ES \<equiv> \<forall> X rhs. (X, rhs) \<in> ES \<longrightarrow> (X = L rhs)"
text {*
The following @{text "rhs_nonempty rhs"} requires regular expressions occuring in transitional
items of @{text "rhs"} does not contain empty string. This is necessary for
the application of Arden's transformation to @{text "rhs"}.
*}
definition
"rhs_nonempty rhs \<equiv> (\<forall> Y r. Trn Y r \<in> rhs \<longrightarrow> [] \<notin> L r)"
text {*
The following @{text "ardenable ES"} requires that Arden's transformation is applicable
to every equation of equational system @{text "ES"}.
*}
definition
"ardenable ES \<equiv> \<forall> X rhs. (X, rhs) \<in> ES \<longrightarrow> rhs_nonempty rhs"
(* The following non_empty seems useless. *)
definition
"non_empty ES \<equiv> \<forall> X rhs. (X, rhs) \<in> ES \<longrightarrow> X \<noteq> {}"
text {*
The following @{text "finite_rhs ES"} requires every equation in @{text "rhs"} be finite.
*}
definition
"finite_rhs ES \<equiv> \<forall> X rhs. (X, rhs) \<in> ES \<longrightarrow> finite rhs"
text {*
The following @{text "classes_of rhs"} returns all variables (or equivalent classes)
occuring in @{text "rhs"}.
*}
definition
"classes_of rhs \<equiv> {X. \<exists> r. Trn X r \<in> rhs}"
text {*
The following @{text "lefts_of ES"} returns all variables
defined by equational system @{text "ES"}.
*}
definition
"lefts_of ES \<equiv> {Y | Y yrhs. (Y, yrhs) \<in> ES}"
text {*
The following @{text "self_contained ES"} requires that every
variable occuring on the right hand side of equations is already defined by some
equation in @{text "ES"}.
*}
definition
"self_contained ES \<equiv> \<forall> (X, xrhs) \<in> ES. classes_of xrhs \<subseteq> lefts_of ES"
text {*
The invariant @{text "Inv(ES)"} is a conjunction of all the previously defined constaints.
*}
definition
"Inv ES \<equiv> valid_eqns ES \<and> finite ES \<and> distinct_equas ES \<and> ardenable ES \<and>
non_empty ES \<and> finite_rhs ES \<and> self_contained ES"
subsection {* The proof of this direction *}
subsubsection {* Basic properties *}
text {*
The following are some basic properties of the above definitions.
*}
lemma L_rhs_union_distrib:
" L (A::rhs_item set) \<union> L B = L (A \<union> B)"
by simp
lemma finite_snd_Trn:
assumes finite:"finite rhs"
shows "finite {r\<^isub>2. Trn Y r\<^isub>2 \<in> rhs}" (is "finite ?B")
proof-
def rhs' \<equiv> "{e \<in> rhs. \<exists> r. e = Trn Y r}"
have "?B = (snd o the_Trn) ` rhs'" using rhs'_def by (auto simp:image_def)
moreover have "finite rhs'" using finite rhs'_def by auto
ultimately show ?thesis by simp
qed
lemma rexp_of_empty:
assumes finite:"finite rhs"
and nonempty:"rhs_nonempty rhs"
shows "[] \<notin> L (rexp_of rhs X)"
using finite nonempty rhs_nonempty_def
by (drule_tac finite_snd_Trn[where Y = X], auto simp:rexp_of_def items_of_def)
lemma [intro!]:
"P (Trn X r) \<Longrightarrow> (\<exists>a. (\<exists>r. a = Trn X r \<and> P a))" by auto
lemma finite_items_of:
"finite rhs \<Longrightarrow> finite (items_of rhs X)"
by (auto simp:items_of_def intro:finite_subset)
lemma lang_of_rexp_of:
assumes finite:"finite rhs"
shows "L (items_of rhs X) = X ;; (L (rexp_of rhs X))"
proof -
have "finite ((snd \<circ> the_Trn) ` items_of rhs X)" using finite_items_of[OF finite] by auto
thus ?thesis
apply (auto simp:rexp_of_def Seq_def items_of_def)
apply (rule_tac x = s1 in exI, rule_tac x = s2 in exI, auto)
by (rule_tac x= "Trn X r" in exI, auto simp:Seq_def)
qed
lemma rexp_of_lam_eq_lam_set:
assumes finite: "finite rhs"
shows "L (rexp_of_lam rhs) = L (lam_of rhs)"
proof -
have "finite (the_r ` {Lam r |r. Lam r \<in> rhs})" using finite
by (rule_tac finite_imageI, auto intro:finite_subset)
thus ?thesis by (auto simp:rexp_of_lam_def lam_of_def)
qed
lemma [simp]:
" L (attach_rexp r xb) = L xb ;; L r"
apply (cases xb, auto simp:Seq_def)
by (rule_tac x = "s1 @ s1a" in exI, rule_tac x = s2a in exI,auto simp:Seq_def)
lemma lang_of_append_rhs:
"L (append_rhs_rexp rhs r) = L rhs ;; L r"
apply (auto simp:append_rhs_rexp_def image_def)
apply (auto simp:Seq_def)
apply (rule_tac x = "L xb ;; L r" in exI, auto simp add:Seq_def)
by (rule_tac x = "attach_rexp r xb" in exI, auto simp:Seq_def)
lemma classes_of_union_distrib:
"classes_of A \<union> classes_of B = classes_of (A \<union> B)"
by (auto simp add:classes_of_def)
lemma lefts_of_union_distrib:
"lefts_of A \<union> lefts_of B = lefts_of (A \<union> B)"
by (auto simp:lefts_of_def)
subsubsection {* Intialization *}
text {*
The following several lemmas until @{text "init_ES_satisfy_Inv"} shows that
the initial equational system satisfies invariant @{text "Inv"}.
*}
lemma defined_by_str:
"\<lbrakk>s \<in> X; X \<in> UNIV // (\<approx>Lang)\<rbrakk> \<Longrightarrow> X = (\<approx>Lang) `` {s}"
by (auto simp:quotient_def Image_def str_eq_rel_def)
lemma every_eqclass_has_transition:
assumes has_str: "s @ [c] \<in> X"
and in_CS: "X \<in> UNIV // (\<approx>Lang)"
obtains Y where "Y \<in> UNIV // (\<approx>Lang)" and "Y ;; {[c]} \<subseteq> X" and "s \<in> Y"
proof -
def Y \<equiv> "(\<approx>Lang) `` {s}"
have "Y \<in> UNIV // (\<approx>Lang)"
unfolding Y_def quotient_def by auto
moreover
have "X = (\<approx>Lang) `` {s @ [c]}"
using has_str in_CS defined_by_str by blast
then have "Y ;; {[c]} \<subseteq> X"
unfolding Y_def Image_def Seq_def
unfolding str_eq_rel_def
by clarsimp
moreover
have "s \<in> Y" unfolding Y_def
unfolding Image_def str_eq_rel_def by simp
ultimately show thesis by (blast intro: that)
qed
lemma l_eq_r_in_eqs:
assumes X_in_eqs: "(X, xrhs) \<in> (eqs (UNIV // (\<approx>Lang)))"
shows "X = L xrhs"
proof
show "X \<subseteq> L xrhs"
proof
fix x
assume "(1)": "x \<in> X"
show "x \<in> L xrhs"
proof (cases "x = []")
assume empty: "x = []"
thus ?thesis using X_in_eqs "(1)"
by (auto simp:eqs_def init_rhs_def)
next
assume not_empty: "x \<noteq> []"
then obtain clist c where decom: "x = clist @ [c]"
by (case_tac x rule:rev_cases, auto)
have "X \<in> UNIV // (\<approx>Lang)" using X_in_eqs by (auto simp:eqs_def)
then obtain Y
where "Y \<in> UNIV // (\<approx>Lang)"
and "Y ;; {[c]} \<subseteq> X"
and "clist \<in> Y"
using decom "(1)" every_eqclass_has_transition by blast
hence
"x \<in> L {Trn Y (CHAR c)| Y c. Y \<in> UNIV // (\<approx>Lang) \<and> Y ;; {[c]} \<subseteq> X}"
using "(1)" decom
by (simp, rule_tac x = "Trn Y (CHAR c)" in exI, simp add:Seq_def)
thus ?thesis using X_in_eqs "(1)"
by (simp add:eqs_def init_rhs_def)
qed
qed
next
show "L xrhs \<subseteq> X" using X_in_eqs
by (auto simp:eqs_def init_rhs_def)
qed
lemma finite_init_rhs:
assumes finite: "finite CS"
shows "finite (init_rhs CS X)"
proof-
have "finite {Trn Y (CHAR c) |Y c. Y \<in> CS \<and> Y ;; {[c]} \<subseteq> X}" (is "finite ?A")
proof -
def S \<equiv> "{(Y, c)| Y c. Y \<in> CS \<and> Y ;; {[c]} \<subseteq> X}"
def h \<equiv> "\<lambda> (Y, c). Trn Y (CHAR c)"
have "finite (CS \<times> (UNIV::char set))" using finite by auto
hence "finite S" using S_def
by (rule_tac B = "CS \<times> UNIV" in finite_subset, auto)
moreover have "?A = h ` S" by (auto simp: S_def h_def image_def)
ultimately show ?thesis
by auto
qed
thus ?thesis by (simp add:init_rhs_def)
qed
lemma init_ES_satisfy_Inv:
assumes finite_CS: "finite (UNIV // (\<approx>Lang))"
shows "Inv (eqs (UNIV // (\<approx>Lang)))"
proof -
have "finite (eqs (UNIV // (\<approx>Lang)))" using finite_CS
by (simp add:eqs_def)
moreover have "distinct_equas (eqs (UNIV // (\<approx>Lang)))"
by (simp add:distinct_equas_def eqs_def)
moreover have "ardenable (eqs (UNIV // (\<approx>Lang)))"
by (auto simp add:ardenable_def eqs_def init_rhs_def rhs_nonempty_def del:L_rhs.simps)
moreover have "valid_eqns (eqs (UNIV // (\<approx>Lang)))"
using l_eq_r_in_eqs by (simp add:valid_eqns_def)
moreover have "non_empty (eqs (UNIV // (\<approx>Lang)))"
by (auto simp:non_empty_def eqs_def quotient_def Image_def str_eq_rel_def)
moreover have "finite_rhs (eqs (UNIV // (\<approx>Lang)))"
using finite_init_rhs[OF finite_CS]
by (auto simp:finite_rhs_def eqs_def)
moreover have "self_contained (eqs (UNIV // (\<approx>Lang)))"
by (auto simp:self_contained_def eqs_def init_rhs_def classes_of_def lefts_of_def)
ultimately show ?thesis by (simp add:Inv_def)
qed
subsubsection {*
Interation step
*}
text {*
From this point until @{text "iteration_step"}, it is proved
that there exists iteration steps which keep @{text "Inv(ES)"} while
decreasing the size of @{text "ES"}.
*}
lemma arden_variate_keeps_eq:
assumes l_eq_r: "X = L rhs"
and not_empty: "[] \<notin> L (rexp_of rhs X)"
and finite: "finite rhs"
shows "X = L (arden_variate X rhs)"
proof -
def A \<equiv> "L (rexp_of rhs X)"
def b \<equiv> "rhs - items_of rhs X"
def B \<equiv> "L b"
have "X = B ;; A\<star>"
proof-
have "rhs = items_of rhs X \<union> b" by (auto simp:b_def items_of_def)
hence "L rhs = L(items_of rhs X \<union> b)" by simp
hence "L rhs = L(items_of rhs X) \<union> B" by (simp only:L_rhs_union_distrib B_def)
with lang_of_rexp_of
have "L rhs = X ;; A \<union> B " using finite by (simp only:B_def b_def A_def)
thus ?thesis
using l_eq_r not_empty
apply (drule_tac B = B and X = X in ardens_revised)
by (auto simp:A_def simp del:L_rhs.simps)
qed
moreover have "L (arden_variate X rhs) = (B ;; A\<star>)" (is "?L = ?R")
by (simp only:arden_variate_def L_rhs_union_distrib lang_of_append_rhs
B_def A_def b_def L_rexp.simps seq_union_distrib)
ultimately show ?thesis by simp
qed
lemma append_keeps_finite:
"finite rhs \<Longrightarrow> finite (append_rhs_rexp rhs r)"
by (auto simp:append_rhs_rexp_def)
lemma arden_variate_keeps_finite:
"finite rhs \<Longrightarrow> finite (arden_variate X rhs)"
by (auto simp:arden_variate_def append_keeps_finite)
lemma append_keeps_nonempty:
"rhs_nonempty rhs \<Longrightarrow> rhs_nonempty (append_rhs_rexp rhs r)"
apply (auto simp:rhs_nonempty_def append_rhs_rexp_def)
by (case_tac x, auto simp:Seq_def)
lemma nonempty_set_sub:
"rhs_nonempty rhs \<Longrightarrow> rhs_nonempty (rhs - A)"
by (auto simp:rhs_nonempty_def)
lemma nonempty_set_union:
"\<lbrakk>rhs_nonempty rhs; rhs_nonempty rhs'\<rbrakk> \<Longrightarrow> rhs_nonempty (rhs \<union> rhs')"
by (auto simp:rhs_nonempty_def)
lemma arden_variate_keeps_nonempty:
"rhs_nonempty rhs \<Longrightarrow> rhs_nonempty (arden_variate X rhs)"
by (simp only:arden_variate_def append_keeps_nonempty nonempty_set_sub)
lemma rhs_subst_keeps_nonempty:
"\<lbrakk>rhs_nonempty rhs; rhs_nonempty xrhs\<rbrakk> \<Longrightarrow> rhs_nonempty (rhs_subst rhs X xrhs)"
by (simp only:rhs_subst_def append_keeps_nonempty nonempty_set_union nonempty_set_sub)
lemma rhs_subst_keeps_eq:
assumes substor: "X = L xrhs"
and finite: "finite rhs"
shows "L (rhs_subst rhs X xrhs) = L rhs" (is "?Left = ?Right")
proof-
def A \<equiv> "L (rhs - items_of rhs X)"
have "?Left = A \<union> L (append_rhs_rexp xrhs (rexp_of rhs X))"
by (simp only:rhs_subst_def L_rhs_union_distrib A_def)
moreover have "?Right = A \<union> L (items_of rhs X)"
proof-
have "rhs = (rhs - items_of rhs X) \<union> (items_of rhs X)" by (auto simp:items_of_def)
thus ?thesis by (simp only:L_rhs_union_distrib A_def)
qed
moreover have "L (append_rhs_rexp xrhs (rexp_of rhs X)) = L (items_of rhs X)"
using finite substor by (simp only:lang_of_append_rhs lang_of_rexp_of)
ultimately show ?thesis by simp
qed
lemma rhs_subst_keeps_finite_rhs:
"\<lbrakk>finite rhs; finite yrhs\<rbrakk> \<Longrightarrow> finite (rhs_subst rhs Y yrhs)"
by (auto simp:rhs_subst_def append_keeps_finite)
lemma eqs_subst_keeps_finite:
assumes finite:"finite (ES:: (string set \<times> rhs_item set) set)"
shows "finite (eqs_subst ES Y yrhs)"
proof -
have "finite {(Ya, rhs_subst yrhsa Y yrhs) |Ya yrhsa. (Ya, yrhsa) \<in> ES}"
(is "finite ?A")
proof-
def eqns' \<equiv> "{((Ya::string set), yrhsa)| Ya yrhsa. (Ya, yrhsa) \<in> ES}"
def h \<equiv> "\<lambda> ((Ya::string set), yrhsa). (Ya, rhs_subst yrhsa Y yrhs)"
have "finite (h ` eqns')" using finite h_def eqns'_def by auto
moreover have "?A = h ` eqns'" by (auto simp:h_def eqns'_def)
ultimately show ?thesis by auto
qed
thus ?thesis by (simp add:eqs_subst_def)
qed
lemma eqs_subst_keeps_finite_rhs:
"\<lbrakk>finite_rhs ES; finite yrhs\<rbrakk> \<Longrightarrow> finite_rhs (eqs_subst ES Y yrhs)"
by (auto intro:rhs_subst_keeps_finite_rhs simp add:eqs_subst_def finite_rhs_def)
lemma append_rhs_keeps_cls:
"classes_of (append_rhs_rexp rhs r) = classes_of rhs"
apply (auto simp:classes_of_def append_rhs_rexp_def)
apply (case_tac xa, auto simp:image_def)
by (rule_tac x = "SEQ ra r" in exI, rule_tac x = "Trn x ra" in bexI, simp+)
lemma arden_variate_removes_cl:
"classes_of (arden_variate Y yrhs) = classes_of yrhs - {Y}"
apply (simp add:arden_variate_def append_rhs_keeps_cls items_of_def)
by (auto simp:classes_of_def)
lemma lefts_of_keeps_cls:
"lefts_of (eqs_subst ES Y yrhs) = lefts_of ES"
by (auto simp:lefts_of_def eqs_subst_def)
lemma rhs_subst_updates_cls:
"X \<notin> classes_of xrhs \<Longrightarrow>
classes_of (rhs_subst rhs X xrhs) = classes_of rhs \<union> classes_of xrhs - {X}"
apply (simp only:rhs_subst_def append_rhs_keeps_cls
classes_of_union_distrib[THEN sym])
by (auto simp:classes_of_def items_of_def)
lemma eqs_subst_keeps_self_contained:
fixes Y
assumes sc: "self_contained (ES \<union> {(Y, yrhs)})" (is "self_contained ?A")
shows "self_contained (eqs_subst ES Y (arden_variate Y yrhs))"
(is "self_contained ?B")
proof-
{ fix X xrhs'
assume "(X, xrhs') \<in> ?B"
then obtain xrhs
where xrhs_xrhs': "xrhs' = rhs_subst xrhs Y (arden_variate Y yrhs)"
and X_in: "(X, xrhs) \<in> ES" by (simp add:eqs_subst_def, blast)
have "classes_of xrhs' \<subseteq> lefts_of ?B"
proof-
have "lefts_of ?B = lefts_of ES" by (auto simp add:lefts_of_def eqs_subst_def)
moreover have "classes_of xrhs' \<subseteq> lefts_of ES"
proof-
have "classes_of xrhs' \<subseteq>
classes_of xrhs \<union> classes_of (arden_variate Y yrhs) - {Y}"
proof-
have "Y \<notin> classes_of (arden_variate Y yrhs)"
using arden_variate_removes_cl by simp
thus ?thesis using xrhs_xrhs' by (auto simp:rhs_subst_updates_cls)
qed
moreover have "classes_of xrhs \<subseteq> lefts_of ES \<union> {Y}" using X_in sc
apply (simp only:self_contained_def lefts_of_union_distrib[THEN sym])
by (drule_tac x = "(X, xrhs)" in bspec, auto simp:lefts_of_def)
moreover have "classes_of (arden_variate Y yrhs) \<subseteq> lefts_of ES \<union> {Y}"
using sc
by (auto simp add:arden_variate_removes_cl self_contained_def lefts_of_def)
ultimately show ?thesis by auto
qed
ultimately show ?thesis by simp
qed
} thus ?thesis by (auto simp only:eqs_subst_def self_contained_def)
qed
lemma eqs_subst_satisfy_Inv:
assumes Inv_ES: "Inv (ES \<union> {(Y, yrhs)})"
shows "Inv (eqs_subst ES Y (arden_variate Y yrhs))"
proof -
have finite_yrhs: "finite yrhs"
using Inv_ES by (auto simp:Inv_def finite_rhs_def)
have nonempty_yrhs: "rhs_nonempty yrhs"
using Inv_ES by (auto simp:Inv_def ardenable_def)
have Y_eq_yrhs: "Y = L yrhs"
using Inv_ES by (simp only:Inv_def valid_eqns_def, blast)
have "distinct_equas (eqs_subst ES Y (arden_variate Y yrhs))"
using Inv_ES
by (auto simp:distinct_equas_def eqs_subst_def Inv_def)
moreover have "finite (eqs_subst ES Y (arden_variate Y yrhs))"
using Inv_ES by (simp add:Inv_def eqs_subst_keeps_finite)
moreover have "finite_rhs (eqs_subst ES Y (arden_variate Y yrhs))"
proof-
have "finite_rhs ES" using Inv_ES
by (simp add:Inv_def finite_rhs_def)
moreover have "finite (arden_variate Y yrhs)"
proof -
have "finite yrhs" using Inv_ES
by (auto simp:Inv_def finite_rhs_def)
thus ?thesis using arden_variate_keeps_finite by simp
qed
ultimately show ?thesis
by (simp add:eqs_subst_keeps_finite_rhs)
qed
moreover have "ardenable (eqs_subst ES Y (arden_variate Y yrhs))"
proof -
{ fix X rhs
assume "(X, rhs) \<in> ES"
hence "rhs_nonempty rhs" using prems Inv_ES
by (simp add:Inv_def ardenable_def)
with nonempty_yrhs
have "rhs_nonempty (rhs_subst rhs Y (arden_variate Y yrhs))"
by (simp add:nonempty_yrhs
rhs_subst_keeps_nonempty arden_variate_keeps_nonempty)
} thus ?thesis by (auto simp add:ardenable_def eqs_subst_def)
qed
moreover have "valid_eqns (eqs_subst ES Y (arden_variate Y yrhs))"
proof-
have "Y = L (arden_variate Y yrhs)"
using Y_eq_yrhs Inv_ES finite_yrhs nonempty_yrhs
by (rule_tac arden_variate_keeps_eq, (simp add:rexp_of_empty)+)
thus ?thesis using Inv_ES
by (clarsimp simp add:valid_eqns_def
eqs_subst_def rhs_subst_keeps_eq Inv_def finite_rhs_def
simp del:L_rhs.simps)
qed
moreover have
non_empty_subst: "non_empty (eqs_subst ES Y (arden_variate Y yrhs))"
using Inv_ES by (auto simp:Inv_def non_empty_def eqs_subst_def)
moreover
have self_subst: "self_contained (eqs_subst ES Y (arden_variate Y yrhs))"
using Inv_ES eqs_subst_keeps_self_contained by (simp add:Inv_def)
ultimately show ?thesis using Inv_ES by (simp add:Inv_def)
qed
lemma eqs_subst_card_le:
assumes finite: "finite (ES::(string set \<times> rhs_item set) set)"
shows "card (eqs_subst ES Y yrhs) <= card ES"
proof-
def f \<equiv> "\<lambda> x. ((fst x)::string set, rhs_subst (snd x) Y yrhs)"
have "eqs_subst ES Y yrhs = f ` ES"
apply (auto simp:eqs_subst_def f_def image_def)
by (rule_tac x = "(Ya, yrhsa)" in bexI, simp+)
thus ?thesis using finite by (auto intro:card_image_le)
qed
lemma eqs_subst_cls_remains:
"(X, xrhs) \<in> ES \<Longrightarrow> \<exists> xrhs'. (X, xrhs') \<in> (eqs_subst ES Y yrhs)"
by (auto simp:eqs_subst_def)
lemma card_noteq_1_has_more:
assumes card:"card S \<noteq> 1"
and e_in: "e \<in> S"
and finite: "finite S"
obtains e' where "e' \<in> S \<and> e \<noteq> e'"
proof-
have "card (S - {e}) > 0"
proof -
have "card S > 1" using card e_in finite
by (case_tac "card S", auto)
thus ?thesis using finite e_in by auto
qed
hence "S - {e} \<noteq> {}" using finite by (rule_tac notI, simp)
thus "(\<And>e'. e' \<in> S \<and> e \<noteq> e' \<Longrightarrow> thesis) \<Longrightarrow> thesis" by auto
qed
lemma iteration_step:
assumes Inv_ES: "Inv ES"
and X_in_ES: "(X, xrhs) \<in> ES"
and not_T: "card ES \<noteq> 1"
shows "\<exists> ES'. (Inv ES' \<and> (\<exists> xrhs'.(X, xrhs') \<in> ES')) \<and>
(card ES', card ES) \<in> less_than" (is "\<exists> ES'. ?P ES'")
proof -
have finite_ES: "finite ES" using Inv_ES by (simp add:Inv_def)
then obtain Y yrhs
where Y_in_ES: "(Y, yrhs) \<in> ES" and not_eq: "(X, xrhs) \<noteq> (Y, yrhs)"
using not_T X_in_ES by (drule_tac card_noteq_1_has_more, auto)
def ES' == "ES - {(Y, yrhs)}"
let ?ES'' = "eqs_subst ES' Y (arden_variate Y yrhs)"
have "?P ?ES''"
proof -
have "Inv ?ES''" using Y_in_ES Inv_ES
by (rule_tac eqs_subst_satisfy_Inv, simp add:ES'_def insert_absorb)
moreover have "\<exists>xrhs'. (X, xrhs') \<in> ?ES''" using not_eq X_in_ES
by (rule_tac ES = ES' in eqs_subst_cls_remains, auto simp add:ES'_def)
moreover have "(card ?ES'', card ES) \<in> less_than"
proof -
have "finite ES'" using finite_ES ES'_def by auto
moreover have "card ES' < card ES" using finite_ES Y_in_ES
by (auto simp:ES'_def card_gt_0_iff intro:diff_Suc_less)
ultimately show ?thesis
by (auto dest:eqs_subst_card_le elim:le_less_trans)
qed
ultimately show ?thesis by simp
qed
thus ?thesis by blast
qed
subsubsection {*
Conclusion of the proof
*}
text {*
From this point until @{text "hard_direction"}, the hard direction is proved
through a simple application of the iteration principle.
*}
lemma iteration_conc:
assumes history: "Inv ES"
and X_in_ES: "\<exists> xrhs. (X, xrhs) \<in> ES"
shows
"\<exists> ES'. (Inv ES' \<and> (\<exists> xrhs'. (X, xrhs') \<in> ES')) \<and> card ES' = 1"
(is "\<exists> ES'. ?P ES'")
proof (cases "card ES = 1")
case True
thus ?thesis using history X_in_ES
by blast
next
case False
thus ?thesis using history iteration_step X_in_ES
by (rule_tac f = card in wf_iter, auto)
qed
lemma last_cl_exists_rexp:
assumes ES_single: "ES = {(X, xrhs)}"
and Inv_ES: "Inv ES"
shows "\<exists> (r::rexp). L r = X" (is "\<exists> r. ?P r")
proof-
let ?A = "arden_variate X xrhs"
have "?P (rexp_of_lam ?A)"
proof -
have "L (rexp_of_lam ?A) = L (lam_of ?A)"
proof(rule rexp_of_lam_eq_lam_set)
show "finite (arden_variate X xrhs)" using Inv_ES ES_single
by (rule_tac arden_variate_keeps_finite,
auto simp add:Inv_def finite_rhs_def)
qed
also have "\<dots> = L ?A"
proof-
have "lam_of ?A = ?A"
proof-
have "classes_of ?A = {}" using Inv_ES ES_single
by (simp add:arden_variate_removes_cl
self_contained_def Inv_def lefts_of_def)
thus ?thesis
by (auto simp only:lam_of_def classes_of_def, case_tac x, auto)
qed
thus ?thesis by simp
qed
also have "\<dots> = X"
proof(rule arden_variate_keeps_eq [THEN sym])
show "X = L xrhs" using Inv_ES ES_single
by (auto simp only:Inv_def valid_eqns_def)
next
from Inv_ES ES_single show "[] \<notin> L (rexp_of xrhs X)"
by(simp add:Inv_def ardenable_def rexp_of_empty finite_rhs_def)
next
from Inv_ES ES_single show "finite xrhs"
by (simp add:Inv_def finite_rhs_def)
qed
finally show ?thesis by simp
qed
thus ?thesis by auto
qed
lemma every_eqcl_has_reg:
assumes finite_CS: "finite (UNIV // (\<approx>Lang))"
and X_in_CS: "X \<in> (UNIV // (\<approx>Lang))"
shows "\<exists> (reg::rexp). L reg = X" (is "\<exists> r. ?E r")
proof -
from X_in_CS have "\<exists> xrhs. (X, xrhs) \<in> (eqs (UNIV // (\<approx>Lang)))"
by (auto simp:eqs_def init_rhs_def)
then obtain ES xrhs where Inv_ES: "Inv ES"
and X_in_ES: "(X, xrhs) \<in> ES"
and card_ES: "card ES = 1"
using finite_CS X_in_CS init_ES_satisfy_Inv iteration_conc
by blast
hence ES_single_equa: "ES = {(X, xrhs)}"
by (auto simp:Inv_def dest!:card_Suc_Diff1 simp:card_eq_0_iff)
thus ?thesis using Inv_ES
by (rule last_cl_exists_rexp)
qed
lemma finals_in_partitions:
"finals Lang \<subseteq> (UNIV // (\<approx>Lang))"
by (auto simp:finals_def quotient_def)
theorem hard_direction:
assumes finite_CS: "finite (UNIV // (\<approx>Lang))"
shows "\<exists> (reg::rexp). Lang = L reg"
proof -
have "\<forall> X \<in> (UNIV // (\<approx>Lang)). \<exists> (reg::rexp). X = L reg"
using finite_CS every_eqcl_has_reg by blast
then obtain f
where f_prop: "\<forall> X \<in> (UNIV // (\<approx>Lang)). X = L ((f X)::rexp)"
by (auto dest:bchoice)
def rs \<equiv> "f ` (finals Lang)"
have "Lang = \<Union> (finals Lang)" using lang_is_union_of_finals by auto
also have "\<dots> = L (folds ALT NULL rs)"
proof -
have "finite rs"
proof -
have "finite (finals Lang)"
using finite_CS finals_in_partitions[of "Lang"]
by (erule_tac finite_subset, simp)
thus ?thesis using rs_def by auto
qed
thus ?thesis
using f_prop rs_def finals_in_partitions[of "Lang"] by auto
qed
finally show ?thesis by blast
qed
section {* Direction: @{text "regular language \<Rightarrow> finite partitions"} *}
subsection {* The scheme for this direction *}
text {*
The following convenient notation @{text "x \<approx>Lang y"} means:
string @{text "x"} and @{text "y"} are equivalent with respect to
language @{text "Lang"}.
*}
definition
str_eq ("_ \<approx>_ _")
where
"x \<approx>Lang y \<equiv> (x, y) \<in> (\<approx>Lang)"
text {*
The very basic scheme to show the finiteness of the partion generated by a
language @{text "Lang"} is by attaching tags to every string. The set of
tags are carefully choosen to make it finite. If it can be proved that
strings with the same tag are equivlent with respect @{text "Lang"}, then
the partition given rise by @{text "Lang"} must be finite. The reason for
this is a lemma in standard library (@{text "finite_imageD"}), which says:
if the image of an injective function on a set @{text "A"} is finite, then
@{text "A"} is finite. It can be shown that the function obtained by
lifting @{text "tag"} to the level of equivalence classes (i.e. @{text "((op
`) tag)"}) is injective (by lemma @{text "tag_image_injI"}) and the image of
this function is finite (with the help of lemma @{text
"finite_tag_imageI"}). This argument is formalized by the following lemma
@{text "tag_finite_imageD"}.
{\bf
Theorems @{text "tag_image_injI"} and @{text
"finite_tag_imageI"} do not exist. Can this comment be deleted?
\marginpar{\bf COMMENT}
}
*}
lemma tag_finite_imageD:
fixes L1::"lang"
assumes str_inj: "\<And> m n. tag m = tag n \<Longrightarrow> m \<approx>L1 n"
and range: "finite (range tag)"
shows "finite (UNIV // \<approx>L1)"
proof (rule_tac f = "(op `) tag" in finite_imageD)
show "finite (op ` tag ` UNIV // \<approx>L1)" using range
apply (rule_tac B = "Pow (tag ` UNIV)" in finite_subset)
by (auto simp add:image_def Pow_def)
next
show "inj_on (op ` tag) (UNIV // \<approx>L1)"
proof-
{ fix X Y
assume X_in: "X \<in> UNIV // \<approx>L1"
and Y_in: "Y \<in> UNIV // \<approx>L1"
and tag_eq: "tag ` X = tag ` Y"
then obtain x y where "x \<in> X" and "y \<in> Y" and "tag x = tag y"
unfolding quotient_def Image_def str_eq_rel_def str_eq_def image_def
apply simp by blast
with X_in Y_in str_inj[of x y]
have "X = Y" by (auto simp:quotient_def str_eq_rel_def str_eq_def)
} thus ?thesis unfolding inj_on_def by auto
qed
qed
subsection {* Lemmas for basic cases *}
text {*
The the final result of this direction is in @{text "rexp_imp_finite"},
which is an induction on the structure of regular expressions. There is one
case for each regular expression operator. For basic operators such as
@{const NULL}, @{const EMPTY}, @{const CHAR}, the finiteness of their
language partition can be established directly with no need of tagging.
This section contains several technical lemma for these base cases.
The inductive cases involve operators @{const ALT}, @{const SEQ} and @{const
STAR}. Tagging functions need to be defined individually for each of
them. There will be one dedicated section for each of these cases, and each
section goes virtually the same way: gives definition of the tagging
function and prove that strings with the same tag are equivalent.
*}
subsection {* The case for @{const "NULL"} *}
lemma quot_null_eq:
shows "(UNIV // \<approx>{}) = ({UNIV}::lang set)"
unfolding quotient_def Image_def str_eq_rel_def by auto
lemma quot_null_finiteI [intro]:
shows "finite ((UNIV // \<approx>{})::lang set)"
unfolding quot_null_eq by simp
subsection {* The case for @{const "EMPTY"} *}
lemma quot_empty_subset:
"UNIV // (\<approx>{[]}) \<subseteq> {{[]}, UNIV - {[]}}"
proof
fix x
assume "x \<in> UNIV // \<approx>{[]}"
then obtain y where h: "x = {z. (y, z) \<in> \<approx>{[]}}"
unfolding quotient_def Image_def by blast
show "x \<in> {{[]}, UNIV - {[]}}"
proof (cases "y = []")
case True with h
have "x = {[]}" by (auto simp: str_eq_rel_def)
thus ?thesis by simp
next
case False with h
have "x = UNIV - {[]}" by (auto simp: str_eq_rel_def)
thus ?thesis by simp
qed
qed
lemma quot_empty_finiteI [intro]:
shows "finite (UNIV // (\<approx>{[]}))"
by (rule finite_subset[OF quot_empty_subset]) (simp)
subsection {* The case for @{const "CHAR"} *}
lemma quot_char_subset:
"UNIV // (\<approx>{[c]}) \<subseteq> {{[]},{[c]}, UNIV - {[], [c]}}"
proof
fix x
assume "x \<in> UNIV // \<approx>{[c]}"
then obtain y where h: "x = {z. (y, z) \<in> \<approx>{[c]}}"
unfolding quotient_def Image_def by blast
show "x \<in> {{[]},{[c]}, UNIV - {[], [c]}}"
proof -
{ assume "y = []" hence "x = {[]}" using h
by (auto simp:str_eq_rel_def)
} moreover {
assume "y = [c]" hence "x = {[c]}" using h
by (auto dest!:spec[where x = "[]"] simp:str_eq_rel_def)
} moreover {
assume "y \<noteq> []" and "y \<noteq> [c]"
hence "\<forall> z. (y @ z) \<noteq> [c]" by (case_tac y, auto)
moreover have "\<And> p. (p \<noteq> [] \<and> p \<noteq> [c]) = (\<forall> q. p @ q \<noteq> [c])"
by (case_tac p, auto)
ultimately have "x = UNIV - {[],[c]}" using h
by (auto simp add:str_eq_rel_def)
} ultimately show ?thesis by blast
qed
qed
lemma quot_char_finiteI [intro]:
shows "finite (UNIV // (\<approx>{[c]}))"
by (rule finite_subset[OF quot_char_subset]) (simp)
subsection {* The case for @{const "SEQ"}*}
definition
"tag_str_SEQ L\<^isub>1 L\<^isub>2 x \<equiv>
((\<approx>L\<^isub>1) `` {x}, {(\<approx>L\<^isub>2) `` {x - xa}| xa. xa \<le> x \<and> xa \<in> L\<^isub>1})"
lemma tag_str_seq_range_finite:
"\<lbrakk>finite (UNIV // \<approx>L\<^isub>1); finite (UNIV // \<approx>L\<^isub>2)\<rbrakk>
\<Longrightarrow> finite (range (tag_str_SEQ L\<^isub>1 L\<^isub>2))"
apply (rule_tac B = "(UNIV // \<approx>L\<^isub>1) \<times> (Pow (UNIV // \<approx>L\<^isub>2))" in finite_subset)
by (auto simp:tag_str_SEQ_def Image_def quotient_def split:if_splits)
lemma append_seq_elim:
assumes "x @ y \<in> L\<^isub>1 ;; L\<^isub>2"
shows "(\<exists> xa \<le> x. xa \<in> L\<^isub>1 \<and> (x - xa) @ y \<in> L\<^isub>2) \<or>
(\<exists> ya \<le> y. (x @ ya) \<in> L\<^isub>1 \<and> (y - ya) \<in> L\<^isub>2)"
proof-
from assms obtain s\<^isub>1 s\<^isub>2
where "x @ y = s\<^isub>1 @ s\<^isub>2"
and in_seq: "s\<^isub>1 \<in> L\<^isub>1 \<and> s\<^isub>2 \<in> L\<^isub>2"
by (auto simp:Seq_def)
hence "(x \<le> s\<^isub>1 \<and> (s\<^isub>1 - x) @ s\<^isub>2 = y) \<or> (s\<^isub>1 \<le> x \<and> (x - s\<^isub>1) @ y = s\<^isub>2)"
using app_eq_dest by auto
moreover have "\<lbrakk>x \<le> s\<^isub>1; (s\<^isub>1 - x) @ s\<^isub>2 = y\<rbrakk> \<Longrightarrow>
\<exists> ya \<le> y. (x @ ya) \<in> L\<^isub>1 \<and> (y - ya) \<in> L\<^isub>2"
using in_seq by (rule_tac x = "s\<^isub>1 - x" in exI, auto elim:prefixE)
moreover have "\<lbrakk>s\<^isub>1 \<le> x; (x - s\<^isub>1) @ y = s\<^isub>2\<rbrakk> \<Longrightarrow>
\<exists> xa \<le> x. xa \<in> L\<^isub>1 \<and> (x - xa) @ y \<in> L\<^isub>2"
using in_seq by (rule_tac x = s\<^isub>1 in exI, auto)
ultimately show ?thesis by blast
qed
lemma tag_str_SEQ_injI:
"tag_str_SEQ L\<^isub>1 L\<^isub>2 m = tag_str_SEQ L\<^isub>1 L\<^isub>2 n \<Longrightarrow> m \<approx>(L\<^isub>1 ;; L\<^isub>2) n"
proof-
{ fix x y z
assume xz_in_seq: "x @ z \<in> L\<^isub>1 ;; L\<^isub>2"
and tag_xy: "tag_str_SEQ L\<^isub>1 L\<^isub>2 x = tag_str_SEQ L\<^isub>1 L\<^isub>2 y"
have"y @ z \<in> L\<^isub>1 ;; L\<^isub>2"
proof-
have "(\<exists> xa \<le> x. xa \<in> L\<^isub>1 \<and> (x - xa) @ z \<in> L\<^isub>2) \<or>
(\<exists> za \<le> z. (x @ za) \<in> L\<^isub>1 \<and> (z - za) \<in> L\<^isub>2)"
using xz_in_seq append_seq_elim by simp
moreover {
fix xa
assume h1: "xa \<le> x" and h2: "xa \<in> L\<^isub>1" and h3: "(x - xa) @ z \<in> L\<^isub>2"
obtain ya where "ya \<le> y" and "ya \<in> L\<^isub>1" and "(y - ya) @ z \<in> L\<^isub>2"
proof -
have "\<exists> ya. ya \<le> y \<and> ya \<in> L\<^isub>1 \<and> (x - xa) \<approx>L\<^isub>2 (y - ya)"
proof -
have "{\<approx>L\<^isub>2 `` {x - xa} |xa. xa \<le> x \<and> xa \<in> L\<^isub>1} =
{\<approx>L\<^isub>2 `` {y - xa} |xa. xa \<le> y \<and> xa \<in> L\<^isub>1}"
(is "?Left = ?Right")
using h1 tag_xy by (auto simp:tag_str_SEQ_def)
moreover have "\<approx>L\<^isub>2 `` {x - xa} \<in> ?Left" using h1 h2 by auto
ultimately have "\<approx>L\<^isub>2 `` {x - xa} \<in> ?Right" by simp
thus ?thesis by (auto simp:Image_def str_eq_rel_def str_eq_def)
qed
with prems show ?thesis by (auto simp:str_eq_rel_def str_eq_def)
qed
hence "y @ z \<in> L\<^isub>1 ;; L\<^isub>2" by (erule_tac prefixE, auto simp:Seq_def)
} moreover {
fix za
assume h1: "za \<le> z" and h2: "(x @ za) \<in> L\<^isub>1" and h3: "z - za \<in> L\<^isub>2"
hence "y @ za \<in> L\<^isub>1"
proof-
have "\<approx>L\<^isub>1 `` {x} = \<approx>L\<^isub>1 `` {y}"
using h1 tag_xy by (auto simp:tag_str_SEQ_def)
with h2 show ?thesis
by (auto simp:Image_def str_eq_rel_def str_eq_def)
qed
with h1 h3 have "y @ z \<in> L\<^isub>1 ;; L\<^isub>2"
by (drule_tac A = L\<^isub>1 in seq_intro, auto elim:prefixE)
}
ultimately show ?thesis by blast
qed
} thus "tag_str_SEQ L\<^isub>1 L\<^isub>2 m = tag_str_SEQ L\<^isub>1 L\<^isub>2 n \<Longrightarrow> m \<approx>(L\<^isub>1 ;; L\<^isub>2) n"
by (auto simp add: str_eq_def str_eq_rel_def)
qed
lemma quot_seq_finiteI [intro]:
"\<lbrakk>finite (UNIV // \<approx>L\<^isub>1); finite (UNIV // \<approx>L\<^isub>2)\<rbrakk>
\<Longrightarrow> finite (UNIV // \<approx>(L\<^isub>1 ;; L\<^isub>2))"
apply (rule_tac tag = "tag_str_SEQ L\<^isub>1 L\<^isub>2" in tag_finite_imageD)
by (auto intro:tag_str_SEQ_injI elim:tag_str_seq_range_finite)
subsection {* The case for @{const "ALT"} *}
definition
tag_str_ALT :: "lang \<Rightarrow> lang \<Rightarrow> string \<Rightarrow> (lang \<times> lang)"
where
"tag_str_ALT L1 L2 = (\<lambda>x. (\<approx>L1 `` {x}, \<approx>L2 `` {x}))"
lemma quot_union_finiteI [intro]:
fixes L1 L2::"lang"
assumes finite1: "finite (UNIV // \<approx>L1)"
and finite2: "finite (UNIV // \<approx>L2)"
shows "finite (UNIV // \<approx>(L1 \<union> L2))"
proof (rule_tac tag = "tag_str_ALT L1 L2" in tag_finite_imageD)
show "\<And>x y. tag_str_ALT L1 L2 x = tag_str_ALT L1 L2 y \<Longrightarrow> x \<approx>(L1 \<union> L2) y"
unfolding tag_str_ALT_def
unfolding str_eq_def
unfolding Image_def
unfolding str_eq_rel_def
by auto
next
have *: "finite ((UNIV // \<approx>L1) \<times> (UNIV // \<approx>L2))" using finite1 finite2 by auto
show "finite (range (tag_str_ALT L1 L2))"
unfolding tag_str_ALT_def
apply(rule finite_subset[OF _ *])
unfolding quotient_def
by auto
qed
subsection {* The case for @{const "STAR"} *}
text {*
This turned out to be the trickiest case.
*} (* I will make some illustrations for it. *)
definition
"tag_str_STAR L\<^isub>1 x \<equiv> {(\<approx>L\<^isub>1) `` {x - xa} | xa. xa < x \<and> xa \<in> L\<^isub>1\<star>}"
lemma finite_set_has_max: "\<lbrakk>finite A; A \<noteq> {}\<rbrakk> \<Longrightarrow>
(\<exists> max \<in> A. \<forall> a \<in> A. f a <= (f max :: nat))"
proof (induct rule:finite.induct)
case emptyI thus ?case by simp
next
case (insertI A a)
show ?case
proof (cases "A = {}")
case True thus ?thesis by (rule_tac x = a in bexI, auto)
next
case False
with prems obtain max
where h1: "max \<in> A"
and h2: "\<forall>a\<in>A. f a \<le> f max" by blast
show ?thesis
proof (cases "f a \<le> f max")
assume "f a \<le> f max"
with h1 h2 show ?thesis by (rule_tac x = max in bexI, auto)
next
assume "\<not> (f a \<le> f max)"
thus ?thesis using h2 by (rule_tac x = a in bexI, auto)
qed
qed
qed
lemma finite_strict_prefix_set: "finite {xa. xa < (x::string)}"
apply (induct x rule:rev_induct, simp)
apply (subgoal_tac "{xa. xa < xs @ [x]} = {xa. xa < xs} \<union> {xs}")
by (auto simp:strict_prefix_def)
lemma tag_str_star_range_finite:
"finite (UNIV // \<approx>L\<^isub>1) \<Longrightarrow> finite (range (tag_str_STAR L\<^isub>1))"
apply (rule_tac B = "Pow (UNIV // \<approx>L\<^isub>1)" in finite_subset)
by (auto simp:tag_str_STAR_def Image_def
quotient_def split:if_splits)
lemma tag_str_STAR_injI:
"tag_str_STAR L\<^isub>1 m = tag_str_STAR L\<^isub>1 n \<Longrightarrow> m \<approx>(L\<^isub>1\<star>) n"
proof-
{ fix x y z
assume xz_in_star: "x @ z \<in> L\<^isub>1\<star>"
and tag_xy: "tag_str_STAR L\<^isub>1 x = tag_str_STAR L\<^isub>1 y"
have "y @ z \<in> L\<^isub>1\<star>"
proof(cases "x = []")
case True
with tag_xy have "y = []"
by (auto simp:tag_str_STAR_def strict_prefix_def)
thus ?thesis using xz_in_star True by simp
next
case False
obtain x_max
where h1: "x_max < x"
and h2: "x_max \<in> L\<^isub>1\<star>"
and h3: "(x - x_max) @ z \<in> L\<^isub>1\<star>"
and h4:"\<forall> xa < x. xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star>
\<longrightarrow> length xa \<le> length x_max"
proof-
let ?S = "{xa. xa < x \<and> xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star>}"
have "finite ?S"
by (rule_tac B = "{xa. xa < x}" in finite_subset,
auto simp:finite_strict_prefix_set)
moreover have "?S \<noteq> {}" using False xz_in_star
by (simp, rule_tac x = "[]" in exI, auto simp:strict_prefix_def)
ultimately have "\<exists> max \<in> ?S. \<forall> a \<in> ?S. length a \<le> length max"
using finite_set_has_max by blast
with prems show ?thesis by blast
qed
obtain ya
where h5: "ya < y" and h6: "ya \<in> L\<^isub>1\<star>" and h7: "(x - x_max) \<approx>L\<^isub>1 (y - ya)"
proof-
from tag_xy have "{\<approx>L\<^isub>1 `` {x - xa} |xa. xa < x \<and> xa \<in> L\<^isub>1\<star>} =
{\<approx>L\<^isub>1 `` {y - xa} |xa. xa < y \<and> xa \<in> L\<^isub>1\<star>}" (is "?left = ?right")
by (auto simp:tag_str_STAR_def)
moreover have "\<approx>L\<^isub>1 `` {x - x_max} \<in> ?left" using h1 h2 by auto
ultimately have "\<approx>L\<^isub>1 `` {x - x_max} \<in> ?right" by simp
with prems show ?thesis apply
(simp add:Image_def str_eq_rel_def str_eq_def) by blast
qed
have "(y - ya) @ z \<in> L\<^isub>1\<star>"
proof-
from h3 h1 obtain a b where a_in: "a \<in> L\<^isub>1"
and a_neq: "a \<noteq> []" and b_in: "b \<in> L\<^isub>1\<star>"
and ab_max: "(x - x_max) @ z = a @ b"
by (drule_tac star_decom, auto simp:strict_prefix_def elim:prefixE)
have "(x - x_max) \<le> a \<and> (a - (x - x_max)) @ b = z"
proof -
have "((x - x_max) \<le> a \<and> (a - (x - x_max)) @ b = z) \<or>
(a < (x - x_max) \<and> ((x - x_max) - a) @ z = b)"
using app_eq_dest[OF ab_max] by (auto simp:strict_prefix_def)
moreover {
assume np: "a < (x - x_max)" and b_eqs: " ((x - x_max) - a) @ z = b"
have "False"
proof -
let ?x_max' = "x_max @ a"
have "?x_max' < x"
using np h1 by (clarsimp simp:strict_prefix_def diff_prefix)
moreover have "?x_max' \<in> L\<^isub>1\<star>"
using a_in h2 by (simp add:star_intro3)
moreover have "(x - ?x_max') @ z \<in> L\<^isub>1\<star>"
using b_eqs b_in np h1 by (simp add:diff_diff_appd)
moreover have "\<not> (length ?x_max' \<le> length x_max)"
using a_neq by simp
ultimately show ?thesis using h4 by blast
qed
} ultimately show ?thesis by blast
qed
then obtain za where z_decom: "z = za @ b"
and x_za: "(x - x_max) @ za \<in> L\<^isub>1"
using a_in by (auto elim:prefixE)
from x_za h7 have "(y - ya) @ za \<in> L\<^isub>1"
by (auto simp:str_eq_def str_eq_rel_def)
with z_decom b_in show ?thesis by (auto dest!:step[of "(y - ya) @ za"])
qed
with h5 h6 show ?thesis
by (drule_tac star_intro1, auto simp:strict_prefix_def elim:prefixE)
qed
} thus "tag_str_STAR L\<^isub>1 m = tag_str_STAR L\<^isub>1 n \<Longrightarrow> m \<approx>(L\<^isub>1\<star>) n"
by (auto simp add:str_eq_def str_eq_rel_def)
qed
lemma quot_star_finiteI [intro]:
"finite (UNIV // \<approx>L\<^isub>1) \<Longrightarrow> finite (UNIV // \<approx>(L\<^isub>1\<star>))"
apply (rule_tac tag = "tag_str_STAR L\<^isub>1" in tag_finite_imageD)
by (auto intro:tag_str_STAR_injI elim:tag_str_star_range_finite)
subsection {* The main lemma *}
lemma rexp_imp_finite:
fixes r::"rexp"
shows "finite (UNIV // \<approx>(L r))"
by (induct r) (auto)
end