regexp: comparison Myhill

equal deleted inserted replaced

-:ecf6c61a4541
+:8ab3a06577cf
 section {* Preliminary definitions *}
 types lang = "string set"
-text {*
+text {*  Sequential composition of two languages *}
-Sequential composition of two languages @{text "L1"} and @{text "L2"}
-*}
 definition
 Seq :: "lang \<Rightarrow> lang \<Rightarrow> lang" (infixr ";;" 100)
 where
 "A ;; B = {s\<^isub>1 @ s\<^isub>2 | s\<^isub>1 s\<^isub>2. s\<^isub>1 \<in> A \<and> s\<^isub>2 \<in> B}"
 text {* Some properties of operator @{text ";;"}. *}
 lemma seq_add_left:
 assumes a: "A = B"
 lemma seq_union_distrib_left:
 shows "C ;; (A \<union> B) = (C ;; A) \<union> (C ;; B)"
 unfolding Seq_def by  auto
 lemma seq_intro:
-"\<lbrakk>x \<in> A; y \<in> B\<rbrakk> \<Longrightarrow> x @ y \<in> A ;; B "
+assumes a: "x \<in> A" "y \<in> B"
-by (auto simp:Seq_def)
+shows "x @ y \<in> A ;; B "
+using a by (auto simp: Seq_def)
 lemma seq_assoc:
 shows "(A ;; B) ;; C = A ;; (B ;; C)"
 unfolding Seq_def
 apply(auto)
 lemma seq_empty [simp]:
 shows "A ;; {[]} = A"
 and   "{[]} ;; A = A"
 by (simp_all add: Seq_def)
+text {* Power and Star of a language *}
 fun
 pow :: "lang \<Rightarrow> nat \<Rightarrow> lang" (infixl "\<up>" 100)
 where
 "A \<up> 0 = {[]}"
 | "A \<up> (Suc n) =  A ;; (A \<up> n)"
 definition
 Star :: "lang \<Rightarrow> lang" ("_\<star>" [101] 102)
 where
 "A\<star> \<equiv> (\<Union>n. A \<up> n)"
 lemma star_start[intro]:
 shows "[] \<in> A\<star>"
 proof -
 have "[] \<in> A \<up> 0" by auto
 qed
 section {* A slightly modified version of Arden's lemma *}
-text {*
-Arden's lemma expressed at the level of languages, rather
+text {*  A helper lemma for Arden *}
-than the level of regular expression.
-*}
 lemma ardens_helper:
 assumes eq: "X = X ;; A \<union> B"
 shows "X = X ;; (A \<up> Suc n) \<union> (\<Union>m\<in>{0..n}. B ;; (A \<up> m))"
 proof (induct n)
 ultimately
 show "X = B ;; A\<star>" by simp
 qed
+section {* Regular Expressions *}
-text {* The syntax of regular expressions is defined by the datatype @{text "rexp"}. *}
 datatype rexp =
 NULL
 | EMPTY
 | CHAR char
 | SEQ rexp rexp
 text {*
 The following @{text "L"} is an overloaded operator, where @{text "L(x)"} evaluates to
 the language represented by the syntactic object @{text "x"}.
 *}
-consts L:: "'a \<Rightarrow> string set"
+consts L:: "'a \<Rightarrow> lang"
-text {*
+text {* The @{text "L (rexp)"} for regular expressions. *}
-The @{text "L(rexp)"} for regular expression @{text "rexp"} is defined by the
-following overloading function @{text "L_rexp"}.
+overloading L_rexp \<equiv> "L::  rexp \<Rightarrow> lang"
-*}
-overloading L_rexp \<equiv> "L::  rexp \<Rightarrow> string set"
 begin
 fun
 L_rexp :: "rexp \<Rightarrow> string set"
 where
 "L_rexp (NULL) = {}"
 | "L_rexp (SEQ r1 r2) = (L_rexp r1) ;; (L_rexp r2)"
 | "L_rexp (ALT r1 r2) = (L_rexp r1) \<union> (L_rexp r2)"
 | "L_rexp (STAR r) = (L_rexp r)\<star>"
 end
-text {*
-To obtain equational system out of finite set of equivalent classes, a fold operation
+section {* Folds for Sets *}
-on finite set @{text "folds"} is defined. The use of @{text "SOME"} makes @{text "fold"}
-more robust than the @{text "fold"} in Isabelle library. The expression @{text "folds f"}
+text {*
+To obtain equational system out of finite set of equivalence classes, a fold operation
+on finite sets @{text "folds"} is defined. The use of @{text "SOME"} makes @{text "folds"}
+more robust than the @{text "fold"} in the Isabelle library. The expression @{text "folds f"}
 makes sense when @{text "f"} is not @{text "associative"} and @{text "commutitive"},
 while @{text "fold f"} does not.
 *}
 definition
 folds :: "('a \<Rightarrow> 'b \<Rightarrow> 'b) \<Rightarrow> 'b \<Rightarrow> 'a set \<Rightarrow> 'b"
 where
 "folds f z S \<equiv> SOME x. fold_graph f z S x"
 text {*
-The following lemma assures that the arbitrary choice made by the @{text "SOME"} in @{text "folds"}
+The following lemma ensures that the arbitrary choice made by the
-does not affect the @{text "L"}-value of the resultant regular expression.
+@{text "SOME"} in @{text "folds"} does not affect the @{text "L"}-value
-*}
+of the resultant regular expression.
+*}
 lemma folds_alt_simp [simp]:
-"finite rs \<Longrightarrow> L (folds ALT NULL rs) = \<Union> (L ` rs)"
+assumes a: "finite rs"
-apply (rule set_eq_intro, simp add:folds_def)
+shows "L (folds ALT NULL rs) = \<Union> (L ` rs)"
-apply (rule someI2_ex, erule finite_imp_fold_graph)
+apply(rule set_eq_intro)
-by (erule fold_graph.induct, auto)
+apply(simp add: folds_def)
+apply(rule someI2_ex)
-(* Just a technical lemma. *)
+apply(rule_tac finite_imp_fold_graph[OF a])
+apply(erule fold_graph.induct)
+apply(auto)
+done
+text {* Just a technical lemma for collections and pairs *}
 lemma [simp]:
 shows "(x, y) \<in> {(x, y). P x y} \<longleftrightarrow> P x y"
 by simp
 text {*
-@{text "\<approx>L"} is an equivalent class defined by language @{text "Lang"}.
+@{text "\<approx>A"} is an equivalence class defined by language @{text "A"}.
 *}
 definition
 str_eq_rel ("\<approx>_" [100] 100)
 where
-"\<approx>Lang \<equiv> {(x, y).  (\<forall>z. x @ z \<in> Lang \<longleftrightarrow> y @ z \<in> Lang)}"
+"\<approx>A \<equiv> {(x, y).  (\<forall>z. x @ z \<in> A \<longleftrightarrow> y @ z \<in> A)}"
 text {*
-Among equivlant clases of @{text "\<approx>Lang"}, the set @{text "finals(Lang)"} singles out
+Among the equivalence clases of @{text "\<approx>A"}, the set @{text "finals A"} singles out
-those which contains strings from @{text "Lang"}.
+those which contains the strings from @{text "A"}.
 *}
 definition
-"finals Lang \<equiv> {\<approx>Lang `` {x} | x . x \<in> Lang}"
+"finals A \<equiv> {\<approx>A `` {x} | x . x \<in> A}"
 text {*
-The following lemma show the relationshipt between @{text "finals(Lang)"} and @{text "Lang"}.
+The following lemma establishes the relationshipt between
+@{text "finals A"} and @{text "A"}.
 *}
 lemma lang_is_union_of_finals:
-"Lang = \<Union> finals(Lang)"
+shows "A = \<Union> finals A"
-proof
+unfolding finals_def
-show "Lang \<subseteq> \<Union> (finals Lang)"
+unfolding Image_def
-proof
+unfolding str_eq_rel_def
-fix x
+apply(auto)
-assume "x \<in> Lang"
+apply(drule_tac x = "[]" in spec)
-thus "x \<in> \<Union> (finals Lang)"
+apply(auto)
-apply (simp add:finals_def, rule_tac x = "(\<approx>Lang) `` {x}" in exI)
+done
-by (auto simp:Image_def str_eq_rel_def)
-qed
-next
-show "\<Union> (finals Lang) \<subseteq> Lang"
-apply (clarsimp simp:finals_def str_eq_rel_def)
-by (drule_tac x = "[]" in spec, auto)
-qed
 section {* Direction @{text "finite partition \<Rightarrow> regular language"}*}
 text {*
 The relationship between equivalent classes can be described by an
-equational system.
+equational system.  For example, in equational system \eqref{example_eqns},
-For example, in equational system \eqref{example_eqns},  $X_0, X_1$ are equivalent
+$X_0, X_1$ are equivalent classes. The first equation says every string in
-classes. The first equation says every string in $X_0$ is obtained either by
+$X_0$ is obtained either by appending one $b$ to a string in $X_0$ or by
-appending one $b$ to a string in $X_0$ or by appending one $a$ to a string in
+appending one $a$ to a string in $X_1$ or just be an empty string
-$X_1$ or just be an empty string (represented by the regular expression $\lambda$). Similary,
+(represented by the regular expression $\lambda$). Similary, the second
-the second equation tells how the strings inside $X_1$ are composed.
+equation tells how the strings inside $X_1$ are composed.
 \begin{equation}\label{example_eqns}
 \begin{aligned}
 X_0 & = X_0 b + X_1 a + \lambda \\
 X_1 & = X_0 a + X_1 b
 \end{aligned}
 \end{equation}
-The summands on the right hand side is represented by the following data type
-@{text "rhs_item"}, mnemonic for 'right hand side item'.
+\noindent
-Generally, there are two kinds of right hand side items, one kind corresponds to
+The summands on the right hand side is represented by the following data
-pure regular expressions, like the $\lambda$ in \eqref{example_eqns}, the other kind corresponds to
+type @{text "rhs_item"}, mnemonic for 'right hand side item'.  Generally,
-transitions from one one equivalent class to another, like the $X_0 b, X_1 a$ etc.
+there are two kinds of right hand side items, one kind corresponds to pure
-*}
+regular expressions, like the $\lambda$ in \eqref{example_eqns}, the other
+kind corresponds to transitions from one one equivalent class to another,
+like the $X_0 b, X_1 a$ etc.
+*}
 datatype rhs_item =
-Lam "rexp"                           (* Lambda *)
+Lam "rexp"            (* Lambda *)
-| Trn "(string set)" "rexp"              (* Transition *)
+| Trn "lang" "rexp"     (* Transition *)
 text {*
 In this formalization, pure regular expressions like $\lambda$ is
-repsented by @{text "Lam(EMPTY)"}, while transitions like $X_0 a$ is represented by $Trn~X_0~(CHAR~a)$.
+repsented by @{text "Lam(EMPTY)"}, while transitions like $X_0 a$ is
-*}
+represented by @{term "Trn X\<^isub>0 (CHAR a)"}.
+*}
 text {*
 The functions @{text "the_r"} and @{text "the_Trn"} are used to extract
 subcomponents from right hand side items.
 *}
-fun the_r :: "rhs_item \<Rightarrow> rexp"
+fun
-where "the_r (Lam r) = r"
+the_r :: "rhs_item \<Rightarrow> rexp"
+where
-fun the_Trn:: "rhs_item \<Rightarrow> (string set \<times> rexp)"
+"the_r (Lam r) = r"
-where "the_Trn (Trn Y r) = (Y, r)"
+fun
-text {*
+the_Trn:: "rhs_item \<Rightarrow> (lang \<times> rexp)"
-Every right hand side item @{text "itm"} defines a string set given
+where
-@{text "L(itm)"}, defined as:
+"the_Trn (Trn Y r) = (Y, r)"
+text {*
+Every right-hand side item @{text "itm"} defines a language given
+by @{text "L(itm)"}, defined as:
 *}
-overloading L_rhs_e \<equiv> "L:: rhs_item \<Rightarrow> string set"
+overloading L_rhs_e \<equiv> "L:: rhs_item \<Rightarrow> lang"
 begin
-fun L_rhs_e:: "rhs_item \<Rightarrow> string set"
+fun L_rhs_e:: "rhs_item \<Rightarrow> lang"
 where
-"L_rhs_e (Lam r) = L r" |
+"L_rhs_e (Lam r) = L r"
-"L_rhs_e (Trn X r) = X ;; L r"
+| "L_rhs_e (Trn X r) = X ;; L r"
 end
 text {*
 The right hand side of every equation is represented by a set of
 items. The string set defined by such a set @{text "itms"} is given
 by @{text "L(itms)"}, defined as:
 *}
-overloading L_rhs \<equiv> "L:: rhs_item set \<Rightarrow> string set"
+overloading L_rhs \<equiv> "L:: rhs_item set \<Rightarrow> lang"
 begin
-fun L_rhs:: "rhs_item set \<Rightarrow> string set"
+fun L_rhs:: "rhs_item set \<Rightarrow> lang"
-where "L_rhs rhs = \<Union> (L ` rhs)"
+where
+"L_rhs rhs = \<Union> (L ` rhs)"
 end
 text {*
-Given a set of equivalent classses @{text "CS"} and one equivalent class @{text "X"} among
+Given a set of equivalence classes @{text "CS"} and one equivalence class @{text "X"} among
 @{text "CS"}, the term @{text "init_rhs CS X"} is used to extract the right hand side of
 the equation describing the formation of @{text "X"}. The definition of @{text "init_rhs"}
 is:
 *}
 With the help of @{text "init_rhs"}, the equitional system descrbing the formation of every
 equivalent class inside @{text "CS"} is given by the following @{text "eqs(CS)"}.
 *}
 definition "eqs CS \<equiv> {(X, init_rhs CS X) | X.  X \<in> CS}"
 (************ arden's lemma variation ********************)
 text {*
 The following @{text "items_of rhs X"} returns all @{text "X"}-items in @{text "rhs"}.
 *}
 definition
 "items_of rhs X \<equiv> {Trn X r | r. (Trn X r) \<in> rhs}"
 text {*
 The following @{text "rexp_of rhs X"} combines all regular expressions in @{text "X"}-items
 The following @{text "attach_rexp rexp' itm"} attach
 the regular expression @{text "rexp'"} to
 the right of right hand side item @{text "itm"}.
 *}
-fun attach_rexp :: "rexp \<Rightarrow> rhs_item \<Rightarrow> rhs_item"
+fun
+attach_rexp :: "rexp \<Rightarrow> rhs_item \<Rightarrow> rhs_item"
 where
 "attach_rexp rexp' (Lam rexp)   = Lam (SEQ rexp rexp')"
 | "attach_rexp rexp' (Trn X rexp) = Trn X (SEQ rexp rexp')"
 text {*
 @{text "rhs"} by @{text "xrhs"}.
 A litte thought may reveal that the final result
 should be: first append $(a_1 | a_2 | \ldots | a_n)$ to every item of @{text "xrhs"} and then
 union the result with all non-@{text "X"}-items of @{text "rhs"}.
 *}
 definition
 "rhs_subst rhs X xrhs \<equiv>
 (rhs - (items_of rhs X)) \<union> (append_rhs_rexp xrhs (rexp_of rhs X))"
 text {*
 Every variable is defined at most onece in @{text "ES"}.
 *}
 definition
 "distinct_equas ES \<equiv>
 \<forall> X rhs rhs'. (X, rhs) \<in> ES \<and> (X, rhs') \<in> ES \<longrightarrow> rhs = rhs'"
 text {*
 Every equation in @{text "ES"} (represented by @{text "(X, rhs)"}) is valid, i.e. @{text "(X = L rhs)"}.
 *}
 definition
 "valid_eqns ES \<equiv> \<forall> X rhs. (X, rhs) \<in> ES \<longrightarrow> (X = L rhs)"
 text {*
 The following @{text "rhs_nonempty rhs"} requires regular expressions occuring in transitional
 items of @{text "rhs"} does not contain empty string. This is necessary for
 the application of Arden's transformation to @{text "rhs"}.
 *}
 definition
 "rhs_nonempty rhs \<equiv> (\<forall> Y r. Trn Y r \<in> rhs \<longrightarrow> [] \<notin> L r)"
 text {*
 The following @{text "ardenable ES"} requires that Arden's transformation is applicable
 to every equation of equational system @{text "ES"}.
 *}
 definition
 "ardenable ES \<equiv> \<forall> X rhs. (X, rhs) \<in> ES \<longrightarrow> rhs_nonempty rhs"
 (* The following non_empty seems useless. *)
 definition
 text {*
 The following are some basic properties of the above definitions.
 *}
 lemma L_rhs_union_distrib:
-" L (A::rhs_item set) \<union> L B = L (A \<union> B)"
+fixes A B::"rhs_item set"
+shows "L A \<union> L B = L (A \<union> B)"
 by simp
 lemma finite_snd_Trn:
 assumes finite:"finite rhs"
 shows "finite {r\<^isub>2. Trn Y r\<^isub>2 \<in> rhs}" (is "finite ?B")
 by (rule_tac finite_imageI, auto intro:finite_subset)
 thus ?thesis by (auto simp:rexp_of_lam_def lam_of_def)
 qed
 lemma [simp]:
-" L (attach_rexp r xb) = L xb ;; L r"
+"L (attach_rexp r xb) = L xb ;; L r"
 apply (cases xb, auto simp:Seq_def)
 apply(rule_tac x = "s\<^isub>1 @ s\<^isub>1'" in exI, rule_tac x = "s\<^isub>2'" in exI)
 apply(auto simp: Seq_def)
 done
 thus ?thesis using Inv_ES
 by (rule last_cl_exists_rexp)
 qed
 lemma finals_in_partitions:
-"finals Lang \<subseteq> (UNIV // (\<approx>Lang))"
+shows "finals A \<subseteq> (UNIV // \<approx>A)"
-by (auto simp:finals_def quotient_def)
+unfolding finals_def
+unfolding quotient_def
+by auto
 theorem hard_direction:
-assumes finite_CS: "finite (UNIV // \<approx>Lang)"
+assumes finite_CS: "finite (UNIV // \<approx>A)"
-shows   "\<exists> (r::rexp). Lang = L r"
+shows   "\<exists>r::rexp. A = L r"
 proof -
-have "\<forall> X \<in> (UNIV // (\<approx>Lang)). \<exists> (reg::rexp). X = L reg"
+have "\<forall> X \<in> (UNIV // \<approx>A). \<exists>reg::rexp. X = L reg"
 using finite_CS every_eqcl_has_reg by blast
 then obtain f
-where f_prop: "\<forall> X \<in> (UNIV // (\<approx>Lang)). X = L ((f X)::rexp)"
+where f_prop: "\<forall> X \<in> (UNIV // \<approx>A). X = L ((f X)::rexp)"
-by (auto dest:bchoice)
+by (auto dest: bchoice)
-def rs \<equiv> "f ` (finals Lang)"
+def rs \<equiv> "f ` (finals A)"
-have "Lang = \<Union> (finals Lang)" using lang_is_union_of_finals by auto
+have "A = \<Union> (finals A)" using lang_is_union_of_finals by auto
 also have "\<dots> = L (folds ALT NULL rs)"
 proof -
 have "finite rs"
 proof -
-have "finite (finals Lang)"
+have "finite (finals A)"
-using finite_CS finals_in_partitions[of "Lang"]
+using finite_CS finals_in_partitions[of "A"]
 by (erule_tac finite_subset, simp)
 thus ?thesis using rs_def by auto
 qed
 thus ?thesis
-using f_prop rs_def finals_in_partitions[of "Lang"] by auto
+using f_prop rs_def finals_in_partitions[of "A"] by auto
 qed
 finally show ?thesis by blast
 qed
 end

changeset 70	8ab3a06577cf
parent 66	828ea293b61f
child 71	426070e68b21