--- a/Myhill_1.thy Sun Feb 06 10:28:29 2011 +0000
+++ b/Myhill_1.thy Sun Feb 06 11:21:12 2011 +0000
@@ -28,15 +28,14 @@
types lang = "string set"
-text {*
- Sequential composition of two languages @{text "L1"} and @{text "L2"}
-*}
+text {* Sequential composition of two languages *}
definition
Seq :: "lang \<Rightarrow> lang \<Rightarrow> lang" (infixr ";;" 100)
where
"A ;; B = {s\<^isub>1 @ s\<^isub>2 | s\<^isub>1 s\<^isub>2. s\<^isub>1 \<in> A \<and> s\<^isub>2 \<in> B}"
+
text {* Some properties of operator @{text ";;"}. *}
lemma seq_add_left:
@@ -53,8 +52,9 @@
unfolding Seq_def by auto
lemma seq_intro:
- "\<lbrakk>x \<in> A; y \<in> B\<rbrakk> \<Longrightarrow> x @ y \<in> A ;; B "
-by (auto simp:Seq_def)
+ assumes a: "x \<in> A" "y \<in> B"
+ shows "x @ y \<in> A ;; B "
+using a by (auto simp: Seq_def)
lemma seq_assoc:
shows "(A ;; B) ;; C = A ;; (B ;; C)"
@@ -68,6 +68,9 @@
and "{[]} ;; A = A"
by (simp_all add: Seq_def)
+
+text {* Power and Star of a language *}
+
fun
pow :: "lang \<Rightarrow> nat \<Rightarrow> lang" (infixl "\<up>" 100)
where
@@ -79,6 +82,7 @@
where
"A\<star> \<equiv> (\<Union>n. A \<up> n)"
+
lemma star_start[intro]:
shows "[] \<in> A\<star>"
proof -
@@ -205,10 +209,8 @@
section {* A slightly modified version of Arden's lemma *}
-text {*
- Arden's lemma expressed at the level of languages, rather
- than the level of regular expression.
-*}
+
+text {* A helper lemma for Arden *}
lemma ardens_helper:
assumes eq: "X = X ;; A \<union> B"
@@ -272,8 +274,8 @@
qed
+section {* Regular Expressions *}
-text {* The syntax of regular expressions is defined by the datatype @{text "rexp"}. *}
datatype rexp =
NULL
| EMPTY
@@ -287,14 +289,12 @@
The following @{text "L"} is an overloaded operator, where @{text "L(x)"} evaluates to
the language represented by the syntactic object @{text "x"}.
*}
-consts L:: "'a \<Rightarrow> string set"
+consts L:: "'a \<Rightarrow> lang"
-text {*
- The @{text "L(rexp)"} for regular expression @{text "rexp"} is defined by the
- following overloading function @{text "L_rexp"}.
-*}
-overloading L_rexp \<equiv> "L:: rexp \<Rightarrow> string set"
+text {* The @{text "L (rexp)"} for regular expressions. *}
+
+overloading L_rexp \<equiv> "L:: rexp \<Rightarrow> lang"
begin
fun
L_rexp :: "rexp \<Rightarrow> string set"
@@ -307,10 +307,13 @@
| "L_rexp (STAR r) = (L_rexp r)\<star>"
end
+
+section {* Folds for Sets *}
+
text {*
- To obtain equational system out of finite set of equivalent classes, a fold operation
- on finite set @{text "folds"} is defined. The use of @{text "SOME"} makes @{text "fold"}
- more robust than the @{text "fold"} in Isabelle library. The expression @{text "folds f"}
+ To obtain equational system out of finite set of equivalence classes, a fold operation
+ on finite sets @{text "folds"} is defined. The use of @{text "SOME"} makes @{text "folds"}
+ more robust than the @{text "fold"} in the Isabelle library. The expression @{text "folds f"}
makes sense when @{text "f"} is not @{text "associative"} and @{text "commutitive"},
while @{text "fold f"} does not.
*}
@@ -321,109 +324,126 @@
"folds f z S \<equiv> SOME x. fold_graph f z S x"
text {*
- The following lemma assures that the arbitrary choice made by the @{text "SOME"} in @{text "folds"}
- does not affect the @{text "L"}-value of the resultant regular expression.
+ The following lemma ensures that the arbitrary choice made by the
+ @{text "SOME"} in @{text "folds"} does not affect the @{text "L"}-value
+ of the resultant regular expression.
*}
+
lemma folds_alt_simp [simp]:
- "finite rs \<Longrightarrow> L (folds ALT NULL rs) = \<Union> (L ` rs)"
-apply (rule set_eq_intro, simp add:folds_def)
-apply (rule someI2_ex, erule finite_imp_fold_graph)
-by (erule fold_graph.induct, auto)
+ assumes a: "finite rs"
+ shows "L (folds ALT NULL rs) = \<Union> (L ` rs)"
+apply(rule set_eq_intro)
+apply(simp add: folds_def)
+apply(rule someI2_ex)
+apply(rule_tac finite_imp_fold_graph[OF a])
+apply(erule fold_graph.induct)
+apply(auto)
+done
-(* Just a technical lemma. *)
+
+text {* Just a technical lemma for collections and pairs *}
+
lemma [simp]:
shows "(x, y) \<in> {(x, y). P x y} \<longleftrightarrow> P x y"
by simp
text {*
- @{text "\<approx>L"} is an equivalent class defined by language @{text "Lang"}.
+ @{text "\<approx>A"} is an equivalence class defined by language @{text "A"}.
*}
definition
str_eq_rel ("\<approx>_" [100] 100)
where
- "\<approx>Lang \<equiv> {(x, y). (\<forall>z. x @ z \<in> Lang \<longleftrightarrow> y @ z \<in> Lang)}"
+ "\<approx>A \<equiv> {(x, y). (\<forall>z. x @ z \<in> A \<longleftrightarrow> y @ z \<in> A)}"
text {*
- Among equivlant clases of @{text "\<approx>Lang"}, the set @{text "finals(Lang)"} singles out
- those which contains strings from @{text "Lang"}.
+ Among the equivalence clases of @{text "\<approx>A"}, the set @{text "finals A"} singles out
+ those which contains the strings from @{text "A"}.
*}
definition
- "finals Lang \<equiv> {\<approx>Lang `` {x} | x . x \<in> Lang}"
+ "finals A \<equiv> {\<approx>A `` {x} | x . x \<in> A}"
text {*
- The following lemma show the relationshipt between @{text "finals(Lang)"} and @{text "Lang"}.
+ The following lemma establishes the relationshipt between
+ @{text "finals A"} and @{text "A"}.
*}
+
lemma lang_is_union_of_finals:
- "Lang = \<Union> finals(Lang)"
-proof
- show "Lang \<subseteq> \<Union> (finals Lang)"
- proof
- fix x
- assume "x \<in> Lang"
- thus "x \<in> \<Union> (finals Lang)"
- apply (simp add:finals_def, rule_tac x = "(\<approx>Lang) `` {x}" in exI)
- by (auto simp:Image_def str_eq_rel_def)
- qed
-next
- show "\<Union> (finals Lang) \<subseteq> Lang"
- apply (clarsimp simp:finals_def str_eq_rel_def)
- by (drule_tac x = "[]" in spec, auto)
-qed
+ shows "A = \<Union> finals A"
+unfolding finals_def
+unfolding Image_def
+unfolding str_eq_rel_def
+apply(auto)
+apply(drule_tac x = "[]" in spec)
+apply(auto)
+done
+
section {* Direction @{text "finite partition \<Rightarrow> regular language"}*}
text {*
The relationship between equivalent classes can be described by an
- equational system.
- For example, in equational system \eqref{example_eqns}, $X_0, X_1$ are equivalent
- classes. The first equation says every string in $X_0$ is obtained either by
- appending one $b$ to a string in $X_0$ or by appending one $a$ to a string in
- $X_1$ or just be an empty string (represented by the regular expression $\lambda$). Similary,
- the second equation tells how the strings inside $X_1$ are composed.
+ equational system. For example, in equational system \eqref{example_eqns},
+ $X_0, X_1$ are equivalent classes. The first equation says every string in
+ $X_0$ is obtained either by appending one $b$ to a string in $X_0$ or by
+ appending one $a$ to a string in $X_1$ or just be an empty string
+ (represented by the regular expression $\lambda$). Similary, the second
+ equation tells how the strings inside $X_1$ are composed.
+
\begin{equation}\label{example_eqns}
\begin{aligned}
X_0 & = X_0 b + X_1 a + \lambda \\
X_1 & = X_0 a + X_1 b
\end{aligned}
\end{equation}
- The summands on the right hand side is represented by the following data type
- @{text "rhs_item"}, mnemonic for 'right hand side item'.
- Generally, there are two kinds of right hand side items, one kind corresponds to
- pure regular expressions, like the $\lambda$ in \eqref{example_eqns}, the other kind corresponds to
- transitions from one one equivalent class to another, like the $X_0 b, X_1 a$ etc.
- *}
+
+ \noindent
+ The summands on the right hand side is represented by the following data
+ type @{text "rhs_item"}, mnemonic for 'right hand side item'. Generally,
+ there are two kinds of right hand side items, one kind corresponds to pure
+ regular expressions, like the $\lambda$ in \eqref{example_eqns}, the other
+ kind corresponds to transitions from one one equivalent class to another,
+ like the $X_0 b, X_1 a$ etc.
+
+*}
datatype rhs_item =
- Lam "rexp" (* Lambda *)
- | Trn "(string set)" "rexp" (* Transition *)
+ Lam "rexp" (* Lambda *)
+ | Trn "lang" "rexp" (* Transition *)
+
text {*
In this formalization, pure regular expressions like $\lambda$ is
- repsented by @{text "Lam(EMPTY)"}, while transitions like $X_0 a$ is represented by $Trn~X_0~(CHAR~a)$.
- *}
+ repsented by @{text "Lam(EMPTY)"}, while transitions like $X_0 a$ is
+ represented by @{term "Trn X\<^isub>0 (CHAR a)"}.
+*}
text {*
The functions @{text "the_r"} and @{text "the_Trn"} are used to extract
subcomponents from right hand side items.
*}
-fun the_r :: "rhs_item \<Rightarrow> rexp"
-where "the_r (Lam r) = r"
+fun
+ the_r :: "rhs_item \<Rightarrow> rexp"
+where
+ "the_r (Lam r) = r"
-fun the_Trn:: "rhs_item \<Rightarrow> (string set \<times> rexp)"
-where "the_Trn (Trn Y r) = (Y, r)"
+fun
+ the_Trn:: "rhs_item \<Rightarrow> (lang \<times> rexp)"
+where
+ "the_Trn (Trn Y r) = (Y, r)"
text {*
- Every right hand side item @{text "itm"} defines a string set given
- @{text "L(itm)"}, defined as:
+ Every right-hand side item @{text "itm"} defines a language given
+ by @{text "L(itm)"}, defined as:
*}
-overloading L_rhs_e \<equiv> "L:: rhs_item \<Rightarrow> string set"
+
+overloading L_rhs_e \<equiv> "L:: rhs_item \<Rightarrow> lang"
begin
- fun L_rhs_e:: "rhs_item \<Rightarrow> string set"
+ fun L_rhs_e:: "rhs_item \<Rightarrow> lang"
where
- "L_rhs_e (Lam r) = L r" |
- "L_rhs_e (Trn X r) = X ;; L r"
+ "L_rhs_e (Lam r) = L r"
+ | "L_rhs_e (Trn X r) = X ;; L r"
end
text {*
@@ -432,14 +452,15 @@
by @{text "L(itms)"}, defined as:
*}
-overloading L_rhs \<equiv> "L:: rhs_item set \<Rightarrow> string set"
+overloading L_rhs \<equiv> "L:: rhs_item set \<Rightarrow> lang"
begin
- fun L_rhs:: "rhs_item set \<Rightarrow> string set"
- where "L_rhs rhs = \<Union> (L ` rhs)"
+ fun L_rhs:: "rhs_item set \<Rightarrow> lang"
+ where
+ "L_rhs rhs = \<Union> (L ` rhs)"
end
text {*
- Given a set of equivalent classses @{text "CS"} and one equivalent class @{text "X"} among
+ Given a set of equivalence classes @{text "CS"} and one equivalence class @{text "X"} among
@{text "CS"}, the term @{text "init_rhs CS X"} is used to extract the right hand side of
the equation describing the formation of @{text "X"}. The definition of @{text "init_rhs"}
is:
@@ -464,12 +485,14 @@
equivalent class inside @{text "CS"} is given by the following @{text "eqs(CS)"}.
*}
+
definition "eqs CS \<equiv> {(X, init_rhs CS X) | X. X \<in> CS}"
(************ arden's lemma variation ********************)
text {*
The following @{text "items_of rhs X"} returns all @{text "X"}-items in @{text "rhs"}.
*}
+
definition
"items_of rhs X \<equiv> {Trn X r | r. (Trn X r) \<in> rhs}"
@@ -505,7 +528,8 @@
the right of right hand side item @{text "itm"}.
*}
-fun attach_rexp :: "rexp \<Rightarrow> rhs_item \<Rightarrow> rhs_item"
+fun
+ attach_rexp :: "rexp \<Rightarrow> rhs_item \<Rightarrow> rhs_item"
where
"attach_rexp rexp' (Lam rexp) = Lam (SEQ rexp rexp')"
| "attach_rexp rexp' (Trn X rexp) = Trn X (SEQ rexp rexp')"
@@ -541,6 +565,7 @@
should be: first append $(a_1 | a_2 | \ldots | a_n)$ to every item of @{text "xrhs"} and then
union the result with all non-@{text "X"}-items of @{text "rhs"}.
*}
+
definition
"rhs_subst rhs X xrhs \<equiv>
(rhs - (items_of rhs X)) \<union> (append_rhs_rexp xrhs (rexp_of rhs X))"
@@ -599,6 +624,7 @@
definition
"distinct_equas ES \<equiv>
\<forall> X rhs rhs'. (X, rhs) \<in> ES \<and> (X, rhs') \<in> ES \<longrightarrow> rhs = rhs'"
+
text {*
Every equation in @{text "ES"} (represented by @{text "(X, rhs)"}) is valid, i.e. @{text "(X = L rhs)"}.
*}
@@ -610,6 +636,7 @@
items of @{text "rhs"} does not contain empty string. This is necessary for
the application of Arden's transformation to @{text "rhs"}.
*}
+
definition
"rhs_nonempty rhs \<equiv> (\<forall> Y r. Trn Y r \<in> rhs \<longrightarrow> [] \<notin> L r)"
@@ -617,6 +644,7 @@
The following @{text "ardenable ES"} requires that Arden's transformation is applicable
to every equation of equational system @{text "ES"}.
*}
+
definition
"ardenable ES \<equiv> \<forall> X rhs. (X, rhs) \<in> ES \<longrightarrow> rhs_nonempty rhs"
@@ -669,7 +697,8 @@
*}
lemma L_rhs_union_distrib:
- " L (A::rhs_item set) \<union> L B = L (A \<union> B)"
+ fixes A B::"rhs_item set"
+ shows "L A \<union> L B = L (A \<union> B)"
by simp
lemma finite_snd_Trn:
@@ -717,7 +746,7 @@
qed
lemma [simp]:
- " L (attach_rexp r xb) = L xb ;; L r"
+ "L (attach_rexp r xb) = L xb ;; L r"
apply (cases xb, auto simp:Seq_def)
apply(rule_tac x = "s\<^isub>1 @ s\<^isub>1'" in exI, rule_tac x = "s\<^isub>2'" in exI)
apply(auto simp: Seq_def)
@@ -1221,31 +1250,33 @@
qed
lemma finals_in_partitions:
- "finals Lang \<subseteq> (UNIV // (\<approx>Lang))"
- by (auto simp:finals_def quotient_def)
+ shows "finals A \<subseteq> (UNIV // \<approx>A)"
+unfolding finals_def
+unfolding quotient_def
+by auto
theorem hard_direction:
- assumes finite_CS: "finite (UNIV // \<approx>Lang)"
- shows "\<exists> (r::rexp). Lang = L r"
+ assumes finite_CS: "finite (UNIV // \<approx>A)"
+ shows "\<exists>r::rexp. A = L r"
proof -
- have "\<forall> X \<in> (UNIV // (\<approx>Lang)). \<exists> (reg::rexp). X = L reg"
+ have "\<forall> X \<in> (UNIV // \<approx>A). \<exists>reg::rexp. X = L reg"
using finite_CS every_eqcl_has_reg by blast
then obtain f
- where f_prop: "\<forall> X \<in> (UNIV // (\<approx>Lang)). X = L ((f X)::rexp)"
- by (auto dest:bchoice)
- def rs \<equiv> "f ` (finals Lang)"
- have "Lang = \<Union> (finals Lang)" using lang_is_union_of_finals by auto
+ where f_prop: "\<forall> X \<in> (UNIV // \<approx>A). X = L ((f X)::rexp)"
+ by (auto dest: bchoice)
+ def rs \<equiv> "f ` (finals A)"
+ have "A = \<Union> (finals A)" using lang_is_union_of_finals by auto
also have "\<dots> = L (folds ALT NULL rs)"
proof -
have "finite rs"
proof -
- have "finite (finals Lang)"
- using finite_CS finals_in_partitions[of "Lang"]
+ have "finite (finals A)"
+ using finite_CS finals_in_partitions[of "A"]
by (erule_tac finite_subset, simp)
thus ?thesis using rs_def by auto
qed
thus ?thesis
- using f_prop rs_def finals_in_partitions[of "Lang"] by auto
+ using f_prop rs_def finals_in_partitions[of "A"] by auto
qed
finally show ?thesis by blast
qed
--- a/Paper/Paper.thy Sun Feb 06 10:28:29 2011 +0000
+++ b/Paper/Paper.thy Sun Feb 06 11:21:12 2011 +0000
@@ -9,6 +9,8 @@
REL :: "(string \<times> string) \<Rightarrow> bool"
UPLUS :: "'a set \<Rightarrow> 'a set \<Rightarrow> (nat \<times> 'a) set"
+abbreviation
+ "EClass x R \<equiv> R `` {x}"
notation (latex output)
str_eq_rel ("\<approx>\<^bsub>_\<^esub>") and
@@ -19,9 +21,11 @@
quotient ("_ \<^raw:\ensuremath{\!\sslash\!}> _" [90, 90] 90) and
REL ("\<approx>") and
UPLUS ("_ \<^raw:\ensuremath{\uplus}> _" [90, 90] 90) and
- L ("L '(_')" [0] 101)
+ L ("L '(_')" [0] 101) and
+ EClass ("\<lbrakk>_\<rbrakk>\<^bsub>_\<^esub>" [100, 100] 100)
(*>*)
+
section {* Introduction *}
text {*
@@ -41,8 +45,8 @@
the accepting and non-accepting states in the corresponding automaton to
obtain an automaton for the complement language. The problem, however, lies with
formalising such reasoning in a HOL-based theorem prover, in our case
- Isabelle/HOL. Automata consist of states and transitions. They need to be represented
- as graphs or matrices, neither
+ Isabelle/HOL. Automata are build up from states and transitions that
+ need to be represented as graphs or matrices, neither
of which can be defined as inductive datatype.\footnote{In some works
functions are used to represent state transitions, but also they are not
inductive datatypes.} This means we have to build our own reasoning
@@ -125,7 +129,7 @@
automata, since there is no type quantification available in HOL. An
alternative, which provides us with a single type for automata, is to give every
state node an identity, for example a natural
- number, and then be careful renaming these identities apart whenever
+ number, and then be careful to rename these identities apart whenever
connecting two automata. This results in clunky proofs
establishing that properties are invariant under renaming. Similarly,
connecting two automata represented as matrices results in very adhoc
@@ -134,7 +138,7 @@
Because of these problems to do with representing automata, there seems
to be no substantial formalisation of automata theory and regular languages
carried out in a HOL-based theorem prover. We are only aware of the
- large formalisation of the automata theory in Nuprl \cite{Constable00} and
+ large formalisation of automata theory in Nuprl \cite{Constable00} and
some smaller formalisations in Coq, for example \cite{Filliatre97}.
In this paper, we will not attempt to formalise automata theory, but take a completely
@@ -150,10 +154,10 @@
\noindent
The reason is that regular expressions, unlike graphs and matrices, can
be easily defined as inductive datatype. Therefore a corresponding reasoning
- infrastructure comes for free. This has recently been used for formalising regular
- expression matching in HOL4 \cite{OwensSlind08}. The purpose of this paper is to
+ infrastructure comes for free. This has recently been used in HOL4 for formalising regular
+ expression matching based on derivatives \cite{OwensSlind08}. The purpose of this paper is to
show that a central result about regular languages, the Myhill-Nerode theorem,
- can be recreated by only using regular expressions. This theorem give a necessary
+ can be recreated by only using regular expressions. This theorem gives a necessary
and sufficient condition for when a language is regular. As a corollary of this
theorem we can easily establish the usual closure properties, including
complementation, for regular languages.\smallskip
@@ -261,14 +265,27 @@
\end{tabular}
\end{tabular}
\end{center}
+
*}
section {* Finite Partitions Imply Regularity of a Language *}
text {*
+ \begin{definition}[Myhill-Nerode Relation]\mbox{}\\
+ @{thm str_eq_rel_def[simplified]}
+ \end{definition}
+
+ \begin{definition} @{text "finals A"} are the equivalence classes that contain
+ strings from @{text A}\\
+ @{thm finals_def}
+ \end{definition}
+
+ @{thm lang_is_union_of_finals}
+
+
\begin{theorem}
Given a language @{text A}.
- @{thm[mode=IfThen] hard_direction[where Lang="A"]}
+ @{thm[mode=IfThen] hard_direction}
\end{theorem}
*}