--- a/Myhill_1.thy Sun Feb 06 11:21:12 2011 +0000
+++ b/Myhill_1.thy Mon Feb 07 10:23:23 2011 +0000
@@ -133,27 +133,29 @@
shows "x @ y \<in> A\<star>"
using a b by (blast intro: star_intro1 star_intro2)
+lemma star_cases:
+ shows "A\<star> = {[]} \<union> A ;; A\<star>"
+proof
+ { fix x
+ have "x \<in> A\<star> \<Longrightarrow> x \<in> {[]} \<union> A ;; A\<star>"
+ unfolding Seq_def
+ by (induct rule: star_induct) (auto)
+ }
+ then show "A\<star> \<subseteq> {[]} \<union> A ;; A\<star>" by auto
+next
+ show "{[]} \<union> A ;; A\<star> \<subseteq> A\<star>"
+ unfolding Seq_def by auto
+qed
+
lemma star_decom:
- "\<lbrakk>x \<in> A\<star>; x \<noteq> []\<rbrakk> \<Longrightarrow>(\<exists> a b. x = a @ b \<and> a \<noteq> [] \<and> a \<in> A \<and> b \<in> A\<star>)"
+ assumes a: "x \<in> A\<star>" "x \<noteq> []"
+ shows "\<exists>a b. x = a @ b \<and> a \<noteq> [] \<and> a \<in> A \<and> b \<in> A\<star>"
+using a
apply(induct rule: star_induct)
apply(simp)
apply(blast)
done
-lemma lang_star_cases:
- shows "L\<star> = {[]} \<union> L ;; L\<star>"
-proof
- { fix x
- have "x \<in> L\<star> \<Longrightarrow> x \<in> {[]} \<union> L ;; L\<star>"
- unfolding Seq_def
- by (induct rule: star_induct) (auto)
- }
- then show "L\<star> \<subseteq> {[]} \<union> L ;; L\<star>" by auto
-next
- show "{[]} \<union> L ;; L\<star> \<subseteq> L\<star>"
- unfolding Seq_def by auto
-qed
-
lemma
shows seq_Union_left: "B ;; (\<Union>n. A \<up> n) = (\<Union>n. B ;; (A \<up> n))"
and seq_Union_right: "(\<Union>n. A \<up> n) ;; B = (\<Union>n. (A \<up> n) ;; B)"
@@ -237,7 +239,7 @@
assume eq: "X = B ;; A\<star>"
have "A\<star> = {[]} \<union> A\<star> ;; A"
unfolding seq_star_comm[symmetric]
- by (rule lang_star_cases)
+ by (rule star_cases)
then have "B ;; A\<star> = B ;; ({[]} \<union> A\<star> ;; A)"
by (rule seq_add_left)
also have "\<dots> = B \<union> B ;; (A\<star> ;; A)"
@@ -351,7 +353,7 @@
@{text "\<approx>A"} is an equivalence class defined by language @{text "A"}.
*}
definition
- str_eq_rel ("\<approx>_" [100] 100)
+ str_eq_rel :: "lang \<Rightarrow> (string \<times> string) set" ("\<approx>_" [100] 100)
where
"\<approx>A \<equiv> {(x, y). (\<forall>z. x @ z \<in> A \<longleftrightarrow> y @ z \<in> A)}"
@@ -361,7 +363,9 @@
*}
definition
- "finals A \<equiv> {\<approx>A `` {x} | x . x \<in> A}"
+ finals :: "lang \<Rightarrow> lang set"
+where
+ "finals A \<equiv> {\<approx>A `` {x} | x . x \<in> A}"
text {*
The following lemma establishes the relationshipt between
@@ -464,14 +468,19 @@
@{text "CS"}, the term @{text "init_rhs CS X"} is used to extract the right hand side of
the equation describing the formation of @{text "X"}. The definition of @{text "init_rhs"}
is:
- *}
+*}
+
+definition
+ transition :: "lang \<Rightarrow> char \<Rightarrow> lang \<Rightarrow> bool" ("_ \<Turnstile>_\<Rightarrow>_" [100,100,100] 100)
+where
+ "Y \<Turnstile>c\<Rightarrow> X \<equiv> Y ;; {[c]} \<subseteq> X"
definition
"init_rhs CS X \<equiv>
if ([] \<in> X) then
- {Lam(EMPTY)} \<union> {Trn Y (CHAR c) | Y c. Y \<in> CS \<and> Y ;; {[c]} \<subseteq> X}
+ {Lam EMPTY} \<union> {Trn Y (CHAR c) | Y c. Y \<in> CS \<and> Y \<Turnstile>c\<Rightarrow> X}
else
- {Trn Y (CHAR c)| Y c. Y \<in> CS \<and> Y ;; {[c]} \<subseteq> X}"
+ {Trn Y (CHAR c)| Y c. Y \<in> CS \<and> Y \<Turnstile>c\<Rightarrow> X}"
text {*
In the definition of @{text "init_rhs"}, the term
@@ -483,7 +492,7 @@
With the help of @{text "init_rhs"}, the equitional system descrbing the formation of every
equivalent class inside @{text "CS"} is given by the following @{text "eqs(CS)"}.
- *}
+*}
definition "eqs CS \<equiv> {(X, init_rhs CS X) | X. X \<in> CS}"
@@ -546,10 +555,11 @@
With the help of the two functions immediately above, Ardens'
transformation on right hand side @{text "rhs"} is implemented
by the following function @{text "arden_variate X rhs"}.
- After this transformation, the recursive occurent of @{text "X"}
- in @{text "rhs"} will be eliminated, while the
- string set defined by @{text "rhs"} is kept unchanged.
- *}
+ After this transformation, the recursive occurence of @{text "X"}
+ in @{text "rhs"} will be eliminated, while the string set defined
+ by @{text "rhs"} is kept unchanged.
+*}
+
definition
"arden_variate X rhs \<equiv>
append_rhs_rexp (rhs - items_of rhs X) (STAR (rexp_of rhs X))"
@@ -580,9 +590,9 @@
"eqs_subst ES X xrhs \<equiv> {(Y, rhs_subst yrhs X xrhs) | Y yrhs. (Y, yrhs) \<in> ES}"
text {*
- The computation of regular expressions for equivalent classes is accomplished
+ The computation of regular expressions for equivalence classes is accomplished
using a iteration principle given by the following lemma.
- *}
+*}
lemma wf_iter [rule_format]:
fixes f
@@ -773,7 +783,7 @@
text {*
The following several lemmas until @{text "init_ES_satisfy_Inv"} shows that
the initial equational system satisfies invariant @{text "Inv"}.
- *}
+*}
lemma defined_by_str:
"\<lbrakk>s \<in> X; X \<in> UNIV // (\<approx>Lang)\<rbrakk> \<Longrightarrow> X = (\<approx>Lang) `` {s}"
@@ -824,16 +834,17 @@
and "clist \<in> Y"
using decom "(1)" every_eqclass_has_transition by blast
hence
- "x \<in> L {Trn Y (CHAR c)| Y c. Y \<in> UNIV // (\<approx>Lang) \<and> Y ;; {[c]} \<subseteq> X}"
- using "(1)" decom
+ "x \<in> L {Trn Y (CHAR c)| Y c. Y \<in> UNIV // (\<approx>Lang) \<and> Y \<Turnstile>c\<Rightarrow> X}"
+ unfolding transition_def
+ using "(1)" decom
by (simp, rule_tac x = "Trn Y (CHAR c)" in exI, simp add:Seq_def)
- thus ?thesis using X_in_eqs "(1)"
- by (simp add:eqs_def init_rhs_def)
+ thus ?thesis using X_in_eqs "(1)"
+ by (simp add: eqs_def init_rhs_def)
qed
qed
next
show "L xrhs \<subseteq> X" using X_in_eqs
- by (auto simp:eqs_def init_rhs_def)
+ by (auto simp:eqs_def init_rhs_def transition_def)
qed
lemma finite_init_rhs:
@@ -851,7 +862,7 @@
ultimately show ?thesis
by auto
qed
- thus ?thesis by (simp add:init_rhs_def)
+ thus ?thesis by (simp add:init_rhs_def transition_def)
qed
lemma init_ES_satisfy_Inv:
@@ -884,7 +895,8 @@
From this point until @{text "iteration_step"}, it is proved
that there exists iteration steps which keep @{text "Inv(ES)"} while
decreasing the size of @{text "ES"}.
- *}
+*}
+
lemma arden_variate_keeps_eq:
assumes l_eq_r: "X = L rhs"
and not_empty: "[] \<notin> L (rexp_of rhs X)"
--- a/Paper/Paper.thy Sun Feb 06 11:21:12 2011 +0000
+++ b/Paper/Paper.thy Mon Feb 07 10:23:23 2011 +0000
@@ -22,7 +22,8 @@
REL ("\<approx>") and
UPLUS ("_ \<^raw:\ensuremath{\uplus}> _" [90, 90] 90) and
L ("L '(_')" [0] 101) and
- EClass ("\<lbrakk>_\<rbrakk>\<^bsub>_\<^esub>" [100, 100] 100)
+ EClass ("\<lbrakk>_\<rbrakk>\<^bsub>_\<^esub>" [100, 100] 100) and
+ transition ("_ \<^raw:\ensuremath{\stackrel{\text{>_\<^raw:}}{\Longrightarrow}}> _" [100, 100, 100] 100)
(*>*)
@@ -40,7 +41,7 @@
introduce finite automata and then define everything in terms of them. For
example, a regular language is normally defined as one whose strings are
recognised by a finite deterministic automaton. This approach has many
- benefits. Among them is that it is easy to convince oneself from the fact that
+ benefits. Among them is the fact that it is easy to convince oneself that
regular languages are closed under complementation: one just has to exchange
the accepting and non-accepting states in the corresponding automaton to
obtain an automaton for the complement language. The problem, however, lies with
@@ -139,7 +140,7 @@
to be no substantial formalisation of automata theory and regular languages
carried out in a HOL-based theorem prover. We are only aware of the
large formalisation of automata theory in Nuprl \cite{Constable00} and
- some smaller formalisations in Coq, for example \cite{Filliatre97}.
+ some smaller formalisations in Coq (for example \cite{Filliatre97}).
In this paper, we will not attempt to formalise automata theory, but take a completely
different approach to regular languages. Instead of defining a regular language as one
@@ -153,12 +154,12 @@
\noindent
The reason is that regular expressions, unlike graphs and matrices, can
- be easily defined as inductive datatype. Therefore a corresponding reasoning
- infrastructure comes for free. This has recently been used in HOL4 for formalising regular
- expression matching based on derivatives \cite{OwensSlind08}. The purpose of this paper is to
- show that a central result about regular languages, the Myhill-Nerode theorem,
- can be recreated by only using regular expressions. This theorem gives a necessary
- and sufficient condition for when a language is regular. As a corollary of this
+ be easily defined as inductive datatype. Consequently a corresponding reasoning
+ infrastructure comes for free. This has recently been exploited in HOL4 with a formalisation
+ of regular expression matching based on derivatives \cite{OwensSlind08}. The purpose of this paper is to
+ show that a central result about regular languages---the Myhill-Nerode theorem---can
+ be recreated by only using regular expressions. This theorem gives necessary
+ and sufficient conditions for when a language is regular. As a corollary of this
theorem we can easily establish the usual closure properties, including
complementation, for regular languages.\smallskip
@@ -176,11 +177,9 @@
Strings in Isabelle/HOL are lists of characters with the \emph{empty string}
being represented by the empty list, written @{term "[]"}. \emph{Languages}
are sets of strings. The language containing all strings is written in
- Isabelle/HOL as @{term "UNIV::string set"}. The notation for the quotient
- of a language @{text A} according to a relation @{term REL} is @{term "A //
- REL"}. The concatenation of two languages is written @{term "A ;; B"}; a
- language raised to the power $n$ is written @{term "A \<up> n"}. Both concepts
- are defined as
+ Isabelle/HOL as @{term "UNIV::string set"}. The concatenation of two languages
+ is written @{term "A ;; B"} and a language raised to the power $n$ is written
+ @{term "A \<up> n"}. Their definitions are
\begin{center}
@{thm Seq_def[THEN eq_reflection, where A1="A" and B1="B"]}
@@ -192,10 +191,31 @@
\noindent
where @{text "@"} is the usual list-append operation. The Kleene-star of a language @{text A}
- is defined as the union over all powers, namely @{thm Star_def}.
+ is defined as the union over all powers, namely @{thm Star_def}. In the paper
+ we will often make use of the following properties.
+ \begin{proposition}\label{langprops}\mbox{}\\
+ \begin{tabular}{@ {}ll@ {\hspace{10mm}}ll}
+ (i) & @{thm star_cases} & (ii) & @{thm[mode=IfThen] pow_length}\\
+ (iii) & @{thm seq_Union_left} &
+ \end{tabular}
+ \end{proposition}
+
+ \noindent
+ We omit the proofs of these properties, but invite the reader to consult
+ our formalisation.\footnote{Available at ???}
+
+
+ The notation for the quotient of a language @{text A} according to an
+ equivalence relation @{term REL} is @{term "A // REL"}. We will write
+ @{text "\<lbrakk>x\<rbrakk>\<^isub>\<approx>"} for the equivalence class defined
+ as @{text "{y | y \<approx> x}"}.
+
+
Central to our proof will be the solution of equational systems
- involving regular expressions. For this we will use the following ``reverse''
+ involving regular expressions. For this we will use Arden's lemma \cite{}
+ which solves equations of the form @{term "X = A ;; X \<union> B"} provided
+ @{term "[] \<notin> A"}. However we will need the following ``reverse''
version of Arden's lemma.
\begin{lemma}[Reverse Arden's Lemma]\mbox{}\\
@@ -206,7 +226,8 @@
\begin{proof}
For the right-to-left direction we assume @{thm (rhs) ardens_revised} and show
- that @{thm (lhs) ardens_revised} holds. From Lemma ??? we have @{term "A\<star> = {[]} \<union> A ;; A\<star>"},
+ that @{thm (lhs) ardens_revised} holds. From Prop.~\ref{langprops}$(i)$
+ we have @{term "A\<star> = {[]} \<union> A ;; A\<star>"},
which is equal to @{term "A\<star> = {[]} \<union> A\<star> ;; A"}. Adding @{text B} to both
sides gives @{term "B ;; A\<star> = B ;; ({[]} \<union> A\<star> ;; A)"}, whose right-hand side
is equal to @{term "(B ;; A\<star>) ;; A \<union> B"}. This completes this direction.
@@ -220,15 +241,17 @@
\noindent
Using this property we can show that @{term "B ;; (A \<up> n) \<subseteq> X"} holds for
- all @{text n}. From this we can infer @{term "B ;; A\<star> \<subseteq> X"} using Lemma ???.
+ all @{text n}. From this we can infer @{term "B ;; A\<star> \<subseteq> X"} using the definition
+ of @{text "\<star>"}.
For the inclusion in the other direction we assume a string @{text s}
with length @{text k} is element in @{text X}. Since @{thm (prem 1) ardens_revised}
- we know that @{term "s \<notin> X ;; (A \<up> Suc k)"} since its length is only @{text k}
+ we know by Prop.~\ref{langprops}$(ii)$ that
+ @{term "s \<notin> X ;; (A \<up> Suc k)"} since its length is only @{text k}
(the strings in @{term "X ;; (A \<up> Suc k)"} are all longer).
From @{text "(*)"} it follows then that
@{term s} must be element in @{term "(\<Union>m\<in>{0..k}. B ;; (A \<up> m))"}. This in turn
- implies that @{term s} is in @{term "(\<Union>n. B ;; (A \<up> n))"}. Using Lemma ??? this
- is equal to @{term "B ;; A\<star>"}, as we needed to show.\qed
+ implies that @{term s} is in @{term "(\<Union>n. B ;; (A \<up> n))"}. Using Prop.~\ref{langprops}$(iii)$
+ this is equal to @{term "B ;; A\<star>"}, as we needed to show.\qed
\end{proof}
\noindent
@@ -275,13 +298,27 @@
@{thm str_eq_rel_def[simplified]}
\end{definition}
- \begin{definition} @{text "finals A"} are the equivalence classes that contain
- strings from @{text A}\\
+ \noindent
+ It is easy to see that @{term "\<approx>A"} is an equivalence relation, which partitions
+ the set of all string, @{text "UNIV"}, into a set of equivalence classed. We define
+ the set @{term "finals A"} as those equivalence classes that contain strings of
+ @{text A}, namely
+
+ \begin{equation}
@{thm finals_def}
- \end{definition}
+ \end{equation}
+
+ \noindent
+ It is easy to show that @{thm lang_is_union_of_finals} holds. We can also define
+ a notion of \emph{transition} between equivalence classes as
- @{thm lang_is_union_of_finals}
+ \begin{equation}
+ @{thm transition_def}
+ \end{equation}
+ \noindent
+ which means if we add the character @{text c} to all strings in the equivalence
+ class @{text Y} HERE
\begin{theorem}
Given a language @{text A}.
Binary file tphols-2011/myhill.pdf has changed