--- a/Myhill_1.thy Sun Feb 13 10:36:53 2011 +0000
+++ b/Myhill_1.thy Mon Feb 14 07:42:16 2011 +0000
@@ -487,14 +487,6 @@
definition
"Solve X ES \<equiv> while Test (Iter X) ES"
-
-(* test *)
-partial_function (tailrec)
- solve
-where
- "solve X ES = (if (card ES = 1) then ES else solve X (Iter X ES))"
-
-
text {*
Since the @{text "while"} combinator from HOL library is used to implement @{text "Solve X ES"},
the induction principle @{thm [source] while_rule} is used to proved the desired properties
@@ -666,19 +658,22 @@
*}
lemma defined_by_str:
- "\<lbrakk>s \<in> X; X \<in> UNIV // (\<approx>Lang)\<rbrakk> \<Longrightarrow> X = (\<approx>Lang) `` {s}"
-by (auto simp:quotient_def Image_def str_eq_rel_def)
+ assumes "s \<in> X" "X \<in> UNIV // \<approx>A"
+ shows "X = \<approx>A `` {s}"
+using assms
+unfolding quotient_def Image_def str_eq_rel_def
+by auto
lemma every_eqclass_has_transition:
assumes has_str: "s @ [c] \<in> X"
- and in_CS: "X \<in> UNIV // (\<approx>Lang)"
- obtains Y where "Y \<in> UNIV // (\<approx>Lang)" and "Y ;; {[c]} \<subseteq> X" and "s \<in> Y"
+ and in_CS: "X \<in> UNIV // \<approx>A"
+ obtains Y where "Y \<in> UNIV // \<approx>A" and "Y ;; {[c]} \<subseteq> X" and "s \<in> Y"
proof -
- def Y \<equiv> "(\<approx>Lang) `` {s}"
- have "Y \<in> UNIV // (\<approx>Lang)"
+ def Y \<equiv> "\<approx>A `` {s}"
+ have "Y \<in> UNIV // \<approx>A"
unfolding Y_def quotient_def by auto
moreover
- have "X = (\<approx>Lang) `` {s @ [c]}"
+ have "X = \<approx>A `` {s @ [c]}"
using has_str in_CS defined_by_str by blast
then have "Y ;; {[c]} \<subseteq> X"
unfolding Y_def Image_def Seq_def
@@ -687,18 +682,18 @@
moreover
have "s \<in> Y" unfolding Y_def
unfolding Image_def str_eq_rel_def by simp
- ultimately show thesis by (blast intro: that)
+ ultimately show thesis using that by blast
qed
lemma l_eq_r_in_eqs:
- assumes X_in_eqs: "(X, xrhs) \<in> (Init (UNIV // (\<approx>Lang)))"
- shows "X = L xrhs"
+ assumes X_in_eqs: "(X, rhs) \<in> Init (UNIV // \<approx>A)"
+ shows "X = L rhs"
proof
- show "X \<subseteq> L xrhs"
+ show "X \<subseteq> L rhs"
proof
fix x
assume "(1)": "x \<in> X"
- show "x \<in> L xrhs"
+ show "x \<in> L rhs"
proof (cases "x = []")
assume empty: "x = []"
thus ?thesis using X_in_eqs "(1)"
@@ -707,14 +702,14 @@
assume not_empty: "x \<noteq> []"
then obtain clist c where decom: "x = clist @ [c]"
by (case_tac x rule:rev_cases, auto)
- have "X \<in> UNIV // (\<approx>Lang)" using X_in_eqs by (auto simp:Init_def)
+ have "X \<in> UNIV // \<approx>A" using X_in_eqs by (auto simp:Init_def)
then obtain Y
- where "Y \<in> UNIV // (\<approx>Lang)"
+ where "Y \<in> UNIV // \<approx>A"
and "Y ;; {[c]} \<subseteq> X"
and "clist \<in> Y"
using decom "(1)" every_eqclass_has_transition by blast
hence
- "x \<in> L {Trn Y (CHAR c)| Y c. Y \<in> UNIV // (\<approx>Lang) \<and> Y \<Turnstile>c\<Rightarrow> X}"
+ "x \<in> L {Trn Y (CHAR c)| Y c. Y \<in> UNIV // \<approx>A \<and> Y \<Turnstile>c\<Rightarrow> X}"
unfolding transition_def
using "(1)" decom
by (simp, rule_tac x = "Trn Y (CHAR c)" in exI, simp add:Seq_def)
@@ -723,10 +718,17 @@
qed
qed
next
- show "L xrhs \<subseteq> X" using X_in_eqs
+ show "L rhs \<subseteq> X" using X_in_eqs
by (auto simp:Init_def Init_rhs_def transition_def)
qed
+lemma test:
+ assumes X_in_eqs: "(X, rhs) \<in> Init (UNIV // \<approx>A)"
+ shows "X = \<Union> (L ` rhs)"
+using assms
+by (drule_tac l_eq_r_in_eqs) (simp)
+
+
lemma finite_Init_rhs:
assumes finite: "finite CS"
shows "finite (Init_rhs CS X)"
--- a/Paper/Paper.thy Sun Feb 13 10:36:53 2011 +0000
+++ b/Paper/Paper.thy Mon Feb 14 07:42:16 2011 +0000
@@ -154,7 +154,7 @@
\it%
\begin{tabular}{@ {}l@ {}p{0.88\textwidth}@ {}}
`` & If the reader finds the above treatment in terms of bit lists revoltingly
- concrete, I cannot disagree. A more abstract approach is clearly desirable.''\\
+ concrete, I cannot disagree. A more abstract approach is clearly desirable.''\smallskip\\
`` & All lemmas appear obvious given a picture of the composition of automata\ldots
Yet their proofs require a painful amount of detail.''
\end{tabular}
@@ -171,7 +171,7 @@
to be no substantial formalisation of automata theory and regular languages
carried out in HOL-based theorem provers. Nipkow establishes in
\cite{Nipkow98} the link between regular expressions and automata in
- the restricted context of lexing. The only larger formalisations of automata theory
+ the context of lexing. The only larger formalisations of automata theory
are carried out in Nuprl \cite{Constable00} and in Coq (for example
\cite{Filliatre97}).
@@ -239,10 +239,11 @@
\end{proposition}
\noindent
- In @{text "(ii)"} we use the notation @{term "length s"} for the length of a string.
- We omit the proofs for these properties, but invite the reader to consult
- our formalisation.\footnote{Available at ???}
-
+ In @{text "(ii)"} we use the notation @{term "length s"} for the length of a
+ string. This property states that if @{term "[] \<notin> A"} then the lengths of
+ the strings in @{term "A \<up> (Suc n)"} must be longer than @{text n}. We omit
+ the proofs for these properties, but invite the reader to consult our
+ formalisation.\footnote{Available at ???}
The notation in Isabelle/HOL for the quotient of a language @{text A} according to an
equivalence relation @{term REL} is @{term "A // REL"}. We will write
@@ -327,11 +328,11 @@
\end{tabular}
\end{center}
- Given a set of regular expressions @{text rs}, we will make use of the operation of generating
+ Given a finite set of regular expressions @{text rs}, we will make use of the operation of generating
a regular expression that matches all languages of @{text rs}. We only need to know the existence
of such a regular expression and therefore we use Isabelle/HOL's @{const "fold_graph"} and Hilbert's
@{text "\<epsilon>"} to define @{term "\<Uplus>rs"}. This operation, roughly speaking, folds @{const ALT} over the
- set @{text rs} with @{const NULL} for the empty set. We can prove that for finite sets @{text rs}
+ set @{text rs} with @{const NULL} for the empty set. We can prove that for a finite set @{text rs}
\begin{center}
@{thm (lhs) folds_alt_simp} @{text "= \<Union> (\<calL> ` rs)"}
@@ -342,7 +343,7 @@
image of the set @{text rs} under function @{text "\<calL>"}.
*}
-section {* Finite Partitions Imply Regularity of a Language *}
+section {* The Myhill-Nerode Theorem, First Part *}
text {*
The key definition in the Myhill-Nerode theorem is the
@@ -378,7 +379,7 @@
\noindent
To prove this theorem, we first define the set @{term "finals A"} as those equivalence
- classes that contain strings of @{text A}, namely
+ classes from @{term "UNIV // \<approx>A"} that contain strings of @{text A}, namely
%
\begin{equation}
@{thm finals_def}
@@ -389,7 +390,7 @@
It is straightforward to show that in general @{thm lang_is_union_of_finals} and
@{thm finals_in_partitions} hold.
Therefore if we know that there exists a regular expression for every
- equivalence class in @{term "finals A"} (which by assumption must be
+ equivalence class in \mbox{@{term "finals A"}} (which by assumption must be
a finite set), then we can use @{text "\<bigplus>"} to obtain a regular expression
that matches every string in @{text A}.
@@ -412,7 +413,7 @@
@{term "X\<^isub>1 \<Turnstile>c\<Rightarrow> X\<^isub>2"}, @{term "X\<^isub>1 \<Turnstile>d\<Rightarrow> X\<^isub>3"} with @{text d} being any
other character than @{text c}, and @{term "X\<^isub>3 \<Turnstile>d\<Rightarrow> X\<^isub>3"} for any @{text d}.
- Next we build an equational system that
+ Next we build an \emph{initial} equational system that
contains an equation for each equivalence class. Suppose we have
the equivalence classes @{text "X\<^isub>1,\<dots>,X\<^isub>n"}, there must be one and only one that
contains the empty string @{text "[]"} (since equivalence classes are disjoint).
@@ -428,22 +429,21 @@
\end{center}
\noindent
- where the terms @{text "(Y\<^isub>i\<^isub>j, CHAR c\<^isub>i\<^isub>j)"} stand for all transitions
- @{term "Y\<^isub>i\<^isub>j \<Turnstile>c\<^isub>i\<^isub>j\<Rightarrow> X\<^isub>i"}. Our internal represeantation for the right-hand
- sides are sets of terms.
- There can only be finitely many such
- terms since there are only finitely many equivalence classes
- and only finitely many characters.
- The term @{text "\<lambda>(EMPTY)"} in the first equation acts as a marker for the equivalence
- class containing @{text "[]"}.\footnote{Note that we mark, roughly speaking, the
+ where the terms @{text "(Y\<^isub>i\<^isub>j, CHAR c\<^isub>i\<^isub>j)"}
+ stand for all transitions @{term "Y\<^isub>i\<^isub>j \<Turnstile>c\<^isub>i\<^isub>j\<Rightarrow>
+ X\<^isub>i"}. There can only be
+ finitely many such terms in a right-hand side since there are only finitely many
+ equivalence classes and only finitely many characters. The term @{text
+ "\<lambda>(EMPTY)"} in the first equation acts as a marker for the equivalence class
+ containing @{text "[]"}.\footnote{Note that we mark, roughly speaking, the
single ``initial'' state in the equational system, which is different from
- the method by Brzozowski \cite{Brzozowski64}, where he marks the ``terminal''
- states. We are forced to set up the equational system in our way, because
- the Myhill-Nerode relation determines the ``direction'' of the transitions.
- The successor ``state'' of an equivalence class @{text Y} can be reached by adding
- characters to the end of @{text Y}. This is also the reason why we have to use
- our reverse version of Arden's lemma.}
- Overloading the function @{text \<calL>} for the two kinds of terms in the
+ the method by Brzozowski \cite{Brzozowski64}, where he marks the
+ ``terminal'' states. We are forced to set up the equational system in our
+ way, because the Myhill-Nerode relation determines the ``direction'' of the
+ transitions. The successor ``state'' of an equivalence class @{text Y} can
+ be reached by adding characters to the end of @{text Y}. This is also the
+ reason why we have to use our reverse version of Arden's lemma.}
+ Overloading the function @{text \<calL>} for the two kinds of terms in the
equational system, we have
\begin{center}
@@ -453,7 +453,7 @@
\end{center}
\noindent
- we can prove for @{text "X\<^isub>2\<^isub>.\<^isub>.\<^isub>n"} that the following equations
+ and we can prove for @{text "X\<^isub>2\<^isub>.\<^isub>.\<^isub>n"} that the following equations
%
\begin{equation}\label{inv1}
@{text "X\<^isub>i = \<calL>(Y\<^isub>i\<^isub>1, CHAR c\<^isub>i\<^isub>1) \<union> \<dots> \<union> \<calL>(Y\<^isub>i\<^isub>q, CHAR c\<^isub>i\<^isub>q)"}.
@@ -469,21 +469,42 @@
\noindent
The reason for adding the @{text \<lambda>}-marker to our equational system is
to obtain this equation: it only holds in this form since none of
- the other terms contain the empty string. Since we use sets for representing
- the right-hans side we can write \eqref{inv1} and \eqref{inv2} more
- concisely for an equation of the form @{text "X = rhs"} as
+ the other terms contain the empty string.
+
+ Our represeantation of the equations are pairs,
+ where the first component is an equivalence class and the second component
+ is a set of terms standing for the right-hand side. Given a set of equivalence
+ classes @{text CS}, our initial equational system @{term "Init CS"} is thus
+ defined as
+
+ \begin{center}
+ \begin{tabular}{rcl}
+ @{thm (lhs) Init_rhs_def} & @{text "\<equiv>"} &
+ @{text "if"}~@{term "[] \<in> X"}\\
+ & & @{text "then"}~@{term "{Trn Y (CHAR c) | Y c. Y \<in> CS \<and> Y \<Turnstile>c\<Rightarrow> X} \<union> {Lam EMPTY}"}\\
+ & & @{text "else"}~@{term "{Trn Y (CHAR c)| Y c. Y \<in> CS \<and> Y \<Turnstile>c\<Rightarrow> X}"}\\
+ @{thm (lhs) Init_def} & @{text "\<equiv>"} & @{thm (rhs) Init_def}
+ \end{tabular}
+ \end{center}
+
+
+
+ \noindent
+ Because we use sets of terms
+ for representing the right-hand sides in the equational system we can
+ prove \eqref{inv1} and \eqref{inv2} more concisely as
%
- \begin{equation}\label{inv}
- \mbox{@{text "X = \<Union> (\<calL> ` rhs)"}}
- \end{equation}
+ \begin{lemma}\label{inv}
+ If @{thm (prem 1) test} then @{text "X = \<Union> \<calL> ` rhs"}.
+ \end{lemma}
\noindent
Our proof of Thm.~\ref{myhillnerodeone} will proceed by transforming the
- equational system into a \emph{solved form} maintaining the invariant
- \eqref{inv}. From the solved form we will be able to read
+ initial equational system into one in \emph{solved form} maintaining the invariant
+ in Lemma \ref{inv}. From the solved form we will be able to read
off the regular expressions.
- In order to transform an equational system into solved form, we have two main
+ In order to transform an equational system into solved form, we have two
operations: one that takes an equation of the form @{text "X = rhs"} and removes
the recursive occurences of @{text X} in the @{text rhs} using our variant of Arden's
Lemma. The other operation takes an equation @{text "X = rhs"}
@@ -535,10 +556,11 @@
the substitution operation we will arrange it so that @{text "xrhs"} does not contain
any occurence of @{text X}.
- We lift these two operation to work over equational systems @{text ES}: @{const Subst_all}
+ With these two operation in place, we can define the operation that removes one equation
+ from an equational systems @{text ES}. The operation @{const Subst_all}
substitutes an equation @{text "X = xrhs"} throughout an equational system @{text ES};
- @{const Remove} completely removes such an equaution from @{text ES} by substituting
- it to the rest of the equational system, but first removing all recursive occurences
+ @{const Remove} then completely removes such an equation from @{text ES} by substituting
+ it to the rest of the equational system, but first eliminating all recursive occurences
of @{text X} by applying @{const Arden} to @{text "xrhs"}.
\begin{center}
@@ -547,9 +569,42 @@
@{thm (lhs) Remove_def} & @{text "\<equiv>"} & @{thm (rhs) Remove_def}
\end{tabular}
\end{center}
+
+ \noindent
+ Finially, we can define how an equational system should be solved. For this
+ we will iterate the elimination of an equation until only one equation
+ will be left in the system. However, we not just want to have any equation
+ as being the last one, but the one for which we want to calculate the regular
+ expression. Therefore we define the iteration step so that it chooses an
+ equation with an equivalence class that is not @{text X}. This allows us to
+ control, which equation will be the last. We use again Hilbert's choice operator,
+ written @{text SOME}, to chose an equation in a equational system @{text ES}.
+
+ \begin{center}
+ \begin{tabular}{rc@ {\hspace{4mm}}r@ {\hspace{1mm}}l}
+ @{thm (lhs) Iter_def} & @{text "\<equiv>"}~~\mbox{} & \multicolumn{2}{@ {\hspace{-4mm}}l}{@{text "let"}}\\
+ & & @{text "(Y, yrhs) ="} & @{term "SOME (Y, yrhs). (Y, yrhs) \<in> ES \<and> X \<noteq> Y"} \\
+ & & \multicolumn{2}{@ {\hspace{-4mm}}l}{@{text "in"}~~@{term "Remove ES Y yrhs"}}\\
+ \end{tabular}
+ \end{center}
+
+ \noindent
+ The last definition in our
+
+ \begin{center}
+ @{thm Solve_def}
+ \end{center}
+
+
+ \begin{center}
+ @{thm while_rule}
+ \end{center}
*}
-section {* Regular Expressions Generate Finitely Many Partitions *}
+
+
+
+section {* Myhill-Nerode, Second Part *}
text {*