Journal/Paper.thy
changeset 174 2b414a8a7132
parent 173 d371536861bc
child 175 edc642266a82
--- a/Journal/Paper.thy	Tue Jul 26 10:58:26 2011 +0000
+++ b/Journal/Paper.thy	Tue Jul 26 18:12:07 2011 +0000
@@ -48,6 +48,7 @@
   REL ("\<approx>") and
   UPLUS ("_ \<^raw:\ensuremath{\uplus}> _" [90, 90] 90) and
   lang ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and
+  lang_trm ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and
   Lam ("\<lambda>'(_')" [100] 100) and 
   Trn ("'(_, _')" [100, 100] 100) and 
   EClass ("\<lbrakk>_\<rbrakk>\<^bsub>_\<^esub>" [100, 100] 100) and
@@ -57,12 +58,15 @@
   Append_rexp_rhs ("_ \<^raw:\ensuremath{\triangleleft}> _" [100, 100] 50) and
   
   uminus ("\<^raw:\ensuremath{\overline{>_\<^raw:}}>" [100] 100) and
-  tag_str_Plus ("tag\<^isub>A\<^isub>L\<^isub>T _ _" [100, 100] 100) and
-  tag_str_Plus ("tag\<^isub>A\<^isub>L\<^isub>T _ _ _" [100, 100, 100] 100) and
+  tag_str_Plus ("tag\<^bsub>PLUS\<^esub> _ _" [100, 100] 100) and
+  tag_str_Plus ("tag\<^bsub>PLUS\<^esub> _ _ _" [100, 100, 100] 100) and
   tag_str_Times ("tag\<^isub>S\<^isub>E\<^isub>Q _ _" [100, 100] 100) and
   tag_str_Times ("tag\<^isub>S\<^isub>E\<^isub>Q _ _ _" [100, 100, 100] 100) and
   tag_str_Star ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _" [100] 100) and
-  tag_str_Star ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _ _" [100, 100] 100)
+  tag_str_Star ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _ _" [100, 100] 100) and
+  tag_eq_rel ("\<^raw:$\threesim$>\<^bsub>_\<^esub>") and
+  Delta ("\<Delta>'(_')") and
+  nullable ("\<delta>'(_')")
 
 lemma meta_eq_app:
   shows "f \<equiv> \<lambda>x. g x \<Longrightarrow> f x \<equiv> g x"
@@ -227,8 +231,7 @@
   pairs. Using this definition for disjoint union means we do not have a
   single type for automata. As a result we will not be able to define a regular
   language as one for which there exists an automaton that recognises all its
-  strings. Similarly, we cannot state properties about \emph{all} automata,
-  since there is no type quantification available in HOL (unlike in Coq, for
+  strings, since there is no type quantification available in HOL (unlike in Coq, for
   example).
 
   An alternative, which provides us with a single type for automata, is to give every 
@@ -318,25 +321,27 @@
   This has recently been exploited in HOL4 with a formalisation of
   regular expression matching based on derivatives \cite{OwensSlind08} and
   with an equivalence checker for regular expressions in Isabelle/HOL
-  \cite{KraussNipkow11}.  The purpose of this paper is to show that a central
+  \cite{KraussNipkow11}.  The main purpose of this paper is to show that a central
   result about regular languages---the Myhill-Nerode theorem---can be
   recreated by only using regular expressions. This theorem gives necessary
   and sufficient conditions for when a language is regular. As a corollary of
   this theorem we can easily establish the usual closure properties, including
   complementation, for regular languages.\medskip
   
-  \noindent
-  {\bf Contributions:} There is an extensive literature on regular languages.
-  To our best knowledge, our proof of the Myhill-Nerode theorem is the first
-  that is based on regular expressions, only. The part of this theorem stating
-  that finitely many partitions imply regularity of the language is proved by 
-  an argument about solving equational sytems.  This argument seems to be folklore.
-  For the other part, we give two proofs: a
-  direct proof using certain tagging-functions, and an indirect proof using
-  Antimirov's partial derivatives \cite{Antimirov95} (also earlier russion work). 
-  Again to our best knowledge, the tagging-functions have not been used before to
-  establish the Myhill-Nerode theorem. 
-
+  \noindent 
+  {\bf Contributions:} There is an extensive literature on regular
+  languages.  To our best knowledge, our proof of the Myhill-Nerode theorem is
+  the first that is based on regular expressions, only. The part of this
+  theorem stating that finitely many partitions imply regularity of the
+  language is proved by an argument about solving equational sytems.  This
+  argument appears to be folklore. For the other part, we give two proofs: one
+  direct proof using certain tagging-functions, and another indirect proof
+  using Antimirov's partial derivatives \cite{Antimirov95}. Again to our best
+  knowledge, the tagging-functions have not been used before to establish the
+  Myhill-Nerode theorem. Derivatives of regular expressions have been used
+  extensively in the literature, unlike partial derivatives. However, partial
+  derivatives are more suitable in the context of the Myhill-Nerode theorem,
+  since it is easier to establish formally their finiteness result.
 *}
 
 section {* Preliminaries *}
@@ -424,7 +429,7 @@
   From @{text "(*)"} it follows then that
   @{term s} must be an element in @{term "(\<Union>m\<in>{0..k}. B \<cdot> (A \<up> m))"}. This in turn
   implies that @{term s} is in @{term "(\<Union>n. B \<cdot> (A \<up> n))"}. Using Prop.~\ref{langprops}@{text "(iii)"} 
-  this is equal to @{term "B \<cdot> A\<star>"}, as we needed to show.\qed
+  this is equal to @{term "B \<cdot> A\<star>"}, as we needed to show.
   \end{proof}
 
   \noindent
@@ -482,8 +487,7 @@
 section {* The Myhill-Nerode Theorem, First Part *}
 
 text {*
-  Folklore: Henzinger (arden-DFA-regexp.pdf)
-
+  \footnote{Folklore: Henzinger (arden-DFA-regexp.pdf); Hofmann}
 
   \noindent
   The key definition in the Myhill-Nerode theorem is the
@@ -491,7 +495,8 @@
   strings are related, provided there is no distinguishing extension in this
   language. This can be defined as a tertiary relation.
 
-  \begin{dfntn}[Myhill-Nerode Relation] Given a language @{text A}, two strings @{text x} and
+  \begin{dfntn}[Myhill-Nerode Relation]\label{myhillneroderel} 
+  Given a language @{text A}, two strings @{text x} and
   @{text y} are Myhill-Nerode related provided
   \begin{center}
   @{thm str_eq_def[simplified str_eq_rel_def Pair_Collect]}
@@ -532,8 +537,15 @@
   \noindent
   In our running example, @{text "X\<^isub>2"} is the only 
   equivalence class in @{term "finals {[c]}"}.
-  It is straightforward to show that in general @{thm lang_is_union_of_finals} and 
-  @{thm finals_in_partitions} hold. 
+  It is straightforward to show that in general 
+
+  \begin{center}
+  @{thm lang_is_union_of_finals}\hspace{15mm} 
+  @{thm finals_in_partitions} 
+  \end{center}
+
+  \noindent
+  hold. 
   Therefore if we know that there exists a regular expression for every
   equivalence class in \mbox{@{term "finals A"}} (which by assumption must be
   a finite set), then we can use @{text "\<bigplus>"} to obtain a regular expression 
@@ -858,7 +870,7 @@
   initial equational system. Soundness is proved in Lem.~\ref{inv}. Distinctness
   follows from the fact that the equivalence classes are disjoint. The @{text ardenable}
   property also follows from the setup of the initial equational system, as does 
-  validity.\qed
+  validity.
   \end{proof}
 
   \noindent
@@ -890,7 +902,7 @@
   Having proved the implication above, we can instantiate @{text "ES"} with @{text "ES - {(Y, yrhs)}"}
   which matches with our proof-obligation of @{const "Subst_all"}. Since
   \mbox{@{term "ES = ES - {(Y, yrhs)} \<union> {(Y, yrhs)}"}}, we can use the assumption 
-  to complete the proof.\qed 
+  to complete the proof.
   \end{proof}
 
   \noindent
@@ -906,7 +918,7 @@
   @{term "(Y, yrhs) \<noteq> (X, rhs)"}. Using the distinctness property we can infer
   that @{term "Y \<noteq> X"}. We further know that @{text "Remove ES Y yrhs"}
   removes the equation @{text "Y = yrhs"} from the system, and therefore 
-  the cardinality of @{const Iter} strictly decreases.\qed
+  the cardinality of @{const Iter} strictly decreases.
   \end{proof}
 
   \noindent
@@ -938,7 +950,7 @@
   of @{text ES} is @{text 1}. This means @{text "ES"} must actually be the set @{text "{(X, rhs)}"},
   for which the invariant holds. This allows us to conclude that 
   @{term "Solve X (Init (UNIV // \<approx>A)) = {(X, rhs)}"} and @{term "invariant {(X, rhs)}"} hold,
-  as needed.\qed
+  as needed.
   \end{proof}
 
   \noindent
@@ -960,7 +972,7 @@
   removes that @{text X} from @{text rhs}, that @{term "rhss (Arden X rhs) = {}"}.
   This means the right-hand side @{term "Arden X rhs"} can only consist of terms of the form @{term "Lam r"}.
   So we can collect those (finitely many) regular expressions @{text rs} and have @{term "X = L (\<Uplus>rs)"}.
-  With this we can conclude the proof.\qed
+  With this we can conclude the proof.
   \end{proof}
 
   \noindent
@@ -976,7 +988,7 @@
   set of regular expressions @{text "rs"} such that
   @{term "\<Union>(finals A) = L (\<Uplus>rs)"}.
   Since the left-hand side is equal to @{text A}, we can use @{term "\<Uplus>rs"} 
-  as the regular expression that is needed in the theorem.\qed
+  as the regular expression that is needed in the theorem.
   \end{proof}
 *}
 
@@ -987,15 +999,15 @@
 
 text {*
   \noindent
-  We will prove in this section the second part of the Myhill-Nerode
-  theorem. It can be formulated in our setting as follows:
+  In this section and the next we will give two proofs for establishing the second 
+  part of the Myhill-Nerode theorem. It can be formulated in our setting as follows:
 
   \begin{thrm}
   Given @{text "r"} is a regular expression, then @{thm Myhill_Nerode2}.
   \end{thrm}  
 
   \noindent
-  The proof will be by induction on the structure of @{text r}. It turns out
+  The first proof will be by induction on the structure of @{text r}. It turns out
   the base cases are straightforward.
 
 
@@ -1012,28 +1024,35 @@
   \end{center}
 
   \noindent
-  hold, which shows that @{term "UNIV // \<approx>(L r)"} must be finite.\qed
+  hold, which shows that @{term "UNIV // \<approx>(lang r)"} must be finite.
   \end{proof}
 
   \noindent
   Much more interesting, however, are the inductive cases. They seem hard to solve 
   directly. The reader is invited to try. 
 
-  Our proof will rely on some
+  Our first proof will rely on some
   \emph{tagging-functions} defined over strings. Given the inductive hypothesis, it will 
-  be easy to prove that the \emph{range} of these tagging-functions is finite
-  (the range of a function @{text f} is defined as @{text "range f \<equiv> f ` UNIV"}).
+  be easy to prove that the \emph{range} of these tagging-functions is finite. 
+  The range of a function @{text f} is defined as 
+  
+  \begin{center}
+  @{text "range f \<equiv> f ` UNIV"}
+  \end{center}
+
+  \noindent
+  that means we take the image of @{text f} w.r.t.~all elements in the domain. 
   With this we will be able to infer that the tagging-functions, seen as relations,
   give rise to finitely many equivalence classes of @{const UNIV}. Finally we 
-  will show that the tagging-relations are more refined than @{term "\<approx>(L r)"}, which
-  implies that @{term "UNIV // \<approx>(L r)"} must also be finite (a relation @{text "R\<^isub>1"} 
-  is said to \emph{refine} @{text "R\<^isub>2"} provided @{text "R\<^isub>1 \<subseteq> R\<^isub>2"}).
+  will show that the tagging-relations are more refined than @{term "\<approx>(lang r)"}, which
+  implies that @{term "UNIV // \<approx>(lang r)"} must also be finite---a relation @{text "R\<^isub>1"} 
+  is said to \emph{refine} @{text "R\<^isub>2"} provided @{text "R\<^isub>1 \<subseteq> R\<^isub>2"}.
   We formally define the notion of a \emph{tagging-relation} as follows.
 
   \begin{dfntn}[Tagging-Relation] Given a tagging-function @{text tag}, then two strings @{text x}
   and @{text y} are \emph{tag-related} provided
   \begin{center}
-  @{text "x =tag= y \<equiv> tag x = tag y"}\;.
+  @{text "x \<^raw:$\threesim$>\<^bsub>tag\<^esub> y \<equiv> tag x = tag y"}\;.
   \end{center}
   \end{dfntn}
 
@@ -1063,7 +1082,7 @@
   From the assumptions we can obtain @{text "x \<in> X"} and @{text "y \<in> Y"} with 
   @{text "tag x = tag y"}. Since @{text x} and @{text y} are tag-related, this in 
   turn means that the equivalence classes @{text X}
-  and @{text Y} must be equal.\qed
+  and @{text Y} must be equal.
   \end{proof}
 
   \begin{lmm}\label{fintwo} 
@@ -1077,7 +1096,7 @@
   We prove this lemma again using \eqref{finiteimageD}. This time we set @{text f} to
   be @{text "X \<mapsto>"}~@{term "{R\<^isub>1 `` {x} | x. x \<in> X}"}. It is easy to see that 
   @{term "finite (f ` (UNIV // R\<^isub>2))"} because it is a subset of @{term "Pow (UNIV // R\<^isub>1)"},
-  which is finite by assumption. What remains to be shown is that @{text f} is injective
+  which must be finite by assumption. What remains to be shown is that @{text f} is injective
   on @{term "UNIV // R\<^isub>2"}. This is equivalent to showing that two equivalence 
   classes, say @{text "X"} and @{text Y}, in @{term "UNIV // R\<^isub>2"} are equal, provided
   @{text "f X = f Y"}. For @{text "X = Y"} to be equal, we have to find two elements
@@ -1087,13 +1106,13 @@
   and further @{term "R\<^isub>1 ``{x} \<in> f Y"}. This means we can obtain a @{text y}
   such that @{term "R\<^isub>1 `` {x} = R\<^isub>1 `` {y}"} holds. Consequently @{text x} and @{text y}
   are @{text "R\<^isub>1"}-related. Since by assumption @{text "R\<^isub>1"} refines @{text "R\<^isub>2"},
-  they must also be @{text "R\<^isub>2"}-related, as we need to show.\qed
+  they must also be @{text "R\<^isub>2"}-related, as we need to show.
   \end{proof}
 
   \noindent
   Chaining Lem.~\ref{finone} and \ref{fintwo} together, means in order to show
-  that @{term "UNIV // \<approx>(L r)"} is finite, we have to find a tagging-function whose
-  range can be shown to be finite and whose tagging-relation refines @{term "\<approx>(L r)"}.
+  that @{term "UNIV // \<approx>(lang r)"} is finite, we have to find a tagging-function whose
+  range can be shown to be finite and whose tagging-relation refines @{term "\<approx>(lang r)"}.
   Let us attempt the @{const PLUS}-case first.
 
   \begin{proof}[@{const "PLUS"}-Case] 
@@ -1107,7 +1126,7 @@
   where @{text "A"} and @{text "B"} are some arbitrary languages.
   We can show in general, if @{term "finite (UNIV // \<approx>A)"} and @{term "finite (UNIV // \<approx>B)"}
   then @{term "finite ((UNIV // \<approx>A) \<times> (UNIV // \<approx>B))"} holds. The range of
-  @{term "tag_str_PLUS A B"} is a subset of this product set---so finite. It remains to be shown
+  @{term "tag_str_Plus A B"} is a subset of this product set---so finite. It remains to be shown
   that @{text "=tag\<^isub>A\<^isub>L\<^isub>T A B="} refines @{term "\<approx>(A \<union> B)"}. This amounts to 
   showing
   %
@@ -1129,7 +1148,7 @@
   The definition of the tagging-function will give us in each case the
   information to infer that @{text "y @ z \<in> A \<union> B"}.
   Finally we
-  can discharge this case by setting @{text A} to @{term "L r\<^isub>1"} and @{text B} to @{term "L r\<^isub>2"}.\qed
+  can discharge this case by setting @{text A} to @{term "lang r\<^isub>1"} and @{text B} to @{term "lang r\<^isub>2"}.
   \end{proof}
 
 
@@ -1279,7 +1298,7 @@
   @{term "\<approx>A `` {x} = \<approx>A `` {y}"} and thus @{term "x \<approx>A y"}. Which means by the Myhill-Nerode
   relation that @{term "y @ z' \<in> A"} holds. Using @{text "z - z' \<in> B"}, we can conclude also in this case
   with @{term "y @ z \<in> A \<cdot> B"}. We again can complete the @{const TIMES}-case
-  by setting @{text A} to @{term "L r\<^isub>1"} and @{text B} to @{term "L r\<^isub>2"}.\qed 
+  by setting @{text A} to @{term "lang r\<^isub>1"} and @{text B} to @{term "lang r\<^isub>2"}. 
   \end{proof}
   
   \noindent
@@ -1383,8 +1402,8 @@
   and also know @{term "(x - x'\<^isub>m\<^isub>a\<^isub>x) \<approx>A (y - y')"}. Unfolding the Myhill-Nerode
   relation we know @{term "(y - y') @ z\<^isub>a \<in> A"}. We also know that @{term "z\<^isub>b \<in> A\<star>"}.
   Therefore @{term "y' @ ((y - y') @ z\<^isub>a) @ z\<^isub>b \<in> A\<star>"}, which means
-  @{term "y @ z \<in> A\<star>"}. As the last step we have to set @{text "A"} to @{term "L r"} and
-  complete the proof.\qed
+  @{term "y @ z \<in> A\<star>"}. As the last step we have to set @{text "A"} to @{term "lang r"} and
+  complete the proof.
   \end{proof}
 *}
 
@@ -1394,9 +1413,137 @@
   \noindent
   As we have seen in the previous section, in order to establish
   the second direction of the Myhill-Nerode theorem, we need to find 
-  a more refined relation (more refined than ??) for which we can
-  show that there are only finitely many equivalence classes.
-  Brzozowski presented in the Appendix of~\cite{Brzozowski64} 
+  a more refined relation than @{term "\<approx>(lang r)"} for which we can
+  show that there are only finitely many equivalence classes. So far we 
+  showed this by induction on @{text "r"}. However, there is also 
+  an indirect method to come up with such a refined relation. Assume
+  the following two definitions for a left-quotient of a language, which 
+  we write as @{term "Der c A"} and @{term "Ders s A"} where 
+  @{text c} is a character and @{text s} a string:
+
+  \begin{center}
+  \begin{tabular}{r@ {\hspace{1mm}}c@ {\hspace{2mm}}l}
+  @{thm (lhs) Der_def}  & @{text "\<equiv>"} & @{thm (rhs) Der_def}\\
+  @{thm (lhs) Ders_def} & @{text "\<equiv>"} & @{thm (rhs) Ders_def}\\
+  \end{tabular}
+  \end{center}
+
+  \noindent
+  Now clearly we have the following relation between the Myhill-Nerode relation
+  (Definition~\ref{myhillneroderel}) and left-quotients
+
+  \begin{equation}\label{mhders}
+  @{term "x \<approx>A y"} \hspace{4mm}\text{if and only if}\hspace{4mm} @{term "Ders x A = Ders y A"}
+  \end{equation}
+
+  \noindent
+  It is realtively easy to establish the following identidies for left-quotients:
+  
+  \begin{center}
+  \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{2mm}}l}
+  @{thm (lhs) Der_zero}  & $=$ & @{thm (rhs) Der_zero}\\
+  @{thm (lhs) Der_one}   & $=$ & @{thm (rhs) Der_one}\\
+  @{thm (lhs) Der_atom}  & $=$ & @{thm (rhs) Der_atom}\\
+  @{thm (lhs) Der_union} & $=$ & @{thm (rhs) Der_union}\\
+  @{thm (lhs) Der_conc}  & $=$ & @{thm (rhs) Der_conc}\\
+  @{thm (lhs) Der_star}  & $=$ & @{thm (rhs) Der_star}\\
+  \end{tabular}
+  \end{center}
+
+  \noindent
+  where @{text "\<Delta>"} is a function that tests whether the empty string
+  is in the language and returns @{term "{[]}"} or @{term "{}"}, respectively.
+  The only interesting case above is the last one where we use Prop.~\ref{langprops}
+  in order to infer that @{term "Der c (A\<star>) = Der c (A \<cdot> A\<star>)"}. We can 
+  then complete the proof by observing that @{term "Delta A \<cdot> Der c (A\<star>) \<subseteq> (Der c A) \<cdot> A\<star>"}.
+  
+  Brzozowski observed that the left-quotients for languages of regular
+  expressions can be calculated directly via the notion of \emph{derivatives
+  of a regular expressions} \cite{Brzozowski64} which we define in Isabelle/HOL as 
+  follows:
+
+  \begin{center}
+  \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1.5mm}}l@ {}}
+  @{thm (lhs) der.simps(1)}  & @{text "\<equiv>"} & @{thm (rhs) der.simps(1)}\\
+  @{thm (lhs) der.simps(2)}  & @{text "\<equiv>"} & @{thm (rhs) der.simps(2)}\\
+  @{thm (lhs) der.simps(3)[where c'="d"]}  & @{text "\<equiv>"} & @{thm (rhs) der.simps(3)[where c'="d"]}\\
+  @{thm (lhs) der.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}  
+     & @{text "\<equiv>"} & @{thm (rhs) der.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\
+  @{thm (lhs) der.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}  
+     & @{text "\<equiv>"}\\ 
+  \multicolumn{3}{@ {\hspace{5mm}}l@ {}}{@{thm (rhs) der.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}}\\
+  @{thm (lhs) der.simps(6)}  & @{text "\<equiv>"} & @{thm (rhs) der.simps(6)}\smallskip\\
+  @{thm (lhs) ders.simps(1)}  & @{text "\<equiv>"} & @{thm (rhs) ders.simps(1)}\\
+  @{thm (lhs) ders.simps(2)}  & @{text "\<equiv>"} & @{thm (rhs) ders.simps(2)}\\
+  \end{tabular}
+  \end{center}
+
+  \noindent
+  The function @{term "nullable r"} tests whether the regular expression
+  can recognise the empty string:
+
+  \begin{center}
+  \begin{tabular}{cc}
+  \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1.5mm}}l@ {}}
+  @{thm (lhs) nullable.simps(1)}  & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(1)}\\
+  @{thm (lhs) nullable.simps(2)}  & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(2)}\\
+  @{thm (lhs) nullable.simps(3)}  & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(3)}\\
+  \end{tabular} &
+  \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1.5mm}}l@ {}}
+  @{thm (lhs) nullable.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}  
+     & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\
+  @{thm (lhs) nullable.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}  
+     & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\
+  @{thm (lhs) nullable.simps(6)}  & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(6)}\\
+  \end{tabular}
+  \end{tabular}
+  \end{center}
+
+  \noindent
+  Brzozowski proved 
+
+  \begin{equation}\label{Dersders}
+  \mbox{\begin{tabular}{l}
+  @{thm Der_der}\\
+  @{thm Ders_ders}
+  \end{tabular}}
+  \end{equation}
+
+  \noindent
+  where the first is by induction on @{text r} and the second by a simple
+  calculation.
+
+  The importance in the context of the Myhill-Nerode theorem is that 
+  we can use \eqref{mhders} and \eqref{Dersders} in order to derive
+
+  \begin{center}
+  @{term "x \<approx>(lang r) y"} \hspace{4mm}if and only if\hspace{4mm} 
+  @{term "lang (ders x r) = lang (ders y r)"}
+  \end{center}
+
+  \noindent
+  which means @{term "x \<approx>(lang r) y"} provided @{term "ders x r = ders y r"}.
+  Consequently, we can use as the tagging relation 
+  @{text "\<^raw:$\threesim$>\<^bsub>(\<lambda>x. ders x r)\<^esub>"}, which we know refines @{term "\<approx>(lang r)"}. 
+  This almost helps us because Brozowski also proved that there for every 
+  language there are only 
+  finitely `dissimilar' derivatives for a regular expression. Two regulare
+  expressions are similar if they can be identified using the using the 
+  ACI-identities
+
+  \begin{center}
+  \begin{tabular}{cl}
+  (A) & @{term "Plus (Plus r\<^isub>1 r\<^isub>2) r\<^isub>3"} $\equiv$ @{term "Plus r\<^isub>1 (Plus r\<^isub>2 r\<^isub>3)"}\\
+  (C) & @{term "Plus r\<^isub>1 r\<^isub>2"} $\equiv$ @{term "Plus r\<^isub>2 r\<^isub>1"}\\
+  (I) & @{term "Plus r r"} $\equiv$ @{term "r"}\\
+  \end{tabular}
+  \end{center}
+
+  \noindent
+  Without the indentification, we unfortunately obtain infinitely many
+  derivations (an example is given in \cite[Page~141]{Sakarovitch09}).
+  Reasoning modulo ACI can be done, but it is very painful in a theorem prover.
+
 
   in order to prove the second
   direction of the Myhill-Nerode theorem. There he calculates the