a final polishing before submitting later this week
(*<*)theory Paperimports "../Closures2" "../Attic/Prefix_subtract" begindeclare [[show_question_marks = false]]consts REL :: "(string \<times> string) \<Rightarrow> bool" UPLUS :: "'a set \<Rightarrow> 'a set \<Rightarrow> (nat \<times> 'a) set"abbreviation "EClass x R \<equiv> R `` {x}"abbreviation "Append_rexp2 r_itm r \<equiv> Append_rexp r r_itm"abbreviation "pow" (infixl "\<up>" 100)where "A \<up> n \<equiv> A ^^ n"syntax (latex output) "_Collect" :: "pttrn => bool => 'a set" ("(1{_ | _})") "_CollectIn" :: "pttrn => 'a set => bool => 'a set" ("(1{_ \<in> _ | _})")translations "_Collect p P" <= "{p. P}" "_Collect p P" <= "{p|xs. P}" "_CollectIn p A P" <= "{p : A. P}"syntax (latex output) "_UNION_le" :: "'a \<Rightarrow> 'a => 'b set => 'b set" ("(3\<Union>(00\<^bsub>_ \<le> _\<^esub>)/ _)" [0, 0, 10] 10)abbreviation "ZERO \<equiv> Zero"abbreviation "ONE \<equiv> One"abbreviation "ATOM \<equiv> Atom"abbreviation "PLUS \<equiv> Plus"abbreviation "TIMES \<equiv> Times"abbreviation "TIMESS \<equiv> Timess"abbreviation "STAR \<equiv> Star"abbreviation "ALLS \<equiv> Star Allreg"definition Delta :: "'a lang \<Rightarrow> 'a lang"where "Delta A = (if [] \<in> A then {[]} else {})"notation (latex output) str_eq ("\<approx>\<^bsub>_\<^esub>") and str_eq_applied ("_ \<approx>\<^bsub>_\<^esub> _") and conc (infixr "\<cdot>" 100) and star ("_\<^bsup>\<star>\<^esup>" [101] 200) and pow ("_\<^bsup>_\<^esup>" [100, 100] 100) and Suc ("_+1" [100] 100) and quotient ("_ \<^raw:\ensuremath{\!\sslash\!}> _" [90, 90] 90) and REL ("\<approx>") and UPLUS ("_ \<^raw:\ensuremath{\uplus}> _" [90, 90] 90) and lang ("\<^raw:\ensuremath{\cal{L}}>" 101) and lang ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and lang_trm ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and Lam ("\<lambda>'(_')" [100] 100) and Trn ("'(_, _')" [100, 100] 100) and EClass ("\<lbrakk>_\<rbrakk>\<^bsub>_\<^esub>" [100, 100] 100) and transition ("_ \<^raw:\ensuremath{\stackrel{\text{>_\<^raw:}}{\Longmapsto}}> _" [100, 100, 100] 100) and Setalt ("\<^raw:\ensuremath{\bigplus}>_" [1000] 999) and Append_rexp2 ("_ \<^raw:\ensuremath{\triangleleft}> _" [100, 100] 100) and Append_rexp_rhs ("_ \<^raw:\ensuremath{\triangleleft}> _" [100, 100] 50) and uminus ("\<^raw:\ensuremath{\overline{\isa{>_\<^raw:}}}>" [100] 100) and tag_Plus ("+tag _ _" [100, 100] 100) and tag_Plus ("+tag _ _ _" [100, 100, 100] 100) and tag_Times ("\<times>tag _ _" [100, 100] 100) and tag_Times ("\<times>tag _ _ _" [100, 100, 100] 100) and tag_Star ("\<star>tag _" [100] 100) and tag_Star ("\<star>tag _ _" [100, 100] 100) and tag_eq ("\<^raw:$\threesim$>\<^bsub>_\<^esub>" [100] 100) and Delta ("\<Delta>'(_')") and nullable ("\<delta>'(_')") and Cons ("_ :: _" [100, 100] 100) and Rev ("Rev _" [1000] 100) and Deriv ("Der _ _" [1000, 1000] 100) and Derivs ("Ders") and ONE ("ONE" 999) and ZERO ("ZERO" 999) and pderivs_lang ("pdersl") and UNIV1 ("UNIV\<^isup>+") and Deriv_lang ("Dersl") and Derivss ("Derss") and deriv ("der") and derivs ("ders") and pderiv ("pder") and pderivs ("pders") and pderivs_set ("pderss") and SUBSEQ ("Sub") and SUPSEQ ("Sup") and UP ("'(_')\<up>") and ALLS ("ALL")lemmas Deriv_simps = Deriv_empty Deriv_epsilon Deriv_char Deriv_uniondefinition Der :: "'a \<Rightarrow> 'a lang \<Rightarrow> 'a lang"where "Der c A \<equiv> {s. [c] @ s \<in> A}"definition Ders :: "'a list \<Rightarrow> 'a lang \<Rightarrow> 'a lang"where "Ders s A \<equiv> {s'. s @ s' \<in> A}"lemma meta_eq_app: shows "f \<equiv> \<lambda>x. g x \<Longrightarrow> f x \<equiv> g x" by autolemma str_eq_def': shows "x \<approx>A y \<equiv> (\<forall>z. x @ z \<in> A \<longleftrightarrow> y @ z \<in> A)"unfolding str_eq_def by simplemma conc_def': "A \<cdot> B = {s\<^isub>1 @ s\<^isub>2 | s\<^isub>1 s\<^isub>2. s\<^isub>1 \<in> A \<and> s\<^isub>2 \<in> B}"unfolding conc_def by simplemma conc_Union_left: shows "B \<cdot> (\<Union>n. A \<up> n) = (\<Union>n. B \<cdot> (A \<up> n))"unfolding conc_def by autolemma test: assumes X_in_eqs: "(X, rhs) \<in> Init (UNIV // \<approx>A)" shows "X = \<Union> (lang_trm ` rhs)"using assms l_eq_r_in_eqs by (simp)abbreviation notprec ("_ \<^raw:\mbox{$\not\preceq$}>_")where "notprec x y \<equiv> \<not>(x \<preceq> y)"abbreviation minimal_syn ("min\<^bsub>_\<^esub> _")where "minimal_syn A x \<equiv> minimal x A" (* THEOREMS *)notation (Rule output) "==>" ("\<^raw:\mbox{}\inferrule{\mbox{>_\<^raw:}}>\<^raw:{\mbox{>_\<^raw:}}>")syntax (Rule output) "_bigimpl" :: "asms \<Rightarrow> prop \<Rightarrow> prop" ("\<^raw:\mbox{}\inferrule{>_\<^raw:}>\<^raw:{\mbox{>_\<^raw:}}>") "_asms" :: "prop \<Rightarrow> asms \<Rightarrow> asms" ("\<^raw:\mbox{>_\<^raw:}\\>/ _") "_asm" :: "prop \<Rightarrow> asms" ("\<^raw:\mbox{>_\<^raw:}>")notation (Axiom output) "Trueprop" ("\<^raw:\mbox{}\inferrule{\mbox{}}{\mbox{>_\<^raw:}}>")notation (IfThen output) "==>" ("\<^raw:{\normalsize{}>If\<^raw:\,}> _/ \<^raw:{\normalsize \,>then\<^raw:\,}>/ _.")syntax (IfThen output) "_bigimpl" :: "asms \<Rightarrow> prop \<Rightarrow> prop" ("\<^raw:{\normalsize{}>If\<^raw:\,}> _ /\<^raw:{\normalsize \,>then\<^raw:\,}>/ _.") "_asms" :: "prop \<Rightarrow> asms \<Rightarrow> asms" ("\<^raw:\mbox{>_\<^raw:}> /\<^raw:{\normalsize \,>and\<^raw:\,}>/ _") "_asm" :: "prop \<Rightarrow> asms" ("\<^raw:\mbox{>_\<^raw:}>")notation (IfThenNoBox output) "==>" ("\<^raw:{\normalsize{}>If\<^raw:\,}> _/ \<^raw:{\normalsize \,>then\<^raw:\,}>/ _.")syntax (IfThenNoBox output) "_bigimpl" :: "asms \<Rightarrow> prop \<Rightarrow> prop" ("\<^raw:{\normalsize{}>If\<^raw:\,}> _ /\<^raw:{\normalsize \,>then\<^raw:\,}>/ _.") "_asms" :: "prop \<Rightarrow> asms \<Rightarrow> asms" ("_ /\<^raw:{\normalsize \,>and\<^raw:\,}>/ _") "_asm" :: "prop \<Rightarrow> asms" ("_")lemma pow_length: assumes a: "[] \<notin> A" and b: "s \<in> A \<up> Suc n" shows "n < length s"using bproof (induct n arbitrary: s) case 0 have "s \<in> A \<up> Suc 0" by fact with a have "s \<noteq> []" by auto then show "0 < length s" by autonext case (Suc n) have ih: "\<And>s. s \<in> A \<up> Suc n \<Longrightarrow> n < length s" by fact have "s \<in> A \<up> Suc (Suc n)" by fact then obtain s1 s2 where eq: "s = s1 @ s2" and *: "s1 \<in> A" and **: "s2 \<in> A \<up> Suc n" by (auto simp add: Seq_def) from ih ** have "n < length s2" by simp moreover have "0 < length s1" using * a by auto ultimately show "Suc n < length s" unfolding eq by (simp only: length_append)qed(*>*)section {* Introduction *}text {* \noindent Regular languages are an important and well-understood subject in Computer Science, with many beautiful theorems and many useful algorithms. There is a wide range of textbooks on this subject, many of which are aimed at students and contain very detailed `pencil-and-paper' proofs (e.g.~\cite{Kozen97, HopcroftUllman69}). It seems natural to exercise theorem provers by formalising the theorems and by verifying formally the algorithms. A popular choice for a theorem prover would be one based on Higher-Order Logic (HOL), for example HOL4, HOLlight or Isabelle/HOL. For the development presented in this paper we will use the Isabelle/HOL. HOL is a predicate calculus that allows quantification over predicate variables. Its type system is based on Church's Simple Theory of Types \cite{Church40}. Although many mathematical concepts can be conveniently expressed in HOL, there are some limitations that hurt badly when attempting a simple-minded formalisation of regular languages in it. The typical approach (for example \cite{HopcroftUllman69,Kozen97}) to regular languages is to introduce finite deterministic automata and then define everything in terms of them. For example, a regular language is normally defined as: \begin{dfntn}\label{baddef} A language @{text A} is \emph{regular}, provided there is a finite deterministic automaton that recognises all strings of @{text "A"}. \end{dfntn} \noindent This approach has many benefits. Among them is the fact that it is easy to convince oneself that regular languages are closed under complementation: one just has to exchange the accepting and non-accepting states in the corresponding automaton to obtain an automaton for the complement language. The problem, however, lies with formalising such reasoning in a HOL-based theorem prover. Automata are built up from states and transitions that need to be represented as graphs, matrices or functions, none of which can be defined as an inductive datatype. In case of graphs and matrices, this means we have to build our own reasoning infrastructure for them, as neither Isabelle/HOL nor HOL4 nor HOLlight support them with libraries. Even worse, reasoning about graphs and matrices can be a real hassle in HOL-based theorem provers, because we have to be able to combine automata. Consider for example the operation of sequencing two automata, say $A_1$ and $A_2$, by connecting the accepting states of $A_1$ to the initial state of $A_2$: \begin{center} \begin{tabular}{ccc} \begin{tikzpicture}[scale=1] %\draw[step=2mm] (-1,-1) grid (1,1); \draw[rounded corners=1mm, very thick] (-1.0,-0.3) rectangle (-0.2,0.3); \draw[rounded corners=1mm, very thick] ( 0.2,-0.3) rectangle ( 1.0,0.3); \node (A) at (-1.0,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; \node (B) at ( 0.2,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; \node (C) at (-0.2, 0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; \node (D) at (-0.2,-0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; \node (E) at (1.0, 0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; \node (F) at (1.0,-0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; \node (G) at (1.0,-0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; \draw (-0.6,0.0) node {\small$A_1$}; \draw ( 0.6,0.0) node {\small$A_2$}; \end{tikzpicture} & \raisebox{2.1mm}{\bf\Large$\;\;\;\Rightarrow\,\;\;$} & \begin{tikzpicture}[scale=1] %\draw[step=2mm] (-1,-1) grid (1,1); \draw[rounded corners=1mm, very thick] (-1.0,-0.3) rectangle (-0.2,0.3); \draw[rounded corners=1mm, very thick] ( 0.2,-0.3) rectangle ( 1.0,0.3); \node (A) at (-1.0,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; \node (B) at ( 0.2,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; \node (C) at (-0.2, 0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; \node (D) at (-0.2,-0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; \node (E) at (1.0, 0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; \node (F) at (1.0,-0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; \node (G) at (1.0,-0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; \draw (C) to [very thick, bend left=45] (B); \draw (D) to [very thick, bend right=45] (B); \draw (-0.6,0.0) node {\small$A_1$}; \draw ( 0.6,0.0) node {\small$A_2$}; \end{tikzpicture} \end{tabular} \end{center} \noindent On `paper' we can define the corresponding graph in terms of the disjoint union of the state nodes. Unfortunately in HOL, the standard definition for disjoint union, namely % \begin{equation}\label{disjointunion} @{text "A\<^isub>1 \<uplus> A\<^isub>2 \<equiv> {(1, x) | x \<in> A\<^isub>1} \<union> {(2, y) | y \<in> A\<^isub>2}"} \end{equation} \noindent changes the type---the disjoint union is not a set, but a set of pairs. Using this definition for disjoint union means we do not have a single type for the states of automata. As a result we will not be able to define a regular language as one for which there exists an automaton that recognises all its strings (Definition~\ref{baddef}). This is because we cannot make a definition in HOL that is only polymorphic in the state type, but not in the predicate for regularity; and there is no type quantification available in HOL (unlike in Coq, for example).\footnote{Slind already pointed out this problem in an email to the HOL4 mailing list on 21st April 2005.} An alternative, which provides us with a single type for states of automata, is to give every state node an identity, for example a natural number, and then be careful to rename these identities apart whenever connecting two automata. This results in clunky proofs establishing that properties are invariant under renaming. Similarly, connecting two automata represented as matrices results in very adhoc constructions, which are not pleasant to reason about. Functions are much better supported in Isabelle/HOL, but they still lead to similar problems as with graphs. Composing, for example, two non-deterministic automata in parallel requires also the formalisation of disjoint unions. Nipkow \cite{Nipkow98} dismisses for this the option of using identities, because it leads according to him to ``messy proofs''. Since he does not need to define what regular languages are, Nipkow opts for a variant of \eqref{disjointunion} using bit lists, but writes\smallskip \begin{quote} \it% \begin{tabular}{@ {}l@ {}p{0.88\textwidth}@ {}} `` & All lemmas appear obvious given a picture of the composition of automata\ldots Yet their proofs require a painful amount of detail.'' \end{tabular} \end{quote}\smallskip \noindent and\smallskip \begin{quote} \it% \begin{tabular}{@ {}l@ {}p{0.88\textwidth}@ {}} `` & If the reader finds the above treatment in terms of bit lists revoltingly concrete, I cannot disagree. A more abstract approach is clearly desirable.'' \end{tabular} \end{quote}\smallskip \noindent Moreover, it is not so clear how to conveniently impose a finiteness condition upon functions in order to represent \emph{finite} automata. The best is probably to resort to more advanced reasoning frameworks, such as \emph{locales} or \emph{type classes}, which are \emph{not} available in all HOL-based theorem provers. Because of these problems to do with representing automata, there seems to be no substantial formalisation of automata theory and regular languages carried out in HOL-based theorem provers. Nipkow \cite{Nipkow98} establishes the link between regular expressions and automata in the context of lexing. Berghofer and Reiter \cite{BerghoferReiter09} formalise automata working over bit strings in the context of Presburger arithmetic. The only larger formalisations of automata theory are carried out in Nuprl \cite{Constable00} and in Coq, e.g.~\cite{Filliatre97,Almeidaetal10}. Also, one might consider automata as just convenient `vehicles' for establishing properties about regular languages. However, paper proofs about automata often involve subtle side-conditions which are easily overlooked, but which make formal reasoning rather painful. For example Kozen's proof of the Myhill-Nerode Theorem requires that automata do not have inaccessible states \cite{Kozen97}. Another subtle side-condition is completeness of automata, that is automata need to have total transition functions and at most one `sink' state from which there is no connection to a final state (Brzozowski mentions this side-condition in the context of state complexity of automata \cite{Brzozowski10}). Such side-conditions mean that if we define a regular language as one for which there exists \emph{a} finite automaton that recognises all its strings (see Definition~\ref{baddef}), then we need a lemma which ensures that another equivalent one can be found satisfying the side-condition, and also need to make sure our operations on automata preserve them. Unfortunately, such `little' and `obvious' lemmas make formalisations of automata theory hair-pulling experiences. In this paper, we will not attempt to formalise automata theory in Isabelle/HOL nor will we attempt to formalise automata proofs from the literature, but take a different approach to regular languages than is usually taken. Instead of defining a regular language as one where there exists an automaton that recognises all its strings, we define a regular language as: \begin{dfntn}\label{regular} A language @{text A} is \emph{regular}, provided there is a regular expression that matches all strings of @{text "A"}. \end{dfntn} \noindent And then `forget' automata. The reason is that regular expressions, unlike graphs, matrices and functions, can be easily defined as an inductive datatype. A reasoning infrastructure (like induction and recursion) comes for free in HOL. Moreover, no side-conditions will be needed for regular expressions, like we need for automata. This convenience of regular expressions has recently been exploited in HOL4 with a formalisation of regular expression matching based on derivatives \cite{OwensSlind08} and with an equivalence checker for regular expressions in Isabelle/HOL \cite{KraussNipkow11}. The main purpose of this paper is to show that a central result about regular languages---the Myhill-Nerode Theorem---can be recreated by only using regular expressions. This theorem gives necessary and sufficient conditions for when a language is regular. As a corollary of this theorem we can easily establish the usual closure properties, including complementation, for regular languages. We use the Continuation Lemma \cite{Rosenberg06}, which is also a corollary of the Myhill-Nerode Theorem, for establishing the non-regularity of the language @{text "a\<^isup>nb\<^isup>n"}.\medskip \noindent {\bf Contributions:} There is an extensive literature on regular languages. To our best knowledge, our proof of the Myhill-Nerode Theorem is the first that is based on regular expressions, only. The part of this theorem stating that finitely many partitions imply regularity of the language is proved by an argument about solving equational systems. This argument appears to be folklore. For the other part, we give two proofs: one direct proof using certain tagging-functions, and another indirect proof using Antimirov's partial derivatives \cite{Antimirov95}. Again to our best knowledge, the tagging-functions have not been used before for establishing the Myhill-Nerode Theorem. Derivatives of regular expressions have been used recently quite widely in the literature; partial derivatives, in contrast, attract much less attention. However, partial derivatives are more suitable in the context of the Myhill-Nerode Theorem, since it is easier to establish formally their finiteness result. We are not aware of any proof that uses either of them for proving the Myhill-Nerode Theorem.*}section {* Preliminaries *}text {* \noindent Strings in Isabelle/HOL are lists of characters with the \emph{empty string} being represented by the empty list, written @{term "[]"}. We assume there are only finitely many different characters. \emph{Languages} are sets of strings. The language containing all strings is written in Isabelle/HOL as @{term "UNIV::string set"}. The concatenation of two languages is written @{term "A \<cdot> B"} and a language raised to the power @{text n} is written @{term "A \<up> n"}. They are defined as usual \begin{center} \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} @{thm (lhs) conc_def'[THEN eq_reflection, where A1="A" and B1="B"]} & @{text "\<equiv>"} & @{thm (rhs) conc_def'[THEN eq_reflection, where A1="A" and B1="B"]}\\ @{thm (lhs) lang_pow.simps(1)[THEN eq_reflection, where A1="A"]} & @{text "\<equiv>"} & @{thm (rhs) lang_pow.simps(1)[THEN eq_reflection, where A1="A"]}\\ @{thm (lhs) lang_pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]} & @{text "\<equiv>"} & @{thm (rhs) lang_pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]} \end{tabular} \end{center} \noindent where @{text "@"} is the list-append operation. The Kleene-star of a language @{text A} is defined as the union over all powers, namely @{thm star_def}. In the paper we will make use of the following properties of these constructions. \begin{prpstn}\label{langprops}\mbox{}\\ \begin{tabular}{@ {}lp{10cm}} (i) & @{thm star_unfold_left} \\ (ii) & @{thm[mode=IfThen] pow_length}\\ (iii) & @{thm conc_Union_left} \\ (iv) & If @{thm (prem 1) star_decom} and @{thm (prem 2) star_decom} then there exists an @{text "x\<^isub>p"} and @{text "x\<^isub>s"} with @{text "x = x\<^isub>p @ x\<^isub>s"} and @{term "x\<^isub>p \<noteq> []"} such that @{term "x\<^isub>p \<in> A"} and @{term "x\<^isub>s \<in> A\<star>"}. \end{tabular} \end{prpstn} \noindent In @{text "(ii)"} we use the notation @{term "length s"} for the length of a string; this property states that if \mbox{@{term "[] \<notin> A"}} then the lengths of the strings in @{term "A \<up> (Suc n)"} must be longer than @{text n}. Property @{text "(iv)"} states that a non-empty string in @{term "A\<star>"} can always be split up into a non-empty prefix belonging to @{text "A"} and the rest being in @{term "A\<star>"}. We omit the proofs for these properties, but invite the reader to consult our formalisation.\footnote{Available in the Archive of Formal Proofs at \url{http://afp.sourceforge.net/devel-entries/Myhill-Nerode.shtml} \cite{myhillnerodeafp11}.} The notation in Isabelle/HOL for the quotient of a language @{text A} according to an equivalence relation @{term REL} is @{term "A // REL"}. We will write @{text "\<lbrakk>x\<rbrakk>\<^isub>\<approx>"} for the equivalence class defined as \mbox{@{text "{y | y \<approx> x}"}}, and have @{text "x \<approx> y"} if and only if @{text "\<lbrakk>x\<rbrakk>\<^isub>\<approx> = \<lbrakk>y\<rbrakk>\<^isub>\<approx>"}. Central to our proof will be the solution of equational systems involving equivalence classes of languages. For this we will use Arden's Lemma (see for example \cite[Page 100]{Sakarovitch09}), which solves equations of the form @{term "X = A \<cdot> X \<union> B"} provided @{term "[] \<notin> A"}. However we will need the following `reversed' version of Arden's Lemma (`reversed' in the sense of changing the order of @{term "A \<cdot> X"} to \mbox{@{term "X \<cdot> A"}}). \begin{lmm}[Reversed Arden's Lemma]\label{arden}\mbox{}\\ If @{thm (prem 1) reversed_Arden} then @{thm (lhs) reversed_Arden} if and only if @{thm (rhs) reversed_Arden}. \end{lmm} \begin{proof} For the right-to-left direction we assume @{thm (rhs) reversed_Arden} and show that @{thm (lhs) reversed_Arden} holds. From Property~\ref{langprops}@{text "(i)"} we have @{term "A\<star> = A \<cdot> A\<star> \<union> {[]}"}, which is equal to @{term "A\<star> = A\<star> \<cdot> A \<union> {[]}"}. Adding @{text B} to both sides gives @{term "B \<cdot> A\<star> = B \<cdot> (A\<star> \<cdot> A \<union> {[]})"}, whose right-hand side is equal to @{term "(B \<cdot> A\<star>) \<cdot> A \<union> B"}. Applying the assumed equation completes this direction. For the other direction we assume @{thm (lhs) reversed_Arden}. By a simple induction on @{text n}, we can establish the property \begin{center} @{text "(*)"}\hspace{5mm} @{thm (concl) reversed_arden_helper} \end{center} \noindent Using this property we can show that @{term "B \<cdot> (A \<up> n) \<subseteq> X"} holds for all @{text n}. From this we can infer @{term "B \<cdot> A\<star> \<subseteq> X"} using the definition of @{text "\<star>"}. For the inclusion in the other direction we assume a string @{text s} with length @{text k} is an element in @{text X}. Since @{thm (prem 1) reversed_Arden} we know by Property~\ref{langprops}@{text "(ii)"} that @{term "s \<notin> X \<cdot> (A \<up> Suc k)"} since its length is only @{text k} (the strings in @{term "X \<cdot> (A \<up> Suc k)"} are all longer). From @{text "(*)"} it follows then that @{term s} must be an element in @{term "(\<Union>m\<le>k. B \<cdot> (A \<up> m))"}. This in turn implies that @{term s} is in @{term "(\<Union>n. B \<cdot> (A \<up> n))"}. Using Property~\ref{langprops}@{text "(iii)"} this is equal to @{term "B \<cdot> A\<star>"}, as we needed to show. \end{proof} \noindent Regular expressions are defined as the inductive datatype \begin{center} \begin{tabular}{rcl} @{text r} & @{text "::="} & @{term ZERO}\\ & @{text"|"} & @{term One}\\ & @{text"|"} & @{term "Atom c"}\\ & @{text"|"} & @{term "Times r r"}\\ & @{text"|"} & @{term "Plus r r"}\\ & @{text"|"} & @{term "Star r"} \end{tabular} \end{center} \noindent and the language matched by a regular expression is defined as \begin{center} \begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l} @{thm (lhs) lang.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(1)}\\ @{thm (lhs) lang.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(2)}\\ @{thm (lhs) lang.simps(3)[where a="c"]} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(3)[where a="c"]}\\ @{thm (lhs) lang.simps(4)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(4)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]}\\ @{thm (lhs) lang.simps(5)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(5)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]}\\ @{thm (lhs) lang.simps(6)[where r="r"]} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(6)[where r="r"]}\\ \end{tabular} \end{center} Given a finite set of regular expressions @{text rs}, we will make use of the operation of generating a regular expression that matches the union of all languages of @{text rs}. We only need to know the existence of such a regular expression and therefore we use Isabelle/HOL's @{const "fold_graph"} and Hilbert's @{text "\<epsilon>"} to define @{term "\<Uplus>rs"}. This operation, roughly speaking, folds @{const PLUS} over the set @{text rs} with @{const ZERO} for the empty set. We can prove that for a finite set @{text rs} % \begin{equation}\label{uplus} \mbox{@{thm (lhs) folds_plus_simp} @{text "= \<Union> (\<calL> ` rs)"}} \end{equation} \noindent holds, whereby @{text "\<calL> ` rs"} stands for the image of the set @{text rs} under function @{text "\<calL>"} defined as \begin{center} @{term "lang ` rs \<equiv> {lang r | r. r \<in> rs}"} \end{center} \noindent In what follows we shall use this convenient short-hand notation for images of sets also with other functions.*}section {* The Myhill-Nerode Theorem, First Part *}text {* \noindent The key definition in the Myhill-Nerode Theorem is the \emph{Myhill-Nerode Relation}, which states that w.r.t.~a language two strings are related, provided there is no distinguishing extension in this language. This can be defined as a tertiary relation. \begin{dfntn}[Myhill-Nerode Relation]\label{myhillneroderel} Given a language @{text A}, two strings @{text x} and @{text y} are Myhill-Nerode related provided \begin{center} @{thm str_eq_def'} \end{center} \end{dfntn} \noindent It is easy to see that @{term "\<approx>A"} is an equivalence relation, which partitions the set of all strings, @{text "UNIV"}, into a set of disjoint equivalence classes. To illustrate this quotient construction, let us give a simple example: consider the regular language containing just the string @{text "[c]"}. The relation @{term "\<approx>({[c]})"} partitions @{text UNIV} into three equivalence classes @{text "X\<^isub>1"}, @{text "X\<^isub>2"} and @{text "X\<^isub>3"} as follows \begin{center} \begin{tabular}{l} @{text "X\<^isub>1 = {[]}"}\\ @{text "X\<^isub>2 = {[c]}"}\\ @{text "X\<^isub>3 = UNIV - {[], [c]}"} \end{tabular} \end{center} One direction of the Myhill-Nerode Theorem establishes that if there are finitely many equivalence classes, like in the example above, then the language is regular. In our setting we therefore have to show: \begin{thrm}\label{myhillnerodeone} @{thm[mode=IfThen] Myhill_Nerode1} \end{thrm} \noindent To prove this theorem, we first define the set @{term "finals A"} as those equivalence classes from @{term "UNIV // \<approx>A"} that contain strings of @{text A}, namely % \begin{equation} @{thm finals_def} \end{equation} \noindent In our running example, @{text "X\<^isub>2"} is the only equivalence class in @{term "finals {[c]}"}. It is straightforward to show that in general \begin{equation}\label{finalprops} @{thm lang_is_union_of_finals}\hspace{15mm} @{thm finals_in_partitions} \end{equation} \noindent hold. Therefore if we know that there exists a regular expression for every equivalence class in \mbox{@{term "finals A"}} (which by assumption must be a finite set), then we can use @{text "\<bigplus>"} to obtain a regular expression that matches every string in @{text A}. Our proof of Theorem~\ref{myhillnerodeone} relies on a method that can calculate a regular expression for \emph{every} equivalence class, not just the ones in @{term "finals A"}. We first define the notion of \emph{one-character-transition} between two equivalence classes % \begin{equation} @{thm transition_def} \end{equation} \noindent which means that if we concatenate the character @{text c} to the end of all strings in the equivalence class @{text Y}, we obtain a subset of @{text X}. Note that we do not define an automaton here, we merely relate two sets (with the help of a character). In our concrete example we have @{term "X\<^isub>1 \<Turnstile>c\<Rightarrow> X\<^isub>2"}, @{term "X\<^isub>1 \<Turnstile>d\<^isub>i\<Rightarrow> X\<^isub>3"} with @{text "d\<^isub>i"} being any other character than @{text c}, and @{term "X\<^isub>3 \<Turnstile>c\<^isub>j\<Rightarrow> X\<^isub>3"} for any character @{text "c\<^isub>j"}. Next we construct an \emph{initial equational system} that contains an equation for each equivalence class. We first give an informal description of this construction. Suppose we have the equivalence classes @{text "X\<^isub>1,\<dots>,X\<^isub>n"}, there must be one and only one that contains the empty string @{text "[]"} (since equivalence classes are disjoint). Let us assume @{text "[] \<in> X\<^isub>1"}. We build the following equational system \begin{center} \begin{tabular}{rcl} @{text "X\<^isub>1"} & @{text "="} & @{text "(Y\<^isub>1\<^isub>1, ATOM c\<^isub>1\<^isub>1) + \<dots> + (Y\<^isub>1\<^isub>p, ATOM c\<^isub>1\<^isub>p) + \<lambda>(ONE)"} \\ @{text "X\<^isub>2"} & @{text "="} & @{text "(Y\<^isub>2\<^isub>1, ATOM c\<^isub>2\<^isub>1) + \<dots> + (Y\<^isub>2\<^isub>o, ATOM c\<^isub>2\<^isub>o)"} \\ & $\vdots$ \\ @{text "X\<^isub>n"} & @{text "="} & @{text "(Y\<^isub>n\<^isub>1, ATOM c\<^isub>n\<^isub>1) + \<dots> + (Y\<^isub>n\<^isub>q, ATOM c\<^isub>n\<^isub>q)"}\\ \end{tabular} \end{center} \noindent where the terms @{text "(Y\<^isub>i\<^isub>j, ATOM c\<^isub>i\<^isub>j)"} stand for all transitions @{term "Y\<^isub>i\<^isub>j \<Turnstile>c\<^isub>i\<^isub>j\<Rightarrow> X\<^isub>i"}. %The intuition behind the equational system is that every %equation @{text "X\<^isub>i = rhs\<^isub>i"} in this system %corresponds roughly to a state of an automaton whose name is @{text X\<^isub>i} and its predecessor states %are the @{text "Y\<^isub>i\<^isub>j"}; the @{text "c\<^isub>i\<^isub>j"} are the labels of the transitions from these %predecessor states to @{text X\<^isub>i}. There can only be finitely many terms of the form @{text "(Y\<^isub>i\<^isub>j, ATOM c\<^isub>i\<^isub>j)"} in a right-hand side since by assumption there are only finitely many equivalence classes and only finitely many characters. The term @{text "\<lambda>(ONE)"} in the first equation acts as a marker for the initial state, that is the equivalence class containing the empty string @{text "[]"}.\footnote{Note that we mark, roughly speaking, the single `initial' state in the equational system, which is different from the method by Brzozowski \cite{Brzozowski64}, where he marks the `terminal' states. We are forced to set up the equational system in our way, because the Myhill-Nerode Relation determines the `direction' of the transitions---the successor `state' of an equivalence class @{text Y} can be reached by adding a character to the end of @{text Y}. This is also the reason why we have to use our reversed version of Arden's Lemma.} In our running example we have the initial equational system \begin{equation}\label{exmpcs} \mbox{\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} @{term "X\<^isub>1"} & @{text "="} & @{text "\<lambda>(ONE)"}\\ @{term "X\<^isub>2"} & @{text "="} & @{text "(X\<^isub>1, ATOM c)"}\\ @{term "X\<^isub>3"} & @{text "="} & @{text "(X\<^isub>1, ATOM d\<^isub>1) + \<dots> + (X\<^isub>1, ATOM d\<^isub>n)"}\\ & & \mbox{}\hspace{10mm}@{text "+ (X\<^isub>3, ATOM c\<^isub>1) + \<dots> + (X\<^isub>3, ATOM c\<^isub>m)"} \end{tabular}} \end{equation} \noindent where @{text "d\<^isub>1\<dots>d\<^isub>n"} is the sequence of all characters but not containing @{text c}, and @{text "c\<^isub>1\<dots>c\<^isub>m"} is the sequence of all characters. Overloading the function @{text \<calL>} for the two kinds of terms in the equational system, we have \begin{center} @{text "\<calL>(Y, r) \<equiv>"} % @{thm (rhs) lang_trm.simps(2)[where X="Y" and r="r", THEN eq_reflection]}\hspace{10mm} @{thm lang_trm.simps(1)[where r="r", THEN eq_reflection]} \end{center} \noindent and we can prove for @{text "X\<^isub>2\<^isub>.\<^isub>.\<^isub>n"} that the following equations % \begin{equation}\label{inv1} @{text "X\<^isub>i = \<calL>(Y\<^isub>i\<^isub>1, ATOM c\<^isub>i\<^isub>1) \<union> \<dots> \<union> \<calL>(Y\<^isub>i\<^isub>q, ATOM c\<^isub>i\<^isub>q)"}. \end{equation} \noindent hold. Similarly for @{text "X\<^isub>1"} we can show the following equation % \begin{equation}\label{inv2} @{text "X\<^isub>1 = \<calL>(Y\<^isub>1\<^isub>1, ATOM c\<^isub>1\<^isub>1) \<union> \<dots> \<union> \<calL>(Y\<^isub>1\<^isub>p, ATOM c\<^isub>1\<^isub>p) \<union> \<calL>(\<lambda>(ONE))"}. \end{equation} \noindent holds. The reason for adding the @{text \<lambda>}-marker to our initial equational system is to obtain this equation: it only holds with the marker, since none of the other terms contain the empty string. The point of the initial equational system is that solving it means we will be able to extract a regular expression for every equivalence class. Our representation for the equations in Isabelle/HOL are pairs, where the first component is an equivalence class (a set of strings) and the second component is a set of terms. Given a set of equivalence classes @{text CS}, our initial equational system @{term "Init CS"} is thus formally defined as % \begin{equation}\label{initcs} \mbox{\begin{tabular}{rcl} @{thm (lhs) Init_rhs_def} & @{text "\<equiv>"} & @{text "if"}~@{term "[] \<in> X"}\\ & & @{text "then"}~@{term "{Trn Y (ATOM c) | Y c. Y \<in> CS \<and> Y \<Turnstile>c\<Rightarrow> X} \<union> {Lam ONE}"}\\ & & @{text "else"}~@{term "{Trn Y (ATOM c)| Y c. Y \<in> CS \<and> Y \<Turnstile>c\<Rightarrow> X}"}\\ @{thm (lhs) Init_def} & @{text "\<equiv>"} & @{thm (rhs) Init_def} \end{tabular}} \end{equation} \noindent Because we use sets of terms for representing the right-hand sides of equations, we can prove \eqref{inv1} and \eqref{inv2} more concisely as % \begin{lmm}\label{inv} If @{thm (prem 1) test} then @{text "X = \<Union> \<calL> ` rhs"}. \end{lmm} \noindent Our proof of Theorem~\ref{myhillnerodeone} will proceed by transforming the initial equational system into one in \emph{solved form} maintaining the invariant in Lemma~\ref{inv}. From the solved form we will be able to read off the regular expressions. In order to transform an equational system into solved form, we have two operations: one that takes an equation of the form @{text "X = rhs"} and removes any recursive occurrences of @{text X} in the @{text rhs} using our variant of Arden's Lemma. The other operation takes an equation @{text "X = rhs"} and substitutes @{text X} throughout the rest of the equational system adjusting the remaining regular expressions appropriately. To define this adjustment we define the \emph{append-operation} taking a term and a regular expression as argument \begin{center} \begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l} @{thm (lhs) Append_rexp.simps(2)[where X="Y" and r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]} & @{text "\<equiv>"} & @{thm (rhs) Append_rexp.simps(2)[where X="Y" and r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]}\\ @{thm (lhs) Append_rexp.simps(1)[where r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]} & @{text "\<equiv>"} & @{thm (rhs) Append_rexp.simps(1)[where r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]} \end{tabular} \end{center} \noindent We lift this operation to entire right-hand sides of equations, written as @{thm (lhs) Append_rexp_rhs_def[where rexp="r"]}. With this we can define the \emph{arden-operation} for an equation of the form @{text "X = rhs"} as: % \begin{equation}\label{arden_def} \mbox{\begin{tabular}{rc@ {\hspace{2mm}}r@ {\hspace{1mm}}l} @{thm (lhs) Arden_def} & @{text "\<equiv>"}~~\mbox{} & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "let"}}\\ & & @{text "rhs' ="} & @{term "rhs - {Trn X r | r. Trn X r \<in> rhs}"} \\ & & @{text "r' ="} & @{term "Star (\<Uplus> {r. Trn X r \<in> rhs})"}\\ & & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "in"}~~@{term "Append_rexp_rhs rhs' r'"}}\\ \end{tabular}} \end{equation} \noindent In this definition, we first delete all terms of the form @{text "(X, r)"} from @{text rhs}; then we calculate the combined regular expressions for all @{text r} coming from the deleted @{text "(X, r)"}, and take the @{const Star} of it; finally we append this regular expression to @{text rhs'}. If we apply this operation to the right-hand side of @{text "X\<^isub>3"} in \eqref{exmpcs}, we obtain the equation: \begin{center} \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} @{term "X\<^isub>3"} & @{text "="} & @{text "(X\<^isub>1, TIMES (ATOM d\<^isub>1) (STAR \<^raw:\ensuremath{\bigplus}>{ATOM c\<^isub>1,\<dots>, ATOM c\<^isub>m})) + \<dots> "}\\ & & \mbox{}\hspace{13mm} @{text "\<dots> + (X\<^isub>1, TIMES (ATOM d\<^isub>n) (STAR \<^raw:\ensuremath{\bigplus}>{ATOM c\<^isub>1,\<dots>, ATOM c\<^isub>m}))"} \end{tabular} \end{center} \noindent That means we eliminated the recursive occurrence of @{text "X\<^isub>3"} on the right-hand side. Note we used the abbreviation @{text "\<^raw:\ensuremath{\bigplus}>{ATOM c\<^isub>1,\<dots>, ATOM c\<^isub>m}"} to stand for a regular expression that matches with every character. In our algorithm we are only interested in the existence of such a regular expression and do not specify it any further. It can be easily seen that the @{text "Arden"}-operation mimics Arden's Lemma on the level of equations. To ensure the non-emptiness condition of Arden's Lemma we say that a right-hand side is @{text ardenable} provided \begin{center} @{thm ardenable_def} \end{center} \noindent This allows us to prove a version of Arden's Lemma on the level of equations. \begin{lmm}\label{ardenable} Given an equation @{text "X = rhs"}. If @{text "X = \<Union>\<calL> ` rhs"}, @{thm (prem 2) Arden_preserves_soundness}, and @{thm (prem 3) Arden_preserves_soundness}, then @{text "X = \<Union>\<calL> ` (Arden X rhs)"}. \end{lmm} \noindent Our @{text ardenable} condition is slightly stronger than needed for applying Arden's Lemma, but we can still ensure that it holds throughout our algorithm of transforming equations into solved form. The \emph{substitution-operation} takes an equation of the form @{text "X = xrhs"} and substitutes it into the right-hand side @{text rhs}. \begin{center} \begin{tabular}{rc@ {\hspace{2mm}}r@ {\hspace{1mm}}l} @{thm (lhs) Subst_def} & @{text "\<equiv>"}~~\mbox{} & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "let"}}\\ & & @{text "rhs' ="} & @{term "rhs - {Trn X r | r. Trn X r \<in> rhs}"} \\ & & @{text "r' ="} & @{term "\<Uplus> {r. Trn X r \<in> rhs}"}\\ & & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "in"}~~@{term "rhs' \<union> Append_rexp_rhs xrhs r'"}}\\ \end{tabular} \end{center} \noindent We again delete first all occurrences of @{text "(X, r)"} in @{text rhs}; we then calculate the regular expression corresponding to the deleted terms; finally we append this regular expression to @{text "xrhs"} and union it up with @{text rhs'}. When we use the substitution operation we will arrange it so that @{text "xrhs"} does not contain any occurrence of @{text X}. For example substituting the first equation in \eqref{exmpcs} into the right-hand side of the second, thus eliminating the equivalence class @{text "X\<^isub>1"}, gives us the equation \begin{equation}\label{exmpresult} \mbox{\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} @{term "X\<^isub>2"} & @{text "="} & @{text "\<lambda>(TIMES ONE (ATOM c))"}\\ \end{tabular}} \end{equation} With these two operations in place, we can define the operation that removes one equation from an equational systems @{text ES}. The operation @{const Subst_all} substitutes an equation @{text "X = xrhs"} throughout an equational system @{text ES}; @{const Remove} then completely removes such an equation from @{text ES} by substituting it to the rest of the equational system, but first eliminating all recursive occurrences of @{text X} by applying @{const Arden} to @{text "xrhs"}. \begin{center} \begin{tabular}{rcl} @{thm (lhs) Subst_all_def} & @{text "\<equiv>"} & @{thm (rhs) Subst_all_def}\\ @{thm (lhs) Remove_def} & @{text "\<equiv>"} & @{thm (rhs) Remove_def} \end{tabular} \end{center} \noindent Finally, we can define how an equational system should be solved. For this we will need to iterate the process of eliminating equations until only one equation will be left in the system. However, we do not just want to have any equation as being the last one, but the one involving the equivalence class for which we want to calculate the regular expression. Let us suppose this equivalence class is @{text X}. Since @{text X} is the one to be solved, in every iteration step we have to pick an equation to be eliminated that is different from @{text X}. In this way @{text X} is kept to the final step. The choice is implemented using Hilbert's choice operator, written @{text SOME} in the definition below. \begin{center} \begin{tabular}{rc@ {\hspace{4mm}}r@ {\hspace{1mm}}l} @{thm (lhs) Iter_def} & @{text "\<equiv>"}~~\mbox{} & \multicolumn{2}{@ {\hspace{-4mm}}l}{@{text "let"}}\\ & & @{text "(Y, yrhs) ="} & @{term "SOME (Y, yrhs). (Y, yrhs) \<in> ES \<and> X \<noteq> Y"} \\ & & \multicolumn{2}{@ {\hspace{-4mm}}l}{@{text "in"}~~@{term "Remove ES Y yrhs"}}\\ \end{tabular} \end{center} \noindent The last definition we need applies @{term Iter} over and over until a condition @{text Cond} is \emph{not} satisfied anymore. This condition states that there are more than one equation left in the equational system @{text ES}. To solve an equational system we use Isabelle/HOL's @{text while}-operator as follows: \begin{center} @{thm Solve_def} \end{center} \noindent We are not concerned here with the definition of this operator (see Berghofer and Nipkow \cite{BerghoferNipkow00} for example), but note that we eliminate in each @{const Iter}-step a single equation, and therefore have a well-founded termination order by taking the cardinality of the equational system @{text ES}. This enables us to prove properties about our definition of @{const Solve} when we `call' it with the equivalence class @{text X} and the initial equational system @{term "Init (UNIV // \<approx>A)"} from \eqref{initcs} using the principle: \begin{equation}\label{whileprinciple} \mbox{\begin{tabular}{l} @{term "invariant (Init (UNIV // \<approx>A))"} \\ @{term "\<forall>ES. invariant ES \<and> Cond ES \<longrightarrow> invariant (Iter X ES)"}\\ @{term "\<forall>ES. invariant ES \<and> Cond ES \<longrightarrow> card (Iter X ES) < card ES"}\\ @{term "\<forall>ES. invariant ES \<and> \<not> Cond ES \<longrightarrow> P ES"}\\ \hline \multicolumn{1}{c}{@{term "P (Solve X (Init (UNIV // \<approx>A)))"}} \end{tabular}} \end{equation} \noindent This principle states that given an invariant (which we will specify below) we can prove a property @{text "P"} involving @{const Solve}. For this we have to discharge the following proof obligations: first the initial equational system satisfies the invariant; second the iteration step @{text "Iter"} preserves the invariant as long as the condition @{term Cond} holds; third @{text "Iter"} decreases the termination order, and fourth that once the condition does not hold anymore then the property @{text P} must hold. The property @{term P} in our proof will state that @{term "Solve X (Init (UNIV // \<approx>A))"} returns with a single equation @{text "X = xrhs"} for some @{text "xrhs"}, and that this equational system still satisfies the invariant. In order to get the proof through, the invariant is composed of the following six properties: \begin{center} \begin{tabular}{@ {}rcl@ {\hspace{-13mm}}l @ {}} @{text "invariant ES"} & @{text "\<equiv>"} & @{term "finite ES"} & @{text "(finiteness)"}\\ & @{text "\<and>"} & @{thm (rhs) finite_rhs_def} & @{text "(finiteness rhs)"}\\ & @{text "\<and>"} & @{text "\<forall>(X, rhs)\<in>ES. X = \<Union>\<calL> ` rhs"} & @{text "(soundness)"}\\ & @{text "\<and>"} & @{thm (rhs) distinctness_def}\\ & & & @{text "(distinctness)"}\\ & @{text "\<and>"} & @{thm (rhs) ardenable_all_def} & @{text "(ardenable)"}\\ & @{text "\<and>"} & @{thm (rhs) validity_def} & @{text "(validity)"}\\ \end{tabular} \end{center} \noindent The first two ensure that the equational system is always finite (number of equations and number of terms in each equation); the third makes sure the `meaning' of the equations is preserved under our transformations. The other properties are a bit more technical, but are needed to get our proof through. Distinctness states that every equation in the system is distinct. @{text Ardenable} ensures that we can always apply the @{text Arden} operation. The last property states that every @{text rhs} can only contain equivalence classes for which there is an equation. Therefore @{text lhss} is just the set containing the first components of an equational system, while @{text "rhss"} collects all equivalence classes @{text X} in the terms of the form @{term "Trn X r"}. That means formally @{thm (lhs) lhss_def}~@{text "\<equiv> {X | (X, rhs) \<in> ES}"} and @{thm (lhs) rhss_def}~@{text "\<equiv> {X | (X, r) \<in> rhs}"}. It is straightforward to prove that the initial equational system satisfies the invariant. \begin{lmm}\label{invzero} @{thm[mode=IfThen] Init_ES_satisfies_invariant} \end{lmm} \begin{proof} Finiteness is given by the assumption and the way how we set up the initial equational system. Soundness is proved in Lemma~\ref{inv}. Distinctness follows from the fact that the equivalence classes are disjoint. The @{text ardenable} property also follows from the setup of the initial equational system, as does validity. \end{proof} \noindent Next we show that @{text Iter} preserves the invariant. \begin{lmm}\label{iterone} @{thm[mode=IfThen] iteration_step_invariant[where xrhs="rhs"]} \end{lmm} \begin{proof} The argument boils down to choosing an equation @{text "Y = yrhs"} to be eliminated and to show that @{term "Subst_all (ES - {(Y, yrhs)}) Y (Arden Y yrhs)"} preserves the invariant. We prove this as follows: \begin{center} \begin{tabular}{@ {}l@ {}} @{text "\<forall> ES."}\\ \mbox{}\hspace{5mm}@{thm (prem 1) Subst_all_satisfies_invariant} implies @{thm (concl) Subst_all_satisfies_invariant} \end{tabular} \end{center} \noindent Finiteness is straightforward, as the @{const Subst} and @{const Arden} operations keep the equational system finite. These operations also preserve soundness and distinctness (we proved soundness for @{const Arden} in Lemma~\ref{ardenable}). The property @{text ardenable} is clearly preserved because the append-operation cannot make a regular expression to match the empty string. Validity is given because @{const Arden} removes an equivalence class from @{text yrhs} and then @{const Subst_all} removes @{text Y} from the equational system. Having proved the implication above, we can instantiate @{text "ES"} with @{text "ES - {(Y, yrhs)}"} which matches with our proof-obligation of @{const "Subst_all"}. Since \mbox{@{term "ES = ES - {(Y, yrhs)} \<union> {(Y, yrhs)}"}}, we can use the assumption to complete the proof. \end{proof} \noindent We also need the fact that @{text Iter} decreases the termination measure. \begin{lmm}\label{itertwo} @{thm[mode=IfThen] iteration_step_measure[simplified (no_asm), where xrhs="rhs"]} \end{lmm} \begin{proof} By assumption we know that @{text "ES"} is finite and has more than one element. Therefore there must be an element @{term "(Y, yrhs) \<in> ES"} with @{term "(Y, yrhs) \<noteq> (X, rhs)"}. Using the distinctness property we can infer that @{term "Y \<noteq> X"}. We further know that @{text "Remove ES Y yrhs"} removes the equation @{text "Y = yrhs"} from the system, and therefore the cardinality of @{const Iter} strictly decreases. \end{proof} \noindent This brings us to our property we want to establish for @{text Solve}. \begin{lmm} If @{thm (prem 1) Solve} and @{thm (prem 2) Solve} then there exists a @{text rhs} such that @{term "Solve X (Init (UNIV // \<approx>A)) = {(X, rhs)}"} and @{term "invariant {(X, rhs)}"}. \end{lmm} \begin{proof} In order to prove this lemma using \eqref{whileprinciple}, we have to use a slightly stronger invariant since Lemma~\ref{iterone} and \ref{itertwo} have the precondition that @{term "(X, rhs) \<in> ES"} for some @{text rhs}. This precondition is needed in order to choose in the @{const Iter}-step an equation that is not \mbox{@{term "X = rhs"}}. Therefore our invariant cannot be just @{term "invariant ES"}, but must be @{term "invariant ES \<and> (\<exists>rhs. (X, rhs) \<in> ES)"}. By assumption @{thm (prem 2) Solve} and Lemma~\ref{invzero}, the more general invariant holds for the initial equational system. This is premise 1 of~\eqref{whileprinciple}. Premise 2 is given by Lemma~\ref{iterone} and the fact that @{const Iter} might modify the @{text rhs} in the equation @{term "X = rhs"}, but does not remove it. Premise 3 of~\eqref{whileprinciple} is by Lemma~\ref{itertwo}. Now in premise 4 we like to show that there exists a @{text rhs} such that @{term "ES = {(X, rhs)}"} and that @{text "invariant {(X, rhs)}"} holds, provided the condition @{text "Cond"} does not holds. By the stronger invariant we know there exists such a @{text "rhs"} with @{term "(X, rhs) \<in> ES"}. Because @{text Cond} is not true, we know the cardinality of @{text ES} is @{text 1}. This means @{text "ES"} must actually be the set @{text "{(X, rhs)}"}, for which the invariant holds. This allows us to conclude that @{term "Solve X (Init (UNIV // \<approx>A)) = {(X, rhs)}"} and @{term "invariant {(X, rhs)}"} hold, as needed. \end{proof} \noindent With this lemma in place we can show that for every equivalence class in @{term "UNIV // \<approx>A"} there exists a regular expression. \begin{lmm}\label{every_eqcl_has_reg} @{thm[mode=IfThen] every_eqcl_has_reg} \end{lmm} \begin{proof} By the preceding lemma, we know that there exists a @{text "rhs"} such that @{term "Solve X (Init (UNIV // \<approx>A))"} returns the equation @{text "X = rhs"}, and that the invariant holds for this equation. That means we know @{text "X = \<Union>\<calL> ` rhs"}. We further know that this is equal to \mbox{@{text "\<Union>\<calL> ` (Arden X rhs)"}} using the properties of the invariant and Lemma~\ref{ardenable}. Using the validity property for the equation @{text "X = rhs"}, we can infer that @{term "rhss rhs \<subseteq> {X}"} and because the @{text Arden} operation removes that @{text X} from @{text rhs}, that @{term "rhss (Arden X rhs) = {}"}. This means the right-hand side @{term "Arden X rhs"} can only consist of terms of the form @{term "Lam r"}. So we can collect those (finitely many) regular expressions @{text rs} and have @{term "X = lang (\<Uplus>rs)"}. With this we can conclude the proof. \end{proof} \noindent Lemma~\ref{every_eqcl_has_reg} allows us to finally give a proof for the first direction of the Myhill-Nerode Theorem. \begin{proof}[Proof of Theorem~\ref{myhillnerodeone}] By Lemma~\ref{every_eqcl_has_reg} we know that there exists a regular expression for every equivalence class in @{term "UNIV // \<approx>A"}. Since @{text "finals A"} is a subset of @{term "UNIV // \<approx>A"}, we also know that for every equivalence class in @{term "finals A"} there exists a regular expression. Moreover by assumption we know that @{term "finals A"} must be finite, and therefore there must be a finite set of regular expressions @{text "rs"} such that @{term "\<Union>(finals A) = lang (\<Uplus>rs)"}. Since the left-hand side is equal to @{text A}, we can use @{term "\<Uplus>rs"} as the regular expression that is needed in the theorem. \end{proof} \noindent Note that our algorithm for solving equational systems provides also a method for calculating a regular expression for the complement of a regular language: if we combine all regular expressions corresponding to equivalence classes not in @{term "finals A"}, then we obtain a regular expression for the complement language @{term "- A"}. This is similar to the usual construction of a `complement automaton'.*}section {* Myhill-Nerode, Second Part *}text {* \noindent In this section we will give a proof for establishing the second part of the Myhill-Nerode Theorem. It can be formulated in our setting as follows: \begin{thrm}\label{myhillnerodetwo} Given @{text "r"} is a regular expression, then @{thm Myhill_Nerode2}. \end{thrm} \noindent The proof will be by induction on the structure of @{text r}. It turns out the base cases are straightforward. \begin{proof}[Base Cases] The cases for @{const ZERO}, @{const ONE} and @{const ATOM} are routine, because we can easily establish that \begin{center} \begin{tabular}{l} @{thm quot_zero_eq}\\ @{thm quot_one_subset}\\ @{thm quot_atom_subset} \end{tabular} \end{center} \noindent hold, which shows that @{term "UNIV // \<approx>(lang r)"} must be finite. \end{proof} \noindent Much more interesting, however, are the inductive cases. They seem hard to be solved directly. The reader is invited to try. In order to see how our proof proceeds consider the following suggestive picture taken from Constable et al \cite{Constable00}: \begin{equation}\label{pics} \mbox{\begin{tabular}{c@ {\hspace{10mm}}c@ {\hspace{10mm}}c} \begin{tikzpicture}[scale=1] %Circle \draw[thick] (0,0) circle (1.1); \end{tikzpicture} & \begin{tikzpicture}[scale=1] %Circle \draw[thick] (0,0) circle (1.1); %Main rays \foreach \a in {0, 90,...,359} \draw[very thick] (0, 0) -- (\a:1.1); \foreach \a / \l in {45/1, 135/2, 225/3, 315/4} \draw (\a: 0.65) node {$a_\l$}; \end{tikzpicture} & \begin{tikzpicture}[scale=1] %Circle \draw[thick] (0,0) circle (1.1); %Main rays \foreach \a in {0, 45,...,359} \draw[very thick] (0, 0) -- (\a:1.1); \foreach \a / \l in {22.5/1.1, 67.5/1.2, 112.5/2.1, 157.5/2.2, 202.4/3.1, 247.5/3.2, 292.5/4.1, 337.5/4.2} \draw (\a: 0.77) node {$a_{\l}$}; \end{tikzpicture}\\ @{term UNIV} & @{term "UNIV // (\<approx>(lang r))"} & @{term "UNIV // R"} \end{tabular}} \end{equation} \noindent The relation @{term "\<approx>(lang r)"} partitions the set of all strings, @{term UNIV}, into some equivalence classes. To show that there are only finitely many of them, it suffices to show in each induction step that another relation, say @{text R}, has finitely many equivalence classes and refines @{term "\<approx>(lang r)"}. \begin{dfntn} A relation @{text "R\<^isub>1"} \emph{refines} @{text "R\<^isub>2"} provided @{text "R\<^isub>1 \<subseteq> R\<^isub>2"}. \end{dfntn} \noindent For constructing @{text R}, we will rely on some \emph{tagging-functions} defined over strings. Given the inductive hypothesis, it will be easy to prove that the \emph{range} of these tagging-functions is finite. The range of a function @{text f} is defined as \begin{center} @{text "range f \<equiv> f ` UNIV"} \end{center} \noindent that means we take the image of @{text f} w.r.t.~all elements in the domain. With this we will be able to infer that the tagging-functions, seen as relations, give rise to finitely many equivalence classes. Finally we will show that the tagging-relations are more refined than @{term "\<approx>(lang r)"}, which implies that @{term "UNIV // \<approx>(lang r)"} must also be finite. We formally define the notion of a \emph{tagging-relation} as follows. \begin{dfntn}[Tagging-Relation] Given a tagging-function @{text tag}, then two strings @{text x} and @{text y} are \emph{tag-related} provided \begin{center} @{text "x \<^raw:$\threesim$>\<^bsub>tag\<^esub> y \<equiv> tag x = tag y"}\;. \end{center} \end{dfntn} In order to establish finiteness of a set @{text A}, we shall use the following powerful principle from Isabelle/HOL's library. % \begin{equation}\label{finiteimageD} @{thm[mode=IfThen] finite_imageD} \end{equation} \noindent It states that if an image of a set under an injective function @{text f} (injective over this set) is finite, then the set @{text A} itself must be finite. We can use it to establish the following two lemmas. \begin{lmm}\label{finone} @{thm[mode=IfThen] finite_eq_tag_rel} \end{lmm} \begin{proof} We set in \eqref{finiteimageD}, @{text f} to be @{text "X \<mapsto> tag ` X"}. We have @{text "range f"} to be a subset of @{term "Pow (range tag)"}, which we know must be finite by assumption. Now @{term "f ` (UNIV // =tag=)"} is a subset of @{text "range f"}, and so also finite. Injectivity amounts to showing that @{text "X = Y"} under the assumptions that @{text "X, Y \<in> "}~@{term "UNIV // =tag="} and @{text "f X = f Y"}. From the assumptions we obtain \mbox{@{text "x \<in> X"}} and @{text "y \<in> Y"} with @{text "tag x = tag y"}. Since @{text x} and @{text y} are tag-related, this in turn means that the equivalence classes @{text X} and @{text Y} must be equal. Therefore \eqref{finiteimageD} allows us to conclude with @{thm (concl) finite_eq_tag_rel}. \end{proof} \begin{lmm}\label{fintwo} Given two equivalence relations @{text "R\<^isub>1"} and @{text "R\<^isub>2"}, whereby @{text "R\<^isub>1"} refines @{text "R\<^isub>2"}. If @{thm (prem 1) refined_partition_finite[where ?R1.0="R\<^isub>1" and ?R2.0="R\<^isub>2"]} then @{thm (concl) refined_partition_finite[where ?R1.0="R\<^isub>1" and ?R2.0="R\<^isub>2"]}. \end{lmm} \begin{proof} We prove this lemma again using \eqref{finiteimageD}. This time we set @{text f} to be @{text "X \<mapsto>"}~@{term "{R\<^isub>1 `` {x} | x. x \<in> X}"}. It is easy to see that @{term "finite (f ` (UNIV // R\<^isub>2))"} because it is a subset of @{term "Pow (UNIV // R\<^isub>1)"}, which must be finite by assumption. What remains to be shown is that @{text f} is injective on @{term "UNIV // R\<^isub>2"}. This is equivalent to showing that two equivalence classes, say @{text "X"} and @{text Y}, in @{term "UNIV // R\<^isub>2"} are equal, provided @{text "f X = f Y"}. For @{text "X = Y"} to be equal, we have to find two elements @{text "x \<in> X"} and @{text "y \<in> Y"} such that they are @{text R\<^isub>2} related. We know there exists a @{text "x \<in> X"} with \mbox{@{term "X = R\<^isub>2 `` {x}"}}. From the latter fact we can infer that @{term "R\<^isub>1 ``{x} \<in> f X"} and further @{term "R\<^isub>1 ``{x} \<in> f Y"}. This means we can obtain a @{text y} such that @{term "R\<^isub>1 `` {x} = R\<^isub>1 `` {y}"} holds. Consequently @{text x} and @{text y} are @{text "R\<^isub>1"}-related. Since by assumption @{text "R\<^isub>1"} refines @{text "R\<^isub>2"}, they must also be @{text "R\<^isub>2"}-related, as we need to show. \end{proof} \noindent Chaining Lemma~\ref{finone} and \ref{fintwo} together, means in order to show that @{term "UNIV // \<approx>(lang r)"} is finite, we have to construct a tagging-function whose range can be shown to be finite and whose tagging-relation refines @{term "\<approx>(lang r)"}. Let us attempt the @{const PLUS}-case first. We take as tagging-function \begin{center} @{thm tag_Plus_def[where A="A" and B="B", THEN meta_eq_app]} \end{center} \noindent where @{text "A"} and @{text "B"} are some arbitrary languages. The reason for this choice is that we need to establish that @{term "=(tag_Plus A B)="} refines @{term "\<approx>(A \<union> B)"}. This amounts to showing @{term "x \<approx>A y"} or @{term "x \<approx>B y"} under the assumption @{term "x"}~@{term "=(tag_Plus A B)="}~@{term y}. As we shall see, this definition will provide us with just the right assumptions in order to get the proof through. \begin{proof}[@{const "PLUS"}-Case] We can show in general, if @{term "finite (UNIV // \<approx>A)"} and @{term "finite (UNIV // \<approx>B)"} then @{term "finite ((UNIV // \<approx>A) \<times> (UNIV // \<approx>B))"} holds. The range of @{term "tag_Plus A B"} is a subset of this product set---so finite. For the refinement proof-obligation, we know that @{term "(\<approx>A `` {x}, \<approx>B `` {x}) = (\<approx>A `` {y}, \<approx>B `` {y})"} holds by assumption. Then clearly either @{term "x \<approx>A y"} or @{term "x \<approx>B y"}, as we needed to show. Finally we can discharge this case by setting @{text A} to @{term "lang r\<^isub>1"} and @{text B} to @{term "lang r\<^isub>2"}. \end{proof} \noindent The @{const TIMES}-case is slightly more complicated. We first prove the following lemma, which will aid the proof about refinement. \begin{lmm}\label{refinement} The relation @{text "\<^raw:$\threesim$>\<^bsub>tag\<^esub>"} refines @{term "\<approx>A"}, provided for all strings @{text x}, @{text y} and @{text z} we have that \mbox{@{text "x \<^raw:$\threesim$>\<^bsub>tag\<^esub> y"}} and @{term "x @ z \<in> A"} imply @{text "y @ z \<in> A"}. \end{lmm} \noindent We therefore can analyse how the strings @{text "x @ z"} are in the language @{text A} and then construct an appropriate tagging-function to infer that @{term "y @ z"} are also in @{text A}. For this we will use the notion of the set of all possible \emph{partitions} of a string: \begin{equation} @{thm Partitions_def} \end{equation} \noindent If we know that @{text "(x\<^isub>p, x\<^isub>s) \<in> Partitions x"}, we will refer to @{text "x\<^isub>p"} as the \emph{prefix} of the string @{text x}, and respectively to @{text "x\<^isub>s"} as the \emph{suffix}. Now assuming @{term "x @ z \<in> A \<cdot> B"}, there are only two possible ways of how to `split' this string to be in @{term "A \<cdot> B"}: % \begin{center} \begin{tabular}{c} \scalebox{1}{ \begin{tikzpicture}[fill=gray!20] \node[draw,minimum height=3.8ex, fill] (x) { $\hspace{4.8em}@{text x}\hspace{4.8em}$ }; \node[draw,minimum height=3.8ex, right=-0.03em of x, fill] (za) { $\hspace{0.6em}@{text "z\<^isub>p"}\hspace{0.6em}$ }; \node[draw,minimum height=3.8ex, right=-0.03em of za, fill] (zza) { $\hspace{2.6em}@{text "z\<^isub>s"}\hspace{2.6em}$ }; \draw[decoration={brace,transform={yscale=3}},decorate] (x.north west) -- ($(za.north west)+(0em,0em)$) node[midway, above=0.5em]{@{text x}}; \draw[decoration={brace,transform={yscale=3}},decorate] ($(za.north west)+(0em,0ex)$) -- ($(zza.north east)+(0em,0ex)$) node[midway, above=0.5em]{@{text z}}; \draw[decoration={brace,transform={yscale=3}},decorate] ($(x.north west)+(0em,3ex)$) -- ($(zza.north east)+(0em,3ex)$) node[midway, above=0.8em]{@{term "x @ z \<in> A \<cdot> B"}}; \draw[decoration={brace,transform={yscale=3}},decorate] ($(za.south east)+(0em,0ex)$) -- ($(x.south west)+(0em,0ex)$) node[midway, below=0.5em]{@{text "x @ z\<^isub>p \<in> A"}}; \draw[decoration={brace,transform={yscale=3}},decorate] ($(zza.south east)+(0em,0ex)$) -- ($(za.south east)+(0em,0ex)$) node[midway, below=0.5em]{@{text "z\<^isub>s \<in> B"}}; \end{tikzpicture}} \\[2mm] \scalebox{1}{ \begin{tikzpicture}[fill=gray!20] \node[draw,minimum height=3.8ex, fill] (xa) { $\hspace{3em}@{text "x\<^isub>p"}\hspace{3em}$ }; \node[draw,minimum height=3.8ex, right=-0.03em of xa, fill] (xxa) { $\hspace{0.2em}@{text "x\<^isub>s"}\hspace{0.2em}$ }; \node[draw,minimum height=3.8ex, right=-0.03em of xxa, fill] (z) { $\hspace{5em}@{text z}\hspace{5em}$ }; \draw[decoration={brace,transform={yscale=3}},decorate] (xa.north west) -- ($(xxa.north east)+(0em,0em)$) node[midway, above=0.5em]{@{text x}}; \draw[decoration={brace,transform={yscale=3}},decorate] (z.north west) -- ($(z.north east)+(0em,0em)$) node[midway, above=0.5em]{@{text z}}; \draw[decoration={brace,transform={yscale=3}},decorate] ($(xa.north west)+(0em,3ex)$) -- ($(z.north east)+(0em,3ex)$) node[midway, above=0.8em]{@{term "x @ z \<in> A \<cdot> B"}}; \draw[decoration={brace,transform={yscale=3}},decorate] ($(z.south east)+(0em,0ex)$) -- ($(xxa.south west)+(0em,0ex)$) node[midway, below=0.5em]{@{term "x\<^isub>s @ z \<in> B"}}; \draw[decoration={brace,transform={yscale=3}},decorate] ($(xa.south east)+(0em,0ex)$) -- ($(xa.south west)+(0em,0ex)$) node[midway, below=0.5em]{@{term "x\<^isub>p \<in> A"}}; \end{tikzpicture}} \end{tabular} \end{center} % \noindent Either @{text x} and a prefix of @{text "z"} is in @{text A} and the rest in @{text B} (first picture) or there is a prefix of @{text x} in @{text A} and the rest is in @{text B} (second picture). In both cases we have to show that @{term "y @ z \<in> A \<cdot> B"}. The first case we will only go through if we know that @{term "x \<approx>A y"} holds @{text "(*)"}. Because then we can infer from @{term "x @ z\<^isub>p \<in> A"} that @{term "y @ z\<^isub>p \<in> A"} holds for all @{text "z\<^isub>p"}. In the second case we only know that @{text "x\<^isub>p"} and @{text "x\<^isub>s"} is one possible partition of the string @{text x}. We have to know that both @{text "x\<^isub>p"} and the corresponding partition @{text "y\<^isub>p"} are in @{text "A"}, and that @{text "x\<^isub>s"} is `@{text B}-related' to @{text "y\<^isub>s"} @{text "(**)"}. From the latter fact we can infer that @{text "y\<^isub>s @ z \<in> B"}. This will solve the second case. Taking the two requirements, @{text "(*)"} and @{text "(**)"}, together we define the tagging-function in the @{const Times}-case as: \begin{center} @{thm (lhs) tag_Times_def[where ?A="A" and ?B="B"]}~@{text "\<equiv>"}~ @{text "(\<lbrakk>x\<rbrakk>\<^bsub>\<approx>A\<^esub>, {\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>B\<^esub> | x\<^isub>p \<in> A \<and> (x\<^isub>p, x\<^isub>s) \<in> Partitions x})"} \end{center} \noindent Note that we have to make the assumption for all suffixes @{text "x\<^isub>s"}, since we do not know anything about how the string @{term x} is partitioned. With this definition in place, let us prove the @{const "Times"}-case. \begin{proof}[@{const TIMES}-Case] If @{term "finite (UNIV // \<approx>A)"} and @{term "finite (UNIV // \<approx>B)"} then @{term "finite ((UNIV // \<approx>A) \<times> (Pow (UNIV // \<approx>B)))"} holds. The range of @{term "tag_Times A B"} is a subset of this product set, and therefore finite. For the refinement of @{term "\<approx>(A \<cdot> B)"} and @{text "\<^raw:$\threesim$>\<^bsub>\<times>tag A B\<^esub>"}, we have by Lemma \ref{refinement} \begin{center} @{term "tag_Times A B x = tag_Times A B y"} \end{center} \noindent and @{term "x @ z \<in> A \<cdot> B"}, and have to establish @{term "y @ z \<in> A \<cdot> B"}. As shown in the pictures above, there are two cases to be considered. First, there exists a @{text "z\<^isub>p"} and @{text "z\<^isub>s"} such that @{term "x @ z\<^isub>p \<in> A"} and @{text "z\<^isub>s \<in> B"}. By the assumption about @{term "tag_Times A B"} we have @{term "\<approx>A `` {x} = \<approx>A `` {y}"} and thus @{term "x \<approx>A y"}. Hence by the Myhill-Nerode Relation @{term "y @ z\<^isub>p \<in> A"} holds. Using @{text "z\<^isub>s \<in> B"}, we can conclude in this case with @{term "y @ z \<in> A \<cdot> B"} (recall @{text "z\<^isub>p @ z\<^isub>s = z"}). Second there exists a partition @{text "x\<^isub>p"} and @{text "x\<^isub>s"} with @{text "x\<^isub>p \<in> A"} and @{text "x\<^isub>s @ z \<in> B"}. We therefore have \begin{center} @{text "\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>B\<^esub> \<in> {\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>B\<^esub> | x\<^isub>p \<in> A \<and> (x\<^isub>p, x\<^isub>s) \<in> Partitions x}"} \end{center} \noindent and by the assumption about @{term "tag_Times A B"} also \begin{center} @{text "\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>B\<^esub> \<in> {\<lbrakk>y\<^isub>s\<rbrakk>\<^bsub>\<approx>B\<^esub> | y\<^isub>p \<in> A \<and> (y\<^isub>p, y\<^isub>s) \<in> Partitions y}"} \end{center} \noindent This means there must be a partition @{text "y\<^isub>p"} and @{text "y\<^isub>s"} such that @{term "y\<^isub>p \<in> A"} and @{term "\<approx>B `` {x\<^isub>s} = \<approx>B `` {y\<^isub>s}"}. Unfolding the Myhill-Nerode Relation and together with the facts that @{text "x\<^isub>p \<in> A"} and \mbox{@{text "x\<^isub>s @ z \<in> B"}}, we obtain @{term "y\<^isub>p \<in> A"} and @{text "y\<^isub>s @ z \<in> B"}, as needed in this case. We again can complete the @{const TIMES}-case by setting @{text A} to @{term "lang r\<^isub>1"} and @{text B} to @{term "lang r\<^isub>2"}. \end{proof} \noindent The case for @{const Star} is similar to @{const TIMES}, but poses a few extra challenges. To deal with them, we define first the notion of a \emph{string prefix} and a \emph{strict string prefix}: \begin{center} \begin{tabular}{l} @{text "x \<le> y \<equiv> \<exists>z. y = x @ z"}\\ @{text "x < y \<equiv> x \<le> y \<and> x \<noteq> y"} \end{tabular} \end{center} When analysing the case of @{text "x @ z"} being an element in @{term "A\<star>"} and @{text x} is not the empty string, we have the following picture: \begin{center} \scalebox{1}{ \begin{tikzpicture}[fill=gray!20] \node[draw,minimum height=3.8ex, fill] (xa) { $\hspace{4em}@{text "x\<^bsub>pmax\<^esub>"}\hspace{4em}$ }; \node[draw,minimum height=3.8ex, right=-0.03em of xa, fill] (xxa) { $\hspace{0.5em}@{text "x\<^bsub>s\<^esub>"}\hspace{0.5em}$ }; \node[draw,minimum height=3.8ex, right=-0.03em of xxa, fill] (za) { $\hspace{2em}@{text "z\<^isub>a"}\hspace{2em}$ }; \node[draw,minimum height=3.8ex, right=-0.03em of za, fill] (zb) { $\hspace{7em}@{text "z\<^isub>b"}\hspace{7em}$ }; \draw[decoration={brace,transform={yscale=3}},decorate] (xa.north west) -- ($(xxa.north east)+(0em,0em)$) node[midway, above=0.5em]{@{text x}}; \draw[decoration={brace,transform={yscale=3}},decorate] (za.north west) -- ($(zb.north east)+(0em,0em)$) node[midway, above=0.5em]{@{text z}}; \draw[decoration={brace,transform={yscale=3}},decorate] ($(xa.north west)+(0em,3ex)$) -- ($(zb.north east)+(0em,3ex)$) node[midway, above=0.8em]{@{term "x @ z \<in> A\<star>"}}; \draw[decoration={brace,transform={yscale=3}},decorate] ($(za.south east)+(0em,0ex)$) -- ($(xxa.south west)+(0em,0ex)$) node[midway, below=0.5em]{@{term "x\<^isub>s @ z\<^isub>a \<in> A"}}; \draw[decoration={brace,transform={yscale=3}},decorate] ($(xa.south east)+(0em,0ex)$) -- ($(xa.south west)+(0em,0ex)$) node[midway, below=0.5em]{@{text "x\<^bsub>pmax\<^esub> \<in> A\<^isup>\<star>"}}; \draw[decoration={brace,transform={yscale=3}},decorate] ($(zb.south east)+(0em,0ex)$) -- ($(zb.south west)+(0em,0ex)$) node[midway, below=0.5em]{@{term "z\<^isub>b \<in> A\<star>"}}; \draw[decoration={brace,transform={yscale=3}},decorate] ($(zb.south east)+(0em,-4ex)$) -- ($(xxa.south west)+(0em,-4ex)$) node[midway, below=0.5em]{@{term "x\<^isub>s @ z \<in> A\<star>"}}; \end{tikzpicture}} \end{center} % \noindent We can find a strict prefix @{text "x\<^isub>p"} of @{text x} such that @{term "x\<^isub>p \<in> A\<star>"}, @{text "x\<^isub>p < x"} and the rest @{term "x\<^isub>s @ z \<in> A\<star>"}. For example the empty string @{text "[]"} would do (recall @{term "x \<noteq> []"}). There are potentially many such prefixes, but there can only be finitely many of them (the string @{text x} is finite). Let us therefore choose the longest one and call it @{text "x\<^bsub>pmax\<^esub>"}. Now for the rest of the string @{text "x\<^isub>s @ z"} we know it is in @{term "A\<star>"} and cannot be the empty string. By Property~\ref{langprops}@{text "(iv)"}, we can separate this string into two parts, say @{text "a"} and @{text "b"}, such that @{text "a \<noteq> []"}, @{text "a \<in> A"} and @{term "b \<in> A\<star>"}. Now @{text a} must be strictly longer than @{text "x\<^isub>s"}, otherwise @{text "x\<^bsub>pmax\<^esub>"} is not the longest prefix. That means @{text a} `overlaps' with @{text z}, splitting it into two components @{text "z\<^isub>a"} and @{text "z\<^isub>b"}. For this we know that @{text "x\<^isub>s @ z\<^isub>a \<in> A"} and @{term "z\<^isub>b \<in> A\<star>"}. To cut a story short, we have divided @{term "x @ z \<in> A\<star>"} such that we have a string @{text a} with @{text "a \<in> A"} that lies just on the `border' of @{text x} and @{text z}. This string is @{text "x\<^isub>s @ z\<^isub>a"}. In order to show that @{term "x @ z \<in> A\<star>"} implies @{term "y @ z \<in> A\<star>"}, we use the following tagging-function: % \begin{center} @{thm (lhs) tag_Star_def[where ?A="A", THEN meta_eq_app]}~@{text "\<equiv>"}~ @{text "{\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>A\<^esub> | x\<^isub>p < x \<and> x\<^isub>p \<in> A\<^isup>\<star> \<and> (x\<^isub>s, x\<^isub>p) \<in> Partitions x}"} \end{center} \begin{proof}[@{const Star}-Case] If @{term "finite (UNIV // \<approx>A)"} then @{term "finite (Pow (UNIV // \<approx>A))"} holds. The range of @{term "tag_Star A"} is a subset of this set, and therefore finite. Again we have to show under the assumption @{term "x"}~@{term "=(tag_Star A)="}~@{term y} that @{term "x @ z \<in> A\<star>"} implies @{term "y @ z \<in> A\<star>"}. We first need to consider the case that @{text x} is the empty string. From the assumption about strict prefixes in @{text "\<^raw:$\threesim$>\<^bsub>\<star>tag A\<^esub>"}, we can infer @{text y} is the empty string and then clearly have @{term "y @ z \<in> A\<star>"}. In case @{text x} is not the empty string, we can divide the string @{text "x @ z"} as shown in the picture above. By the tagging-function and the facts @{text "x\<^bsub>pmax\<^esub> \<in> A\<^isup>\<star>"} and @{text "x\<^bsub>pmax\<^esub> < x"}, we have \begin{center} @{text "\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>A\<^esub> \<in> {\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>A\<^esub> | x\<^bsub>pmax\<^esub> < x \<and> x\<^bsub>pmax\<^esub> \<in> A\<^isup>\<star> \<and> (x\<^bsub>pmax\<^esub>, x\<^isub>s) \<in> Partitions x}"} \end{center} \noindent which by assumption is equal to \begin{center} @{text "\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>A\<^esub> \<in> {\<lbrakk>y\<^isub>s\<rbrakk>\<^bsub>\<approx>A\<^esub> | y\<^bsub>p\<^esub> < y \<and> y\<^bsub>p\<^esub> \<in> A\<^isup>\<star> \<and> (y\<^bsub>p\<^esub>, y\<^isub>s) \<in> Partitions y}"} \end{center} \noindent From this we know there exist a partition @{text "y\<^isub>p"} and @{text "y\<^isub>s"} with @{term "y\<^isub>p \<in> A\<star>"} and also @{term "x\<^isub>s \<approx>A y\<^isub>s"}. Unfolding the Myhill-Nerode Relation we know @{term "y\<^isub>s @ z\<^isub>a \<in> A"}. We also know that @{term "z\<^isub>b \<in> A\<star>"}. Therefore @{term "y\<^isub>p @ (y\<^isub>s @ z\<^isub>a) @ z\<^isub>b \<in> A\<star>"}, which means @{term "y @ z \<in> A\<star>"}. The last step is to set @{text "A"} to @{term "lang r"} and thus complete the proof. \end{proof}*}section {* Second Part proved using Partial Derivatives\label{derivatives} *}text {* \noindent As we have seen in the previous section, in order to establish the second direction of the Myhill-Nerode Theorem, it is sufficient to find a more refined relation than @{term "\<approx>(lang r)"} for which we can show that there are only finitely many equivalence classes. So far we showed this directly by induction on @{text "r"} using tagging-functions. However, there is also an indirect method to come up with such a refined relation by using derivatives of regular expressions~\cite{Brzozowski64}. Assume the following two definitions for the \emph{left-quotient} of a language, which we write as @{term "Deriv c A"} and @{term "Derivs s A"} where @{text c} is a character and @{text s} a string, respectively: \begin{center} \begin{tabular}{r@ {\hspace{1mm}}c@ {\hspace{2mm}}l} @{thm (lhs) Der_def} & @{text "\<equiv>"} & @{thm (rhs) Der_def}\\ @{thm (lhs) Ders_def} & @{text "\<equiv>"} & @{thm (rhs) Ders_def}\\ \end{tabular} \end{center} \noindent In order to aid readability, we shall make use of the following abbreviation \begin{center} @{abbrev "Derivss s As"} \end{center} \noindent where we apply the left-quotient to a set of languages and then combine the results. Clearly we have the following equivalence between the Myhill-Nerode Relation (Definition~\ref{myhillneroderel}) and left-quotients \begin{equation}\label{mhders} @{term "x \<approx>A y"} \hspace{4mm}\text{if and only if}\hspace{4mm} @{term "Derivs x A = Derivs y A"} \end{equation} \noindent It is also straightforward to establish the following properties of left-quotients \begin{equation} \mbox{\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{2mm}}l} @{thm (lhs) Deriv_simps(1)} & $=$ & @{thm (rhs) Deriv_simps(1)}\\ @{thm (lhs) Deriv_simps(2)} & $=$ & @{thm (rhs) Deriv_simps(2)}\\ @{thm (lhs) Deriv_simps(3)} & $=$ & @{thm (rhs) Deriv_simps(3)}\\ @{thm (lhs) Deriv_simps(4)} & $=$ & @{thm (rhs) Deriv_simps(4)}\\ @{thm (lhs) Der_conc} & $=$ & @{thm (rhs) Der_conc}\\ @{thm (lhs) Deriv_star} & $=$ & @{thm (rhs) Deriv_star}\\ @{thm (lhs) Derivs_simps(1)} & $=$ & @{thm (rhs) Derivs_simps(1)}\\ @{thm (lhs) Derivs_simps(2)} & $=$ & @{thm (rhs) Derivs_simps(2)}\\ %@{thm (lhs) Derivs_simps(3)[where ?s1.0="s\<^isub>1" and ?s2.0="s\<^isub>2"]} & $=$ % & @{thm (rhs) Derivs_simps(3)[where ?s1.0="s\<^isub>1" and ?s2.0="s\<^isub>2"]}\\ \end{tabular}} \end{equation} \noindent Note that in the last equation we use the list-cons operator written \mbox{@{text "_ :: _"}}. The only interesting case is the case of @{term "A\<star>"} where we use Property~\ref{langprops}@{text "(i)"} in order to infer that @{term "Deriv c (A\<star>) = Deriv c (A \<cdot> A\<star>)"}. We can then complete the proof by using the fifth equation and noting that @{term "Deriv c (A\<star>) \<subseteq> (Deriv c A) \<cdot> A\<star>"} provided @{text "[] \<in> A"}. Brzozowski observed that the left-quotients for languages of regular expressions can be calculated directly using the notion of \emph{derivatives of a regular expression} \cite{Brzozowski64}. We define this notion in Isabelle/HOL as follows: \begin{center} \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1.5mm}}l@ {}} @{thm (lhs) deriv.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) deriv.simps(1)}\\ @{thm (lhs) deriv.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) deriv.simps(2)}\\ @{thm (lhs) deriv.simps(3)[where c'="d"]} & @{text "\<equiv>"} & @{thm (rhs) deriv.simps(3)[where c'="d"]}\\ @{thm (lhs) deriv.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} & @{thm (rhs) deriv.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ @{thm (lhs) deriv.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} & @{text "if"}~@{term "nullable r\<^isub>1"}~@{text "then"}~% @{term "Plus (Times (deriv c r\<^isub>1) r\<^isub>2) (deriv c r\<^isub>2)"}\\ & & \phantom{@{text "if"}~@{term "nullable r\<^isub>1"}~}@{text "else"}~% @{term "Times (deriv c r\<^isub>1) r\<^isub>2"}\\ @{thm (lhs) deriv.simps(6)} & @{text "\<equiv>"} & @{thm (rhs) deriv.simps(6)}\smallskip\\ @{thm (lhs) derivs.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) derivs.simps(1)}\\ @{thm (lhs) derivs.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) derivs.simps(2)}\\ \end{tabular} \end{center} \noindent The last two clauses extend derivatives from characters to strings. The boolean function @{term "nullable r"} needed in the @{const Times}-case tests whether a regular expression can recognise the empty string. It can be defined as follows. \begin{center} \begin{tabular}{c} \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1.5mm}}l@ {}} @{thm (lhs) nullable.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(1)}\\ @{thm (lhs) nullable.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(2)}\\ @{thm (lhs) nullable.simps(3)} & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(3)}\\ @{thm (lhs) nullable.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ @{thm (lhs) nullable.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ @{thm (lhs) nullable.simps(6)} & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(6)}\\ \end{tabular} \end{tabular} \end{center} \noindent By induction on the regular expression @{text r}, respectively on the string @{text s}, one can easily show that left-quotients and derivatives of regular expressions relate as follows (see for example~\cite{Sakarovitch09}): \begin{equation}\label{Dersders} \mbox{\begin{tabular}{c} @{thm Deriv_deriv}\\ @{thm Derivs_derivs} \end{tabular}} \end{equation} \noindent The importance of this fact in the context of the Myhill-Nerode Theorem is that we can use \eqref{mhders} and \eqref{Dersders} in order to establish that \begin{center} @{term "x \<approx>(lang r) y"} \hspace{4mm}if and only if\hspace{4mm} @{term "lang (derivs x r) = lang (derivs y r)"}. \end{center} \noindent holds and hence \begin{equation} @{term "x \<approx>(lang r) y"}\hspace{4mm}\mbox{provided}\hspace{4mm}@{term "derivs x r = derivs y r"} \end{equation} \noindent This means the right-hand side (seen as a relation) refines the Myhill-Nerode Relation. Consequently, we can use @{text "\<^raw:$\threesim$>\<^bsub>(\<lambda>x. ders x r)\<^esub>"} as a tagging-relation. However, in order to be useful for the second part of the Myhill-Nerode Theorem, we have to be able to establish that for the corresponding language there are only finitely many derivatives---thus ensuring that there are only finitely many equivalence classes. Unfortunately, this is not true in general. Sakarovitch gives an example where a regular expression has infinitely many derivatives w.r.t.~the language @{text "(ab)\<^isup>\<star> \<union> (ab)\<^isup>\<star>a"}, which is formally written in our notation as \mbox{@{text "{[a,b]}\<^isup>\<star> \<union> ({[a,b]}\<^isup>\<star> \<cdot> {[a]})"}} (see \cite[Page~141]{Sakarovitch09}). What Brzozowski \cite{Brzozowski64} established is that for every language there \emph{are} only finitely `dissimilar' derivatives for a regular expression. Two regular expressions are said to be \emph{similar} provided they can be identified using the using the @{text "ACI"}-identities: \begin{equation}\label{ACI} \mbox{\begin{tabular}{cl} (@{text A}) & @{term "Plus (Plus r\<^isub>1 r\<^isub>2) r\<^isub>3"} $\equiv$ @{term "Plus r\<^isub>1 (Plus r\<^isub>2 r\<^isub>3)"}\\ (@{text C}) & @{term "Plus r\<^isub>1 r\<^isub>2"} $\equiv$ @{term "Plus r\<^isub>2 r\<^isub>1"}\\ (@{text I}) & @{term "Plus r r"} $\equiv$ @{term "r"}\\ \end{tabular}} \end{equation} \noindent Carrying this idea through, we must not consider the set of all derivatives, but the one modulo @{text "ACI"}. In principle, this can be done formally, but it is very painful in a theorem prover (since there is no direct characterisation of the set of dissimilar derivatives). Fortunately, there is a much simpler approach using \emph{partial derivatives}. They were introduced by Antimirov \cite{Antimirov95} and can be defined in Isabelle/HOL as follows: \begin{center} \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1.5mm}}l@ {}} @{thm (lhs) pderiv.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) pderiv.simps(1)}\\ @{thm (lhs) pderiv.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) pderiv.simps(2)}\\ @{thm (lhs) pderiv.simps(3)[where c'="d"]} & @{text "\<equiv>"} & @{thm (rhs) pderiv.simps(3)[where c'="d"]}\\ @{thm (lhs) pderiv.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} & @{thm (rhs) pderiv.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ @{thm (lhs) pderiv.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} & @{text "if"}~@{term "nullable r\<^isub>1"}~@{text "then"}~% @{term "(Timess (pderiv c r\<^isub>1) r\<^isub>2) \<union> (pderiv c r\<^isub>2)"}\\ & & \phantom{@{text "if"}~@{term "nullable r\<^isub>1"}~}@{text "else"}~% @{term "Timess (pderiv c r\<^isub>1) r\<^isub>2"}\\ @{thm (lhs) pderiv.simps(6)} & @{text "\<equiv>"} & @{thm (rhs) pderiv.simps(6)}\smallskip\\ @{thm (lhs) pderivs.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) pderivs.simps(1)}\\ @{thm (lhs) pderivs.simps(2)} & @{text "\<equiv>"} & @{text "\<Union> (pders s) ` (pder c r)"}\\ \end{tabular} \end{center} \noindent Again the last two clauses extend partial derivatives from characters to strings. Unlike `simple' derivatives, the functions for partial derivatives return sets of regular expressions. In the @{const Times} and @{const Star} cases we therefore use the auxiliary definition \begin{center} @{text "TIMESS rs r \<equiv> {TIMES r' r | r' \<in> rs}"} \end{center} \noindent in order to `sequence' a regular expression with a set of regular expressions. Note that in the last clause we first build the set of partial derivatives w.r.t~the character @{text c}, then build the image of this set under the function @{term "pderivs s"} and finally `union up' all resulting sets. It will be convenient to introduce for this the following abbreviation \begin{center} @{abbrev "pderivs_set s rs"} \end{center} \noindent which simplifies the last clause of @{const "pderivs"} to \begin{center} \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1.5mm}}l@ {}} @{thm (lhs) pderivs.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) pderivs.simps(2)}\\ \end{tabular} \end{center} Partial derivatives can be seen as having the @{text "ACI"}-identities already built in: taking the partial derivatives of the regular expressions in \eqref{ACI} gives us in each case equal sets. Antimirov \cite{Antimirov95} showed a similar result to \eqref{Dersders} for partial derivatives, namely \begin{equation}\label{Derspders} \mbox{\begin{tabular}{lc} @{text "(i)"} & @{thm Deriv_pderiv}\\ @{text "(ii)"} & @{thm Derivs_pderivs} \end{tabular}} \end{equation} \begin{proof} The first fact is by a simple induction on @{text r}. For the second we slightly modify Antimirov's proof by performing an induction on @{text s} where we generalise over all @{text r}. That means in the @{text "cons"}-case the induction hypothesis is \begin{center} @{text "(IH)"}\hspace{3mm}@{term "\<forall>r. Derivs s (lang r) = \<Union> lang ` (pderivs s r)"} \end{center} \noindent With this we can establish \begin{center} \begin{tabular}{r@ {\hspace{1.5mm}}c@ {\hspace{1.5mm}}ll} @{term "Derivs (c # s) (lang r)"} & @{text "="} & @{term "Derivs s (Deriv c (lang r))"} & by def.\\ & @{text "="} & @{term "Derivs s (\<Union> lang ` (pderiv c r))"} & by @{text "("}\ref{Derspders}@{text ".i)"}\\ & @{text "="} & @{term "\<Union> (Derivs s) ` (lang ` (pderiv c r))"} & by def.~of @{text "Ders"}\\ & @{text "="} & @{term "\<Union> lang ` (\<Union> pderivs s ` (pderiv c r))"} & by IH\\ & @{text "="} & @{term "\<Union> lang ` (pderivs (c # s) r)"} & by def.\\ \end{tabular} \end{center} \noindent Note that in order to apply the induction hypothesis in the fourth equation, we need the generalisation over all regular expressions @{text r}. The case for the empty string is routine and omitted. \end{proof} \noindent Taking \eqref{Dersders} and \eqref{Derspders} together gives the relationship between languages of derivatives and partial derivatives \begin{equation} \mbox{\begin{tabular}{lc} @{text "(i)"} & @{thm deriv_pderiv[symmetric]}\\ @{text "(ii)"} & @{thm derivs_pderivs[symmetric]} \end{tabular}} \end{equation} \noindent These two properties confirm the observation made earlier that by using sets, partial derivatives have the @{text "ACI"}-identities of derivatives already built in. Antimirov also proved that for every language and every regular expression there are only finitely many partial derivatives, whereby the set of partial derivatives of @{text r} w.r.t.~a language @{text A} is defined as \begin{equation}\label{Pdersdef} @{thm pderivs_lang_def} \end{equation} \begin{thrm}[Antimirov \cite{Antimirov95}]\label{antimirov} For every language @{text A} and every regular expression @{text r}, \mbox{@{thm finite_pderivs_lang}}. \end{thrm} \noindent Antimirov's proof first establishes this theorem for the language @{term UNIV1}, which is the set of all non-empty strings. For this he proves: \begin{equation}\label{pdersunivone} \mbox{\begin{tabular}{l} @{thm pderivs_lang_Zero}\\ @{thm pderivs_lang_One}\\ @{thm pderivs_lang_Atom}\\ @{thm pderivs_lang_Plus[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ @{thm pderivs_lang_Times[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ @{thm pderivs_lang_Star}\\ \end{tabular}} \end{equation} \noindent from which one can deduce by induction on @{text r} that \begin{center} @{thm finite_pderivs_lang_UNIV1} \end{center} \noindent holds. Now Antimirov's theorem follows because \begin{center} @{thm pderivs_lang_UNIV}\\ \end{center} \noindent and for all languages @{text "A"}, @{term "pderivs_lang A r"} is a subset of @{term "pderivs_lang UNIV r"}. Since we follow Antimirov's proof quite closely in our formalisation (only the last two cases of \eqref{pdersunivone} involve some non-routine induction arguments), we omit the details. Let us now return to our proof for the second direction in the Myhill-Nerode Theorem. The point of the above calculations is to use @{text "\<^raw:$\threesim$>\<^bsub>(\<lambda>x. pders x r)\<^esub>"} as tagging-relation. \begin{proof}[Proof of Theorem~\ref{myhillnerodetwo} (second version)] Using \eqref{mhders} and \eqref{Derspders} we can easily infer that \begin{center} @{term "x \<approx>(lang r) y"}\hspace{4mm}\mbox{provided}\hspace{4mm}@{term "pderivs x r = pderivs y r"} \end{center} \noindent which means the tagging-relation @{text "\<^raw:$\threesim$>\<^bsub>(\<lambda>x. pders x r)\<^esub>"} refines @{term "\<approx>(lang r)"}. So we know by Lemma~\ref{fintwo}, \mbox{@{term "finite (UNIV // (\<approx>(lang r)))"}} holds if @{term "finite (UNIV // (=(\<lambda>x. pderivs x r)=))"}. In order to establish the latter, we can use Lemma~\ref{finone} and show that the range of the tagging-function \mbox{@{term "\<lambda>x. pderivs x r"}} is finite. For this recall Definition \ref{Pdersdef}, which gives us that \begin{center} @{thm pderivs_lang_def[where A="UNIV", simplified]} \end{center} \noindent Now the range of @{term "\<lambda>x. pderivs x r"} is a subset of @{term "Pow (pderivs_lang UNIV r)"}, which we know is finite by Theorem~\ref{antimirov}. Consequently there are only finitely many equivalence classes of @{text "\<^raw:$\threesim$>\<^bsub>(\<lambda>x. pders x r)\<^esub>"}. This relation refines @{term "\<approx>(lang r)"}, and therefore we can again conclude the second part of the Myhill-Nerode Theorem. \end{proof}*}section {* Closure Properties of Regular Languages\label{closure} *}text {* \noindent The beauty of regular languages is that they are closed under many set operations. Closure under union, concatenation and Kleene-star are trivial to establish given our definition of regularity (recall Definition~\ref{regular}). More interesting in our setting is the closure under complement, because it seems difficult to construct a regular expression for the complement language by direct means. However the existence of such a regular expression can now be easily proved using both parts of the Myhill-Nerode Theorem, since \begin{center} @{term "s\<^isub>1 \<approx>A s\<^isub>2"} if and only if @{term "s\<^isub>1 \<approx>(-A) s\<^isub>2"} \end{center} \noindent holds for any strings @{text "s\<^isub>1"} and @{text "s\<^isub>2"}. Therefore @{text A} and the complement language @{term "-A"} give rise to the same partitions. So if one is finite, the other is too, and vice versa. As noted earlier, our algorithm for solving equational systems actually calculates the regular expression for the complement language. Calculating this regular expression via automata using the standard method would be quite involved. It includes the steps: regular expression @{text "\<Rightarrow>"} non-deterministic automaton @{text "\<Rightarrow>"} deterministic automaton @{text "\<Rightarrow>"} complement automaton @{text "\<Rightarrow>"} regular expression. Clearly not something you want to formalise in a theorem prover in which it is cumbersome to reason about automata. Once closure under complement is established, closure under intersection and set difference is also easy, because \begin{center} \begin{tabular}{c} @{term "A \<inter> B = - (- A \<union> - B)"}\\ @{term "A - B = - (- A \<union> B)"} \end{tabular} \end{center} \noindent Since all finite languages are regular, then by closure under complement also all co-finite languages. Closure of regular languages under reversal, that is \begin{center} @{text "A\<^bsup>-1\<^esup> \<equiv> {s\<^bsup>-1\<^esup> | s \<in> A}"} \end{center} \noindent can be shown with the help of the following operation defined recursively over regular expressions \begin{center} \begin{tabular}{r@ {\hspace{1mm}}c@ {\hspace{1mm}}l} @{thm (lhs) Rev.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) Rev.simps(1)}\\ @{thm (lhs) Rev.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) Rev.simps(2)}\\ @{thm (lhs) Rev.simps(3)} & @{text "\<equiv>"} & @{thm (rhs) Rev.simps(3)}\\ @{thm (lhs) Rev.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} & @{thm (rhs) Rev.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ @{thm (lhs) Rev.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} & @{thm (rhs) Rev.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ @{thm (lhs) Rev.simps(6)} & @{text "\<equiv>"} & @{thm (rhs) Rev.simps(6)}\\ \end{tabular} \end{center} \noindent For this operation we can show \begin{center} @{text "(\<calL>(r))\<^bsup>-1\<^esup>"}~@{text "="}~@{thm (rhs) rev_lang} \end{center} \noindent from which closure under reversal of regular languages follows. A perhaps surprising fact is that regular languages are closed under any left-quotient. Define \begin{center} @{abbrev "Deriv_lang B A"} \end{center} \noindent and assume @{text B} is any language and @{text A} is regular, then @{term "Deriv_lang B A"} is regular. To see this consider the following argument using partial derivatives: From @{text A} being regular we know there exists a regular expression @{text r} such that @{term "A = lang r"}. We also know that @{term "pderivs_lang B r"} is finite for every language @{text B} and regular expression @{text r} (recall Theorem~\ref{antimirov}). By definition and \eqref{Derspders} we have \begin{equation}\label{eqq} @{term "Deriv_lang B (lang r) = (\<Union> lang ` (pderivs_lang B r))"} \end{equation} \noindent Since there are only finitely many regular expressions in @{term "pderivs_lang B r"}, we know by \eqref{uplus} that there exists a regular expression so that the right-hand side of \eqref{eqq} is equal to the language \mbox{@{term "lang (\<Uplus>(pderivs_lang B r))"}}. Thus the regular expression @{term "\<Uplus>(pderivs_lang B r)"} verifies that @{term "Deriv_lang B A"} is regular. Even more surprising is the fact that for \emph{every} language @{text A}, the language consisting of all substrings of @{text A} is regular \cite{Haines69} (see also \cite{Shallit08, Gasarch09}). A \emph{substring} can be obtained by striking out zero or more characters from a string. This can be defined inductively in Isabelle/HOL by the following three rules: \begin{center} @{thm[mode=Axiom] emb0[where bs="x"]}\hspace{10mm} @{thm[mode=Rule] emb1[where as="x" and b="c" and bs="y"]}\hspace{10mm} @{thm[mode=Rule] emb2[where as="x" and a="c" and bs="y"]} \end{center} \noindent It is straightforward to prove that @{text "\<preceq>"} is a partial order. Now define the \emph{language of substrings} and \emph{superstrings} of a language @{text A} respectively as \begin{center} \begin{tabular}{l} @{thm SUBSEQ_def}\\ @{thm SUPSEQ_def} \end{tabular} \end{center} \noindent We like to establish \begin{thrm}[Haines \cite{Haines69}]\label{subseqreg} For every language @{text A}, the languages @{text "(i)"} @{term "SUBSEQ A"} and @{text "(ii)"} @{term "SUPSEQ A"} are regular. \end{thrm} \noindent Our proof follows the one given in \cite[Pages 92--95]{Shallit08}, except that we use Higman's Lemma, which is already proved in the Isabelle/HOL library \cite{Berghofer03}.\footnote{Unfortunately, Berghofer's formalisation of Higman's Lemma is restricted to 2-letter alphabets, which means also our formalisation of Theorem~\ref{subseqreg} is `tainted' with this constraint. However our methodology is applicable to any alphabet of finite size.} Higman's Lemma allows us to infer that every set @{text A} of antichains, satisfying \begin{equation}\label{higman} @{text "\<forall>x, y \<in> A."}~@{term "x \<noteq> y \<longrightarrow> \<not>(x \<preceq> y) \<and> \<not>(y \<preceq> x)"} \end{equation} \noindent is finite. The first step in our proof of Theorem~\ref{subseqreg} is to establish the following simple properties for @{term SUPSEQ} \begin{equation}\label{supseqprops} \mbox{\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} @{thm (lhs) SUPSEQ_simps(1)} & @{text "\<equiv>"} & @{thm (rhs) SUPSEQ_simps(1)}\\ @{thm (lhs) SUPSEQ_simps(2)} & @{text "\<equiv>"} & @{thm (rhs) SUPSEQ_simps(2)}\\ @{thm (lhs) SUPSEQ_atom} & @{text "\<equiv>"} & @{thm (rhs) SUPSEQ_atom}\\ @{thm (lhs) SUPSEQ_union} & @{text "\<equiv>"} & @{thm (rhs) SUPSEQ_union}\\ @{thm (lhs) SUPSEQ_conc} & @{text "\<equiv>"} & @{thm (rhs) SUPSEQ_conc}\\ @{thm (lhs) SUPSEQ_star} & @{text "\<equiv>"} & @{thm (rhs) SUPSEQ_star} \end{tabular}} \end{equation} \noindent whereby the last equation follows from the fact that @{term "A\<star>"} contains the empty string. With these properties at our disposal we can establish the lemma \begin{lmm} If @{text A} is regular, then also @{term "SUPSEQ A"}. \end{lmm} \begin{proof} Since our alphabet is finite, we have a regular expression, written @{text ALL}, that matches every string. Using this regular expression we can inductively define the operation @{text "r\<up>"} \begin{center} \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} @{thm (lhs) UP.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) UP.simps(1)}\\ @{thm (lhs) UP.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) UP.simps(2)}\\ @{thm (lhs) UP.simps(3)} & @{text "\<equiv>"} & @{thm (rhs) UP.simps(3)}\\ @{thm (lhs) UP.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} & @{thm (rhs) UP.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ @{thm (lhs) UP.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} & @{thm (rhs) UP.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ @{thm (lhs) UP.simps(6)} & @{text "\<equiv>"} & @{thm (rhs) UP.simps(6)} \end{tabular} \end{center} \noindent and use \eqref{supseqprops} to establish that @{thm lang_UP} holds. This shows that @{term "SUBSEQ A"} is regular, provided @{text A} is. \end{proof} \noindent Now we can prove the main lemma w.r.t.~@{const "SUPSEQ"}, namely \begin{lmm}\label{mset} For every language @{text A}, there exists a finite language @{text M} such that \begin{center} \mbox{@{term "SUPSEQ M = SUPSEQ A"}}\;. \end{center} \end{lmm} \begin{proof} For @{text M} we take the set of all minimal elements of @{text A}. An element @{text x} is said to be \emph{minimal} in @{text A} provided \begin{center} @{thm minimal_def} \end{center} \noindent By Higman's Lemma \eqref{higman} we know that @{term "M \<equiv> {x \<in> A. minimal x A}"} is finite, since every minimal element is incomparable, except with itself. It is also straightforward to show that @{term "SUPSEQ M \<subseteq> SUPSEQ A"}. For the other direction we have @{term "x \<in> SUPSEQ A"}. From this we obtain a @{text y} such that @{term "y \<in> A"} and @{term "y \<preceq> x"}. Since we have that the relation \mbox{@{term "{(y, x). y \<preceq> x \<and> x \<noteq> y}"}} is well-founded, there must be a minimal element @{text "z"} such that @{term "z \<in> A"} and @{term "z \<preceq> y"}, and hence by transitivity also \mbox{@{term "z \<preceq> x"}} (here we deviate from the argument given in \cite{Shallit08}, because Isabelle/HOL provides already an extensive infrastructure for reasoning about well-foundedness). Since @{term "z"} is minimal and an element in @{term A}, we also know that @{term z} is in @{term M}. From this together with \mbox{@{term "z \<preceq> x"}}, we can infer that @{term x} is in @{term "SUPSEQ M"}, as required. \end{proof} \noindent This lemma allows us to establish the second part of Theorem~\ref{subseqreg}. \begin{proof}[Proof of the Second Part of Theorem~\ref{subseqreg}] Given any language @{text A}, by Lemma~\ref{mset} we know there exists a finite, and thus regular, language @{text M}. We further have @{term "SUPSEQ M = SUPSEQ A"}, which establishes the second part. \end{proof} \noindent In order to establish the first part of this theorem, we use the property proved in \cite{Shallit08}, namely that \begin{equation}\label{compl} @{thm SUBSEQ_complement} \end{equation} \noindent holds. Now the first part of Theorem~\ref{subseqreg} is a simple consequence of the second part. \begin{proof}[Proof of the First Part of Theorem~\ref{subseqreg}] By the second part, we know the right-hand side of \eqref{compl} is regular, which means @{term "- SUBSEQ A"} is regular. But since we established already that regularity is preserved under complement, also @{term "SUBSEQ A"} must be regular. \end{proof} Finally we like to show that the Myhill-Nerode Theorem is also convenient for establishing the non-regularity of languages. For this we use the following version of the Continuation Lemma (see for example~\cite{Rosenberg06}). \begin{lmm}[Continuation Lemma] If a language @{text A} is regular and a set @{text B} is infinite, then there exist two distinct strings @{text x} and @{text y} in @{text B} such that @{term "x \<approx>A y"}. \end{lmm} \noindent This lemma can be easily deduced from the Myhill-Nerode Theorem and the Pigeonhole Principle: Since @{text A} is regular, there can be only finitely many equivalence classes. Hence an infinite set must contain at least two strings that are in the same equivalence class, that is they need to be related by the Myhill-Nerode Relation. Using this lemma, it is straightforward to establish that the language \mbox{@{text "A \<equiv> \<Union>\<^isub>n a\<^sup>n @ b\<^sup>n"}} is not regular (@{text "a\<^sup>n"} stands for the strings consisting of @{text n} times the character a; similarly for @{text "b\<^isup>n"}). For this consider the infinite set @{text "B \<equiv> \<Union>\<^isub>n a\<^sup>n"}. \begin{lmm} No two distinct strings in set @{text "B"} are Myhill-Nerode related by language @{text A}. \end{lmm} \begin{proof} After unfolding the definitions, we need to establish that for @{term "i \<noteq> j"}, the equality \mbox{@{text "a\<^sup>i @ b\<^sup>j = a\<^sup>n @ b\<^sup>n"}} leads to a contradiction. This is clearly the case if we test that the two strings have the same amount of @{text a}'s and @{text b}'s; the string on the right-hand side satisfies this property, but not the one on the left-hand side. Therefore the strings cannot be equal and we have a contradiction. \end{proof} \noindent To conclude the proof on non-regularity of language @{text A}, the Continuation Lemma and the lemma above lead to a contradiction assuming @{text A} is regular. Therefore the language @{text A} is not regular, as we wanted to show.*}section {* Conclusion and Related Work *}text {* \noindent In this paper we took the view that a regular language is one where there exists a regular expression that matches all of its strings. Regular expressions can conveniently be defined as a datatype in HOL-based theorem provers. For us it was therefore interesting to find out how far we can push this point of view. We have established in Isabelle/HOL both directions of the Myhill-Nerode Theorem. % \begin{thrm}[The Myhill-Nerode Theorem]\mbox{}\\ A language @{text A} is regular if and only if @{thm (rhs) Myhill_Nerode}. \end{thrm} \noindent Having formalised this theorem means we pushed our point of view quite far. Using this theorem we can obviously prove when a language is \emph{not} regular---by establishing that it has infinitely many equivalence classes generated by the Myhill-Nerode Relation (this is usually the purpose of the Pumping Lemma \cite{Kozen97}). We can also use it to establish the standard textbook results about closure properties of regular languages. Interesting is the case of closure under complement, because it seems difficult to construct a regular expression for the complement language by direct means. However the existence of such a regular expression can be easily proved using the Myhill-Nerode Theorem. Our insistence on regular expressions for proving the Myhill-Nerode Theorem arose from the limitations of HOL, which is the logic underlying the popular theorem provers HOL4, HOLlight and Isabelle/HOL. In order to guarantee consistency, formalisations in HOL can only extend the logic with definitions that introduce a new concept in terms of already existing notions. A convenient definition for automata (based on graphs) uses a polymorphic type for the state nodes. This allows us to use the standard operation for disjoint union whenever we need to compose two automata. Unfortunately, we cannot use such a polymorphic definition in HOL as part of the definition for regularity of a language (a predicate over sets of strings). Consider for example the following attempt: \begin{center} @{text "is_regular A \<equiv> \<exists>M(\<alpha>). is_dfa (M) \<and> \<calL>(M) = A"} \end{center} \noindent In this definifion, the definiens is polymorphic in the type of the automata @{text "M"} (indicated by dependency on the type-variable @{text "\<alpha>"}), but the definiendum @{text "is_regular"} is not. Such definitions are excluded from HOL, because they can lead easily to inconsistencies (see \cite{PittsHOL4} for a simple example). Also HOL does not contain type-quantifiers which would allow us to get rid of the polymorphism by quantifying over the type-variable @{text "\<alpha>"}. Therefore when defining regularity in terms of automata, the only natural way out in HOL is to resort to state nodes with an identity, for example a natural number. Unfortunatly, the consequence is that we have to be careful when combining two automata so that there is no clash between two such states. This makes formalisations quite fiddly and rather unpleasant. Regular expressions proved much more convenient for reasoning in HOL since they can be defined as inductive datatype and a reasoning infrastructure comes for free. The definition of regularity in terms of regular expressions poses no problem at all for HOL. We showed in this paper that they can be used for establishing the central result in regular language theory---the Myhill-Nerode Theorem. While regular expressions are convenient, they have some limitations. One is that there seems to be no method of calculating a minimal regular expression (for example in terms of length) for a regular language, like there is for automata. On the other hand, efficient regular expression matching, without using automata, poses no problem \cite{OwensReppyTuron09}. For an implementation of a simple regular expression matcher, whose correctness has been formally established, we refer the reader to Owens and Slind \cite{OwensSlind08}. In our opinion, their formalisation is considerably slicker than for example the approach to regular expression matching taken in \cite{Harper99} and \cite{Yi06}. Our proof of the first direction is very much inspired by \emph{Brzozowski's algebraic method} used to convert a finite automaton to a regular expression \cite{Brzozowski64}. The close connection can be seen by considering the equivalence classes as the states of the minimal automaton for the regular language. However there are some subtle differences. Because our equivalence classes (or correspondingly states) arise from the Myhill-Nerode Relation, the most natural choice is to characterise each state with the set of strings starting from the initial state leading up to that state. Usually, however, the states are characterised as the strings starting from that state leading to the terminal states. The first choice has consequences about how the initial equational system is set up. We have the $\lambda$-term on our `initial state', while Brzozowski has it on the terminal states. This means we also need to reverse the direction of Arden's Lemma. We have not found anything in the `pencil-and-paper-reasoning' literature about our way of proving the first direction of the Myhill-Nerode Theorem, but it appears to be folklore. We presented two proofs for the second direction of the Myhill-Nerode Theorem. One direct proof using tagging-functions and another using partial derivatives. This part of our work is where our method using regular expressions shines, because we can completely side-step the standard argument \cite{Kozen97} where automata need to be composed. However, it is also the direction where we had to spend most of the `conceptual' time, as our first proof based on tagging-functions is new for establishing the Myhill-Nerode Theorem. All standard proofs of this direction proceed by arguments over automata. The indirect proof for the second direction arose from our interest in Brzozowski's derivatives for regular expression matching. While Brzozowski already established that there are only finitely many dissimilar derivatives for every regular expression, this result is not as straightforward to formalise in a theorem prover as one might wish. The reason is that the set of dissimilar derivatives is not defined inductively, but in terms of an ACI-equivalence relation. This difficulty prevented for example Krauss and Nipkow to prove termination of their equivalence checker for regular expressions \cite{KraussNipkow11}. Their checker is based on Brzozowski's derivatives and for their argument the lack of a formal proof of termination is not crucial (it merely lets them ``sleep better'' \cite{KraussNipkow11}). We expect that their development simplifies by using partial derivatives, instead of derivatives, and that the termination of the algorithm can be formally established (the main ingredient is Theorem~\ref{antimirov}). However, since partial derivatives use sets of regular expressions, one needs to carefully analyse whether the resulting algorithm is still executable. Given the existing infrastructure for executable sets in Isabelle/HOL \cite{Haftmann09}, it should. Our formalisation of the Myhill-Nerode Theorem consists of 780 lines of Isabelle/Isar code for the first direction and 460 for the second (the one based on tagging-functions), plus around 300 lines of standard material about regular languages. The formalisation of derivatives and partial derivatives shown in Section~\ref{derivatives} consists of 390 lines of code. The closure properties in Section~\ref{closure} (except Theorem~\ref{subseqreg}) can be established in 190 lines of code. The Continuation Lemma and the non-regularity of @{text "a\<^sup>n b\<^sup>n"} require 80 lines of code. The algorithm for solving equational systems, which we used in the first direction, is conceptually relatively simple. Still the use of sets over which the algorithm operates means it is not as easy to formalise as one might hope. However, it seems sets cannot be avoided since the `input' of the algorithm consists of equivalence classes and we cannot see how to reformulate the theory so that we can use lists or matrices. Lists would be much easier to reason about, since we can define functions over them by recursion. For sets we have to use set-comprehensions, which is slightly unwieldy. Matrices would allow us to use the slick formalisation by Nipkow of the Gauss-Jordan algorithm \cite{Nipkow11}. While our formalisation might appear large, it should be seen in the context of the work done by Constable at al \cite{Constable00} who formalised the Myhill-Nerode Theorem in Nuprl using automata. They write that their four-member team needed something on the magnitude of 18 months for their formalisation. It is hard to gauge the size of a formalisation in Nurpl, but from what is shown in the Nuprl Math Library about their development it seems substantially larger than ours. We attribute this to our use of regular expressions, which meant we did not need to `fight' the theorem prover. Also, Filli\^atre reports that his formalisation in Coq of automata theory and Kleene's theorem is ``rather big'' \cite{Filliatre97}. More recently, Almeida et al reported about another formalisation of regular languages in Coq \cite{Almeidaetal10}. Their main result is the correctness of Mirkin's construction of an automaton from a regular expression using partial derivatives. This took approximately 10600 lines of code. In terms of time, the estimate for our formalisation is that we needed approximately 3 months and this included the time to find our proof arguments. Unlike Constable et al, who were able to follow the Myhill-Nerode proof from \cite{HopcroftUllman69}, we had to find our own arguments. So for us the formalisation was not the bottleneck. The code of our formalisation can be found in the Archive of Formal Proofs at \mbox{\url{http://afp.sourceforge.net/devel-entries/Myhill-Nerode.shtml}} \cite{myhillnerodeafp11}.\medskip \noindent {\bf Acknowledgements:} We are grateful for the comments we received from Larry Paulson. Tobias Nipkow made us aware of the properties in Theorem~\ref{subseqreg} and Tjark Weber helped us with proving them.*}(*<*)end(*>*)