(*<*)
theory Paper
imports "../Closures" "../Attic/Prefix_subtract"
begin
declare [[show_question_marks = false]]
consts
REL :: "(string \<times> string) \<Rightarrow> bool"
UPLUS :: "'a set \<Rightarrow> 'a set \<Rightarrow> (nat \<times> 'a) set"
abbreviation
"EClass x R \<equiv> R `` {x}"
abbreviation
"Append_rexp2 r_itm r \<equiv> Append_rexp r r_itm"
abbreviation
"pow" (infixl "\<up>" 100)
where
"A \<up> n \<equiv> A ^^ n"
syntax (latex output)
"_Collect" :: "pttrn => bool => 'a set" ("(1{_ | _})")
"_CollectIn" :: "pttrn => 'a set => bool => 'a set" ("(1{_ \<in> _ | _})")
translations
"_Collect p P" <= "{p. P}"
"_Collect p P" <= "{p|xs. P}"
"_CollectIn p A P" <= "{p : A. P}"
abbreviation "ZERO \<equiv> Zero"
abbreviation "ONE \<equiv> One"
abbreviation "ATOM \<equiv> Atom"
abbreviation "PLUS \<equiv> Plus"
abbreviation "TIMES \<equiv> Times"
abbreviation "TIMESS \<equiv> Timess"
abbreviation "STAR \<equiv> Star"
notation (latex output)
str_eq ("\<approx>\<^bsub>_\<^esub>") and
str_eq_applied ("_ \<approx>\<^bsub>_\<^esub> _") and
conc (infixr "\<cdot>" 100) and
star ("_\<^bsup>\<star>\<^esup>" [101] 200) and
pow ("_\<^bsup>_\<^esup>" [100, 100] 100) and
Suc ("_+1" [100] 100) and
quotient ("_ \<^raw:\ensuremath{\!\sslash\!}> _" [90, 90] 90) and
REL ("\<approx>") and
UPLUS ("_ \<^raw:\ensuremath{\uplus}> _" [90, 90] 90) and
lang ("\<^raw:\ensuremath{\cal{L}}>" 101) and
lang ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and
lang_trm ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and
Lam ("\<lambda>'(_')" [100] 100) and
Trn ("'(_, _')" [100, 100] 100) and
EClass ("\<lbrakk>_\<rbrakk>\<^bsub>_\<^esub>" [100, 100] 100) and
transition ("_ \<^raw:\ensuremath{\stackrel{\text{>_\<^raw:}}{\Longmapsto}}> _" [100, 100, 100] 100) and
Setalt ("\<^raw:\ensuremath{\bigplus}>_" [1000] 999) and
Append_rexp2 ("_ \<^raw:\ensuremath{\triangleleft}> _" [100, 100] 100) and
Append_rexp_rhs ("_ \<^raw:\ensuremath{\triangleleft}> _" [100, 100] 50) and
uminus ("\<^raw:\ensuremath{\overline{>_\<^raw:}}>" [100] 100) and
tag_Plus ("+tag _ _" [100, 100] 100) and
tag_Plus ("+tag _ _ _" [100, 100, 100] 100) and
tag_Times ("\<times>tag _ _" [100, 100] 100) and
tag_Times ("\<times>tag _ _ _" [100, 100, 100] 100) and
tag_Star ("\<star>tag _" [100] 100) and
tag_Star ("\<star>tag _ _" [100, 100] 100) and
tag_eq ("\<^raw:$\threesim$>\<^bsub>_\<^esub>" [100] 100) and
Delta ("\<Delta>'(_')") and
nullable ("\<delta>'(_')") and
Cons ("_ :: _" [100, 100] 100) and
Rev ("Rev _" [1000] 100) and
Der ("Der _ _" [1000, 1000] 100) and
ONE ("ONE" 999) and
ZERO ("ZERO" 999) and
pders_lang ("pderl") and
UNIV1 ("UNIV\<^isup>+") and
Ders_lang ("Derl")
lemma meta_eq_app:
shows "f \<equiv> \<lambda>x. g x \<Longrightarrow> f x \<equiv> g x"
by auto
lemma str_eq_def':
shows "x \<approx>A y \<equiv> (\<forall>z. x @ z \<in> A \<longleftrightarrow> y @ z \<in> A)"
unfolding str_eq_def by simp
lemma conc_def':
"A \<cdot> B = {s\<^isub>1 @ s\<^isub>2 | s\<^isub>1 s\<^isub>2. s\<^isub>1 \<in> A \<and> s\<^isub>2 \<in> B}"
unfolding conc_def by simp
lemma conc_Union_left:
shows "B \<cdot> (\<Union>n. A \<up> n) = (\<Union>n. B \<cdot> (A \<up> n))"
unfolding conc_def by auto
lemma test:
assumes X_in_eqs: "(X, rhs) \<in> Init (UNIV // \<approx>A)"
shows "X = \<Union> (lang_trm ` rhs)"
using assms l_eq_r_in_eqs by (simp)
(* THEOREMS *)
notation (Rule output)
"==>" ("\<^raw:\mbox{}\inferrule{\mbox{>_\<^raw:}}>\<^raw:{\mbox{>_\<^raw:}}>")
syntax (Rule output)
"_bigimpl" :: "asms \<Rightarrow> prop \<Rightarrow> prop"
("\<^raw:\mbox{}\inferrule{>_\<^raw:}>\<^raw:{\mbox{>_\<^raw:}}>")
"_asms" :: "prop \<Rightarrow> asms \<Rightarrow> asms"
("\<^raw:\mbox{>_\<^raw:}\\>/ _")
"_asm" :: "prop \<Rightarrow> asms" ("\<^raw:\mbox{>_\<^raw:}>")
notation (Axiom output)
"Trueprop" ("\<^raw:\mbox{}\inferrule{\mbox{}}{\mbox{>_\<^raw:}}>")
notation (IfThen output)
"==>" ("\<^raw:{\normalsize{}>If\<^raw:\,}> _/ \<^raw:{\normalsize \,>then\<^raw:\,}>/ _.")
syntax (IfThen output)
"_bigimpl" :: "asms \<Rightarrow> prop \<Rightarrow> prop"
("\<^raw:{\normalsize{}>If\<^raw:\,}> _ /\<^raw:{\normalsize \,>then\<^raw:\,}>/ _.")
"_asms" :: "prop \<Rightarrow> asms \<Rightarrow> asms" ("\<^raw:\mbox{>_\<^raw:}> /\<^raw:{\normalsize \,>and\<^raw:\,}>/ _")
"_asm" :: "prop \<Rightarrow> asms" ("\<^raw:\mbox{>_\<^raw:}>")
notation (IfThenNoBox output)
"==>" ("\<^raw:{\normalsize{}>If\<^raw:\,}> _/ \<^raw:{\normalsize \,>then\<^raw:\,}>/ _.")
syntax (IfThenNoBox output)
"_bigimpl" :: "asms \<Rightarrow> prop \<Rightarrow> prop"
("\<^raw:{\normalsize{}>If\<^raw:\,}> _ /\<^raw:{\normalsize \,>then\<^raw:\,}>/ _.")
"_asms" :: "prop \<Rightarrow> asms \<Rightarrow> asms" ("_ /\<^raw:{\normalsize \,>and\<^raw:\,}>/ _")
"_asm" :: "prop \<Rightarrow> asms" ("_")
(*>*)
section {* Introduction *}
text {*
\noindent
Regular languages are an important and well-understood subject in Computer
Science, with many beautiful theorems and many useful algorithms. There is a
wide range of textbooks on this subject, many of which are aimed at students
and contain very detailed `pencil-and-paper' proofs (e.g.~\cite{Kozen97,
HopcroftUllman69}). It seems natural to exercise theorem provers by
formalising the theorems and by verifying formally the algorithms.
A popular choice for a theorem prover would be one based on Higher-Order
Logic (HOL), for example HOL4, HOLlight or Isabelle/HOL. For the development
presented in this paper we will use the latter. HOL is a predicate calculus
that allows quantification over predicate variables. Its type system is
based on Church's Simple Theory of Types \cite{Church40}. Although many
mathematical concepts can be conveniently expressed in HOL, there are some
limitations that hurt badly, if one attempts a simple-minded formalisation
of regular languages in it.
The typical approach to regular languages is to
introduce finite automata and then define everything in terms of them
\cite{Kozen97}. For example, a regular language is normally defined as:
\begin{dfntn}\label{baddef}
A language @{text A} is \emph{regular}, provided there is a
finite deterministic automaton that recognises all strings of @{text "A"}.
\end{dfntn}
\noindent
This approach has many benefits. Among them is the fact that it is easy to
convince oneself that regular languages are closed under complementation:
one just has to exchange the accepting and non-accepting states in the
corresponding automaton to obtain an automaton for the complement language.
The problem, however, lies with formalising such reasoning in a HOL-based
theorem prover. Automata are built up from states and transitions that need
to be represented as graphs, matrices or functions, none of which can be
defined as an inductive datatype.
In case of graphs and matrices, this means we have to build our own
reasoning infrastructure for them, as neither Isabelle/HOL nor HOL4 nor
HOLlight support them with libraries. Even worse, reasoning about graphs and
matrices can be a real hassle in HOL-based theorem provers, because
we have to be able to combine automata. Consider for
example the operation of sequencing two automata, say $A_1$ and $A_2$, by
connecting the accepting states of $A_1$ to the initial state of $A_2$:
%
\begin{center}
\begin{tabular}{ccc}
\begin{tikzpicture}[scale=1]
%\draw[step=2mm] (-1,-1) grid (1,1);
\draw[rounded corners=1mm, very thick] (-1.0,-0.3) rectangle (-0.2,0.3);
\draw[rounded corners=1mm, very thick] ( 0.2,-0.3) rectangle ( 1.0,0.3);
\node (A) at (-1.0,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
\node (B) at ( 0.2,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
\node (C) at (-0.2, 0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
\node (D) at (-0.2,-0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
\node (E) at (1.0, 0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
\node (F) at (1.0,-0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
\node (G) at (1.0,-0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
\draw (-0.6,0.0) node {\small$A_1$};
\draw ( 0.6,0.0) node {\small$A_2$};
\end{tikzpicture}
&
\raisebox{2.1mm}{\bf\Large$\;\;\;\Rightarrow\,\;\;$}
&
\begin{tikzpicture}[scale=1]
%\draw[step=2mm] (-1,-1) grid (1,1);
\draw[rounded corners=1mm, very thick] (-1.0,-0.3) rectangle (-0.2,0.3);
\draw[rounded corners=1mm, very thick] ( 0.2,-0.3) rectangle ( 1.0,0.3);
\node (A) at (-1.0,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
\node (B) at ( 0.2,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
\node (C) at (-0.2, 0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
\node (D) at (-0.2,-0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
\node (E) at (1.0, 0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
\node (F) at (1.0,-0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
\node (G) at (1.0,-0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
\draw (C) to [very thick, bend left=45] (B);
\draw (D) to [very thick, bend right=45] (B);
\draw (-0.6,0.0) node {\small$A_1$};
\draw ( 0.6,0.0) node {\small$A_2$};
\end{tikzpicture}
\end{tabular}
\end{center}
\noindent
On `paper' we can define the corresponding
graph in terms of the disjoint
union of the state nodes. Unfortunately in HOL, the standard definition for disjoint
union, namely
%
\begin{equation}\label{disjointunion}
@{text "A\<^isub>1 \<uplus> A\<^isub>2 \<equiv> {(1, x) | x \<in> A\<^isub>1} \<union> {(2, y) | y \<in> A\<^isub>2}"}
\end{equation}
\noindent
changes the type---the disjoint union is not a set, but a set of
pairs. Using this definition for disjoint union means we do not have a
single type for automata. As a result we will not be able to define a regular
language as one for which there exists an automaton that recognises all its
strings (Definition~\ref{baddef}). This is because we cannot make a definition in HOL that is polymorphic in
the state type and there is no type quantification available in HOL (unlike
in Coq, for example).\footnote{Slind already pointed out this problem in an email
to the HOL4 mailing list on 21st April 2005.}
An alternative, which provides us with a single type for automata, is to give every
state node an identity, for example a natural
number, and then be careful to rename these identities apart whenever
connecting two automata. This results in clunky proofs
establishing that properties are invariant under renaming. Similarly,
connecting two automata represented as matrices results in very adhoc
constructions, which are not pleasant to reason about.
Functions are much better supported in Isabelle/HOL, but they still lead to similar
problems as with graphs. Composing, for example, two non-deterministic automata in parallel
requires also the formalisation of disjoint unions. Nipkow \cite{Nipkow98}
dismisses for this the option of using identities, because it leads according to
him to ``messy proofs''. Since he does not need to define what regular
languages are, Nipkow opts for a variant of \eqref{disjointunion} using bit lists, but writes
\begin{quote}
\it%
\begin{tabular}{@ {}l@ {}p{0.88\textwidth}@ {}}
`` & All lemmas appear obvious given a picture of the composition of automata\ldots
Yet their proofs require a painful amount of detail.''
\end{tabular}
\end{quote}
\noindent
and
\begin{quote}
\it%
\begin{tabular}{@ {}l@ {}p{0.88\textwidth}@ {}}
`` & If the reader finds the above treatment in terms of bit lists revoltingly
concrete, I cannot disagree. A more abstract approach is clearly desirable.''
\end{tabular}
\end{quote}
\noindent
Moreover, it is not so clear how to conveniently impose a finiteness
condition upon functions in order to represent \emph{finite} automata. The
best is probably to resort to more advanced reasoning frameworks, such as
\emph{locales} or \emph{type classes}, which are \emph{not} available in all
HOL-based theorem provers.
Because of these problems to do with representing automata, there seems to
be no substantial formalisation of automata theory and regular languages
carried out in HOL-based theorem provers. Nipkow \cite{Nipkow98} establishes
the link between regular expressions and automata in the context of
lexing. Berghofer and Reiter \cite{BerghoferReiter09} formalise automata
working over bit strings in the context of Presburger arithmetic. The only
larger formalisations of automata theory are carried out in Nuprl
\cite{Constable00} and in Coq \cite{Filliatre97}.
Also one might consider automata theory and regular languages as a well-worn
stock subject where everything is crystal clear. However, paper proofs about
automata often involve subtle side-conditions which are easily overlooked,
but which make formal reasoning rather painful. For example Kozen's proof of
the Myhill-Nerode theorem requires that automata do not have inaccessible
states \cite{Kozen97}. Another subtle side-condition is completeness of
automata, that is automata need to have total transition functions and at
most one `sink' state from which there is no connection to a final state
(Brzozowski mentions this side-condition in the context of state complexity
of automata \cite{Brzozowski10}). Such side-conditions mean that if we
define a regular language as one for which there exists \emph{a} finite
automaton that recognises all its strings (see Definition~\ref{baddef}), then we
need a lemma which ensures that another equivalent one can be found
satisfying the side-condition. Unfortunately, such `little' and `obvious'
lemmas make a formalisation of automata theory a hair-pulling experience.
In this paper, we will not attempt to formalise automata theory in
Isabelle/HOL nor will we attempt to formalise automata proofs from the
literature, but take a different approach to regular languages than is
usually taken. Instead of defining a regular language as one where there
exists an automaton that recognises all its strings, we define a
regular language as:
\begin{dfntn}\label{regular}
A language @{text A} is \emph{regular}, provided there is a regular expression
that matches all strings of @{text "A"}.
\end{dfntn}
\noindent
The reason is that regular expressions, unlike graphs, matrices and
functions, can be easily defined as an inductive datatype. A reasoning
infrastructure (like induction and recursion) comes then for free in
HOL. Moreover, no side-conditions will be needed for regular expressions,
like we need for automata. This convenience of regular expressions has
recently been exploited in HOL4 with a formalisation of regular expression
matching based on derivatives \cite{OwensSlind08} and with an equivalence
checker for regular expressions in Isabelle/HOL \cite{KraussNipkow11}. The
main purpose of this paper is to show that a central result about regular
languages---the Myhill-Nerode theorem---can be recreated by only using
regular expressions. This theorem gives necessary and sufficient conditions
for when a language is regular. As a corollary of this theorem we can easily
establish the usual closure properties, including complementation, for
regular languages.\medskip
\noindent
{\bf Contributions:} There is an extensive literature on regular languages.
To our best knowledge, our proof of the Myhill-Nerode theorem is the first
that is based on regular expressions, only. The part of this theorem stating
that finitely many partitions imply regularity of the language is proved by
an argument about solving equational systems. This argument appears to be
folklore. For the other part, we give two proofs: one direct proof using
certain tagging-functions, and another indirect proof using Antimirov's
partial derivatives \cite{Antimirov95}. Again to our best knowledge, the
tagging-functions have not been used before to establish the Myhill-Nerode
theorem. Derivatives of regular expressions have been used recently quite
widely in the literature; partial derivatives, in contrast, attract much
less attention. However, partial derivatives are more suitable in the
context of the Myhill-Nerode theorem, since it is easier to establish
formally their finiteness result. We are not aware of any proof that uses
either of them for proving the Myhill-Nerode theorem.
*}
section {* Preliminaries *}
text {*
\noindent
Strings in Isabelle/HOL are lists of characters with the \emph{empty string}
being represented by the empty list, written @{term "[]"}. We assume there
are only finitely many different characters. \emph{Languages} are sets of
strings. The language containing all strings is written in Isabelle/HOL as
@{term "UNIV::string set"}. The concatenation of two languages is written
@{term "A \<cdot> B"} and a language raised to the power @{text n} is written
@{term "A \<up> n"}. They are defined as usual
\begin{center}
\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
@{thm (lhs) conc_def'[THEN eq_reflection, where A1="A" and B1="B"]}
& @{text "\<equiv>"} & @{thm (rhs) conc_def'[THEN eq_reflection, where A1="A" and B1="B"]}\\
@{thm (lhs) lang_pow.simps(1)[THEN eq_reflection, where A1="A"]}
& @{text "\<equiv>"} & @{thm (rhs) lang_pow.simps(1)[THEN eq_reflection, where A1="A"]}\\
@{thm (lhs) lang_pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]}
& @{text "\<equiv>"} & @{thm (rhs) lang_pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]}
\end{tabular}
\end{center}
\noindent
where @{text "@"} is the list-append operation. The Kleene-star of a language @{text A}
is defined as the union over all powers, namely @{thm star_def}. In the paper
we will make use of the following properties of these constructions.
\begin{prpstn}\label{langprops}\mbox{}\\
\begin{tabular}{@ {}lp{10cm}}
(i) & @{thm star_unfold_left} \\
(ii) & @{thm[mode=IfThen] pow_length}\\
(iii) & @{thm conc_Union_left} \\
(iv) & If @{thm (prem 1) star_decom} and @{thm (prem 2) star_decom} then
there exists an @{text "x\<^isub>p"} and @{text "x\<^isub>s"} with @{text "x = x\<^isub>p @ x\<^isub>s"}
and @{term "x\<^isub>p \<noteq> []"} such that @{term "x\<^isub>p \<in> A"} and @{term "x\<^isub>s \<in> A\<star>"}.
\end{tabular}
\end{prpstn}
\noindent
In @{text "(ii)"} we use the notation @{term "length s"} for the length of a
string; this property states that if \mbox{@{term "[] \<notin> A"}} then the lengths of
the strings in @{term "A \<up> (Suc n)"} must be longer than @{text n}.
Property @{text "(iv)"} states that a non-empty string in @{term "A\<star>"} can
always be split up into a non-empty prefix belonging to @{text "A"} and the
rest being in @{term "A\<star>"}. We omit
the proofs for these properties, but invite the reader to consult our
formalisation.\footnote{Available at \url{http://www4.in.tum.de/~urbanc/regexp.html}}
The notation in Isabelle/HOL for the quotient of a language @{text A}
according to an equivalence relation @{term REL} is @{term "A // REL"}. We
will write @{text "\<lbrakk>x\<rbrakk>\<^isub>\<approx>"} for the equivalence class defined as
\mbox{@{text "{y | y \<approx> x}"}}, and have @{text "x \<approx> y"} if and only if @{text
"\<lbrakk>x\<rbrakk>\<^isub>\<approx> = \<lbrakk>y\<rbrakk>\<^isub>\<approx>"}.
Central to our proof will be the solution of equational systems
involving equivalence classes of languages. For this we will use Arden's Lemma
(see \cite[Page 100]{Sakarovitch09}),
which solves equations of the form @{term "X = A \<cdot> X \<union> B"} provided
@{term "[] \<notin> A"}. However we will need the following `reverse'
version of Arden's Lemma (`reverse' in the sense of changing the order of @{term "A \<cdot> X"} to
\mbox{@{term "X \<cdot> A"}}).
\begin{lmm}[Reverse Arden's Lemma]\label{arden}\mbox{}\\
If @{thm (prem 1) arden} then
@{thm (lhs) arden} if and only if
@{thm (rhs) arden}.
\end{lmm}
\begin{proof}
For the right-to-left direction we assume @{thm (rhs) arden} and show
that @{thm (lhs) arden} holds. From Prop.~\ref{langprops}@{text "(i)"}
we have @{term "A\<star> = A \<cdot> A\<star> \<union> {[]}"},
which is equal to @{term "A\<star> = A\<star> \<cdot> A \<union> {[]}"}. Adding @{text B} to both
sides gives @{term "B \<cdot> A\<star> = B \<cdot> (A\<star> \<cdot> A \<union> {[]})"}, whose right-hand side
is equal to @{term "(B \<cdot> A\<star>) \<cdot> A \<union> B"}. This completes this direction.
For the other direction we assume @{thm (lhs) arden}. By a simple induction
on @{text n}, we can establish the property
\begin{center}
@{text "(*)"}\hspace{5mm} @{thm (concl) arden_helper}
\end{center}
\noindent
Using this property we can show that @{term "B \<cdot> (A \<up> n) \<subseteq> X"} holds for
all @{text n}. From this we can infer @{term "B \<cdot> A\<star> \<subseteq> X"} using the definition
of @{text "\<star>"}.
For the inclusion in the other direction we assume a string @{text s}
with length @{text k} is an element in @{text X}. Since @{thm (prem 1) arden}
we know by Prop.~\ref{langprops}@{text "(ii)"} that
@{term "s \<notin> X \<cdot> (A \<up> Suc k)"} since its length is only @{text k}
(the strings in @{term "X \<cdot> (A \<up> Suc k)"} are all longer).
From @{text "(*)"} it follows then that
@{term s} must be an element in @{term "(\<Union>m\<in>{0..k}. B \<cdot> (A \<up> m))"}. This in turn
implies that @{term s} is in @{term "(\<Union>n. B \<cdot> (A \<up> n))"}. Using Prop.~\ref{langprops}@{text "(iii)"}
this is equal to @{term "B \<cdot> A\<star>"}, as we needed to show.
\end{proof}
\noindent
Regular expressions are defined as the inductive datatype
\begin{center}
\begin{tabular}{rcl}
@{text r} & @{text "::="} & @{term ZERO}\\
& @{text"|"} & @{term One}\\
& @{text"|"} & @{term "Atom c"}\\
& @{text"|"} & @{term "Times r r"}\\
& @{text"|"} & @{term "Plus r r"}\\
& @{text"|"} & @{term "Star r"}
\end{tabular}
\end{center}
\noindent
and the language matched by a regular expression is defined as
\begin{center}
\begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
@{thm (lhs) lang.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(1)}\\
@{thm (lhs) lang.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(2)}\\
@{thm (lhs) lang.simps(3)[where a="c"]} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(3)[where a="c"]}\\
@{thm (lhs) lang.simps(4)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]} & @{text "\<equiv>"} &
@{thm (rhs) lang.simps(4)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]}\\
@{thm (lhs) lang.simps(5)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]} & @{text "\<equiv>"} &
@{thm (rhs) lang.simps(5)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]}\\
@{thm (lhs) lang.simps(6)[where r="r"]} & @{text "\<equiv>"} &
@{thm (rhs) lang.simps(6)[where r="r"]}\\
\end{tabular}
\end{center}
Given a finite set of regular expressions @{text rs}, we will make use of the operation of generating
a regular expression that matches the union of all languages of @{text rs}. We only need to know the
existence
of such a regular expression and therefore we use Isabelle/HOL's @{const "fold_graph"} and Hilbert's
@{text "\<epsilon>"} to define @{term "\<Uplus>rs"}. This operation, roughly speaking, folds @{const PLUS} over the
set @{text rs} with @{const ZERO} for the empty set. We can prove that for a finite set @{text rs}
%
\begin{equation}\label{uplus}
\mbox{@{thm (lhs) folds_alt_simp} @{text "= \<Union> (\<calL> ` rs)"}}
\end{equation}
\noindent
holds, whereby @{text "\<calL> ` rs"} stands for the
image of the set @{text rs} under function @{text "\<calL>"} defined as
\begin{center}
@{term "lang ` rs \<equiv> {lang r | r. r \<in> rs}"}
\end{center}
\noindent
In what follows we shall use this convenient short-hand notation for images of sets
also with other functions.
*}
section {* The Myhill-Nerode Theorem, First Part *}
text {*
\noindent
\footnote{Folklore: Henzinger (arden-DFA-regexp.pdf); Hofmann}
The key definition in the Myhill-Nerode theorem is the
\emph{Myhill-Nerode relation}, which states that w.r.t.~a language two
strings are related, provided there is no distinguishing extension in this
language. This can be defined as a tertiary relation.
\begin{dfntn}[Myhill-Nerode Relation]\label{myhillneroderel}
Given a language @{text A}, two strings @{text x} and
@{text y} are Myhill-Nerode related provided
\begin{center}
@{thm str_eq_def'}
\end{center}
\end{dfntn}
\noindent
It is easy to see that @{term "\<approx>A"} is an equivalence relation, which
partitions the set of all strings, @{text "UNIV"}, into a set of disjoint
equivalence classes. To illustrate this quotient construction, let us give a simple
example: consider the regular language containing just
the string @{text "[c]"}. The relation @{term "\<approx>({[c]})"} partitions @{text UNIV}
into three equivalence classes @{text "X\<^isub>1"}, @{text "X\<^isub>2"} and @{text "X\<^isub>3"}
as follows
\begin{center}
\begin{tabular}{l}
@{text "X\<^isub>1 = {[]}"}\\
@{text "X\<^isub>2 = {[c]}"}\\
@{text "X\<^isub>3 = UNIV - {[], [c]}"}
\end{tabular}
\end{center}
One direction of the Myhill-Nerode theorem establishes
that if there are finitely many equivalence classes, like in the example above, then
the language is regular. In our setting we therefore have to show:
\begin{thrm}\label{myhillnerodeone}
@{thm[mode=IfThen] Myhill_Nerode1}
\end{thrm}
\noindent
To prove this theorem, we first define the set @{term "finals A"} as those equivalence
classes from @{term "UNIV // \<approx>A"} that contain strings of @{text A}, namely
%
\begin{equation}
@{thm finals_def}
\end{equation}
\noindent
In our running example, @{text "X\<^isub>2"} is the only
equivalence class in @{term "finals {[c]}"}.
It is straightforward to show that in general
\begin{equation}\label{finalprops}
@{thm lang_is_union_of_finals}\hspace{15mm}
@{thm finals_in_partitions}
\end{equation}
\noindent
hold.
Therefore if we know that there exists a regular expression for every
equivalence class in \mbox{@{term "finals A"}} (which by assumption must be
a finite set), then we can use @{text "\<bigplus>"} to obtain a regular expression
that matches every string in @{text A}.
Our proof of Thm.~\ref{myhillnerodeone} relies on a method that can calculate a
regular expression for \emph{every} equivalence class, not just the ones
in @{term "finals A"}. We
first define the notion of \emph{one-character-transition} between
two equivalence classes
%
\begin{equation}
@{thm transition_def}
\end{equation}
\noindent
which means that if we concatenate the character @{text c} to the end of all
strings in the equivalence class @{text Y}, we obtain a subset of
@{text X}. Note that we do not define an automaton here, we merely relate two sets
(with the help of a character). In our concrete example we have
@{term "X\<^isub>1 \<Turnstile>c\<Rightarrow> X\<^isub>2"}, @{term "X\<^isub>1 \<Turnstile>d\<^isub>i\<Rightarrow> X\<^isub>3"} with @{text "d\<^isub>i"} being any
other character than @{text c}, and @{term "X\<^isub>3 \<Turnstile>c\<^isub>j\<Rightarrow> X\<^isub>3"} for any
character @{text "c\<^isub>j"}.
Next we construct an \emph{initial equational system} that
contains an equation for each equivalence class. We first give
an informal description of this construction. Suppose we have
the equivalence classes @{text "X\<^isub>1,\<dots>,X\<^isub>n"}, there must be one and only one that
contains the empty string @{text "[]"} (since equivalence classes are disjoint).
Let us assume @{text "[] \<in> X\<^isub>1"}. We build the following equational system
\begin{center}
\begin{tabular}{rcl}
@{text "X\<^isub>1"} & @{text "="} & @{text "(Y\<^isub>1\<^isub>1, ATOM c\<^isub>1\<^isub>1) + \<dots> + (Y\<^isub>1\<^isub>p, ATOM c\<^isub>1\<^isub>p) + \<lambda>(ONE)"} \\
@{text "X\<^isub>2"} & @{text "="} & @{text "(Y\<^isub>2\<^isub>1, ATOM c\<^isub>2\<^isub>1) + \<dots> + (Y\<^isub>2\<^isub>o, ATOM c\<^isub>2\<^isub>o)"} \\
& $\vdots$ \\
@{text "X\<^isub>n"} & @{text "="} & @{text "(Y\<^isub>n\<^isub>1, ATOM c\<^isub>n\<^isub>1) + \<dots> + (Y\<^isub>n\<^isub>q, ATOM c\<^isub>n\<^isub>q)"}\\
\end{tabular}
\end{center}
\noindent
where the terms @{text "(Y\<^isub>i\<^isub>j, ATOM c\<^isub>i\<^isub>j)"}
stand for all transitions @{term "Y\<^isub>i\<^isub>j \<Turnstile>c\<^isub>i\<^isub>j\<Rightarrow>
X\<^isub>i"}.
%The intuition behind the equational system is that every
%equation @{text "X\<^isub>i = rhs\<^isub>i"} in this system
%corresponds roughly to a state of an automaton whose name is @{text X\<^isub>i} and its predecessor states
%are the @{text "Y\<^isub>i\<^isub>j"}; the @{text "c\<^isub>i\<^isub>j"} are the labels of the transitions from these
%predecessor states to @{text X\<^isub>i}.
There can only be
finitely many terms of the form @{text "(Y\<^isub>i\<^isub>j, ATOM c\<^isub>i\<^isub>j)"} in a right-hand side
since by assumption there are only finitely many
equivalence classes and only finitely many characters.
The term @{text "\<lambda>(ONE)"} in the first equation acts as a marker for the initial state, that
is the equivalence class
containing @{text "[]"}.\footnote{Note that we mark, roughly speaking, the
single `initial' state in the equational system, which is different from
the method by Brzozowski \cite{Brzozowski64}, where he marks the
`terminal' states. We are forced to set up the equational system in our
way, because the Myhill-Nerode relation determines the `direction' of the
transitions---the successor `state' of an equivalence class @{text Y} can
be reached by adding a character to the end of @{text Y}. This is also the
reason why we have to use our reverse version of Arden's Lemma.}
In our running example we have the initial equational system
\begin{equation}\label{exmpcs}
\mbox{\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
@{term "X\<^isub>1"} & @{text "="} & @{text "\<lambda>(ONE)"}\\
@{term "X\<^isub>2"} & @{text "="} & @{text "(X\<^isub>1, ATOM c)"}\\
@{term "X\<^isub>3"} & @{text "="} & @{text "(X\<^isub>1, ATOM d\<^isub>1) + \<dots> + (X\<^isub>1, ATOM d\<^isub>n)"}\\
& & \mbox{}\hspace{10mm}@{text "+ (X\<^isub>3, ATOM c\<^isub>1) + \<dots> + (X\<^isub>3, ATOM c\<^isub>m)"}
\end{tabular}}
\end{equation}
\noindent
where @{text "d\<^isub>1\<dots>d\<^isub>n"} is the sequence of all characters
but not containing @{text c}, and @{text "c\<^isub>1\<dots>c\<^isub>m"} is the sequence of all
characters.
Overloading the function @{text \<calL>} for the two kinds of terms in the
equational system, we have
\begin{center}
@{text "\<calL>(Y, r) \<equiv>"} %
@{thm (rhs) lang_trm.simps(2)[where X="Y" and r="r", THEN eq_reflection]}\hspace{10mm}
@{thm lang_trm.simps(1)[where r="r", THEN eq_reflection]}
\end{center}
\noindent
and we can prove for @{text "X\<^isub>2\<^isub>.\<^isub>.\<^isub>n"} that the following equations
%
\begin{equation}\label{inv1}
@{text "X\<^isub>i = \<calL>(Y\<^isub>i\<^isub>1, ATOM c\<^isub>i\<^isub>1) \<union> \<dots> \<union> \<calL>(Y\<^isub>i\<^isub>q, ATOM c\<^isub>i\<^isub>q)"}.
\end{equation}
\noindent
hold. Similarly for @{text "X\<^isub>1"} we can show the following equation
%
\begin{equation}\label{inv2}
@{text "X\<^isub>1 = \<calL>(Y\<^isub>1\<^isub>1, ATOM c\<^isub>1\<^isub>1) \<union> \<dots> \<union> \<calL>(Y\<^isub>1\<^isub>p, ATOM c\<^isub>1\<^isub>p) \<union> \<calL>(\<lambda>(ONE))"}.
\end{equation}
\noindent
holds. The reason for adding the @{text \<lambda>}-marker to our initial equational system is
to obtain this equation: it only holds with the marker, since none of
the other terms contain the empty string. The point of the initial equational system is
that solving it means we will be able to extract a regular expression for every equivalence class.
Our representation for the equations in Isabelle/HOL are pairs,
where the first component is an equivalence class (a set of strings)
and the second component
is a set of terms. Given a set of equivalence
classes @{text CS}, our initial equational system @{term "Init CS"} is thus
formally defined as
%
\begin{equation}\label{initcs}
\mbox{\begin{tabular}{rcl}
@{thm (lhs) Init_rhs_def} & @{text "\<equiv>"} &
@{text "if"}~@{term "[] \<in> X"}\\
& & @{text "then"}~@{term "{Trn Y (ATOM c) | Y c. Y \<in> CS \<and> Y \<Turnstile>c\<Rightarrow> X} \<union> {Lam ONE}"}\\
& & @{text "else"}~@{term "{Trn Y (ATOM c)| Y c. Y \<in> CS \<and> Y \<Turnstile>c\<Rightarrow> X}"}\\
@{thm (lhs) Init_def} & @{text "\<equiv>"} & @{thm (rhs) Init_def}
\end{tabular}}
\end{equation}
\noindent
Because we use sets of terms
for representing the right-hand sides of equations, we can
prove \eqref{inv1} and \eqref{inv2} more concisely as
%
\begin{lmm}\label{inv}
If @{thm (prem 1) test} then @{text "X = \<Union> \<calL> ` rhs"}.
\end{lmm}
\noindent
Our proof of Thm.~\ref{myhillnerodeone} will proceed by transforming the
initial equational system into one in \emph{solved form} maintaining the invariant
in Lem.~\ref{inv}. From the solved form we will be able to read
off the regular expressions.
In order to transform an equational system into solved form, we have two
operations: one that takes an equation of the form @{text "X = rhs"} and removes
any recursive occurrences of @{text X} in the @{text rhs} using our variant of Arden's
Lemma. The other operation takes an equation @{text "X = rhs"}
and substitutes @{text X} throughout the rest of the equational system
adjusting the remaining regular expressions appropriately. To define this adjustment
we define the \emph{append-operation} taking a term and a regular expression as argument
\begin{center}
\begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
@{thm (lhs) Append_rexp.simps(2)[where X="Y" and r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]}
& @{text "\<equiv>"} &
@{thm (rhs) Append_rexp.simps(2)[where X="Y" and r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]}\\
@{thm (lhs) Append_rexp.simps(1)[where r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]}
& @{text "\<equiv>"} &
@{thm (rhs) Append_rexp.simps(1)[where r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]}
\end{tabular}
\end{center}
\noindent
We lift this operation to entire right-hand sides of equations, written as
@{thm (lhs) Append_rexp_rhs_def[where rexp="r"]}. With this we can define
the \emph{arden-operation} for an equation of the form @{text "X = rhs"} as:
%
\begin{equation}\label{arden_def}
\mbox{\begin{tabular}{rc@ {\hspace{2mm}}r@ {\hspace{1mm}}l}
@{thm (lhs) Arden_def} & @{text "\<equiv>"}~~\mbox{} & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "let"}}\\
& & @{text "rhs' ="} & @{term "rhs - {Trn X r | r. Trn X r \<in> rhs}"} \\
& & @{text "r' ="} & @{term "Star (\<Uplus> {r. Trn X r \<in> rhs})"}\\
& & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "in"}~~@{term "Append_rexp_rhs rhs' r'"}}\\
\end{tabular}}
\end{equation}
\noindent
In this definition, we first delete all terms of the form @{text "(X, r)"} from @{text rhs};
then we calculate the combined regular expressions for all @{text r} coming
from the deleted @{text "(X, r)"}, and take the @{const Star} of it;
finally we append this regular expression to @{text rhs'}. If we apply this
operation to the right-hand side of @{text "X\<^isub>3"} in \eqref{exmpcs}, we obtain
the equation:
\begin{center}
\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
@{term "X\<^isub>3"} & @{text "="} &
@{text "(X\<^isub>1, TIMES (ATOM d\<^isub>1) (STAR \<^raw:\ensuremath{\bigplus}>{ATOM c\<^isub>1,\<dots>, ATOM c\<^isub>m})) + \<dots> "}\\
& & \mbox{}\hspace{13mm}
@{text "\<dots> + (X\<^isub>1, TIMES (ATOM d\<^isub>n) (STAR \<^raw:\ensuremath{\bigplus}>{ATOM c\<^isub>1,\<dots>, ATOM c\<^isub>m}))"}
\end{tabular}
\end{center}
\noindent
That means we eliminated the dependency of @{text "X\<^isub>3"} on the
right-hand side. Note we used the abbreviation
@{text "\<^raw:\ensuremath{\bigplus}>{ATOM c\<^isub>1,\<dots>, ATOM c\<^isub>m}"}
to stand for a regular expression that matches with every character. In
our algorithm we are only interested in the existence of such a regular expression
and do not specify it any further.
It can be easily seen that the @{text "Arden"}-operation mimics Arden's
Lemma on the level of equations. To ensure the non-emptiness condition of
Arden's Lemma we say that a right-hand side is @{text ardenable} provided
\begin{center}
@{thm ardenable_def}
\end{center}
\noindent
This allows us to prove a version of Arden's Lemma on the level of equations.
\begin{lmm}\label{ardenable}
Given an equation @{text "X = rhs"}.
If @{text "X = \<Union>\<calL> ` rhs"},
@{thm (prem 2) Arden_preserves_soundness}, and
@{thm (prem 3) Arden_preserves_soundness}, then
@{text "X = \<Union>\<calL> ` (Arden X rhs)"}.
\end{lmm}
\noindent
Our @{text ardenable} condition is slightly stronger than needed for applying Arden's Lemma,
but we can still ensure that it holds throughout our algorithm of transforming equations
into solved form. The \emph{substitution-operation} takes an equation
of the form @{text "X = xrhs"} and substitutes it into the right-hand side @{text rhs}.
\begin{center}
\begin{tabular}{rc@ {\hspace{2mm}}r@ {\hspace{1mm}}l}
@{thm (lhs) Subst_def} & @{text "\<equiv>"}~~\mbox{} & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "let"}}\\
& & @{text "rhs' ="} & @{term "rhs - {Trn X r | r. Trn X r \<in> rhs}"} \\
& & @{text "r' ="} & @{term "\<Uplus> {r. Trn X r \<in> rhs}"}\\
& & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "in"}~~@{term "rhs' \<union> Append_rexp_rhs xrhs r'"}}\\
\end{tabular}
\end{center}
\noindent
We again delete first all occurrences of @{text "(X, r)"} in @{text rhs}; we then calculate
the regular expression corresponding to the deleted terms; finally we append this
regular expression to @{text "xrhs"} and union it up with @{text rhs'}. When we use
the substitution operation we will arrange it so that @{text "xrhs"} does not contain
any occurrence of @{text X}. For example substituting the first equation in
\eqref{exmpcs} into the right-hand side of the second, thus eliminating the equivalence
class @{text "X\<^isub>1"}, gives us the equation
\begin{equation}\label{exmpresult}
\mbox{\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
@{term "X\<^isub>2"} & @{text "="} & @{text "\<lambda>(TIMES ONE (ATOM c))"}\\
\end{tabular}}
\end{equation}
With these two operations in place, we can define the operation that removes one equation
from an equational systems @{text ES}. The operation @{const Subst_all}
substitutes an equation @{text "X = xrhs"} throughout an equational system @{text ES};
@{const Remove} then completely removes such an equation from @{text ES} by substituting
it to the rest of the equational system, but first eliminating all recursive occurrences
of @{text X} by applying @{const Arden} to @{text "xrhs"}.
\begin{center}
\begin{tabular}{rcl}
@{thm (lhs) Subst_all_def} & @{text "\<equiv>"} & @{thm (rhs) Subst_all_def}\\
@{thm (lhs) Remove_def} & @{text "\<equiv>"} & @{thm (rhs) Remove_def}
\end{tabular}
\end{center}
\noindent
Finally, we can define how an equational system should be solved. For this
we will need to iterate the process of eliminating equations until only one equation
will be left in the system. However, we do not just want to have any equation
as being the last one, but the one involving the equivalence class for
which we want to calculate the regular
expression. Let us suppose this equivalence class is @{text X}.
Since @{text X} is the one to be solved, in every iteration step we have to pick an
equation to be eliminated that is different from @{text X}. In this way
@{text X} is kept to the final step. The choice is implemented using Hilbert's choice
operator, written @{text SOME} in the definition below.
\begin{center}
\begin{tabular}{rc@ {\hspace{4mm}}r@ {\hspace{1mm}}l}
@{thm (lhs) Iter_def} & @{text "\<equiv>"}~~\mbox{} & \multicolumn{2}{@ {\hspace{-4mm}}l}{@{text "let"}}\\
& & @{text "(Y, yrhs) ="} & @{term "SOME (Y, yrhs). (Y, yrhs) \<in> ES \<and> X \<noteq> Y"} \\
& & \multicolumn{2}{@ {\hspace{-4mm}}l}{@{text "in"}~~@{term "Remove ES Y yrhs"}}\\
\end{tabular}
\end{center}
\noindent
The last definition we need applies @{term Iter} over and over until a condition
@{text Cond} is \emph{not} satisfied anymore. This condition states that there
are more than one equation left in the equational system @{text ES}. To solve
an equational system we use Isabelle/HOL's @{text while}-operator as follows:
\begin{center}
@{thm Solve_def}
\end{center}
\noindent
We are not concerned here with the definition of this operator
(see Berghofer and Nipkow \cite{BerghoferNipkow00}), but note that we eliminate
in each @{const Iter}-step a single equation, and therefore
have a well-founded termination order by taking the cardinality
of the equational system @{text ES}. This enables us to prove
properties about our definition of @{const Solve} when we `call' it with
the equivalence class @{text X} and the initial equational system
@{term "Init (UNIV // \<approx>A)"} from
\eqref{initcs} using the principle:
%
\begin{equation}\label{whileprinciple}
\mbox{\begin{tabular}{l}
@{term "invariant (Init (UNIV // \<approx>A))"} \\
@{term "\<forall>ES. invariant ES \<and> Cond ES \<longrightarrow> invariant (Iter X ES)"}\\
@{term "\<forall>ES. invariant ES \<and> Cond ES \<longrightarrow> card (Iter X ES) < card ES"}\\
@{term "\<forall>ES. invariant ES \<and> \<not> Cond ES \<longrightarrow> P ES"}\\
\hline
\multicolumn{1}{c}{@{term "P (Solve X (Init (UNIV // \<approx>A)))"}}
\end{tabular}}
\end{equation}
\noindent
This principle states that given an invariant (which we will specify below)
we can prove a property
@{text "P"} involving @{const Solve}. For this we have to discharge the following
proof obligations: first the
initial equational system satisfies the invariant; second the iteration
step @{text "Iter"} preserves the invariant as long as the condition @{term Cond} holds;
third @{text "Iter"} decreases the termination order, and fourth that
once the condition does not hold anymore then the property @{text P} must hold.
The property @{term P} in our proof will state that @{term "Solve X (Init (UNIV // \<approx>A))"}
returns with a single equation @{text "X = xrhs"} for some @{text "xrhs"}, and
that this equational system still satisfies the invariant. In order to get
the proof through, the invariant is composed of the following six properties:
\begin{center}
\begin{tabular}{@ {}rcl@ {\hspace{-13mm}}l @ {}}
@{text "invariant ES"} & @{text "\<equiv>"} &
@{term "finite ES"} & @{text "(finiteness)"}\\
& @{text "\<and>"} & @{thm (rhs) finite_rhs_def} & @{text "(finiteness rhs)"}\\
& @{text "\<and>"} & @{text "\<forall>(X, rhs)\<in>ES. X = \<Union>\<calL> ` rhs"} & @{text "(soundness)"}\\
& @{text "\<and>"} & @{thm (rhs) distinctness_def}\\
& & & @{text "(distinctness)"}\\
& @{text "\<and>"} & @{thm (rhs) ardenable_all_def} & @{text "(ardenable)"}\\
& @{text "\<and>"} & @{thm (rhs) validity_def} & @{text "(validity)"}\\
\end{tabular}
\end{center}
\noindent
The first two ensure that the equational system is always finite (number of equations
and number of terms in each equation); the third makes sure the `meaning' of the
equations is preserved under our transformations. The other properties are a bit more
technical, but are needed to get our proof through. Distinctness states that every
equation in the system is distinct. @{text Ardenable} ensures that we can always
apply the @{text Arden} operation.
The last property states that every @{text rhs} can only contain equivalence classes
for which there is an equation. Therefore @{text lhss} is just the set containing
the first components of an equational system,
while @{text "rhss"} collects all equivalence classes @{text X} in the terms of the
form @{term "Trn X r"}. That means formally @{thm (lhs) lhss_def}~@{text "\<equiv> {X | (X, rhs) \<in> ES}"}
and @{thm (lhs) rhss_def}~@{text "\<equiv> {X | (X, r) \<in> rhs}"}.
It is straightforward to prove that the initial equational system satisfies the
invariant.
\begin{lmm}\label{invzero}
@{thm[mode=IfThen] Init_ES_satisfies_invariant}
\end{lmm}
\begin{proof}
Finiteness is given by the assumption and the way how we set up the
initial equational system. Soundness is proved in Lem.~\ref{inv}. Distinctness
follows from the fact that the equivalence classes are disjoint. The @{text ardenable}
property also follows from the setup of the initial equational system, as does
validity.
\end{proof}
\noindent
Next we show that @{text Iter} preserves the invariant.
\begin{lmm}\label{iterone}
@{thm[mode=IfThen] iteration_step_invariant[where xrhs="rhs"]}
\end{lmm}
\begin{proof}
The argument boils down to choosing an equation @{text "Y = yrhs"} to be eliminated
and to show that @{term "Subst_all (ES - {(Y, yrhs)}) Y (Arden Y yrhs)"}
preserves the invariant.
We prove this as follows:
\begin{center}
\begin{tabular}{@ {}l@ {}}
@{text "\<forall> ES."}\\ \mbox{}\hspace{5mm}@{thm (prem 1) Subst_all_satisfies_invariant} implies
@{thm (concl) Subst_all_satisfies_invariant}
\end{tabular}
\end{center}
\noindent
Finiteness is straightforward, as the @{const Subst} and @{const Arden} operations
keep the equational system finite. These operations also preserve soundness
and distinctness (we proved soundness for @{const Arden} in Lem.~\ref{ardenable}).
The property @{text ardenable} is clearly preserved because the append-operation
cannot make a regular expression to match the empty string. Validity is
given because @{const Arden} removes an equivalence class from @{text yrhs}
and then @{const Subst_all} removes @{text Y} from the equational system.
Having proved the implication above, we can instantiate @{text "ES"} with @{text "ES - {(Y, yrhs)}"}
which matches with our proof-obligation of @{const "Subst_all"}. Since
\mbox{@{term "ES = ES - {(Y, yrhs)} \<union> {(Y, yrhs)}"}}, we can use the assumption
to complete the proof.
\end{proof}
\noindent
We also need the fact that @{text Iter} decreases the termination measure.
\begin{lmm}\label{itertwo}
@{thm[mode=IfThen] iteration_step_measure[simplified (no_asm), where xrhs="rhs"]}
\end{lmm}
\begin{proof}
By assumption we know that @{text "ES"} is finite and has more than one element.
Therefore there must be an element @{term "(Y, yrhs) \<in> ES"} with
@{term "(Y, yrhs) \<noteq> (X, rhs)"}. Using the distinctness property we can infer
that @{term "Y \<noteq> X"}. We further know that @{text "Remove ES Y yrhs"}
removes the equation @{text "Y = yrhs"} from the system, and therefore
the cardinality of @{const Iter} strictly decreases.
\end{proof}
\noindent
This brings us to our property we want to establish for @{text Solve}.
\begin{lmm}
If @{thm (prem 1) Solve} and @{thm (prem 2) Solve} then there exists
a @{text rhs} such that @{term "Solve X (Init (UNIV // \<approx>A)) = {(X, rhs)}"}
and @{term "invariant {(X, rhs)}"}.
\end{lmm}
\begin{proof}
In order to prove this lemma using \eqref{whileprinciple}, we have to use a slightly
stronger invariant since Lem.~\ref{iterone} and \ref{itertwo} have the precondition
that @{term "(X, rhs) \<in> ES"} for some @{text rhs}. This precondition is needed
in order to choose in the @{const Iter}-step an equation that is not \mbox{@{term "X = rhs"}}.
Therefore our invariant cannot be just @{term "invariant ES"}, but must be
@{term "invariant ES \<and> (\<exists>rhs. (X, rhs) \<in> ES)"}. By assumption
@{thm (prem 2) Solve} and Lem.~\ref{invzero}, the more general invariant holds for
the initial equational system. This is premise 1 of~\eqref{whileprinciple}.
Premise 2 is given by Lem.~\ref{iterone} and the fact that @{const Iter} might
modify the @{text rhs} in the equation @{term "X = rhs"}, but does not remove it.
Premise 3 of~\eqref{whileprinciple} is by Lem.~\ref{itertwo}. Now in premise 4
we like to show that there exists a @{text rhs} such that @{term "ES = {(X, rhs)}"}
and that @{text "invariant {(X, rhs)}"} holds, provided the condition @{text "Cond"}
does not holds. By the stronger invariant we know there exists such a @{text "rhs"}
with @{term "(X, rhs) \<in> ES"}. Because @{text Cond} is not true, we know the cardinality
of @{text ES} is @{text 1}. This means @{text "ES"} must actually be the set @{text "{(X, rhs)}"},
for which the invariant holds. This allows us to conclude that
@{term "Solve X (Init (UNIV // \<approx>A)) = {(X, rhs)}"} and @{term "invariant {(X, rhs)}"} hold,
as needed.
\end{proof}
\noindent
With this lemma in place we can show that for every equivalence class in @{term "UNIV // \<approx>A"}
there exists a regular expression.
\begin{lmm}\label{every_eqcl_has_reg}
@{thm[mode=IfThen] every_eqcl_has_reg}
\end{lmm}
\begin{proof}
By the preceding lemma, we know that there exists a @{text "rhs"} such
that @{term "Solve X (Init (UNIV // \<approx>A))"} returns the equation @{text "X = rhs"},
and that the invariant holds for this equation. That means we
know @{text "X = \<Union>\<calL> ` rhs"}. We further know that
this is equal to \mbox{@{text "\<Union>\<calL> ` (Arden X rhs)"}} using the properties of the
invariant and Lem.~\ref{ardenable}. Using the validity property for the equation @{text "X = rhs"},
we can infer that @{term "rhss rhs \<subseteq> {X}"} and because the @{text Arden} operation
removes that @{text X} from @{text rhs}, that @{term "rhss (Arden X rhs) = {}"}.
This means the right-hand side @{term "Arden X rhs"} can only consist of terms of the form @{term "Lam r"}.
So we can collect those (finitely many) regular expressions @{text rs} and have @{term "X = lang (\<Uplus>rs)"}.
With this we can conclude the proof.
\end{proof}
\noindent
Lem.~\ref{every_eqcl_has_reg} allows us to finally give a proof for the first direction
of the Myhill-Nerode theorem.
\begin{proof}[of Thm.~\ref{myhillnerodeone}]
By Lem.~\ref{every_eqcl_has_reg} we know that there exists a regular expression for
every equivalence class in @{term "UNIV // \<approx>A"}. Since @{text "finals A"} is
a subset of @{term "UNIV // \<approx>A"}, we also know that for every equivalence class
in @{term "finals A"} there exists a regular expression. Moreover by assumption
we know that @{term "finals A"} must be finite, and therefore there must be a finite
set of regular expressions @{text "rs"} such that
@{term "\<Union>(finals A) = lang (\<Uplus>rs)"}.
Since the left-hand side is equal to @{text A}, we can use @{term "\<Uplus>rs"}
as the regular expression that is needed in the theorem.
\end{proof}
*}
section {* Myhill-Nerode, Second Part *}
text {*
\noindent
In this section we will give a proof for establishing the second
part of the Myhill-Nerode theorem. It can be formulated in our setting as follows:
\begin{thrm}\label{myhillnerodetwo}
Given @{text "r"} is a regular expression, then @{thm Myhill_Nerode2}.
\end{thrm}
\noindent
The proof will be by induction on the structure of @{text r}. It turns out
the base cases are straightforward.
\begin{proof}[Base Cases]
The cases for @{const ZERO}, @{const ONE} and @{const ATOM} are routine, because
we can easily establish that
\begin{center}
\begin{tabular}{l}
@{thm quot_zero_eq}\\
@{thm quot_one_subset}\\
@{thm quot_atom_subset}
\end{tabular}
\end{center}
\noindent
hold, which shows that @{term "UNIV // \<approx>(lang r)"} must be finite.
\end{proof}
\noindent
Much more interesting, however, are the inductive cases. They seem hard to be solved
directly. The reader is invited to try.
In order to see how our proof proceeds consider the following suggestive picture
taken from Constable et al \cite{Constable00}:
\begin{equation}\label{pics}
\mbox{\begin{tabular}{c@ {\hspace{10mm}}c@ {\hspace{10mm}}c}
\begin{tikzpicture}[scale=1]
%Circle
\draw[thick] (0,0) circle (1.1);
\end{tikzpicture}
&
\begin{tikzpicture}[scale=1]
%Circle
\draw[thick] (0,0) circle (1.1);
%Main rays
\foreach \a in {0, 90,...,359}
\draw[very thick] (0, 0) -- (\a:1.1);
\foreach \a / \l in {45/1, 135/2, 225/3, 315/4}
\draw (\a: 0.65) node {$a_\l$};
\end{tikzpicture}
&
\begin{tikzpicture}[scale=1]
%Circle
\draw[thick] (0,0) circle (1.1);
%Main rays
\foreach \a in {0, 45,...,359}
\draw[very thick] (0, 0) -- (\a:1.1);
\foreach \a / \l in {22.5/1.1, 67.5/1.2, 112.5/2.1, 157.5/2.2, 202.4/3.1, 247.5/3.2, 292.5/4.1, 337.5/4.2}
\draw (\a: 0.77) node {$a_{\l}$};
\end{tikzpicture}\\
@{term UNIV} & @{term "UNIV // (\<approx>(lang r))"} & @{term "UNIV // R"}
\end{tabular}}
\end{equation}
\noindent
The relation @{term "\<approx>(lang r)"} partitions the set of all strings, @{term UNIV}, into some
equivalence classes. To show that there are only finitely many of them, it
suffices to show in each induction step that another relation, say @{text
R}, has finitely many equivalence classes and refines @{term "\<approx>(lang r)"}.
\begin{dfntn}
A relation @{text "R\<^isub>1"} is said to \emph{refine} @{text "R\<^isub>2"}
provided @{text "R\<^isub>1 \<subseteq> R\<^isub>2"}.
\end{dfntn}
\noindent
For constructing @{text R} will
rely on some \emph{tagging-functions} defined over strings. Given the
inductive hypothesis, it will be easy to prove that the \emph{range} of
these tagging-functions is finite. The range of a function @{text f} is
defined as
\begin{center}
@{text "range f \<equiv> f ` UNIV"}
\end{center}
\noindent
that means we take the image of @{text f} w.r.t.~all elements in the
domain. With this we will be able to infer that the tagging-functions, seen
as relations, give rise to finitely many equivalence classes.
Finally we will show that the tagging-relations are more refined than
@{term "\<approx>(lang r)"}, which implies that @{term "UNIV // \<approx>(lang r)"} must
also be finite. We formally define the notion of a \emph{tagging-relation}
as follows.
\begin{dfntn}[Tagging-Relation] Given a tagging-function @{text tag}, then two strings @{text x}
and @{text y} are \emph{tag-related} provided
\begin{center}
@{text "x \<^raw:$\threesim$>\<^bsub>tag\<^esub> y \<equiv> tag x = tag y"}\;.
\end{center}
\end{dfntn}
In order to establish finiteness of a set @{text A}, we shall use the following powerful
principle from Isabelle/HOL's library.
%
\begin{equation}\label{finiteimageD}
@{thm[mode=IfThen] finite_imageD}
\end{equation}
\noindent
It states that if an image of a set under an injective function @{text f} (injective over this set)
is finite, then the set @{text A} itself must be finite. We can use it to establish the following
two lemmas.
\begin{lmm}\label{finone}
@{thm[mode=IfThen] finite_eq_tag_rel}
\end{lmm}
\begin{proof}
We set in \eqref{finiteimageD}, @{text f} to be @{text "X \<mapsto> tag ` X"}. We have
@{text "range f"} to be a subset of @{term "Pow (range tag)"}, which we know must be
finite by assumption. Now @{term "f (UNIV // =tag=)"} is a subset of @{text "range f"},
and so also finite. Injectivity amounts to showing that @{text "X = Y"} under the
assumptions that @{text "X, Y \<in> "}~@{term "UNIV // =tag="} and @{text "f X = f Y"}.
From the assumptions we can obtain @{text "x \<in> X"} and @{text "y \<in> Y"} with
@{text "tag x = tag y"}. Since @{text x} and @{text y} are tag-related, this in
turn means that the equivalence classes @{text X}
and @{text Y} must be equal.
\end{proof}
\begin{lmm}\label{fintwo}
Given two equivalence relations @{text "R\<^isub>1"} and @{text "R\<^isub>2"}, whereby
@{text "R\<^isub>1"} refines @{text "R\<^isub>2"}.
If @{thm (prem 1) refined_partition_finite[where ?R1.0="R\<^isub>1" and ?R2.0="R\<^isub>2"]}
then @{thm (concl) refined_partition_finite[where ?R1.0="R\<^isub>1" and ?R2.0="R\<^isub>2"]}.
\end{lmm}
\begin{proof}
We prove this lemma again using \eqref{finiteimageD}. This time we set @{text f} to
be @{text "X \<mapsto>"}~@{term "{R\<^isub>1 `` {x} | x. x \<in> X}"}. It is easy to see that
@{term "finite (f ` (UNIV // R\<^isub>2))"} because it is a subset of @{term "Pow (UNIV // R\<^isub>1)"},
which must be finite by assumption. What remains to be shown is that @{text f} is injective
on @{term "UNIV // R\<^isub>2"}. This is equivalent to showing that two equivalence
classes, say @{text "X"} and @{text Y}, in @{term "UNIV // R\<^isub>2"} are equal, provided
@{text "f X = f Y"}. For @{text "X = Y"} to be equal, we have to find two elements
@{text "x \<in> X"} and @{text "y \<in> Y"} such that they are @{text R\<^isub>2} related.
We know there exists a @{text "x \<in> X"} with \mbox{@{term "X = R\<^isub>2 `` {x}"}}.
From the latter fact we can infer that @{term "R\<^isub>1 ``{x} \<in> f X"}
and further @{term "R\<^isub>1 ``{x} \<in> f Y"}. This means we can obtain a @{text y}
such that @{term "R\<^isub>1 `` {x} = R\<^isub>1 `` {y}"} holds. Consequently @{text x} and @{text y}
are @{text "R\<^isub>1"}-related. Since by assumption @{text "R\<^isub>1"} refines @{text "R\<^isub>2"},
they must also be @{text "R\<^isub>2"}-related, as we need to show.
\end{proof}
\noindent
Chaining Lem.~\ref{finone} and \ref{fintwo} together, means in order to show
that @{term "UNIV // \<approx>(lang r)"} is finite, we have to construct a tagging-function whose
range can be shown to be finite and whose tagging-relation refines @{term "\<approx>(lang r)"}.
Let us attempt the @{const PLUS}-case first. We take as tagging-function
\begin{center}
@{thm tag_Plus_def[where A="A" and B="B", THEN meta_eq_app]}
\end{center}
\noindent
where @{text "A"} and @{text "B"} are some arbitrary languages. The reason for this choice
is that we need to establish that @{term "=(tag_Plus A B)="} refines @{term "\<approx>(A \<union> B)"}.
This amounts to showing @{term "x \<approx>A y"} or @{term "x \<approx>B y"} under the assumption
@{term "x"}~@{term "=(tag_Plus A B)="}~@{term y}. As we shall see, this definition will
provide us with just the right assumptions in order to get the proof through.
\begin{proof}[@{const "PLUS"}-Case]
We can show in general, if @{term "finite (UNIV // \<approx>A)"} and @{term "finite
(UNIV // \<approx>B)"} then @{term "finite ((UNIV // \<approx>A) \<times> (UNIV // \<approx>B))"}
holds. The range of @{term "tag_Plus A B"} is a subset of this product
set---so finite. For the refinement proof-obligation, we know that @{term
"(\<approx>A `` {x}, \<approx>B `` {x}) = (\<approx>A `` {y}, \<approx>B `` {y})"} holds by assumption. Then
clearly either @{term "x \<approx>A y"} or @{term "x \<approx>B y"}, as we needed to
show. Finally we can discharge this case by setting @{text A} to @{term
"lang r\<^isub>1"} and @{text B} to @{term "lang r\<^isub>2"}.
\end{proof}
\noindent
The @{const TIMES}-case is slightly more complicated. We first prove the
following lemma, which will aid the proof about refinement.
\begin{lmm}\label{refinement}
The relation @{text "\<^raw:$\threesim$>\<^bsub>tag\<^esub>"} refines @{term "\<approx>A"}, provided for
all strings @{text x}, @{text y} and @{text z} we have that \mbox{@{text "x \<^raw:$\threesim$>\<^bsub>tag\<^esub> y"}}
and @{term "x @ z \<in> A"} imply @{text "y @ z \<in> A"}.
\end{lmm}
\noindent
We therefore can analyse how the strings @{text "x @ z"} are in the language
@{text A} and then construct an appropriate tagging-function to infer that
@{term "y @ z"} are also in @{text A}. For this we will use the notion of
the set of all possible \emph{partitions} of a string:
\begin{equation}
@{thm Partitions_def}
\end{equation}
\noindent
If we know that @{text "(x\<^isub>p, x\<^isub>s) \<in> Partitions x"}, we will
refer to @{text "x\<^isub>p"} as the \emph{prefix} of the string @{text x},
and respectively to @{text "x\<^isub>s"} as the \emph{suffix}.
Now assuming @{term "x @ z \<in> A \<cdot> B"} there are only two possible ways of how to `split'
this string to be in @{term "A \<cdot> B"}:
%
\begin{center}
\begin{tabular}{c}
\scalebox{1}{
\begin{tikzpicture}
\node[draw,minimum height=3.8ex] (x) { $\hspace{4.8em}@{text x}\hspace{4.8em}$ };
\node[draw,minimum height=3.8ex, right=-0.03em of x] (za) { $\hspace{0.6em}@{text "z\<^isub>p"}\hspace{0.6em}$ };
\node[draw,minimum height=3.8ex, right=-0.03em of za] (zza) { $\hspace{2.6em}@{text "z\<^isub>s"}\hspace{2.6em}$ };
\draw[decoration={brace,transform={yscale=3}},decorate]
(x.north west) -- ($(za.north west)+(0em,0em)$)
node[midway, above=0.5em]{@{text x}};
\draw[decoration={brace,transform={yscale=3}},decorate]
($(za.north west)+(0em,0ex)$) -- ($(zza.north east)+(0em,0ex)$)
node[midway, above=0.5em]{@{text z}};
\draw[decoration={brace,transform={yscale=3}},decorate]
($(x.north west)+(0em,3ex)$) -- ($(zza.north east)+(0em,3ex)$)
node[midway, above=0.8em]{@{term "x @ z \<in> A \<cdot> B"}};
\draw[decoration={brace,transform={yscale=3}},decorate]
($(za.south east)+(0em,0ex)$) -- ($(x.south west)+(0em,0ex)$)
node[midway, below=0.5em]{@{text "x @ z\<^isub>p \<in> A"}};
\draw[decoration={brace,transform={yscale=3}},decorate]
($(zza.south east)+(0em,0ex)$) -- ($(za.south east)+(0em,0ex)$)
node[midway, below=0.5em]{@{text "z\<^isub>s \<in> B"}};
\end{tikzpicture}}
\\[2mm]
\scalebox{1}{
\begin{tikzpicture}
\node[draw,minimum height=3.8ex] (xa) { $\hspace{3em}@{text "x\<^isub>p"}\hspace{3em}$ };
\node[draw,minimum height=3.8ex, right=-0.03em of xa] (xxa) { $\hspace{0.2em}@{text "x\<^isub>s"}\hspace{0.2em}$ };
\node[draw,minimum height=3.8ex, right=-0.03em of xxa] (z) { $\hspace{5em}@{text z}\hspace{5em}$ };
\draw[decoration={brace,transform={yscale=3}},decorate]
(xa.north west) -- ($(xxa.north east)+(0em,0em)$)
node[midway, above=0.5em]{@{text x}};
\draw[decoration={brace,transform={yscale=3}},decorate]
(z.north west) -- ($(z.north east)+(0em,0em)$)
node[midway, above=0.5em]{@{text z}};
\draw[decoration={brace,transform={yscale=3}},decorate]
($(xa.north west)+(0em,3ex)$) -- ($(z.north east)+(0em,3ex)$)
node[midway, above=0.8em]{@{term "x @ z \<in> A \<cdot> B"}};
\draw[decoration={brace,transform={yscale=3}},decorate]
($(z.south east)+(0em,0ex)$) -- ($(xxa.south west)+(0em,0ex)$)
node[midway, below=0.5em]{@{term "x\<^isub>s @ z \<in> B"}};
\draw[decoration={brace,transform={yscale=3}},decorate]
($(xa.south east)+(0em,0ex)$) -- ($(xa.south west)+(0em,0ex)$)
node[midway, below=0.5em]{@{term "x\<^isub>p \<in> A"}};
\end{tikzpicture}}
\end{tabular}
\end{center}
%
\noindent
Either @{text x} and a prefix of @{text "z"} is in @{text A} and the rest in @{text B}
(first picture) or there is a prefix of @{text x} in @{text A} and the rest is in @{text B}
(second picture). In both cases we have to show that @{term "y @ z \<in> A \<cdot> B"}. The first case
we will only go through if we know that @{term "x \<approx>A y"} holds @{text "(*)"}. Because then
we can infer from @{term "x @ z\<^isub>p \<in> A"} that @{term "y @ z\<^isub>p \<in> A"} holds for all @{text "z\<^isub>p"}.
In the second case we only know that @{text "x\<^isub>p"} and @{text "x\<^isub>s"} is one possible partition
of the string @{text x}. We have to know that both @{text "x\<^isub>p"} and the
corresponding partition @{text "y\<^isub>p"} are in @{text "A"}, and that @{text "x\<^isub>s"} is `@{text B}-related'
to @{text "y\<^isub>s"} @{text "(**)"}. From the latter fact we can infer that @{text "y\<^isub>s @ z \<in> B"}.
This will solve the second case.
Taking the two requirements, @{text "(*)"} and @{text "(**)"}, together we define the
tagging-function in the @{const Times}-case as:
\begin{center}
@{thm (lhs) tag_Times_def[where ?A="A" and ?B="B"]}~@{text "\<equiv>"}~
@{text "(\<lbrakk>x\<rbrakk>\<^bsub>\<approx>A\<^esub>, {\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>B\<^esub> | x\<^isub>p \<in> A \<and> (x\<^isub>p, x\<^isub>s) \<in> Partitions x})"}
\end{center}
\noindent
We have to make the assumption for all suffixes @{text "x\<^isub>s"}, since we do
not know anything about how the string @{term x} is partitioned.
With this definition in place, let us prove the @{const "Times"}-case.
\begin{proof}[@{const TIMES}-Case]
If @{term "finite (UNIV // \<approx>A)"} and @{term "finite (UNIV // \<approx>B)"}
then @{term "finite ((UNIV // \<approx>A) \<times> (Pow (UNIV // \<approx>B)))"} holds. The range of
@{term "tag_Times A B"} is a subset of this product set, and therefore finite.
For the refinement of @{term "\<approx>(A \<cdot> B)"} and @{text "\<^raw:$\threesim$>\<^bsub>\<times>tag A B\<^esub>"},
we have by Lemma \ref{refinement}
\begin{center}
@{term "tag_Times A B x = tag_Times A B y"}
\end{center}
\noindent
and @{term "x @ z \<in> A \<cdot> B"}, and have to establish @{term "y @ z \<in> A \<cdot>
B"}. As shown in the pictures above, there are two cases to be
considered. First, there exists a @{text "z\<^isub>p"} and @{text
"z\<^isub>s"} such that @{term "x @ z\<^isub>p \<in> A"} and @{text "z\<^isub>s
\<in> B"}. By the assumption about @{term "tag_Times A B"} we have @{term "\<approx>A
`` {x} = \<approx>A `` {y}"} and thus @{term "x \<approx>A y"}. Hence by the Myhill-Nerode
relation @{term "y @ z\<^isub>p \<in> A"} holds. Using @{text "z\<^isub>s \<in> B"},
we can conclude in this case with @{term "y @ z \<in> A \<cdot> B"} (recall @{text
"z\<^isub>p @ z\<^isub>s = z"}).
Second there exists a partition @{text "x\<^isub>p"} and @{text "x\<^isub>s"} with
@{text "x\<^isub>p \<in> A"} and @{text "x\<^isub>s @ z \<in> B"}. We therefore have
\begin{center}
@{text "\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>B\<^esub> \<in> {\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>B\<^esub> | x\<^isub>p \<in> A \<and> (x\<^isub>p, x\<^isub>s) \<in> Partitions x}"}
\end{center}
\noindent
and by the assumption about @{term "tag_Times A B"} also
\begin{center}
@{text "\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>B\<^esub> \<in> {\<lbrakk>y\<^isub>s\<rbrakk>\<^bsub>\<approx>B\<^esub> | y\<^isub>p \<in> A \<and> (y\<^isub>p, y\<^isub>s) \<in> Partitions y}"}
\end{center}
\noindent
This means there must be a partition @{text "y\<^isub>p"} and @{text "y\<^isub>s"}
such that @{term "y\<^isub>p \<in> A"} and @{term "\<approx>B `` {x\<^isub>s} = \<approx>B ``
{y\<^isub>s}"}. Unfolding the Myhill-Nerode relation and together with the
facts that @{text "x\<^isub>p \<in> A"} and \mbox{@{text "x\<^isub>s @ z \<in> B"}}, we
obtain @{term "y\<^isub>p \<in> A"} and @{text "y\<^isub>s @ z \<in> B"}, as needed in
this case. We again can complete the @{const TIMES}-case by setting @{text
A} to @{term "lang r\<^isub>1"} and @{text B} to @{term "lang r\<^isub>2"}.
\end{proof}
\noindent
The case for @{const Star} is similar to @{const TIMES}, but poses a few
extra challenges. To deal with them, we define first the notion of a \emph{string
prefix} and a \emph{strict string prefix}:
\begin{center}
\begin{tabular}{l}
@{text "x \<le> y \<equiv> \<exists>z. y = x @ z"}\\
@{text "x < y \<equiv> x \<le> y \<and> x \<noteq> y"}
\end{tabular}
\end{center}
When analysing the case of @{text "x @ z"} being an element in @{term "A\<star>"}
and @{text x} is not the empty string, we have the following picture:
\begin{center}
\scalebox{1}{
\begin{tikzpicture}
\node[draw,minimum height=3.8ex] (xa) { $\hspace{4em}@{text "x\<^bsub>pmax\<^esub>"}\hspace{4em}$ };
\node[draw,minimum height=3.8ex, right=-0.03em of xa] (xxa) { $\hspace{0.5em}@{text "x\<^bsub>s\<^esub>"}\hspace{0.5em}$ };
\node[draw,minimum height=3.8ex, right=-0.03em of xxa] (za) { $\hspace{2em}@{text "z\<^isub>a"}\hspace{2em}$ };
\node[draw,minimum height=3.8ex, right=-0.03em of za] (zb) { $\hspace{7em}@{text "z\<^isub>b"}\hspace{7em}$ };
\draw[decoration={brace,transform={yscale=3}},decorate]
(xa.north west) -- ($(xxa.north east)+(0em,0em)$)
node[midway, above=0.5em]{@{text x}};
\draw[decoration={brace,transform={yscale=3}},decorate]
(za.north west) -- ($(zb.north east)+(0em,0em)$)
node[midway, above=0.5em]{@{text z}};
\draw[decoration={brace,transform={yscale=3}},decorate]
($(xa.north west)+(0em,3ex)$) -- ($(zb.north east)+(0em,3ex)$)
node[midway, above=0.8em]{@{term "x @ z \<in> A\<star>"}};
\draw[decoration={brace,transform={yscale=3}},decorate]
($(za.south east)+(0em,0ex)$) -- ($(xxa.south west)+(0em,0ex)$)
node[midway, below=0.5em]{@{term "x\<^isub>s @ z\<^isub>a \<in> A"}};
\draw[decoration={brace,transform={yscale=3}},decorate]
($(xa.south east)+(0em,0ex)$) -- ($(xa.south west)+(0em,0ex)$)
node[midway, below=0.5em]{@{text "x\<^bsub>pmax\<^esub> \<in> A\<^isup>\<star>"}};
\draw[decoration={brace,transform={yscale=3}},decorate]
($(zb.south east)+(0em,0ex)$) -- ($(zb.south west)+(0em,0ex)$)
node[midway, below=0.5em]{@{term "z\<^isub>b \<in> A\<star>"}};
\draw[decoration={brace,transform={yscale=3}},decorate]
($(zb.south east)+(0em,-4ex)$) -- ($(xxa.south west)+(0em,-4ex)$)
node[midway, below=0.5em]{@{term "x\<^isub>s @ z \<in> A\<star>"}};
\end{tikzpicture}}
\end{center}
%
\noindent
We can find a strict prefix @{text "x\<^isub>p"} of @{text x} such that @{term "x\<^isub>p \<in> A\<star>"},
@{text "x\<^isub>p < x"} and the rest @{term "x\<^isub>s @ z \<in> A\<star>"}. For example the empty string
@{text "[]"} would do (recall @{term "x \<noteq> []"}).
There are potentially many such prefixes, but there can only be finitely many of them (the
string @{text x} is finite). Let us therefore choose the longest one and call it
@{text "x\<^bsub>pmax\<^esub>"}. Now for the rest of the string @{text "x\<^isub>s @ z"} we
know it is in @{term "A\<star>"} and cannot be the empty string. By Prop.~\ref{langprops}@{text "(iv)"},
we can separate
this string into two parts, say @{text "a"} and @{text "b"}, such that @{text "a \<noteq> []"}, @{text "a \<in> A"}
and @{term "b \<in> A\<star>"}. Now @{text a} must be strictly longer than @{text "x\<^isub>s"},
otherwise @{text "x\<^bsub>pmax\<^esub>"} is not the longest prefix. That means @{text a}
`overlaps' with @{text z}, splitting it into two components @{text "z\<^isub>a"} and
@{text "z\<^isub>b"}. For this we know that @{text "x\<^isub>s @ z\<^isub>a \<in> A"} and
@{term "z\<^isub>b \<in> A\<star>"}. To cut a story short, we have divided @{term "x @ z \<in> A\<star>"}
such that we have a string @{text a} with @{text "a \<in> A"} that lies just on the
`border' of @{text x} and @{text z}. This string is @{text "x\<^isub>s @ z\<^isub>a"}.
In order to show that @{term "x @ z \<in> A\<star>"} implies @{term "y @ z \<in> A\<star>"}, we use
the following tagging-function:
%
\begin{center}
@{thm (lhs) tag_Star_def[where ?A="A", THEN meta_eq_app]}~@{text "\<equiv>"}~
@{text "{\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>A\<^esub> | x\<^isub>p < x \<and> x\<^isub>p \<in> A\<^isup>\<star> \<and> (x\<^isub>s, x\<^isub>p) \<in> Partitions x}"}
\end{center}
\begin{proof}[@{const Star}-Case]
If @{term "finite (UNIV // \<approx>A)"}
then @{term "finite (Pow (UNIV // \<approx>A))"} holds. The range of
@{term "tag_Star A"} is a subset of this set, and therefore finite.
Again we have to show under the assumption @{term "x"}~@{term "=(tag_Star A)="}~@{term y}
that @{term "x @ z \<in> A\<star>"} implies @{term "y @ z \<in> A\<star>"}.
We first need to consider the case that @{text x} is the empty string.
From the assumption about strict prefixes in @{text "\<^raw:$\threesim$>\<^bsub>\<star>tag A\<^esub>"}, we
can infer @{text y} is the empty string and
then clearly have @{term "y @ z \<in> A\<star>"}. In case @{text x} is not the empty
string, we can divide the string @{text "x @ z"} as shown in the picture
above. By the tagging-function and the facts @{text "x\<^bsub>pmax\<^esub> \<in> A\<^isup>\<star>"} and @{text "x\<^bsub>pmax\<^esub> < x"},
we have
\begin{center}
@{text "\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>A\<^esub> \<in> {\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>A\<^esub> | x\<^bsub>pmax\<^esub> < x \<and> x\<^bsub>pmax\<^esub> \<in> A\<^isup>\<star> \<and> (x\<^bsub>pmax\<^esub>, x\<^isub>s) \<in> Partitions x}"}
\end{center}
\noindent
which by assumption is equal to
\begin{center}
@{text "\<lbrakk>x\<^isub>s\<rbrakk>\<^bsub>\<approx>A\<^esub> \<in> {\<lbrakk>y\<^isub>s\<rbrakk>\<^bsub>\<approx>A\<^esub> | y\<^bsub>p\<^esub> < y \<and> y\<^bsub>p\<^esub> \<in> A\<^isup>\<star> \<and> (y\<^bsub>p\<^esub>, y\<^isub>s) \<in> Partitions y}"}
\end{center}
\noindent
From this we know there exist a partition @{text "y\<^isub>p"} and @{text
"y\<^isub>s"} with @{term "y\<^isub>p \<in> A\<star>"} and also @{term "x\<^isub>s \<approx>A
y\<^isub>s"}. Unfolding the Myhill-Nerode relation we know @{term
"y\<^isub>s @ z\<^isub>a \<in> A"}. We also know that @{term "z\<^isub>b \<in> A\<star>"}.
Therefore @{term "y\<^isub>p @ (y\<^isub>s @ z\<^isub>a) @ z\<^isub>b \<in>
A\<star>"}, which means @{term "y @ z \<in> A\<star>"}. The last step is to set
@{text "A"} to @{term "lang r"} and thus complete the proof.
\end{proof}
*}
section {* Second Part proved using Partial Derivatives\label{derivatives} *}
text {*
\noindent
As we have seen in the previous section, in order to establish
the second direction of the Myhill-Nerode theorem, we need to find
a more refined relation than @{term "\<approx>(lang r)"} for which we can
show that there are only finitely many equivalence classes. So far we
showed this directly by induction on @{text "r"} using tagging-functions.
However, there is also an indirect method to come up with such a refined
relation by using derivatives of regular expressions \cite{Brzozowski64}.
Assume the following two definitions for the \emph{left-quotient} of a language,
which we write as @{term "Der c A"} and @{term "Ders s A"} where @{text c}
is a character and @{text s} a string, respectively:
\begin{center}
\begin{tabular}{r@ {\hspace{1mm}}c@ {\hspace{2mm}}l}
@{thm (lhs) Der_def} & @{text "\<equiv>"} & @{thm (rhs) Der_def}\\
@{thm (lhs) Ders_def} & @{text "\<equiv>"} & @{thm (rhs) Ders_def}\\
\end{tabular}
\end{center}
\noindent
In order to aid readability, we shall make use of the following abbreviation
\begin{center}
@{abbrev "Derss s As"}
\end{center}
\noindent
where we apply the left-quotient to a set of languages and then combine the results.
Clearly we have the following equivalence between the Myhill-Nerode relation
(Definition~\ref{myhillneroderel}) and left-quotients
\begin{equation}\label{mhders}
@{term "x \<approx>A y"} \hspace{4mm}\text{if and only if}\hspace{4mm} @{term "Ders x A = Ders y A"}
\end{equation}
\noindent
It is also straightforward to establish the following properties of left-quotients
\begin{equation}
\mbox{\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{2mm}}l}
@{thm (lhs) Der_simps(1)} & $=$ & @{thm (rhs) Der_simps(1)}\\
@{thm (lhs) Der_simps(2)} & $=$ & @{thm (rhs) Der_simps(2)}\\
@{thm (lhs) Der_simps(3)} & $=$ & @{thm (rhs) Der_simps(3)}\\
@{thm (lhs) Der_simps(4)} & $=$ & @{thm (rhs) Der_simps(4)}\\
@{thm (lhs) Der_conc} & $=$ & @{thm (rhs) Der_conc}\\
@{thm (lhs) Der_star} & $=$ & @{thm (rhs) Der_star}\\
@{thm (lhs) Ders_simps(1)} & $=$ & @{thm (rhs) Ders_simps(1)}\\
@{thm (lhs) Ders_simps(2)} & $=$ & @{thm (rhs) Ders_simps(2)}\\
%@{thm (lhs) Ders_simps(3)[where ?s1.0="s\<^isub>1" and ?s2.0="s\<^isub>2"]} & $=$
% & @{thm (rhs) Ders_simps(3)[where ?s1.0="s\<^isub>1" and ?s2.0="s\<^isub>2"]}\\
\end{tabular}}
\end{equation}
\noindent
where @{text "\<Delta>"} in the fifth line is a function that tests whether the empty string
is in the language and returns @{term "{[]}"} or @{term "{}"}, accordingly.
The only interesting case above is the last one where we use Prop.~\ref{langprops}@{text "(i)"}
in order to infer that @{term "Der c (A\<star>) = Der c (A \<cdot> A\<star>)"}. We can
then complete the proof by noting that @{term "Delta A \<cdot> Der c (A\<star>) \<subseteq> (Der c A) \<cdot> A\<star>"}.
Brzozowski observed that the left-quotients for languages of regular
expressions can be calculated directly using the notion of \emph{derivatives
of a regular expression} \cite{Brzozowski64}. We define this notion in
Isabelle/HOL as follows:
\begin{center}
\begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1.5mm}}l@ {}}
@{thm (lhs) der.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) der.simps(1)}\\
@{thm (lhs) der.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) der.simps(2)}\\
@{thm (lhs) der.simps(3)[where c'="d"]} & @{text "\<equiv>"} & @{thm (rhs) der.simps(3)[where c'="d"]}\\
@{thm (lhs) der.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}
& @{text "\<equiv>"} & @{thm (rhs) der.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\
@{thm (lhs) der.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}
& @{text "\<equiv>"} & @{text "if"}~@{term "nullable r\<^isub>1"}~@{text "then"}~%
@{term "Plus (Times (der c r\<^isub>1) r\<^isub>2) (der c r\<^isub>2)"}\\
& & \phantom{@{text "if"}~@{term "nullable r\<^isub>1"}~}@{text "else"}~%
@{term "Times (der c r\<^isub>1) r\<^isub>2"}\\
@{thm (lhs) der.simps(6)} & @{text "\<equiv>"} & @{thm (rhs) der.simps(6)}\smallskip\\
@{thm (lhs) ders.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) ders.simps(1)}\\
@{thm (lhs) ders.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) ders.simps(2)}\\
\end{tabular}
\end{center}
\noindent
The last two clauses extend derivatives from characters to strings---i.e.~list of
characters. The list-cons operator is written \mbox{@{text "_ :: _"}}. The
boolean function @{term "nullable r"} needed in the @{const Times}-case tests
whether a regular expression can recognise the empty string:
\begin{center}
\begin{tabular}{c@ {\hspace{10mm}}c}
\begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1.5mm}}l@ {}}
@{thm (lhs) nullable.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(1)}\\
@{thm (lhs) nullable.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(2)}\\
@{thm (lhs) nullable.simps(3)} & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(3)}\\
\end{tabular} &
\begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1.5mm}}l@ {}}
@{thm (lhs) nullable.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}
& @{text "\<equiv>"} & @{thm (rhs) nullable.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\
@{thm (lhs) nullable.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}
& @{text "\<equiv>"} & @{thm (rhs) nullable.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\
@{thm (lhs) nullable.simps(6)} & @{text "\<equiv>"} & @{thm (rhs) nullable.simps(6)}\\
\end{tabular}
\end{tabular}
\end{center}
\noindent
By induction on the regular expression @{text r}, respectively on the string @{text s},
one can easily show that left-quotients and derivatives relate as follows
\cite{Sakarovitch09}:
\begin{equation}\label{Dersders}
\mbox{\begin{tabular}{c}
@{thm Der_der}\\
@{thm Ders_ders}
\end{tabular}}
\end{equation}
\noindent
The importance in the context of the Myhill-Nerode theorem is that
we can use \eqref{mhders} and \eqref{Dersders} in order to
establish that @{term "x \<approx>(lang r) y"} is equivalent to
@{term "lang (ders x r) = lang (ders y r)"}. Hence
\begin{equation}
@{term "x \<approx>(lang r) y"}\hspace{4mm}\mbox{provided}\hspace{4mm}@{term "ders x r = ders y r"}
\end{equation}
\noindent
which means the right-hand side (seen as relation) refines the
Myhill-Nerode relation. Consequently, we can use
@{text "\<^raw:$\threesim$>\<^bsub>(\<lambda>x. ders x r)\<^esub>"} as a tagging-relation
for the regular expression @{text r}. However, in
order to be useful for the second part of the Myhill-Nerode theorem, we also have to show that
for the corresponding language there are only finitely many derivatives---thus ensuring
that there are only finitely many equivalence classes. Unfortunately, this
is not true in general. Sakarovitch gives an example where a regular
expression has infinitely many derivatives w.r.t.~the language
\mbox{@{term "({a} \<cdot> {b})\<star> \<union> ({a} \<cdot> {b})\<star> \<cdot> {a}"}}
\cite[Page~141]{Sakarovitch09}.
What Brzozowski \cite{Brzozowski64} established is that for every language there
\emph{are} only finitely `dissimilar' derivatives for a regular
expression. Two regular expressions are said to be \emph{similar} provided
they can be identified using the using the @{text "ACI"}-identities:
\begin{equation}\label{ACI}
\mbox{\begin{tabular}{cl}
(@{text A}) & @{term "Plus (Plus r\<^isub>1 r\<^isub>2) r\<^isub>3"} $\equiv$ @{term "Plus r\<^isub>1 (Plus r\<^isub>2 r\<^isub>3)"}\\
(@{text C}) & @{term "Plus r\<^isub>1 r\<^isub>2"} $\equiv$ @{term "Plus r\<^isub>2 r\<^isub>1"}\\
(@{text I}) & @{term "Plus r r"} $\equiv$ @{term "r"}\\
\end{tabular}}
\end{equation}
\noindent
Carrying this idea through, we must not consider the set of all derivatives,
but the ones modulo @{text "ACI"}. In principle, this can be done formally,
but it is very painful in a theorem prover (since there is no
direct characterisation of the set of dissimilar derivatives).
Fortunately, there is a much simpler approach using \emph{partial
derivatives}. They were introduced by Antimirov \cite{Antimirov95} and can be defined
in Isabelle/HOL as follows:
\begin{center}
\begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1.5mm}}l@ {}}
@{thm (lhs) pder.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) pder.simps(1)}\\
@{thm (lhs) pder.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) pder.simps(2)}\\
@{thm (lhs) pder.simps(3)[where c'="d"]} & @{text "\<equiv>"} & @{thm (rhs) pder.simps(3)[where c'="d"]}\\
@{thm (lhs) pder.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}
& @{text "\<equiv>"} & @{thm (rhs) pder.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\
@{thm (lhs) pder.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}
& @{text "\<equiv>"} & @{text "if"}~@{term "nullable r\<^isub>1"}~@{text "then"}~%
@{term "(Timess (pder c r\<^isub>1) r\<^isub>2) \<union> (pder c r\<^isub>2)"}\\
& & \phantom{@{text "if"}~@{term "nullable r\<^isub>1"}~}@{text "else"}~%
@{term "Timess (pder c r\<^isub>1) r\<^isub>2"}\\
@{thm (lhs) pder.simps(6)} & @{text "\<equiv>"} & @{thm (rhs) pder.simps(6)}\smallskip\\
@{thm (lhs) pders.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) pders.simps(1)}\\
@{thm (lhs) pders.simps(2)} & @{text "\<equiv>"} & @{text "\<Union> (pders s) ` (pder c r)"}\\
\end{tabular}
\end{center}
\noindent
Again the last two clauses extend partial derivatives from characters to strings.
Unlike `simple' derivatives, the functions for partial derivatives return sets of regular
expressions. In the @{const Times} and @{const Star} cases we therefore use the
auxiliary definition
\begin{center}
@{text "TIMESS rs r \<equiv> {TIMES r' r | r' \<in> rs}"}
\end{center}
\noindent
in order to `sequence' a regular expression with a set of regular
expressions. Note that in the last clause we first build the set of partial
derivatives w.r.t~the character @{text c}, then build the image of this set under the
function @{term "pders s"} and finally `union up' all resulting sets. It will be
convenient to introduce for this the following abbreviation
\begin{center}
@{abbrev "pderss s rs"}
\end{center}
\noindent
which simplifies the last clause of @{const "pders"} to
\begin{center}
\begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1.5mm}}l@ {}}
@{thm (lhs) pders.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) pders.simps(2)}\\
\end{tabular}
\end{center}
Partial derivatives can be seen as having the @{text "ACI"}-identities already built in:
taking the partial derivatives of the
regular expressions in \eqref{ACI} gives us in each case
equal sets. Antimirov \cite{Antimirov95} showed a similar result to
\eqref{Dersders} for partial derivatives:
\begin{equation}\label{Derspders}
\mbox{\begin{tabular}{lc}
@{text "(i)"} & @{thm Der_pder}\\
@{text "(ii)"} & @{thm Ders_pders}
\end{tabular}}
\end{equation}
\begin{proof}
The first fact is by a simple induction on @{text r}. For the second we slightly
modify Antimirov's proof by performing an induction on @{text s} where we
generalise over all @{text r}. That means in the @{text "cons"}-case the
induction hypothesis is
\begin{center}
@{text "(IH)"}\hspace{3mm}@{term "\<forall>r. Ders s (lang r) = \<Union> lang ` (pders s r)"}
\end{center}
\noindent
With this we can establish
\begin{center}
\begin{tabular}{r@ {\hspace{1.5mm}}c@ {\hspace{1.5mm}}ll}
@{term "Ders (c # s) (lang r)"}
& @{text "="} & @{term "Ders s (Der c (lang r))"} & by def.\\
& @{text "="} & @{term "Ders s (\<Union> lang ` (pder c r))"} & by @{text "("}\ref{Derspders}@{text ".i)"}\\
& @{text "="} & @{term "\<Union> (Ders s) ` (lang ` (pder c r))"} & by def.~of @{text "Ders"}\\
& @{text "="} & @{term "\<Union> lang ` (\<Union> pders s ` (pder c r))"} & by IH\\
& @{text "="} & @{term "\<Union> lang ` (pders (c # s) r)"} & by def.\\
\end{tabular}
\end{center}
\noindent
Note that in order to apply the induction hypothesis in the fourth equation, we
need the generalisation over all regular expressions @{text r}. The case for
the empty string is routine and omitted.
\end{proof}
\noindent
Taking \eqref{Dersders} and \eqref{Derspders} together gives the relationship
between languages of derivatives and partial derivatives
\begin{equation}
\mbox{\begin{tabular}{lc}
@{text "(i)"} & @{thm der_pder[symmetric]}\\
@{text "(ii)"} & @{thm ders_pders[symmetric]}
\end{tabular}}
\end{equation}
\noindent
These two properties confirm the observation made earlier
that by using sets, partial derivatives have the @{text "ACI"}-identities
of derivatives already built in.
Antimirov also proved that for every language and regular expression
there are only finitely many partial derivatives, whereby the partial
derivatives of @{text r} w.r.t.~a language @{text A} is defined as
\begin{equation}\label{Pdersdef}
@{thm pders_lang_def}
\end{equation}
\begin{thrm}[Antimirov \cite{Antimirov95}]\label{antimirov}
For every language @{text A} and every regular expression @{text r},
\mbox{@{thm finite_pders_lang}}.
\end{thrm}
\noindent
Antimirov's argument first shows this theorem for the language @{term UNIV1},
which is the set of all non-empty strings. For this he proves:
\begin{equation}
\mbox{\begin{tabular}{l}
@{thm pders_lang_Zero}\\
@{thm pders_lang_One}\\
@{thm pders_lang_Atom}\\
@{thm pders_lang_Plus[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\
@{thm pders_lang_Times[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\
@{thm pders_lang_Star}\\
\end{tabular}}
\end{equation}
\noindent
from which one can deduce by induction on @{text r} that
\begin{center}
@{thm finite_pders_lang_UNIV1}
\end{center}
\noindent
holds. Now Antimirov's theorem follows because
\begin{center}
@{thm pders_lang_UNIV}\\
\end{center}
\noindent
and for all languages @{text "A"}, @{thm pders_lang_subset[where B="UNIV",
simplified]} holds. Since we follow Antimirov's proof quite closely in our
formalisation, we omit the details.
Let us return to our proof of the second direction in the Myhill-Nerode
theorem. The point of the above calculations is to use
@{text "\<^raw:$\threesim$>\<^bsub>(\<lambda>x. ders x r)\<^esub>"} as tagging-relation.
\begin{proof}[Proof of Theorem~\ref{myhillnerodetwo}]
Using \eqref{mhders}
and \eqref{Derspders} we can easily infer that
\begin{center}
@{term "x \<approx>(lang r) y"}\hspace{4mm}\mbox{provided}\hspace{4mm}@{term "pders x r = pders y r"}
\end{center}
\noindent
which means the tagging-relation @{text "\<^raw:$\threesim$>\<^bsub>(\<lambda>x. ders x r)\<^esub>"} refines @{term "\<approx>(lang r)"}.
So we know by Lemma~\ref{fintwo}, \mbox{@{term "finite (UNIV // (\<approx>(lang r)))"}}
holds if @{term "finite (UNIV // (=(\<lambda>x. pders x r)=))"}. In order to establish
the latter, we can use Lemma~\ref{finone} and show that the range of the
tagging-function \mbox{@{term "\<lambda>x. pders x r"}} is finite. For this recall Definition
\ref{Pdersdef}, which gives us that
\begin{center}
@{thm pders_lang_def[where A="UNIV", simplified]}
\end{center}
\noindent
Now the range of @{term "\<lambda>x. pders x r"} is a subset of @{term "Pow (pders_lang UNIV r)"},
which we know is finite by Theorem~\ref{antimirov}. This means there
are only finitely many equivalence classes of @{text "\<^raw:$\threesim$>\<^bsub>(\<lambda>x. ders x r)\<^esub>"},
and we can again conclude the second part of the Myhill-Nerode theorem.
\end{proof}
*}
section {* Closure Properties *}
text {*
\noindent
The real beauty of regular languages is that they are closed
under almost all set operations. Closure under union, concatenation and Kleene-star
are trivial to establish given our definition of regularity (Definition~\ref{regular}).
More interesting is the closure under complement, because
it seems difficult to construct a regular expression for the complement
language by direct means. However the existence of such a regular expression
can now be easily proved using the Myhill-Nerode theorem since
\begin{center}
@{term "s\<^isub>1 \<approx>A s\<^isub>2"} if and only if @{term "s\<^isub>1 \<approx>(-A) s\<^isub>2"}
\end{center}
\noindent
holds for any strings @{text "s\<^isub>1"} and @{text
"s\<^isub>2"}. Therefore @{text A} and the complement language @{term "-A"}
give rise to the same partitions. So if one is finite, the other is too and the
other way around.
Once closure under complement is established, closure under intersection
and set difference is also easy, because
\begin{center}
\begin{tabular}{c}
@{term "A \<inter> B = - (- A \<union> - B)"}\\
@{term "A - B = - (- A \<union> B)"}
\end{tabular}
\end{center}
\noindent
Closure of regular languages under reversal, which means
\begin{center}
@{text "A\<^bsup>-1\<^esup> \<equiv> {s\<^bsup>-1\<^esup> | s \<in> A}"}
\end{center}
\noindent
can be shown with the help of the following operation defined on regular
expressions
\begin{center}
\begin{tabular}{r@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
@{thm (lhs) Rev.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) Rev.simps(1)}\\
@{thm (lhs) Rev.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) Rev.simps(2)}\\
@{thm (lhs) Rev.simps(3)} & @{text "\<equiv>"} & @{thm (rhs) Rev.simps(3)}\\
@{thm (lhs) Rev.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} &
@{thm (rhs) Rev.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\
@{thm (lhs) Rev.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} &
@{thm (rhs) Rev.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\
@{thm (lhs) Rev.simps(6)} & @{text "\<equiv>"} & @{thm (rhs) Rev.simps(6)}\\
\end{tabular}
\end{center}
\noindent
For this operation we can so
\begin{center}
@{text "(\<calL>(r))\<^bsup>-1\<^esup>"}~@{text "="}~@{thm (rhs) rev_lang}
\end{center}
\noindent
from which closure under reversal follows.
The perhaps the most surprising fact is that regular languages are closed under any
left-quotient. Define
\begin{center}
@{abbrev "Ders_lang B A"}
\end{center}
\noindent
and assume @{text A} is regular. From this we know there exists a regular
expression @{text r} such that @{term "A = lang r"}. We also know that
@{term "pders_lang B r"} is finite. By definition and Lemma~\ref{Derspders}
\begin{equation}\label{eqq}
@{term "Ders_lang B (lang r) = (\<Union> lang ` (pders_lang B r))"}
\end{equation}
\noindent
Since there are only finitely many regular expressions in @{term "pders_lang B r"}
by Theorem~\ref{antimirov}, we know that the right-hand side of \eqref{eqq}, is
equal to @{term "lang (\<Uplus>(pders_lang B r))"} using \eqref{uplus}. Hence
the regular expression @{term "pders_lang B r"} verifies that @{term "Ders_lang B A"}
is regular.
*}
section {* Conclusion and Related Work *}
text {*
\noindent
In this paper we took the view that a regular language is one where there
exists a regular expression that matches all of its strings. Regular
expressions can conveniently be defined as a datatype in HOL-based theorem
provers. For us it was therefore interesting to find out how far we can push
this point of view. We have established in Isabelle/HOL both directions
of the Myhill-Nerode theorem.
%
\begin{thrm}[The Myhill-Nerode Theorem]\mbox{}\\
A language @{text A} is regular if and only if @{thm (rhs) Myhill_Nerode}.
\end{thrm}
\noindent
Having formalised this theorem means we pushed our point of view quite
far. Using this theorem we can obviously prove when a language is \emph{not}
regular---by establishing that it has infinitely many equivalence classes
generated by the Myhill-Nerode relation (this is usually the purpose of the
pumping lemma \cite{Kozen97}). We can also use it to establish the standard
textbook results about closure properties of regular languages. Interesting
is the case of closure under complement, because it seems difficult to
construct a regular expression for the complement language by direct
means. However the existence of such a regular expression can be easily
proved using the Myhill-Nerode theorem. Proving the existence of such a
regular expression via automata using the standard method would be quite
involved. It includes the steps: regular expression @{text "\<Rightarrow>"}
non-deterministic automaton @{text "\<Rightarrow>"} deterministic automaton @{text "\<Rightarrow>"}
complement automaton @{text "\<Rightarrow>"} regular expression.
While regular expressions are convenient in formalisations, they have some
limitations. One is that there seems to be no method of calculating a
minimal regular expression (for example in terms of length) for a regular
language, like there is
for automata. On the other hand, efficient regular expression matching,
without using automata, poses no problem \cite{OwensReppyTuron09}.
For an implementation of a simple regular expression matcher,
whose correctness has been formally established, we refer the reader to
Owens and Slind \cite{OwensSlind08}.
Our formalisation consists of 780 lines of Isabelle/Isar code for the first
direction and 460 for the second, plus around 300 lines of standard material
about regular languages. The formalisation about derivatives and partial
derivatives shown in Section~\ref{derivatives} consists of 390 lines of code.
While this might be seen large, it should be seen
in the context of the work done by Constable at al \cite{Constable00} who
formalised the Myhill-Nerode theorem in Nuprl using automata. They write
that their four-member team needed something on the magnitude of 18 months
for their formalisation. The estimate for our formalisation is that we
needed approximately 3 months and this included the time to find our proof
arguments. Unlike Constable et al, who were able to follow the proofs from
\cite{HopcroftUllman69}, we had to find our own arguments. So for us the
formalisation was not the bottleneck. It is hard to gauge the size of a
formalisation in Nurpl, but from what is shown in the Nuprl Math Library
about their development it seems substantially larger than ours. The code of
ours can be found in the Mercurial Repository at
\mbox{\url{http://www4.in.tum.de/~urbanc/regexp.html}}.
Our proof of the first direction is very much inspired by \emph{Brzozowski's
algebraic method} used to convert a finite automaton to a regular
expression \cite{Brzozowski64}. The close connection can be seen by considering the equivalence
classes as the states of the minimal automaton for the regular language.
However there are some subtle differences. Since we identify equivalence
classes with the states of the automaton, then the most natural choice is to
characterise each state with the set of strings starting from the initial
state leading up to that state. Usually, however, the states are characterised as the
strings starting from that state leading to the terminal states. The first
choice has consequences about how the initial equational system is set up. We have
the $\lambda$-term on our `initial state', while Brzozowski has it on the
terminal states. This means we also need to reverse the direction of Arden's
Lemma.
This is also where our method shines, because we can completely
side-step the standard argument \cite{Kozen97} where automata need
to be composed, which as stated in the Introduction is not so easy
to formalise in a HOL-based theorem prover. However, it is also the
direction where we had to spend most of the `conceptual' time, as
our proof-argument based on tagging-functions is new for
establishing the Myhill-Nerode theorem. All standard proofs of this
direction proceed by arguments over automata.\medskip
We expect that the development of Krauss \& Nipkow gets easier by
using partial derivatives.\medskip
\noindent
{\bf Acknowledgements:}
We are grateful for the comments we received from Larry
Paulson.
*}
(*<*)
end
(*>*)