Paper/Paper.thy
author urbanc
Sun, 20 Feb 2011 08:12:13 +0000
changeset 125 62925473bf6b
parent 124 8233510cab6c
child 126 c7fdc28b8a76
permissions -rw-r--r--
added pictures for seq-case

(*<*)
theory Paper
imports "../Myhill" "LaTeXsugar" 
begin

declare [[show_question_marks = false]]

consts
 REL :: "(string \<times> string) \<Rightarrow> bool"
 UPLUS :: "'a set \<Rightarrow> 'a set \<Rightarrow> (nat \<times> 'a) set"

abbreviation
  "EClass x R \<equiv> R `` {x}"

abbreviation 
  "append_rexp2 r_itm r \<equiv> append_rexp r r_itm"


notation (latex output)
  str_eq_rel ("\<approx>\<^bsub>_\<^esub>") and
  str_eq ("_ \<approx>\<^bsub>_\<^esub> _") and
  Seq (infixr "\<cdot>" 100) and
  Star ("_\<^bsup>\<star>\<^esup>") and
  pow ("_\<^bsup>_\<^esup>" [100, 100] 100) and
  Suc ("_+1" [100] 100) and
  quotient ("_ \<^raw:\ensuremath{\!\sslash\!}> _" [90, 90] 90) and
  REL ("\<approx>") and
  UPLUS ("_ \<^raw:\ensuremath{\uplus}> _" [90, 90] 90) and
  L ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and
  Lam ("\<lambda>'(_')" [100] 100) and 
  Trn ("'(_, _')" [100, 100] 100) and 
  EClass ("\<lbrakk>_\<rbrakk>\<^bsub>_\<^esub>" [100, 100] 100) and
  transition ("_ \<^raw:\ensuremath{\stackrel{\text{>_\<^raw:}}{\Longmapsto}}> _" [100, 100, 100] 100) and
  Setalt ("\<^raw:\ensuremath{\bigplus}>_" [1000] 999) and
  append_rexp2 ("_ \<^raw:\ensuremath{\triangleleft}> _" [100, 100] 100) and
  append_rhs_rexp ("_ \<^raw:\ensuremath{\triangleleft}> _" [100, 100] 50) and
  uminus ("\<^raw:\ensuremath{\overline{>_\<^raw:}}>" [100] 100) and
  tag_str_ALT ("tag\<^isub>A\<^isub>L\<^isub>T _ _" [100, 100] 100) and
  tag_str_ALT ("tag\<^isub>A\<^isub>L\<^isub>T _ _ _" [100, 100, 100] 100) and
  tag_str_SEQ ("tag\<^isub>S\<^isub>E\<^isub>Q _ _" [100, 100] 100) and
  tag_str_SEQ ("tag\<^isub>S\<^isub>E\<^isub>Q _ _ _" [100, 100, 100] 100) and
  tag_str_STAR ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _" [100] 100) and
  tag_str_STAR ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _ _" [100, 100] 100)
lemma meta_eq_app:
  shows "f \<equiv> \<lambda>x. g x \<Longrightarrow> f x \<equiv> g x"
  by auto

(*>*)


section {* Introduction *}

text {*
  Regular languages are an important and well-understood subject in Computer
  Science, with many beautiful theorems and many useful algorithms. There is a
  wide range of textbooks on this subject, many of which are aimed at students
  and contain very detailed `pencil-and-paper' proofs
  (e.g.~\cite{Kozen97}). It seems natural to exercise theorem provers by
  formalising the theorems and by verifying formally the algorithms.

  There is however a problem: the typical approach to regular languages is to
  introduce finite automata and then define everything in terms of them.  For
  example, a regular language is normally defined as one whose strings are
  recognised by a finite deterministic automaton. This approach has many
  benefits. Among them is the fact that it is easy to convince oneself that
  regular languages are closed under complementation: one just has to exchange
  the accepting and non-accepting states in the corresponding automaton to
  obtain an automaton for the complement language.  The problem, however, lies with
  formalising such reasoning in a HOL-based theorem prover, in our case
  Isabelle/HOL. Automata are built up from states and transitions that 
  need to be represented as graphs, matrices or functions, none
  of which can be defined as inductive datatype. 

  In case of graphs and matrices, this means we have to build our own
  reasoning infrastructure for them, as neither Isabelle/HOL nor HOL4 nor
  HOLlight support them with libraries. Even worse, reasoning about graphs and
  matrices can be a real hassle in HOL-based theorem provers.  Consider for
  example the operation of sequencing two automata, say $A_1$ and $A_2$, by
  connecting the accepting states of $A_1$ to the initial state of $A_2$:  
  
  \begin{center}
  \begin{tabular}{ccc}
  \begin{tikzpicture}[scale=0.8]
  %\draw[step=2mm] (-1,-1) grid (1,1);
  
  \draw[rounded corners=1mm, very thick] (-1.0,-0.3) rectangle (-0.2,0.3);
  \draw[rounded corners=1mm, very thick] ( 0.2,-0.3) rectangle ( 1.0,0.3);

  \node (A) at (-1.0,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
  \node (B) at ( 0.2,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
  
  \node (C) at (-0.2, 0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
  \node (D) at (-0.2,-0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};

  \node (E) at (1.0, 0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
  \node (F) at (1.0,-0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
  \node (G) at (1.0,-0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};

  \draw (-0.6,0.0) node {\footnotesize$A_1$};
  \draw ( 0.6,0.0) node {\footnotesize$A_2$};
  \end{tikzpicture}

  & 

  \raisebox{1.1mm}{\bf\Large$\;\;\;\Rightarrow\,\;\;$}

  &

  \begin{tikzpicture}[scale=0.8]
  %\draw[step=2mm] (-1,-1) grid (1,1);
  
  \draw[rounded corners=1mm, very thick] (-1.0,-0.3) rectangle (-0.2,0.3);
  \draw[rounded corners=1mm, very thick] ( 0.2,-0.3) rectangle ( 1.0,0.3);

  \node (A) at (-1.0,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
  \node (B) at ( 0.2,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
  
  \node (C) at (-0.2, 0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
  \node (D) at (-0.2,-0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};

  \node (E) at (1.0, 0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
  \node (F) at (1.0,-0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
  \node (G) at (1.0,-0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {};
  
  \draw (C) to [very thick, bend left=45] (B);
  \draw (D) to [very thick, bend right=45] (B);

  \draw (-0.6,0.0) node {\footnotesize$A_1$};
  \draw ( 0.6,0.0) node {\footnotesize$A_2$};
  \end{tikzpicture}

  \end{tabular}
  \end{center}

  \noindent
  On `paper' we can define the corresponding graph in terms of the disjoint 
  union of the state nodes. Unfortunately in HOL, the standard definition for disjoint 
  union, namely 
  %
  \begin{equation}\label{disjointunion}
  @{term "UPLUS A\<^isub>1 A\<^isub>2 \<equiv> {(1, x) | x. x \<in> A\<^isub>1} \<union> {(2, y) | y. y \<in> A\<^isub>2}"}
  \end{equation}

  \noindent
  changes the type---the disjoint union is not a set, but a set of pairs. 
  Using this definition for disjoint unions means we do not have a single type for automata
  and hence will not be able to state certain properties about \emph{all}
  automata, since there is no type quantification available in HOL. An
  alternative, which provides us with a single type for automata, is to give every 
  state node an identity, for example a natural
  number, and then be careful to rename these identities apart whenever
  connecting two automata. This results in clunky proofs
  establishing that properties are invariant under renaming. Similarly,
  connecting two automata represented as matrices results in very adhoc
  constructions, which are not pleasant to reason about.

  Functions are much better supported in Isabelle/HOL, but they still lead to similar
  problems as with graphs.  Composing, for example, two non-deterministic automata in parallel
  requires also the formalisation of disjoint unions. Nipkow \cite{Nipkow98} 
  dismisses for this the option of using identities, because it leads according to 
  him to ``messy proofs''. He
  opts for a variant of \eqref{disjointunion} using bit lists, but writes 

  \begin{quote}
  \it%
  \begin{tabular}{@ {}l@ {}p{0.88\textwidth}@ {}}
  `` & All lemmas appear obvious given a picture of the composition of automata\ldots
       Yet their proofs require a painful amount of detail.''
  \end{tabular}
  \end{quote}

  \noindent
  and
  
  \begin{quote}
  \it%
  \begin{tabular}{@ {}l@ {}p{0.88\textwidth}@ {}}
  `` & If the reader finds the above treatment in terms of bit lists revoltingly
       concrete, I cannot disagree. A more abstract approach is clearly desirable.''
  \end{tabular}
  \end{quote}


  \noindent
  Moreover, it is not so clear how to conveniently impose a finiteness condition 
  upon functions in order to represent \emph{finite} automata. The best is
  probably to resort to more advanced reasoning frameworks, such as \emph{locales}
  or \emph{type classes},
  which are \emph{not} avaiable in all HOL-based theorem provers.

  Because of these problems to do with representing automata, there seems
  to be no substantial formalisation of automata theory and regular languages 
  carried out in HOL-based theorem provers. Nipkow  \cite{Nipkow98} establishes
  the link between regular expressions and automata in
  the context of lexing. Berghofer and Reiter \cite{BerghoferReiter09} 
  formalise automata working over 
  bit strings in the context of Presburger arithmetic.
  The only larger formalisations of automata theory 
  are carried out in Nuprl \cite{Constable00} and in Coq \cite{Filliatre97}.
  
  In this paper, we will not attempt to formalise automata theory in
  Isabelle/HOL, but take a completely different approach to regular
  languages. Instead of defining a regular language as one where there exists
  an automaton that recognises all strings of the language, we define a
  regular language as:

  \begin{definition}
  A language @{text A} is \emph{regular}, provided there is a regular expression that matches all
  strings of @{text "A"}.
  \end{definition}
  
  \noindent
  The reason is that regular expressions, unlike graphs, matrices and functions, can
  be easily defined as inductive datatype. Consequently a corresponding reasoning 
  infrastructure comes for free. This has recently been exploited in HOL4 with a formalisation
  of regular expression matching based on derivatives \cite{OwensSlind08} and
  with an equivalence checker for regular expressions in Isabelle/HOL \cite{KraussNipkow11}.  
  The purpose of this paper is to
  show that a central result about regular languages---the Myhill-Nerode theorem---can 
  be recreated by only using regular expressions. This theorem gives necessary
  and sufficient conditions for when a language is regular. As a corollary of this
  theorem we can easily establish the usual closure properties, including 
  complementation, for regular languages.\smallskip
  
  \noindent
  {\bf Contributions:} 
  There is an extensive literature on regular languages.
  To our knowledge, our proof of the Myhill-Nerode theorem is the
  first that is based on regular expressions, only. We prove the part of this theorem 
  stating that a regular expression has only finitely many partitions using certain 
  tagging-functions. Again to our best knowledge, these tagging functions have
  not been used before to establish the Myhill-Nerode theorem.
*}

section {* Preliminaries *}

text {*
  Strings in Isabelle/HOL are lists of characters with the \emph{empty string}
  being represented by the empty list, written @{term "[]"}.  \emph{Languages}
  are sets of strings. The language containing all strings is written in
  Isabelle/HOL as @{term "UNIV::string set"}. The concatenation of two languages 
  is written @{term "A ;; B"} and a language raised to the power @{text n} is written 
  @{term "A \<up> n"}. They are defined as usual

  \begin{center}
  @{thm Seq_def[THEN eq_reflection, where A1="A" and B1="B"]}
  \hspace{7mm}
  @{thm pow.simps(1)[THEN eq_reflection, where A1="A"]}
  \hspace{7mm}
  @{thm pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]}
  \end{center}

  \noindent
  where @{text "@"} is the list-append operation. The Kleene-star of a language @{text A}
  is defined as the union over all powers, namely @{thm Star_def}. In the paper
  we will make use of the following properties of these constructions.
  
  \begin{proposition}\label{langprops}\mbox{}\\
  \begin{tabular}{@ {}ll}
  (i)   & @{thm star_cases}     \\ 
  (ii)  & @{thm[mode=IfThen] pow_length}\\
  (iii) & @{thm seq_Union_left} \\ 
  \end{tabular}
  \end{proposition}

  \noindent
  In @{text "(ii)"} we use the notation @{term "length s"} for the length of a
  string; this property states that if @{term "[] \<notin> A"} then the lengths of
  the strings in @{term "A \<up> (Suc n)"} must be longer than @{text n}.  We omit
  the proofs for these properties, but invite the reader to consult our
  formalisation.\footnote{Available at \url{http://www4.in.tum.de/~urbanc/regexp.html}}

  The notation in Isabelle/HOL for the quotient of a language @{text A} according to an 
  equivalence relation @{term REL} is @{term "A // REL"}. We will write 
  @{text "\<lbrakk>x\<rbrakk>\<^isub>\<approx>"} for the equivalence class defined 
  as @{text "{y | y \<approx> x}"}.


  Central to our proof will be the solution of equational systems
  involving equivalence classes of languages. For this we will use Arden's lemma \cite{Brzozowski64},
  which solves equations of the form @{term "X = A ;; X \<union> B"} provided
  @{term "[] \<notin> A"}. However we will need the following `reverse' 
  version of Arden's lemma (`reverse' in the sense of changing the order of @{term "A ;; X"} to
  \mbox{@{term "X ;; A"}}).

  \begin{lemma}[Reverse Arden's Lemma]\label{arden}\mbox{}\\
  If @{thm (prem 1) arden} then
  @{thm (lhs) arden} if and only if
  @{thm (rhs) arden}.
  \end{lemma}

  \begin{proof}
  For the right-to-left direction we assume @{thm (rhs) arden} and show
  that @{thm (lhs) arden} holds. From Prop.~\ref{langprops}@{text "(i)"} 
  we have @{term "A\<star> = {[]} \<union> A ;; A\<star>"},
  which is equal to @{term "A\<star> = {[]} \<union> A\<star> ;; A"}. Adding @{text B} to both 
  sides gives @{term "B ;; A\<star> = B ;; ({[]} \<union> A\<star> ;; A)"}, whose right-hand side
  is equal to @{term "(B ;; A\<star>) ;; A \<union> B"}. This completes this direction. 

  For the other direction we assume @{thm (lhs) arden}. By a simple induction
  on @{text n}, we can establish the property

  \begin{center}
  @{text "(*)"}\hspace{5mm} @{thm (concl) arden_helper}
  \end{center}
  
  \noindent
  Using this property we can show that @{term "B ;; (A \<up> n) \<subseteq> X"} holds for
  all @{text n}. From this we can infer @{term "B ;; A\<star> \<subseteq> X"} using the definition
  of @{text "\<star>"}.
  For the inclusion in the other direction we assume a string @{text s}
  with length @{text k} is element in @{text X}. Since @{thm (prem 1) arden}
  we know by Prop.~\ref{langprops}@{text "(ii)"} that 
  @{term "s \<notin> X ;; (A \<up> Suc k)"} since its length is only @{text k}
  (the strings in @{term "X ;; (A \<up> Suc k)"} are all longer). 
  From @{text "(*)"} it follows then that
  @{term s} must be element in @{term "(\<Union>m\<in>{0..k}. B ;; (A \<up> m))"}. This in turn
  implies that @{term s} is in @{term "(\<Union>n. B ;; (A \<up> n))"}. Using Prop.~\ref{langprops}@{text "(iii)"} 
  this is equal to @{term "B ;; A\<star>"}, as we needed to show.\qed
  \end{proof}

  \noindent
  Regular expressions are defined as the inductive datatype

  \begin{center}
  @{text r} @{text "::="}
  @{term NULL}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} 
  @{term EMPTY}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} 
  @{term "CHAR c"}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} 
  @{term "SEQ r r"}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} 
  @{term "ALT r r"}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} 
  @{term "STAR r"}
  \end{center}

  \noindent
  and the language matched by a regular expression is defined as

  \begin{center}
  \begin{tabular}{c@ {\hspace{10mm}}c}
  \begin{tabular}{rcl}
  @{thm (lhs) L_rexp.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) L_rexp.simps(1)}\\
  @{thm (lhs) L_rexp.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) L_rexp.simps(2)}\\
  @{thm (lhs) L_rexp.simps(3)[where c="c"]} & @{text "\<equiv>"} & @{thm (rhs) L_rexp.simps(3)[where c="c"]}\\
  \end{tabular}
  &
  \begin{tabular}{rcl}
  @{thm (lhs) L_rexp.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} &
       @{thm (rhs) L_rexp.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\
  @{thm (lhs) L_rexp.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} &
       @{thm (rhs) L_rexp.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\
  @{thm (lhs) L_rexp.simps(6)[where r="r"]} & @{text "\<equiv>"} &
      @{thm (rhs) L_rexp.simps(6)[where r="r"]}\\
  \end{tabular}
  \end{tabular}
  \end{center}

  Given a finite set of regular expressions @{text rs}, we will make use of the operation of generating 
  a regular expression that matches all languages of @{text rs}. We only need to know the existence
  of such a regular expression and therefore we use Isabelle/HOL's @{const "fold_graph"} and Hilbert's
  @{text "\<epsilon>"} to define @{term "\<Uplus>rs"}. This operation, roughly speaking, folds @{const ALT} over the 
  set @{text rs} with @{const NULL} for the empty set. We can prove that for a finite set @{text rs}
  %
  \begin{equation}\label{uplus}
  \mbox{@{thm (lhs) folds_alt_simp} @{text "= \<Union> (\<calL> ` rs)"}}
  \end{equation}

  \noindent
  holds, whereby @{text "\<calL> ` rs"} stands for the 
  image of the set @{text rs} under function @{text "\<calL>"}.
*}

section {* The Myhill-Nerode Theorem, First Part *}

text {*
  The key definition in the Myhill-Nerode theorem is the
  \emph{Myhill-Nerode relation}, which states that w.r.t.~a language two 
  strings are related, provided there is no distinguishing extension in this
  language. This can be defined as tertiary relation.

  \begin{definition}[Myhill-Nerode Relation] Given a language @{text A}, two strings @{text x} and
  @{text y} are Myhill-Nerode related provided
  \begin{center}
  @{thm str_eq_def[simplified str_eq_rel_def Pair_Collect]}
  \end{center}
  \end{definition}

  \noindent
  It is easy to see that @{term "\<approx>A"} is an equivalence relation, which
  partitions the set of all strings, @{text "UNIV"}, into a set of disjoint
  equivalence classes. To illustrate this quotient construction, let us give a simple 
  example: consider the regular language containing just
  the string @{text "[c]"}. The relation @{term "\<approx>({[c]})"} partitions @{text UNIV}
  into three equivalence classes @{text "X\<^isub>1"}, @{text "X\<^isub>2"} and  @{text "X\<^isub>3"}
  as follows
  
  \begin{center}
  @{text "X\<^isub>1 = {[]}"}\hspace{5mm}
  @{text "X\<^isub>2 = {[c]}"}\hspace{5mm}
  @{text "X\<^isub>3 = UNIV - {[], [c]}"}
  \end{center}

  One direction of the Myhill-Nerode theorem establishes 
  that if there are finitely many equivalence classes, like in the example above, then 
  the language is regular. In our setting we therefore have to show:
  
  \begin{theorem}\label{myhillnerodeone}
  @{thm[mode=IfThen] Myhill_Nerode1}
  \end{theorem}

  \noindent
  To prove this theorem, we first define the set @{term "finals A"} as those equivalence
  classes from @{term "UNIV // \<approx>A"} that contain strings of @{text A}, namely
  %
  \begin{equation} 
  @{thm finals_def}
  \end{equation}

  \noindent
  In our running example, @{text "X\<^isub>2"} is the only equivalence class in @{term "finals {[c]}"}.
  It is straightforward to show that in general @{thm lang_is_union_of_finals} and 
  @{thm finals_in_partitions} hold. 
  Therefore if we know that there exists a regular expression for every
  equivalence class in \mbox{@{term "finals A"}} (which by assumption must be
  a finite set), then we can use @{text "\<bigplus>"} to obtain a regular expression 
  that matches every string in @{text A}.


  Our proof of Thm.~\ref{myhillnerodeone} relies on a method that can calculate a
  regular expression for \emph{every} equivalence class, not just the ones 
  in @{term "finals A"}. We
  first define the notion of \emph{one-character-transition} between 
  two equivalence classes
  %
  \begin{equation} 
  @{thm transition_def}
  \end{equation}

  \noindent
  which means that if we concatenate the character @{text c} to the end of all 
  strings in the equivalence class @{text Y}, we obtain a subset of 
  @{text X}. Note that we do not define an automaton here, we merely relate two sets
  (with the help of a character). In our concrete example we have 
  @{term "X\<^isub>1 \<Turnstile>c\<Rightarrow> X\<^isub>2"}, @{term "X\<^isub>1 \<Turnstile>d\<Rightarrow> X\<^isub>3"} with @{text d} being any 
  other character than @{text c}, and @{term "X\<^isub>3 \<Turnstile>d\<Rightarrow> X\<^isub>3"} for any @{text d}.
  
  Next we build an \emph{initial equational system} that
  contains an equation for each equivalence class. Suppose we have 
  the equivalence classes @{text "X\<^isub>1,\<dots>,X\<^isub>n"}, there must be one and only one that
  contains the empty string @{text "[]"} (since equivalence classes are disjoint).
  Let us assume @{text "[] \<in> X\<^isub>1"}. We build the following equational system
  
  \begin{center}
  \begin{tabular}{rcl}
  @{text "X\<^isub>1"} & @{text "="} & @{text "(Y\<^isub>1\<^isub>1, CHAR c\<^isub>1\<^isub>1) + \<dots> + (Y\<^isub>1\<^isub>p, CHAR c\<^isub>1\<^isub>p) + \<lambda>(EMPTY)"} \\
  @{text "X\<^isub>2"} & @{text "="} & @{text "(Y\<^isub>2\<^isub>1, CHAR c\<^isub>2\<^isub>1) + \<dots> + (Y\<^isub>2\<^isub>o, CHAR c\<^isub>2\<^isub>o)"} \\
  & $\vdots$ \\
  @{text "X\<^isub>n"} & @{text "="} & @{text "(Y\<^isub>n\<^isub>1, CHAR c\<^isub>n\<^isub>1) + \<dots> + (Y\<^isub>n\<^isub>q, CHAR c\<^isub>n\<^isub>q)"}\\
  \end{tabular}
  \end{center}

  \noindent
  where the terms @{text "(Y\<^isub>i\<^isub>j, CHAR c\<^isub>i\<^isub>j)"}
  stand for all transitions @{term "Y\<^isub>i\<^isub>j \<Turnstile>c\<^isub>i\<^isub>j\<Rightarrow>
  X\<^isub>i"}.   There can only be
  finitely many such terms in a right-hand side since by assumption there are only finitely many
  equivalence classes and only finitely many characters.  The term @{text
  "\<lambda>(EMPTY)"} in the first equation acts as a marker for the equivalence class
  containing @{text "[]"}.\footnote{Note that we mark, roughly speaking, the
  single `initial' state in the equational system, which is different from
  the method by Brzozowski \cite{Brzozowski64}, where he marks the
  `terminal' states. We are forced to set up the equational system in our
  way, because the Myhill-Nerode relation determines the `direction' of the
  transitions---the successor `state' of an equivalence class @{text Y} can
  be reached by adding a character to the end of @{text Y}. This is also the
  reason why we have to use our reverse version of Arden's lemma.}
  Overloading the function @{text \<calL>} for the two kinds of terms in the
  equational system, we have
  
  \begin{center}
  @{text "\<calL>(Y, r) \<equiv>"} %
  @{thm (rhs) L_rhs_item.simps(2)[where X="Y" and r="r", THEN eq_reflection]}\hspace{10mm}
  @{thm L_rhs_item.simps(1)[where r="r", THEN eq_reflection]}
  \end{center}

  \noindent
  and we can prove for @{text "X\<^isub>2\<^isub>.\<^isub>.\<^isub>n"} that the following equations
  %
  \begin{equation}\label{inv1}
  @{text "X\<^isub>i = \<calL>(Y\<^isub>i\<^isub>1, CHAR c\<^isub>i\<^isub>1) \<union> \<dots> \<union> \<calL>(Y\<^isub>i\<^isub>q, CHAR c\<^isub>i\<^isub>q)"}.
  \end{equation}

  \noindent
  hold. Similarly for @{text "X\<^isub>1"} we can show the following equation
  %
  \begin{equation}\label{inv2}
  @{text "X\<^isub>1 = \<calL>(Y\<^isub>i\<^isub>1, CHAR c\<^isub>i\<^isub>1) \<union> \<dots> \<union> \<calL>(Y\<^isub>i\<^isub>p, CHAR c\<^isub>i\<^isub>p) \<union> \<calL>(\<lambda>(EMPTY))"}.
  \end{equation}

  \noindent
  The reason for adding the @{text \<lambda>}-marker to our initial equational system is 
  to obtain this equation: it only holds with the marker, since none of 
  the other terms contain the empty string. The point of the initial equational system is
  that solving it means we will be able to extract a regular expression for every equivalence class. 

  Our representation for the equations in Isabelle/HOL are pairs,
  where the first component is an equivalence class (a set of strings)
  and the second component
  is a set of terms. Given a set of equivalence
  classes @{text CS}, our initial equational system @{term "Init CS"} is thus 
  formally defined as
  %
  \begin{equation}\label{initcs}
  \mbox{\begin{tabular}{rcl}     
  @{thm (lhs) Init_rhs_def} & @{text "\<equiv>"} & 
  @{text "if"}~@{term "[] \<in> X"}\\
  & & @{text "then"}~@{term "{Trn Y (CHAR c) | Y c. Y \<in> CS \<and> Y \<Turnstile>c\<Rightarrow> X} \<union> {Lam EMPTY}"}\\
  & & @{text "else"}~@{term "{Trn Y (CHAR c)| Y c. Y \<in> CS \<and> Y \<Turnstile>c\<Rightarrow> X}"}\\
  @{thm (lhs) Init_def}     & @{text "\<equiv>"} & @{thm (rhs) Init_def}
  \end{tabular}}
  \end{equation}

  

  \noindent
  Because we use sets of terms 
  for representing the right-hand sides of equations, we can 
  prove \eqref{inv1} and \eqref{inv2} more concisely as
  %
  \begin{lemma}\label{inv}
  If @{thm (prem 1) test} then @{text "X = \<Union> \<calL> ` rhs"}.
  \end{lemma}

  \noindent
  Our proof of Thm.~\ref{myhillnerodeone} will proceed by transforming the
  initial equational system into one in \emph{solved form} maintaining the invariant
  in Lem.~\ref{inv}. From the solved form we will be able to read
  off the regular expressions. 

  In order to transform an equational system into solved form, we have two 
  operations: one that takes an equation of the form @{text "X = rhs"} and removes
  any recursive occurrences of @{text X} in the @{text rhs} using our variant of Arden's 
  Lemma. The other operation takes an equation @{text "X = rhs"}
  and substitutes @{text X} throughout the rest of the equational system
  adjusting the remaining regular expressions appropriately. To define this adjustment 
  we define the \emph{append-operation} taking a term and a regular expression as argument

  \begin{center}
  @{thm append_rexp.simps(2)[where X="Y" and r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]}\hspace{10mm}
  @{thm append_rexp.simps(1)[where r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]}
  \end{center}

  \noindent
  We lift this operation to entire right-hand sides of equations, written as
  @{thm (lhs) append_rhs_rexp_def[where rexp="r"]}. With this we can define
  the \emph{arden-operation} for an equation of the form @{text "X = rhs"} as:
  %
  \begin{equation}\label{arden_def}
  \mbox{\begin{tabular}{rc@ {\hspace{2mm}}r@ {\hspace{1mm}}l}
  @{thm (lhs) Arden_def} & @{text "\<equiv>"}~~\mbox{} & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "let"}}\\ 
   & & @{text "rhs' ="} & @{term "rhs - {Trn X r | r. Trn X r \<in> rhs}"} \\
   & & @{text "r' ="}   & @{term "STAR (\<Uplus> {r. Trn X r \<in> rhs})"}\\
   & &  \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "in"}~~@{term "append_rhs_rexp rhs' r'"}}\\ 
  \end{tabular}}
  \end{equation}

  \noindent
  In this definition, we first delete all terms of the form @{text "(X, r)"} from @{text rhs};
  then we calculate the combined regular expressions for all @{text r} coming 
  from the deleted @{text "(X, r)"}, and take the @{const STAR} of it;
  finally we append this regular expression to @{text rhs'}. It can be easily seen 
  that this operation mimics Arden's lemma on the level of equations. To ensure
  the non-emptiness condition of Arden's lemma we say that a right-hand side is
  \emph{ardenable} provided

  \begin{center}
  @{thm ardenable_def}
  \end{center}

  \noindent
  This allows us to prove

  \begin{lemma}\label{ardenable}
  Given an equation @{text "X = rhs"}.
  If @{text "X = \<Union>\<calL> ` rhs"},
  @{thm (prem 2) Arden_keeps_eq}, and
  @{thm (prem 3) Arden_keeps_eq}, then
  @{text "X = \<Union>\<calL> ` (Arden X rhs)"}
  \end{lemma}
  
  \noindent
  The \emph{substituion-operation} takes an equation
  of the form @{text "X = xrhs"} and substitutes it into the right-hand side @{text rhs}.

  \begin{center}
  \begin{tabular}{rc@ {\hspace{2mm}}r@ {\hspace{1mm}}l}
  @{thm (lhs) Subst_def} & @{text "\<equiv>"}~~\mbox{} & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "let"}}\\ 
   & & @{text "rhs' ="} & @{term "rhs - {Trn X r | r. Trn X r \<in> rhs}"} \\
   & & @{text "r' ="}   & @{term "\<Uplus> {r. Trn X r \<in> rhs}"}\\
   & &  \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "in"}~~@{term "rhs' \<union> append_rhs_rexp xrhs r'"}}\\ 
  \end{tabular}
  \end{center}

  \noindent
  We again delete first all occurrence of @{text "(X, r)"} in @{text rhs}; we then calculate
  the regular expression corresponding to the deleted terms; finally we append this
  regular expression to @{text "xrhs"} and union it up with @{text rhs'}. When we use
  the substitution operation we will arrange it so that @{text "xrhs"} does not contain
  any occurrence of @{text X}.

  With these two operation in place, we can define the operation that removes one equation
  from an equational systems @{text ES}. The operation @{const Subst_all}
  substitutes an equation @{text "X = xrhs"} throughout an equational system @{text ES}; 
  @{const Remove} then completely removes such an equation from @{text ES} by substituting 
  it to the rest of the equational system, but first eliminating all recursive occurrences
  of @{text X} by applying @{const Arden} to @{text "xrhs"}.

  \begin{center}
  \begin{tabular}{rcl}
  @{thm (lhs) Subst_all_def} & @{text "\<equiv>"} & @{thm (rhs) Subst_all_def}\\
  @{thm (lhs) Remove_def}    & @{text "\<equiv>"} & @{thm (rhs) Remove_def}
  \end{tabular}
  \end{center}

  \noindent
  Finally, we can define how an equational system should be solved. For this 
  we will need to iterate the process of eliminating equations until only one equation
  will be left in the system. However, we not just want to have any equation
  as being the last one, but the one involving the equivalence class for 
  which we want to calculate the regular 
  expression. Let us suppose this equivalence class is @{text X}. 
  Since @{text X} is the one to be solved, in every iteration step we have to pick an
  equation to be eliminated that is different from @{text X}. In this way 
  @{text X} is kept to the final step. The choice is implemented using Hilbert's choice 
  operator, written @{text SOME} in the definition below.
  
  \begin{center}
  \begin{tabular}{rc@ {\hspace{4mm}}r@ {\hspace{1mm}}l}
  @{thm (lhs) Iter_def} & @{text "\<equiv>"}~~\mbox{} & \multicolumn{2}{@ {\hspace{-4mm}}l}{@{text "let"}}\\ 
   & & @{text "(Y, yrhs) ="} & @{term "SOME (Y, yrhs). (Y, yrhs) \<in> ES \<and> X \<noteq> Y"} \\
   & &  \multicolumn{2}{@ {\hspace{-4mm}}l}{@{text "in"}~~@{term "Remove ES Y yrhs"}}\\ 
  \end{tabular}
  \end{center}

  \noindent
  The last definition we need applies @{term Iter} over and over until a condition 
  @{text Cond} is \emph{not} satisfied anymore. The condition states that there
  are more than one equation left in the equational system @{text ES}. To solve
  an equational system we use Isabelle/HOL's @{text while}-operator as follows:
  
  \begin{center}
  @{thm Solve_def}
  \end{center}

  \noindent
  We are not concerned here with the definition of this operator
  (see Berghofer and Nipkow \cite{BerghoferNipkow00}), but note that we eliminate
  in each @{const Iter}-step a single equation, and therefore 
  have a well-founded termination order by taking the cardinality 
  of the equational system @{text ES}. This enables us to prove
  properties about our definition of @{const Solve} when we `call' it with
  the equivalence class @{text X} and the initial equational system 
  @{term "Init (UNIV // \<approx>A)"} from
  \eqref{initcs} using the principle:
  %
  \begin{equation}\label{whileprinciple}
  \mbox{\begin{tabular}{l}
  @{term "invariant (Init (UNIV // \<approx>A))"} \\
  @{term "\<forall>ES. invariant ES \<and> Cond ES \<longrightarrow> invariant (Iter X ES)"}\\
  @{term "\<forall>ES. invariant ES \<and> Cond ES \<longrightarrow> card (Iter X ES) < card ES"}\\
  @{term "\<forall>ES. invariant ES \<and> \<not> Cond ES \<longrightarrow> P ES"}\\
  \hline
  \multicolumn{1}{c}{@{term "P (Solve X (Init (UNIV // \<approx>A)))"}}
  \end{tabular}}
  \end{equation}

  \noindent
  This principle states that given an invariant (which we will specify below) 
  we can prove a property
  @{text "P"} involving @{const Solve}. For this we have to discharge the following
  proof obligations: first the
  initial equational system satisfies the invariant; second the iteration
  step @{text "Iter"} preserves the the invariant as long as the condition @{term Cond} holds;
  third @{text "Iter"} decreases the termination order, and fourth that
  once the condition does not hold anymore then the property @{text P} must hold.

  The property @{term P} in our proof will state that @{term "Solve X (Init (UNIV // \<approx>A))"}
  returns with a single equation @{text "X = xrhs"} for some @{text "xrhs"}, and
  that this equational system still satisfies the invariant. In order to get
  the proof through, the invariant is composed of the following six properties:

  \begin{center}
  \begin{tabular}{@ {}rcl@ {\hspace{-13mm}}l @ {}}
  @{text "invariant ES"} & @{text "\<equiv>"} &
      @{term "finite ES"} & @{text "(finiteness)"}\\
  & @{text "\<and>"} & @{thm (rhs) finite_rhs_def} & @{text "(finiteness rhs)"}\\
  & @{text "\<and>"} & @{text "\<forall>(X, rhs)\<in>ES. X = \<Union>\<calL> ` rhs"} & @{text "(soundness)"}\\
  & @{text "\<and>"} & @{thm (rhs) distinct_equas_def}\\
  &             &  & @{text "(distinctness)"}\\
  & @{text "\<and>"} & @{thm (rhs) ardenable_all_def} & @{text "(ardenable)"}\\   
  & @{text "\<and>"} & @{thm (rhs) valid_eqs_def} & @{text "(validity)"}\\
  \end{tabular}
  \end{center}
 
  \noindent
  The first two ensure that the equational system is always finite (number of equations
  and number of terms in each equation); the second makes sure the `meaning' of the 
  equations is preserved under our transformations. The other properties are a bit more
  technical, but are needed to get our proof through. Distinctness states that every
  equation in the system is distinct. Ardenable ensures that we can always
  apply the arden operation. 
  The last property states that every @{text rhs} can only contain equivalence classes
  for which there is an equation. Therefore @{text lhss} is just the set containing 
  the first components of an equational system,
  while @{text "rhss"} collects all equivalence classes @{text X} in the terms of the 
  form @{term "Trn X r"}. That means formally @{thm (lhs) lhss_def}~@{text "\<equiv> {X | (X, rhs) \<in> ES}"} 
  and @{thm (lhs) rhss_def}~@{text "\<equiv> {X | (X, r) \<in> rhs}"}.
  

  It is straightforward to prove that the initial equational system satisfies the
  invariant.

  \begin{lemma}\label{invzero}
  @{thm[mode=IfThen] Init_ES_satisfies_invariant}
  \end{lemma}

  \begin{proof}
  Finiteness is given by the assumption and the way how we set up the 
  initial equational system. Soundness is proved in Lem.~\ref{inv}. Distinctness
  follows from the fact that the equivalence classes are disjoint. The ardenable
  property also follows from the setup of the initial equational system, as does 
  validity.\qed
  \end{proof}

  \noindent
  Next we show that @{text Iter} preserves the invariant.

  \begin{lemma}\label{iterone}
  @{thm[mode=IfThen] iteration_step_invariant[where xrhs="rhs"]}
  \end{lemma}

  \begin{proof} 
  This boils down to choosing an equation @{text "Y = yrhs"} to be eliminated
  and to show that @{term "Subst_all (ES - {(Y, yrhs)}) Y (Arden Y yrhs)"} 
  preserves the invariant.
  We prove this as follows:

  \begin{center}
  @{text "\<forall> ES."} @{thm (prem 1) Subst_all_satisfies_invariant} implies
  @{thm (concl) Subst_all_satisfies_invariant}
  \end{center}

  \noindent
  Finiteness is straightforward, as @{const Subst} and @{const Arden} operations 
  keep the equational system finite. These operations also preserve soundness 
  and distinctness (we proved soundness for @{const Arden} in Lem.~\ref{ardenable}).
  The property ardenable is clearly preserved because the append-operation
  cannot make a regular expression to match the empty string. Validity is
  given because @{const Arden} removes an equivalence class from @{text yrhs}
  and then @{const Subst_all} removes @{text Y} from the equational system.
  Having proved the implication above, we can replace @{text "ES"} with @{text "ES - {(Y, yrhs)}"}
  which matches with our proof-obligation of @{const "Subst_all"}. Since
  \mbox{@{term "ES = ES - {(Y, yrhs)} \<union> {(Y, yrhs)}"}}, we can use our assumption 
  to complete the proof.\qed 
  \end{proof}

  \noindent
  We also need the fact that @{text Iter} decreases the termination measure.

  \begin{lemma}\label{itertwo}
  @{thm[mode=IfThen] iteration_step_measure[simplified (no_asm), where xrhs="rhs"]}
  \end{lemma}

  \begin{proof}
  By assumption we know that @{text "ES"} is finite and has more than one element.
  Therefore there must be an element @{term "(Y, yrhs) \<in> ES"} with 
  @{term "(Y, yrhs) \<noteq> (X, rhs)"}. Using the distinctness property we can infer
  that @{term "Y \<noteq> X"}. We further know that @{text "Remove ES Y yrhs"}
  removes the equation @{text "Y = yrhs"} from the system, and therefore 
  the cardinality of @{const Iter} strictly decreases.\qed
  \end{proof}

  \noindent
  This brings us to our property we want establish for @{text Solve}.


  \begin{lemma}
  If @{thm (prem 1) Solve} and @{thm (prem 2) Solve} then there exists
  a @{text rhs} such that  @{term "Solve X (Init (UNIV // \<approx>A)) = {(X, rhs)}"}
  and @{term "invariant {(X, rhs)}"}.
  \end{lemma}

  \begin{proof} 
  In order to prove this lemma using \eqref{whileprinciple}, we have to use a slightly
  stronger invariant since Lem.~\ref{iterone} and \ref{itertwo} have the precondition 
  that @{term "(X, rhs) \<in> ES"} for some @{text rhs}. This precondition is needed
  in order to choose in the @{const Iter}-step an equation that is not \mbox{@{term "X = rhs"}}.
  Therefore our invariant cannot be just @{term "invariant ES"}, but must be 
  @{term "invariant ES \<and> (\<exists>rhs. (X, rhs) \<in> ES)"}. By assumption 
  @{thm (prem 2) Solve} and Lem.~\ref{invzero}, the more general invariant holds for
  the initial equational system. This is premise 1 of~\eqref{whileprinciple}.
  Premise 2 is given by Lem.~\ref{iterone} and the fact that @{const Iter} might
  modify the @{text rhs} in the equation @{term "X = rhs"}, but does not remove it.
  Premise 3 of~\eqref{whileprinciple} is by Lem.~\ref{itertwo}. Now in premise 4
  we like to show that there exists a @{text rhs} such that @{term "ES = {(X, rhs)}"}
  and that @{text "invariant {(X, rhs)}"} holds, provided the condition @{text "Cond"}
  does not holds. By the stronger invariant we know there exists such a @{text "rhs"}
  with @{term "(X, rhs) \<in> ES"}. Because @{text Cond} is not true, we know the cardinality
  of @{text ES} is @{text 1}. This means @{text "ES"} must actually be the set @{text "{(X, rhs)}"},
  for which the invariant holds. This allows us to conclude that 
  @{term "Solve X (Init (UNIV // \<approx>A)) = {(X, rhs)}"} and @{term "invariant {(X, rhs)}"} hold,
  as needed.\qed
  \end{proof}

  \noindent
  With this lemma in place we can show that for every equivalence class in @{term "UNIV // \<approx>A"}
  there exists a regular expression.

  \begin{lemma}\label{every_eqcl_has_reg}
  @{thm[mode=IfThen] every_eqcl_has_reg}
  \end{lemma}

  \begin{proof}
  By the preceeding Lemma, we know that there exists a @{text "rhs"} such
  that @{term "Solve X (Init (UNIV // \<approx>A))"} returns the equation @{text "X = rhs"},
  and that the invariant holds for this equation. That means we 
  know @{text "X = \<Union>\<calL> ` rhs"}. We further know that
  this is equal to \mbox{@{text "\<Union>\<calL> ` (Arden X rhs)"}} using the properties of the 
  invariant and Lem.~\ref{ardenable}. Using the validity property for the equation @{text "X = rhs"},
  we can infer that @{term "rhss rhs \<subseteq> {X}"} and because the arden operation
  removes that @{text X} from @{text rhs}, that @{term "rhss (Arden X rhs) = {}"}.
  This means the right-hand side @{term "Arden X rhs"} can only consist of terms of the form @{term "Lam r"}.
  So we can collect those (finitely many) regular expressions and have @{term "X = L (\<Uplus>rs)"}.
  With this we can conclude the proof.\qed
  \end{proof}

  \noindent
  Lem.~\ref{every_eqcl_has_reg} allows us to finally give a proof for the first direction
  of the Myhill-Nerode theorem.

  \begin{proof}[of Thm.~\ref{myhillnerodeone}]
  By Lem.~\ref{every_eqcl_has_reg} we know that there exists a regular expression for
  every equivalence class in @{term "UNIV // \<approx>A"}. Since @{text "finals A"} is
  a subset of  @{term "UNIV // \<approx>A"}, we also know that for every equivalence class
  in @{term "finals A"} there exists a regular expression. Moreover by assumption 
  we know that @{term "finals A"} must be finite, and therefore there must be a finite
  set of regular expressions @{text "rs"} such that

  \begin{center}
  @{term "\<Union>(finals A) = L (\<Uplus>rs)"}
  \end{center}

  \noindent
  Since the left-hand side is equal to @{text A}, we can use @{term "\<Uplus>rs"} 
  as the regular expression that is needed in the theorem.\qed
  \end{proof}
*}




section {* Myhill-Nerode, Second Part *}

text {*
  We will prove in this section the second part of the Myhill-Nerode
  theorem. It can be formulated in our setting as follows.

  \begin{theorem}
  Given @{text "r"} is a regular expressions, then @{thm Myhill_Nerode2}.
  \end{theorem}  

  \noindent
  The proof will be by induction on the structure of @{text r}. It turns out
  the base cases are straightforward.


  \begin{proof}[Base Cases]
  The cases for @{const NULL}, @{const EMPTY} and @{const CHAR} are routine, because 
  we can easily establish

  \begin{center}
  \begin{tabular}{l}
  @{thm quot_null_eq}\\
  @{thm quot_empty_subset}\\
  @{thm quot_char_subset}
  \end{tabular}
  \end{center}

  \noindent
  hold, which shows that @{term "UNIV // \<approx>(L r)"} must be finite.\qed
  \end{proof}

  \noindent
  Much more interesting, however, are the inductive cases. They seem hard to be solved 
  directly. The reader is invited to try. 

  Our method will rely on some
  \emph{tagging functions} defined over strings. Given the inductive hypothesis, it will 
  be easy to prove that the range of these tagging functions is finite
  (the range of a function @{text f} is defined as @{text "range f \<equiv> f ` UNIV"}).
  With this we will be able to infer that the tagging functions, seen as relations,
  give rise to finitely many equivalence classes of @{const UNIV}. Finally we 
  will show that the tagging relation is more refined than @{term "\<approx>(L r)"}, which
  implies that @{term "UNIV // \<approx>(L r)"} must also be finite (a relation @{text "R\<^isub>1"} 
  is said to \emph{refine} @{text "R\<^isub>2"} provided @{text "R\<^isub>1 \<subseteq> R\<^isub>2"}).
  We formally define the notion of a \emph{tagging-relation} as follows.

  \begin{definition}[Tagging-Relation] Given a tagging-function @{text tag}, then two strings @{text x}
  and @{text y} are \emph{tag-related} provided
  \begin{center}
  @{text "x =tag= y \<equiv> tag x = tag y"}
  \end{center}
  \end{definition}

  \noindent
  In order to establish finiteness of a set @{text A}, we shall use the following powerful
  principle from Isabelle/HOL's library.
  %
  \begin{equation}\label{finiteimageD}
  @{thm[mode=IfThen] finite_imageD}
  \end{equation}

  \noindent
  It states that if an image of a set under an injective function @{text f} (injective over this set) 
  is finite, then @{text A} itself must be finite. We can use it to establish the following 
  two lemmas.

  \begin{lemma}\label{finone}
  @{thm[mode=IfThen] finite_eq_tag_rel}
  \end{lemma}

  \begin{proof}
  We set in \eqref{finiteimageD}, @{text f} to be @{text "X \<mapsto> tag ` X"}. We have
  @{text "range f"} to be a subset of @{term "Pow (range tag)"}, which we know must be
  finite by assumption. Now @{term "f (UNIV // =tag=)"} is a subset of @{text "range f"},
  and so also finite. Injectivity amounts to showing that @{text "X = Y"} under the
  assumptions that @{text "X, Y \<in> "}~@{term "UNIV // =tag="} and @{text "f X = f Y"}.
  From the assumption we can obtain @{text "x \<in> X"} and @{text "y \<in> Y"} with 
  @{text "tag x = tag y"}. Since @{text x} and @{text y} are tag-related, this in 
  turn means that the equivalence classes @{text X}
  and @{text Y} must be equal.\qed
  \end{proof}

  \begin{lemma}\label{fintwo} 
  Given two equivalence relations @{text "R\<^isub>1"} and @{text "R\<^isub>2"}, whereby
  @{text "R\<^isub>1"} refines @{text "R\<^isub>2"}.
  If @{thm (prem 1) refined_partition_finite[where ?R1.0="R\<^isub>1" and ?R2.0="R\<^isub>2"]}
  then @{thm (concl) refined_partition_finite[where ?R1.0="R\<^isub>1" and ?R2.0="R\<^isub>2"]}.
  \end{lemma}

  \begin{proof}
  We prove this lemma again using \eqref{finiteimageD}. This time we set @{text f} to
  be @{text "X \<mapsto>"}~@{term "{R\<^isub>1 `` {x} | x. x \<in> X}"}. It is easy to see that 
  @{text "finite (f ` (UNIV // R\<^isub>2))"} because it is a subset of @{text "Pow (UNIV // R\<^isub>1)"},
  which is finite by assumption. What remains to be shown is that @{text f} is injective
  on @{term "UNIV // R\<^isub>2"}. This is equivalent to showing that two equivalence 
  classes, say @{text "X"} and @{text Y}, in @{term "UNIV // R\<^isub>2"} are equal, provided
  @{text "f X = f Y"}. For @{text "X = Y"} to be equal, we have to find two elements
  @{text "x \<in> X"} and @{text "y \<in> Y"} such that they are @{text R\<^isub>2} related.
  We know there exists a @{text x} with @{term "X = R\<^isub>2 `` {x}"}
  and @{text "x \<in> X"}. From the latter fact we can infer that @{term "R\<^isub>1 ``{x} \<in> f X"}
  and further @{term "R\<^isub>1 ``{x} \<in> f Y"}. This means we can obtain a @{text y}
  such that @{term "R\<^isub>1 `` {x} = R\<^isub>1 `` {y}"} holds. Consequently @{text x} and @{text y}
  are @{text "R\<^isub>1"}-related. Since by assumption @{text "R\<^isub>1"} refines @{text "R\<^isub>2"},
  they must also be @{text "R\<^isub>2"}-related, as we need to show.\qed
  \end{proof}

  \noindent
  Chaining Lem.~\ref{finone} and \ref{fintwo} together, means in order to show
  that @{term "UNIV // \<approx>(L r)"} is finite, we have to find a tagging function whose
  range can be shown to be finite and whose tagging-relation refines @{term "\<approx>(L r)"}.
  Let us attempt the @{const ALT}-case first.

  \begin{proof}[@{const "ALT"}-Case] 
  We take as tagging function 

  \begin{center}
  @{thm tag_str_ALT_def[where A="A" and B="B", THEN meta_eq_app]}
  \end{center}

  \noindent
  where @{text "A"} and @{text "B"} are some arbitrary languages.
  We can show in general, if @{term "finite (UNIV // \<approx>A)"} and @{term "finite (UNIV // \<approx>B)"}
  then @{term "finite ((UNIV // \<approx>A) \<times> (UNIV // \<approx>B))"} holds. The range of
  @{term "tag_str_ALT A B"} is a subset of this product set, so finite. It remains to be shown
  that @{text "=tag\<^isub>A\<^isub>L\<^isub>T A B="} refines @{term "\<approx>(A \<union> B)"}. This amounts to 
  showing
  %
  \begin{center}
  @{term "tag\<^isub>A\<^isub>L\<^isub>T A B x = tag\<^isub>A\<^isub>L\<^isub>T A B y \<longrightarrow> x \<approx>(A \<union> B) y"}
  \end{center}

  \noindent
  which by unfolding the Myhill-Nerode relation is identical to
  %
  \begin{equation}\label{pattern}
  @{text "\<forall>z. tag\<^isub>A\<^isub>L\<^isub>T A B x = tag\<^isub>A\<^isub>L\<^isub>T A B y \<and> x @ z \<in> A \<union> B \<longrightarrow> y @ z \<in> A \<union> B"}
  \end{equation}

  \noindent
  since both @{text "=tag\<^isub>A\<^isub>L\<^isub>T A B="} and @{term "\<approx>(A \<union> B)"} are symmetric. To solve
  \eqref{pattern} we just have to unfold the definition of the tagging-relation and analyse 
  in which set, @{text A} or @{text B}, the string @{term "x @ z"} is. 
  The definition of the tagging-function will give us in each case the
  information to infer that @{text "y @ z \<in> A \<union> B"}.
  Finally we
  can discharge this case by setting @{text A} to @{term "L r\<^isub>1"} and @{text B} to @{term "L r\<^isub>2"}.\qed
  \end{proof}


  \noindent
  The pattern in \eqref{pattern} is repeated for the other two cases. Unfortunately,
  they are slightly more complicated. In the @{const SEQ}-case we essentially have
  to be able to infer that 

  \begin{center}
  @{term "x @ z \<in> A ;; B \<longrightarrow> y @ z \<in> A ;; B"}
  \end{center}

  \noindent
  using the information given by the appropriate tagging function. The complication 
  is to find out what the possible splits of @{text "x @ z"} are to be in @{term "A ;; B"}
  (this was easy in case of @{term "A \<union> B"}). For this we define the
  notions of \emph{string prefixes} 

  \begin{center}
  @{text "x \<le> y \<equiv> \<exists>z. y = x @ z"}\hspace{10mm}
  @{text "x < y \<equiv> x \<le> y \<and> x \<noteq> y"}
  \end{center}

  \noindent
  and \emph{string subtraction}:

  \begin{center}
  \begin{tabular}{r@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
  @{text "[] - y"} & @{text "\<equiv>"} & @{text "[]"}\\
  @{text "x - []"} & @{text "\<equiv>"} & @{text x}\\
  @{text "cx - dy"} & @{text "\<equiv>"} & @{text "if c = d then x - y else cx"}\\
  \end{tabular}
  \end{center}

  \noindent
  where @{text c} and @{text d} are characters, and @{text x} and @{text y} are strings.
  Now assuming  @{term "x @ z \<in> A ;; B"} there are only two possible ways how this string
  is `split'  over @{term "A ;; B"}:

  \begin{center}
  \scalebox{0.7}{
  \begin{tikzpicture}
    \node[draw,minimum height=3.8ex] (xa) { $\hspace{4em}x'\hspace{4em}$ };
    \node[draw,minimum height=3.8ex, right=-0.03em of xa] (xxa) { $\hspace{0.5em}x - x'\hspace{0.5em}$ };
    \node[draw,minimum height=3.8ex, right=-0.03em of xxa] (z) { $\hspace{10.1em}z\hspace{10.1em}$ };

    \draw[decoration={brace,transform={yscale=3}},decorate]
           (xa.north west) -- ($(xxa.north east)+(0em,0em)$)
               node[midway, above=0.5em]{$x$};

    \draw[decoration={brace,transform={yscale=3}},decorate]
           (z.north west) -- ($(z.north east)+(0em,0em)$)
               node[midway, above=0.5em]{$z$};

    \draw[decoration={brace,transform={yscale=3}},decorate]
           ($(xa.north west)+(0em,3ex)$) -- ($(z.north east)+(0em,3ex)$)
               node[midway, above=0.8em]{@{term "x @ z \<in> A ;; B"}};

    \draw[decoration={brace,transform={yscale=3}},decorate]
           ($(z.south east)+(0em,0ex)$) -- ($(xxa.south west)+(0em,0ex)$)
               node[midway, below=0.5em]{@{term "(x - x') @ z \<in> B"}};

    \draw[decoration={brace,transform={yscale=3}},decorate]
           ($(xa.south east)+(0em,0ex)$) -- ($(xa.south west)+(0em,0ex)$)
               node[midway, below=0.5em]{@{term "x' \<in> A"}};
  \end{tikzpicture}}

  \scalebox{0.7}{
  \begin{tikzpicture}
    \node[draw,minimum height=3.8ex] (x) { $\hspace{6.5em}x\hspace{6.5em}$ };
    \node[draw,minimum height=3.8ex, right=-0.03em of x] (za) { $\hspace{2em}z'\hspace{2em}$ };
    \node[draw,minimum height=3.8ex, right=-0.03em of za] (zza) { $\hspace{6.1em}z - z'\hspace{6.1em}$  };

    \draw[decoration={brace,transform={yscale=3}},decorate]
           (x.north west) -- ($(za.north west)+(0em,0em)$)
               node[midway, above=0.5em]{$x$};

    \draw[decoration={brace,transform={yscale=3}},decorate]
           ($(za.north west)+(0em,0ex)$) -- ($(zza.north east)+(0em,0ex)$)
               node[midway, above=0.8em]{$z$};

    \draw[decoration={brace,transform={yscale=3}},decorate]
           ($(x.north west)+(0em,3ex)$) -- ($(zza.north east)+(0em,3ex)$)
               node[midway, above=0.8em]{@{term "x @ z \<in> A ;; B"}};

    \draw[decoration={brace,transform={yscale=3}},decorate]
           ($(za.south east)+(0em,0ex)$) -- ($(x.south west)+(0em,0ex)$)
               node[midway, below=0.5em]{@{text "x @ z' \<in> A"}};

    \draw[decoration={brace,transform={yscale=3}},decorate]
           ($(zza.south east)+(0em,0ex)$) -- ($(za.south east)+(0em,0ex)$)
               node[midway, below=0.5em]{@{text "(z - z') \<in> B"}};
  \end{tikzpicture}}
  \end{center}

  \noindent
  Either there is a prefix of @{text x} in @{text A} and the rest in @{text B},
  or @{text x} and a prefix of @{text "z"} is in @{text A} and the rest in @{text B}.
  In bot cases we have to show that @{term "y @ z \<in> A ;; B"}. For this we use the 
  following tagging-function

  \begin{center}
  @{thm tag_str_SEQ_def[where ?L1.0="A" and ?L2.0="B", THEN meta_eq_app]}
  \end{center}

  \noindent
  with the idea ???

  \begin{proof}[@{const SEQ}-Case]


  \end{proof}

  \begin{proof}[@{const STAR}-Case]

  \begin{center}
  @{thm tag_str_STAR_def[where ?L1.0="A", THEN meta_eq_app]}
  \end{center}
  \end{proof}
*}



section {* Conclusion and Related Work *}

text {*
  In this paper we took the view that a regular language is one where there
  exists a regular expression that matches all of its strings. Regular
  expressions can conveniently be defined as a datatype in a HOL-based theorem
  prover. For us it was therefore interesting to find out how far we can push
  this point of view. 

  Having formalised the Myhill-Nerode theorem means we
  pushed quite far. Using this theorem we can obviously prove when a language
  is \emph{not} regular---by establishing that it has infinitely many
  equivalence classes generated by the Myhill-Nerode relation (this is usually
  the purpose of the pumping lemma \cite{Kozen97}).  We can also use it to
  establish the standard textbook results about closure properties of regular
  languages. Interesting is the case of closure under complement, because
  it seems difficult to construct a regular expression for the complement
  language by direct means. However the existence of such a regular expression
  can be easily proved using the Myhill-Nerode theorem since 

  \begin{center}
  @{term "s\<^isub>1 \<approx>A s\<^isub>2"} if and only if @{term "s\<^isub>1 \<approx>(-A) s\<^isub>2"}
  \end{center}
 
  \noindent
  holds for any strings @{text "s\<^isub>1"} and @{text
  "s\<^isub>2"}. Therefore @{text A} and the complement language @{term "-A"} give rise to the same
  partitions.  Proving the existence of such a regular expression via automata would 
  be quite involved. It includes the
  steps: regular expression @{text "\<Rightarrow>"} non-deterministic automaton @{text
  "\<Rightarrow>"} deterministic automaton @{text "\<Rightarrow>"} complement automaton @{text "\<Rightarrow>"}
  regular expression.

  While regular expressions are convenient in formalisations, they have some
  limitations. One is that there seems to be no method of calculating a
  minimal regular expression (for example in terms of length) for a regular
  language, like there is
  for automata. On the other hand, efficient regular expression matching,
  without using automata, poses no problem \cite{OwensReppyTuron09}.
  For an implementation of a simple regular expression matcher,
  whose correctness has been formally established, we refer the reader to
  Owens and Slind \cite{OwensSlind08}.


  Our formalisation consists of 790 lines of Isabelle/Isar code for the first
  direction and 475 for the second, plus around 300 lines of standard material about
  regular languages. While this might be seen as too large to count as a
  concise proof pearl, this should be seen in the context of the work done by
  Constable at al \cite{Constable00} who formalised the Myhill-Nerode theorem
  in Nuprl using automata. They write that their four-member team needed
  something on the magnitute of 18 months for their formalisation. The
  estimate for our formalisation is that we needed approximately 3 months and
  this included the time to find our proof arguments. Unlike Constable et al,
  who were able to follow the proofs from \cite{HopcroftUllman69}, we had to
  find our own arguments.  So for us the formalisation was not the
  bottleneck. It is hard to gauge the size of a formalisation in Nurpl, but
  from what is shown in the Nuprl Math Library about their development it
  seems substantially larger than ours. The code of ours can be found in the
  Mercurial Repository at

  \begin{center}
  \url{http://www4.in.tum.de/~urbanc/regexp.html}
  \end{center}


  Our proof of the first direction is very much inspired by \emph{Brzozowski's
  algebraic mehod} used to convert a finite automaton to a regular
  expression \cite{Brzozowski64}. The close connection can be seen by considering the equivalence
  classes as the states of the minimal automaton for the regular language.
  However there are some subtle differences. Since we identify equivalence
  classes with the states of the automaton, then the most natural choice is to
  characterise each state with the set of strings starting from the initial
  state leading up to that state. Usually, however, the states are characterised as the
  strings starting from that state leading to the terminal states.  The first
  choice has consequences about how the initial equational system is set up. We have
  the $\lambda$-term on our `initial state', while Brzozowski has it on the
  terminal states. This means we also need to reverse the direction of Arden's
  lemma.

  We briefly considered using the method Brzozowski presented in the Appendix
  of~\cite{Brzozowski64} in order to prove the second direction of the
  Myhill-Nerode theorem. There he calculates the derivatives for regular
  expressions and shows that there can be only finitely many of them. We could
  have used as the tag of a string @{text s} the derivative of a regular expression
  generated with respect to @{text s}.  Using the fact that two strings are
  Myhill-Nerode related whenever their derivative is the same, together with
  the fact that there are only finitely many derivatives for a regular
  expression would give us a similar argument as ours. However it seems not so easy to
  calculate the derivatives and then to count them. Therefore we preferred our
  direct method of using tagging-functions. This
  is also where our method shines, because we can completely side-step the
  standard argument \cite{Kozen97} where automata need to be composed, which
  as stated in the Introduction is not so convenient to formalise in a 
  HOL-based theorem prover. However, it is also the direction where we had to 
  spend most of the `conceptual' time, as our proof-argument based on tagging-functions
  is new for establishing the Myhill-Nerode theorem. All standard proofs 
  of this direction proceed by arguments over automata.

*}


(*<*)
end
(*>*)