diff -r a8a442ba0dbf -r e93760534354 Journal/Paper.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Journal/Paper.thy Wed May 18 19:54:43 2011 +0000 @@ -0,0 +1,1403 @@ +(*<*) +theory Paper +imports "../Derivs" "~~/src/HOL/Library/LaTeXsugar" +begin + +declare [[show_question_marks = false]] + +consts + REL :: "(string \ string) \ bool" + UPLUS :: "'a set \ 'a set \ (nat \ 'a) set" + +abbreviation + "EClass x R \ R `` {x}" + +abbreviation + "Append_rexp2 r_itm r \ Append_rexp r r_itm" + + +notation (latex output) + str_eq_rel ("\\<^bsub>_\<^esub>") and + str_eq ("_ \\<^bsub>_\<^esub> _") and + Seq (infixr "\" 100) and + Star ("_\<^bsup>\\<^esup>") and + pow ("_\<^bsup>_\<^esup>" [100, 100] 100) and + Suc ("_+1" [100] 100) and + quotient ("_ \<^raw:\ensuremath{\!\sslash\!}> _" [90, 90] 90) and + REL ("\") and + UPLUS ("_ \<^raw:\ensuremath{\uplus}> _" [90, 90] 90) and + L ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and + Lam ("\'(_')" [100] 100) and + Trn ("'(_, _')" [100, 100] 100) and + EClass ("\_\\<^bsub>_\<^esub>" [100, 100] 100) and + transition ("_ \<^raw:\ensuremath{\stackrel{\text{>_\<^raw:}}{\Longmapsto}}> _" [100, 100, 100] 100) and + Setalt ("\<^raw:\ensuremath{\bigplus}>_" [1000] 999) and + Append_rexp2 ("_ \<^raw:\ensuremath{\triangleleft}> _" [100, 100] 100) and + Append_rexp_rhs ("_ \<^raw:\ensuremath{\triangleleft}> _" [100, 100] 50) and + uminus ("\<^raw:\ensuremath{\overline{>_\<^raw:}}>" [100] 100) and + tag_str_ALT ("tag\<^isub>A\<^isub>L\<^isub>T _ _" [100, 100] 100) and + tag_str_ALT ("tag\<^isub>A\<^isub>L\<^isub>T _ _ _" [100, 100, 100] 100) and + tag_str_SEQ ("tag\<^isub>S\<^isub>E\<^isub>Q _ _" [100, 100] 100) and + tag_str_SEQ ("tag\<^isub>S\<^isub>E\<^isub>Q _ _ _" [100, 100, 100] 100) and + tag_str_STAR ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _" [100] 100) and + tag_str_STAR ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _ _" [100, 100] 100) +lemma meta_eq_app: + shows "f \ \x. g x \ f x \ g x" + by auto + +(*>*) + + +section {* Introduction *} + +text {* + Regular languages are an important and well-understood subject in Computer + Science, with many beautiful theorems and many useful algorithms. There is a + wide range of textbooks on this subject, many of which are aimed at students + and contain very detailed `pencil-and-paper' proofs + (e.g.~\cite{Kozen97}). It seems natural to exercise theorem provers by + formalising the theorems and by verifying formally the algorithms. + + There is however a problem: the typical approach to regular languages is to + introduce finite automata and then define everything in terms of them. For + example, a regular language is normally defined as one whose strings are + recognised by a finite deterministic automaton. This approach has many + benefits. Among them is the fact that it is easy to convince oneself that + regular languages are closed under complementation: one just has to exchange + the accepting and non-accepting states in the corresponding automaton to + obtain an automaton for the complement language. The problem, however, lies with + formalising such reasoning in a HOL-based theorem prover, in our case + Isabelle/HOL. Automata are built up from states and transitions that + need to be represented as graphs, matrices or functions, none + of which can be defined as an inductive datatype. + + In case of graphs and matrices, this means we have to build our own + reasoning infrastructure for them, as neither Isabelle/HOL nor HOL4 nor + HOLlight support them with libraries. Even worse, reasoning about graphs and + matrices can be a real hassle in HOL-based theorem provers. Consider for + example the operation of sequencing two automata, say $A_1$ and $A_2$, by + connecting the accepting states of $A_1$ to the initial state of $A_2$:\\[-5.5mm] + % + \begin{center} + \begin{tabular}{ccc} + \begin{tikzpicture}[scale=0.8] + %\draw[step=2mm] (-1,-1) grid (1,1); + + \draw[rounded corners=1mm, very thick] (-1.0,-0.3) rectangle (-0.2,0.3); + \draw[rounded corners=1mm, very thick] ( 0.2,-0.3) rectangle ( 1.0,0.3); + + \node (A) at (-1.0,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (B) at ( 0.2,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + + \node (C) at (-0.2, 0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (D) at (-0.2,-0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + + \node (E) at (1.0, 0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (F) at (1.0,-0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (G) at (1.0,-0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + + \draw (-0.6,0.0) node {\footnotesize$A_1$}; + \draw ( 0.6,0.0) node {\footnotesize$A_2$}; + \end{tikzpicture} + + & + + \raisebox{1.1mm}{\bf\Large$\;\;\;\Rightarrow\,\;\;$} + + & + + \begin{tikzpicture}[scale=0.8] + %\draw[step=2mm] (-1,-1) grid (1,1); + + \draw[rounded corners=1mm, very thick] (-1.0,-0.3) rectangle (-0.2,0.3); + \draw[rounded corners=1mm, very thick] ( 0.2,-0.3) rectangle ( 1.0,0.3); + + \node (A) at (-1.0,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (B) at ( 0.2,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + + \node (C) at (-0.2, 0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (D) at (-0.2,-0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + + \node (E) at (1.0, 0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (F) at (1.0,-0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (G) at (1.0,-0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + + \draw (C) to [very thick, bend left=45] (B); + \draw (D) to [very thick, bend right=45] (B); + + \draw (-0.6,0.0) node {\footnotesize$A_1$}; + \draw ( 0.6,0.0) node {\footnotesize$A_2$}; + \end{tikzpicture} + + \end{tabular} + \end{center} + + \noindent + On `paper' we can define the corresponding graph in terms of the disjoint + union of the state nodes. Unfortunately in HOL, the standard definition for disjoint + union, namely + % + \begin{equation}\label{disjointunion} + @{term "UPLUS A\<^isub>1 A\<^isub>2 \ {(1, x) | x. x \ A\<^isub>1} \ {(2, y) | y. y \ A\<^isub>2}"} + \end{equation} + + \noindent + changes the type---the disjoint union is not a set, but a set of pairs. + Using this definition for disjoint union means we do not have a single type for automata + and hence will not be able to state certain properties about \emph{all} + automata, since there is no type quantification available in HOL (unlike in Coq, for example). An + alternative, which provides us with a single type for automata, is to give every + state node an identity, for example a natural + number, and then be careful to rename these identities apart whenever + connecting two automata. This results in clunky proofs + establishing that properties are invariant under renaming. Similarly, + connecting two automata represented as matrices results in very adhoc + constructions, which are not pleasant to reason about. + + Functions are much better supported in Isabelle/HOL, but they still lead to similar + problems as with graphs. Composing, for example, two non-deterministic automata in parallel + requires also the formalisation of disjoint unions. Nipkow \cite{Nipkow98} + dismisses for this the option of using identities, because it leads according to + him to ``messy proofs''. He + opts for a variant of \eqref{disjointunion} using bit lists, but writes + + \begin{quote} + \it% + \begin{tabular}{@ {}l@ {}p{0.88\textwidth}@ {}} + `` & All lemmas appear obvious given a picture of the composition of automata\ldots + Yet their proofs require a painful amount of detail.'' + \end{tabular} + \end{quote} + + \noindent + and + + \begin{quote} + \it% + \begin{tabular}{@ {}l@ {}p{0.88\textwidth}@ {}} + `` & If the reader finds the above treatment in terms of bit lists revoltingly + concrete, I cannot disagree. A more abstract approach is clearly desirable.'' + \end{tabular} + \end{quote} + + + \noindent + Moreover, it is not so clear how to conveniently impose a finiteness condition + upon functions in order to represent \emph{finite} automata. The best is + probably to resort to more advanced reasoning frameworks, such as \emph{locales} + or \emph{type classes}, + which are \emph{not} available in all HOL-based theorem provers. + + {\bf add commnets from Brozowski} + + Because of these problems to do with representing automata, there seems + to be no substantial formalisation of automata theory and regular languages + carried out in HOL-based theorem provers. Nipkow \cite{Nipkow98} establishes + the link between regular expressions and automata in + the context of lexing. Berghofer and Reiter \cite{BerghoferReiter09} + formalise automata working over + bit strings in the context of Presburger arithmetic. + The only larger formalisations of automata theory + are carried out in Nuprl \cite{Constable00} and in Coq \cite{Filliatre97}. + + In this paper, we will not attempt to formalise automata theory in + Isabelle/HOL, but take a different approach to regular + languages. Instead of defining a regular language as one where there exists + an automaton that recognises all strings of the language, we define a + regular language as: + + \begin{definition} + A language @{text A} is \emph{regular}, provided there is a regular expression that matches all + strings of @{text "A"}. + \end{definition} + + \noindent + The reason is that regular expressions, unlike graphs, matrices and functions, can + be easily defined as inductive datatype. Consequently a corresponding reasoning + infrastructure comes for free. This has recently been exploited in HOL4 with a formalisation + of regular expression matching based on derivatives \cite{OwensSlind08} and + with an equivalence checker for regular expressions in Isabelle/HOL \cite{KraussNipkow11}. + The purpose of this paper is to + show that a central result about regular languages---the Myhill-Nerode theorem---can + be recreated by only using regular expressions. This theorem gives necessary + and sufficient conditions for when a language is regular. As a corollary of this + theorem we can easily establish the usual closure properties, including + complementation, for regular languages.\smallskip + + \noindent + {\bf Contributions:} + There is an extensive literature on regular languages. + To our best knowledge, our proof of the Myhill-Nerode theorem is the + first that is based on regular expressions, only. We prove the part of this theorem + stating that a regular expression has only finitely many partitions using certain + tagging-functions. Again to our best knowledge, these tagging-functions have + not been used before to establish the Myhill-Nerode theorem. +*} + +section {* Preliminaries *} + +text {* + Strings in Isabelle/HOL are lists of characters with the \emph{empty string} + being represented by the empty list, written @{term "[]"}. \emph{Languages} + are sets of strings. The language containing all strings is written in + Isabelle/HOL as @{term "UNIV::string set"}. The concatenation of two languages + is written @{term "A ;; B"} and a language raised to the power @{text n} is written + @{term "A \ n"}. They are defined as usual + + \begin{center} + @{thm Seq_def[THEN eq_reflection, where A1="A" and B1="B"]} + \hspace{7mm} + @{thm pow.simps(1)[THEN eq_reflection, where A1="A"]} + \hspace{7mm} + @{thm pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]} + \end{center} + + \noindent + where @{text "@"} is the list-append operation. The Kleene-star of a language @{text A} + is defined as the union over all powers, namely @{thm Star_def}. In the paper + we will make use of the following properties of these constructions. + + \begin{proposition}\label{langprops}\mbox{}\\ + \begin{tabular}{@ {}ll} + (i) & @{thm star_cases} \\ + (ii) & @{thm[mode=IfThen] pow_length}\\ + (iii) & @{thm seq_Union_left} \\ + \end{tabular} + \end{proposition} + + \noindent + In @{text "(ii)"} we use the notation @{term "length s"} for the length of a + string; this property states that if \mbox{@{term "[] \ A"}} then the lengths of + the strings in @{term "A \ (Suc n)"} must be longer than @{text n}. We omit + the proofs for these properties, but invite the reader to consult our + formalisation.\footnote{Available at \url{http://www4.in.tum.de/~urbanc/regexp.html}} + + The notation in Isabelle/HOL for the quotient of a language @{text A} according to an + equivalence relation @{term REL} is @{term "A // REL"}. We will write + @{text "\x\\<^isub>\"} for the equivalence class defined + as \mbox{@{text "{y | y \ x}"}}. + + + Central to our proof will be the solution of equational systems + involving equivalence classes of languages. For this we will use Arden's Lemma \cite{Brzozowski64}, + which solves equations of the form @{term "X = A ;; X \ B"} provided + @{term "[] \ A"}. However we will need the following `reverse' + version of Arden's Lemma (`reverse' in the sense of changing the order of @{term "A ;; X"} to + \mbox{@{term "X ;; A"}}). + + \begin{lemma}[Reverse Arden's Lemma]\label{arden}\mbox{}\\ + If @{thm (prem 1) arden} then + @{thm (lhs) arden} if and only if + @{thm (rhs) arden}. + \end{lemma} + + \begin{proof} + For the right-to-left direction we assume @{thm (rhs) arden} and show + that @{thm (lhs) arden} holds. From Prop.~\ref{langprops}@{text "(i)"} + we have @{term "A\ = {[]} \ A ;; A\"}, + which is equal to @{term "A\ = {[]} \ A\ ;; A"}. Adding @{text B} to both + sides gives @{term "B ;; A\ = B ;; ({[]} \ A\ ;; A)"}, whose right-hand side + is equal to @{term "(B ;; A\) ;; A \ B"}. This completes this direction. + + For the other direction we assume @{thm (lhs) arden}. By a simple induction + on @{text n}, we can establish the property + + \begin{center} + @{text "(*)"}\hspace{5mm} @{thm (concl) arden_helper} + \end{center} + + \noindent + Using this property we can show that @{term "B ;; (A \ n) \ X"} holds for + all @{text n}. From this we can infer @{term "B ;; A\ \ X"} using the definition + of @{text "\"}. + For the inclusion in the other direction we assume a string @{text s} + with length @{text k} is an element in @{text X}. Since @{thm (prem 1) arden} + we know by Prop.~\ref{langprops}@{text "(ii)"} that + @{term "s \ X ;; (A \ Suc k)"} since its length is only @{text k} + (the strings in @{term "X ;; (A \ Suc k)"} are all longer). + From @{text "(*)"} it follows then that + @{term s} must be an element in @{term "(\m\{0..k}. B ;; (A \ m))"}. This in turn + implies that @{term s} is in @{term "(\n. B ;; (A \ n))"}. Using Prop.~\ref{langprops}@{text "(iii)"} + this is equal to @{term "B ;; A\"}, as we needed to show.\qed + \end{proof} + + \noindent + Regular expressions are defined as the inductive datatype + + \begin{center} + @{text r} @{text "::="} + @{term NULL}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} + @{term EMPTY}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} + @{term "CHAR c"}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} + @{term "SEQ r r"}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} + @{term "ALT r r"}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} + @{term "STAR r"} + \end{center} + + \noindent + and the language matched by a regular expression is defined as + + \begin{center} + \begin{tabular}{c@ {\hspace{10mm}}c} + \begin{tabular}{rcl} + @{thm (lhs) L_rexp.simps(1)} & @{text "\"} & @{thm (rhs) L_rexp.simps(1)}\\ + @{thm (lhs) L_rexp.simps(2)} & @{text "\"} & @{thm (rhs) L_rexp.simps(2)}\\ + @{thm (lhs) L_rexp.simps(3)[where c="c"]} & @{text "\"} & @{thm (rhs) L_rexp.simps(3)[where c="c"]}\\ + \end{tabular} + & + \begin{tabular}{rcl} + @{thm (lhs) L_rexp.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\"} & + @{thm (rhs) L_rexp.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ + @{thm (lhs) L_rexp.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\"} & + @{thm (rhs) L_rexp.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ + @{thm (lhs) L_rexp.simps(6)[where r="r"]} & @{text "\"} & + @{thm (rhs) L_rexp.simps(6)[where r="r"]}\\ + \end{tabular} + \end{tabular} + \end{center} + + Given a finite set of regular expressions @{text rs}, we will make use of the operation of generating + a regular expression that matches the union of all languages of @{text rs}. We only need to know the + existence + of such a regular expression and therefore we use Isabelle/HOL's @{const "fold_graph"} and Hilbert's + @{text "\"} to define @{term "\rs"}. This operation, roughly speaking, folds @{const ALT} over the + set @{text rs} with @{const NULL} for the empty set. We can prove that for a finite set @{text rs} + % + \begin{equation}\label{uplus} + \mbox{@{thm (lhs) folds_alt_simp} @{text "= \ (\ ` rs)"}} + \end{equation} + + \noindent + holds, whereby @{text "\ ` rs"} stands for the + image of the set @{text rs} under function @{text "\"}. +*} + + +section {* The Myhill-Nerode Theorem, First Part *} + +text {* + The key definition in the Myhill-Nerode theorem is the + \emph{Myhill-Nerode relation}, which states that w.r.t.~a language two + strings are related, provided there is no distinguishing extension in this + language. This can be defined as a tertiary relation. + + \begin{definition}[Myhill-Nerode Relation] Given a language @{text A}, two strings @{text x} and + @{text y} are Myhill-Nerode related provided + \begin{center} + @{thm str_eq_def[simplified str_eq_rel_def Pair_Collect]} + \end{center} + \end{definition} + + \noindent + It is easy to see that @{term "\A"} is an equivalence relation, which + partitions the set of all strings, @{text "UNIV"}, into a set of disjoint + equivalence classes. To illustrate this quotient construction, let us give a simple + example: consider the regular language containing just + the string @{text "[c]"}. The relation @{term "\({[c]})"} partitions @{text UNIV} + into three equivalence classes @{text "X\<^isub>1"}, @{text "X\<^isub>2"} and @{text "X\<^isub>3"} + as follows + + \begin{center} + @{text "X\<^isub>1 = {[]}"}\hspace{5mm} + @{text "X\<^isub>2 = {[c]}"}\hspace{5mm} + @{text "X\<^isub>3 = UNIV - {[], [c]}"} + \end{center} + + One direction of the Myhill-Nerode theorem establishes + that if there are finitely many equivalence classes, like in the example above, then + the language is regular. In our setting we therefore have to show: + + \begin{theorem}\label{myhillnerodeone} + @{thm[mode=IfThen] Myhill_Nerode1} + \end{theorem} + + \noindent + To prove this theorem, we first define the set @{term "finals A"} as those equivalence + classes from @{term "UNIV // \A"} that contain strings of @{text A}, namely + % + \begin{equation} + @{thm finals_def} + \end{equation} + + \noindent + In our running example, @{text "X\<^isub>2"} is the only + equivalence class in @{term "finals {[c]}"}. + It is straightforward to show that in general @{thm lang_is_union_of_finals} and + @{thm finals_in_partitions} hold. + Therefore if we know that there exists a regular expression for every + equivalence class in \mbox{@{term "finals A"}} (which by assumption must be + a finite set), then we can use @{text "\"} to obtain a regular expression + that matches every string in @{text A}. + + + Our proof of Thm.~\ref{myhillnerodeone} relies on a method that can calculate a + regular expression for \emph{every} equivalence class, not just the ones + in @{term "finals A"}. We + first define the notion of \emph{one-character-transition} between + two equivalence classes + % + \begin{equation} + @{thm transition_def} + \end{equation} + + \noindent + which means that if we concatenate the character @{text c} to the end of all + strings in the equivalence class @{text Y}, we obtain a subset of + @{text X}. Note that we do not define an automaton here, we merely relate two sets + (with the help of a character). In our concrete example we have + @{term "X\<^isub>1 \c\ X\<^isub>2"}, @{term "X\<^isub>1 \d\ X\<^isub>3"} with @{text d} being any + other character than @{text c}, and @{term "X\<^isub>3 \d\ X\<^isub>3"} for any @{text d}. + + Next we construct an \emph{initial equational system} that + contains an equation for each equivalence class. We first give + an informal description of this construction. Suppose we have + the equivalence classes @{text "X\<^isub>1,\,X\<^isub>n"}, there must be one and only one that + contains the empty string @{text "[]"} (since equivalence classes are disjoint). + Let us assume @{text "[] \ X\<^isub>1"}. We build the following equational system + + \begin{center} + \begin{tabular}{rcl} + @{text "X\<^isub>1"} & @{text "="} & @{text "(Y\<^isub>1\<^isub>1, CHAR c\<^isub>1\<^isub>1) + \ + (Y\<^isub>1\<^isub>p, CHAR c\<^isub>1\<^isub>p) + \(EMPTY)"} \\ + @{text "X\<^isub>2"} & @{text "="} & @{text "(Y\<^isub>2\<^isub>1, CHAR c\<^isub>2\<^isub>1) + \ + (Y\<^isub>2\<^isub>o, CHAR c\<^isub>2\<^isub>o)"} \\ + & $\vdots$ \\ + @{text "X\<^isub>n"} & @{text "="} & @{text "(Y\<^isub>n\<^isub>1, CHAR c\<^isub>n\<^isub>1) + \ + (Y\<^isub>n\<^isub>q, CHAR c\<^isub>n\<^isub>q)"}\\ + \end{tabular} + \end{center} + + \noindent + where the terms @{text "(Y\<^isub>i\<^isub>j, CHAR c\<^isub>i\<^isub>j)"} + stand for all transitions @{term "Y\<^isub>i\<^isub>j \c\<^isub>i\<^isub>j\ + X\<^isub>i"}. + %The intuition behind the equational system is that every + %equation @{text "X\<^isub>i = rhs\<^isub>i"} in this system + %corresponds roughly to a state of an automaton whose name is @{text X\<^isub>i} and its predecessor states + %are the @{text "Y\<^isub>i\<^isub>j"}; the @{text "c\<^isub>i\<^isub>j"} are the labels of the transitions from these + %predecessor states to @{text X\<^isub>i}. + There can only be + finitely many terms of the form @{text "(Y\<^isub>i\<^isub>j, CHAR c\<^isub>i\<^isub>j)"} in a right-hand side + since by assumption there are only finitely many + equivalence classes and only finitely many characters. + The term @{text "\(EMPTY)"} in the first equation acts as a marker for the initial state, that + is the equivalence class + containing @{text "[]"}.\footnote{Note that we mark, roughly speaking, the + single `initial' state in the equational system, which is different from + the method by Brzozowski \cite{Brzozowski64}, where he marks the + `terminal' states. We are forced to set up the equational system in our + way, because the Myhill-Nerode relation determines the `direction' of the + transitions---the successor `state' of an equivalence class @{text Y} can + be reached by adding a character to the end of @{text Y}. This is also the + reason why we have to use our reverse version of Arden's Lemma.} + %In our initial equation system there can only be + %finitely many terms of the form @{text "(Y\<^isub>i\<^isub>j, CHAR c\<^isub>i\<^isub>j)"} in a right-hand side + %since by assumption there are only finitely many + %equivalence classes and only finitely many characters. + Overloading the function @{text \} for the two kinds of terms in the + equational system, we have + + \begin{center} + @{text "\(Y, r) \"} % + @{thm (rhs) L_rhs_trm.simps(2)[where X="Y" and r="r", THEN eq_reflection]}\hspace{10mm} + @{thm L_rhs_trm.simps(1)[where r="r", THEN eq_reflection]} + \end{center} + + \noindent + and we can prove for @{text "X\<^isub>2\<^isub>.\<^isub>.\<^isub>n"} that the following equations + % + \begin{equation}\label{inv1} + @{text "X\<^isub>i = \(Y\<^isub>i\<^isub>1, CHAR c\<^isub>i\<^isub>1) \ \ \ \(Y\<^isub>i\<^isub>q, CHAR c\<^isub>i\<^isub>q)"}. + \end{equation} + + \noindent + hold. Similarly for @{text "X\<^isub>1"} we can show the following equation + % + \begin{equation}\label{inv2} + @{text "X\<^isub>1 = \(Y\<^isub>1\<^isub>1, CHAR c\<^isub>1\<^isub>1) \ \ \ \(Y\<^isub>1\<^isub>p, CHAR c\<^isub>1\<^isub>p) \ \(\(EMPTY))"}. + \end{equation} + + \noindent + holds. The reason for adding the @{text \}-marker to our initial equational system is + to obtain this equation: it only holds with the marker, since none of + the other terms contain the empty string. The point of the initial equational system is + that solving it means we will be able to extract a regular expression for every equivalence class. + + Our representation for the equations in Isabelle/HOL are pairs, + where the first component is an equivalence class (a set of strings) + and the second component + is a set of terms. Given a set of equivalence + classes @{text CS}, our initial equational system @{term "Init CS"} is thus + formally defined as + % + \begin{equation}\label{initcs} + \mbox{\begin{tabular}{rcl} + @{thm (lhs) Init_rhs_def} & @{text "\"} & + @{text "if"}~@{term "[] \ X"}\\ + & & @{text "then"}~@{term "{Trn Y (CHAR c) | Y c. Y \ CS \ Y \c\ X} \ {Lam EMPTY}"}\\ + & & @{text "else"}~@{term "{Trn Y (CHAR c)| Y c. Y \ CS \ Y \c\ X}"}\\ + @{thm (lhs) Init_def} & @{text "\"} & @{thm (rhs) Init_def} + \end{tabular}} + \end{equation} + + + + \noindent + Because we use sets of terms + for representing the right-hand sides of equations, we can + prove \eqref{inv1} and \eqref{inv2} more concisely as + % + \begin{lemma}\label{inv} + If @{thm (prem 1) test} then @{text "X = \ \ ` rhs"}. + \end{lemma} + + \noindent + Our proof of Thm.~\ref{myhillnerodeone} will proceed by transforming the + initial equational system into one in \emph{solved form} maintaining the invariant + in Lem.~\ref{inv}. From the solved form we will be able to read + off the regular expressions. + + In order to transform an equational system into solved form, we have two + operations: one that takes an equation of the form @{text "X = rhs"} and removes + any recursive occurrences of @{text X} in the @{text rhs} using our variant of Arden's + Lemma. The other operation takes an equation @{text "X = rhs"} + and substitutes @{text X} throughout the rest of the equational system + adjusting the remaining regular expressions appropriately. To define this adjustment + we define the \emph{append-operation} taking a term and a regular expression as argument + + \begin{center} + @{thm Append_rexp.simps(2)[where X="Y" and r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]}\hspace{10mm} + @{thm Append_rexp.simps(1)[where r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]} + \end{center} + + \noindent + We lift this operation to entire right-hand sides of equations, written as + @{thm (lhs) Append_rexp_rhs_def[where rexp="r"]}. With this we can define + the \emph{arden-operation} for an equation of the form @{text "X = rhs"} as: + % + \begin{equation}\label{arden_def} + \mbox{\begin{tabular}{rc@ {\hspace{2mm}}r@ {\hspace{1mm}}l} + @{thm (lhs) Arden_def} & @{text "\"}~~\mbox{} & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "let"}}\\ + & & @{text "rhs' ="} & @{term "rhs - {Trn X r | r. Trn X r \ rhs}"} \\ + & & @{text "r' ="} & @{term "STAR (\ {r. Trn X r \ rhs})"}\\ + & & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "in"}~~@{term "append_rhs_rexp rhs' r'"}}\\ + \end{tabular}} + \end{equation} + + \noindent + In this definition, we first delete all terms of the form @{text "(X, r)"} from @{text rhs}; + then we calculate the combined regular expressions for all @{text r} coming + from the deleted @{text "(X, r)"}, and take the @{const STAR} of it; + finally we append this regular expression to @{text rhs'}. It can be easily seen + that this operation mimics Arden's Lemma on the level of equations. To ensure + the non-emptiness condition of Arden's Lemma we say that a right-hand side is + @{text ardenable} provided + + \begin{center} + @{thm ardenable_def} + \end{center} + + \noindent + This allows us to prove a version of Arden's Lemma on the level of equations. + + \begin{lemma}\label{ardenable} + Given an equation @{text "X = rhs"}. + If @{text "X = \\ ` rhs"}, + @{thm (prem 2) Arden_keeps_eq}, and + @{thm (prem 3) Arden_keeps_eq}, then + @{text "X = \\ ` (Arden X rhs)"}. + \end{lemma} + + \noindent + Our @{text ardenable} condition is slightly stronger than needed for applying Arden's Lemma, + but we can still ensure that it holds troughout our algorithm of transforming equations + into solved form. The \emph{substitution-operation} takes an equation + of the form @{text "X = xrhs"} and substitutes it into the right-hand side @{text rhs}. + + \begin{center} + \begin{tabular}{rc@ {\hspace{2mm}}r@ {\hspace{1mm}}l} + @{thm (lhs) Subst_def} & @{text "\"}~~\mbox{} & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "let"}}\\ + & & @{text "rhs' ="} & @{term "rhs - {Trn X r | r. Trn X r \ rhs}"} \\ + & & @{text "r' ="} & @{term "\ {r. Trn X r \ rhs}"}\\ + & & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "in"}~~@{term "rhs' \ append_rhs_rexp xrhs r'"}}\\ + \end{tabular} + \end{center} + + \noindent + We again delete first all occurrences of @{text "(X, r)"} in @{text rhs}; we then calculate + the regular expression corresponding to the deleted terms; finally we append this + regular expression to @{text "xrhs"} and union it up with @{text rhs'}. When we use + the substitution operation we will arrange it so that @{text "xrhs"} does not contain + any occurrence of @{text X}. + + With these two operations in place, we can define the operation that removes one equation + from an equational systems @{text ES}. The operation @{const Subst_all} + substitutes an equation @{text "X = xrhs"} throughout an equational system @{text ES}; + @{const Remove} then completely removes such an equation from @{text ES} by substituting + it to the rest of the equational system, but first eliminating all recursive occurrences + of @{text X} by applying @{const Arden} to @{text "xrhs"}. + + \begin{center} + \begin{tabular}{rcl} + @{thm (lhs) Subst_all_def} & @{text "\"} & @{thm (rhs) Subst_all_def}\\ + @{thm (lhs) Remove_def} & @{text "\"} & @{thm (rhs) Remove_def} + \end{tabular} + \end{center} + + \noindent + Finally, we can define how an equational system should be solved. For this + we will need to iterate the process of eliminating equations until only one equation + will be left in the system. However, we do not just want to have any equation + as being the last one, but the one involving the equivalence class for + which we want to calculate the regular + expression. Let us suppose this equivalence class is @{text X}. + Since @{text X} is the one to be solved, in every iteration step we have to pick an + equation to be eliminated that is different from @{text X}. In this way + @{text X} is kept to the final step. The choice is implemented using Hilbert's choice + operator, written @{text SOME} in the definition below. + + \begin{center} + \begin{tabular}{rc@ {\hspace{4mm}}r@ {\hspace{1mm}}l} + @{thm (lhs) Iter_def} & @{text "\"}~~\mbox{} & \multicolumn{2}{@ {\hspace{-4mm}}l}{@{text "let"}}\\ + & & @{text "(Y, yrhs) ="} & @{term "SOME (Y, yrhs). (Y, yrhs) \ ES \ X \ Y"} \\ + & & \multicolumn{2}{@ {\hspace{-4mm}}l}{@{text "in"}~~@{term "Remove ES Y yrhs"}}\\ + \end{tabular} + \end{center} + + \noindent + The last definition we need applies @{term Iter} over and over until a condition + @{text Cond} is \emph{not} satisfied anymore. This condition states that there + are more than one equation left in the equational system @{text ES}. To solve + an equational system we use Isabelle/HOL's @{text while}-operator as follows: + + \begin{center} + @{thm Solve_def} + \end{center} + + \noindent + We are not concerned here with the definition of this operator + (see Berghofer and Nipkow \cite{BerghoferNipkow00}), but note that we eliminate + in each @{const Iter}-step a single equation, and therefore + have a well-founded termination order by taking the cardinality + of the equational system @{text ES}. This enables us to prove + properties about our definition of @{const Solve} when we `call' it with + the equivalence class @{text X} and the initial equational system + @{term "Init (UNIV // \A)"} from + \eqref{initcs} using the principle: + % + \begin{equation}\label{whileprinciple} + \mbox{\begin{tabular}{l} + @{term "invariant (Init (UNIV // \A))"} \\ + @{term "\ES. invariant ES \ Cond ES \ invariant (Iter X ES)"}\\ + @{term "\ES. invariant ES \ Cond ES \ card (Iter X ES) < card ES"}\\ + @{term "\ES. invariant ES \ \ Cond ES \ P ES"}\\ + \hline + \multicolumn{1}{c}{@{term "P (Solve X (Init (UNIV // \A)))"}} + \end{tabular}} + \end{equation} + + \noindent + This principle states that given an invariant (which we will specify below) + we can prove a property + @{text "P"} involving @{const Solve}. For this we have to discharge the following + proof obligations: first the + initial equational system satisfies the invariant; second the iteration + step @{text "Iter"} preserves the invariant as long as the condition @{term Cond} holds; + third @{text "Iter"} decreases the termination order, and fourth that + once the condition does not hold anymore then the property @{text P} must hold. + + The property @{term P} in our proof will state that @{term "Solve X (Init (UNIV // \A))"} + returns with a single equation @{text "X = xrhs"} for some @{text "xrhs"}, and + that this equational system still satisfies the invariant. In order to get + the proof through, the invariant is composed of the following six properties: + + \begin{center} + \begin{tabular}{@ {}rcl@ {\hspace{-13mm}}l @ {}} + @{text "invariant ES"} & @{text "\"} & + @{term "finite ES"} & @{text "(finiteness)"}\\ + & @{text "\"} & @{thm (rhs) finite_rhs_def} & @{text "(finiteness rhs)"}\\ + & @{text "\"} & @{text "\(X, rhs)\ES. X = \\ ` rhs"} & @{text "(soundness)"}\\ + & @{text "\"} & @{thm (rhs) distinctness_def}\\ + & & & @{text "(distinctness)"}\\ + & @{text "\"} & @{thm (rhs) ardenable_all_def} & @{text "(ardenable)"}\\ + & @{text "\"} & @{thm (rhs) validity_def} & @{text "(validity)"}\\ + \end{tabular} + \end{center} + + \noindent + The first two ensure that the equational system is always finite (number of equations + and number of terms in each equation); the third makes sure the `meaning' of the + equations is preserved under our transformations. The other properties are a bit more + technical, but are needed to get our proof through. Distinctness states that every + equation in the system is distinct. @{text Ardenable} ensures that we can always + apply the @{text Arden} operation. + The last property states that every @{text rhs} can only contain equivalence classes + for which there is an equation. Therefore @{text lhss} is just the set containing + the first components of an equational system, + while @{text "rhss"} collects all equivalence classes @{text X} in the terms of the + form @{term "Trn X r"}. That means formally @{thm (lhs) lhss_def}~@{text "\ {X | (X, rhs) \ ES}"} + and @{thm (lhs) rhss_def}~@{text "\ {X | (X, r) \ rhs}"}. + + + It is straightforward to prove that the initial equational system satisfies the + invariant. + + \begin{lemma}\label{invzero} + @{thm[mode=IfThen] Init_ES_satisfies_invariant} + \end{lemma} + + \begin{proof} + Finiteness is given by the assumption and the way how we set up the + initial equational system. Soundness is proved in Lem.~\ref{inv}. Distinctness + follows from the fact that the equivalence classes are disjoint. The @{text ardenable} + property also follows from the setup of the initial equational system, as does + validity.\qed + \end{proof} + + \noindent + Next we show that @{text Iter} preserves the invariant. + + \begin{lemma}\label{iterone} + @{thm[mode=IfThen] iteration_step_invariant[where xrhs="rhs"]} + \end{lemma} + + \begin{proof} + The argument boils down to choosing an equation @{text "Y = yrhs"} to be eliminated + and to show that @{term "Subst_all (ES - {(Y, yrhs)}) Y (Arden Y yrhs)"} + preserves the invariant. + We prove this as follows: + + \begin{center} + @{text "\ ES."} @{thm (prem 1) Subst_all_satisfies_invariant} implies + @{thm (concl) Subst_all_satisfies_invariant} + \end{center} + + \noindent + Finiteness is straightforward, as the @{const Subst} and @{const Arden} operations + keep the equational system finite. These operations also preserve soundness + and distinctness (we proved soundness for @{const Arden} in Lem.~\ref{ardenable}). + The property @{text ardenable} is clearly preserved because the append-operation + cannot make a regular expression to match the empty string. Validity is + given because @{const Arden} removes an equivalence class from @{text yrhs} + and then @{const Subst_all} removes @{text Y} from the equational system. + Having proved the implication above, we can instantiate @{text "ES"} with @{text "ES - {(Y, yrhs)}"} + which matches with our proof-obligation of @{const "Subst_all"}. Since + \mbox{@{term "ES = ES - {(Y, yrhs)} \ {(Y, yrhs)}"}}, we can use the assumption + to complete the proof.\qed + \end{proof} + + \noindent + We also need the fact that @{text Iter} decreases the termination measure. + + \begin{lemma}\label{itertwo} + @{thm[mode=IfThen] iteration_step_measure[simplified (no_asm), where xrhs="rhs"]} + \end{lemma} + + \begin{proof} + By assumption we know that @{text "ES"} is finite and has more than one element. + Therefore there must be an element @{term "(Y, yrhs) \ ES"} with + @{term "(Y, yrhs) \ (X, rhs)"}. Using the distinctness property we can infer + that @{term "Y \ X"}. We further know that @{text "Remove ES Y yrhs"} + removes the equation @{text "Y = yrhs"} from the system, and therefore + the cardinality of @{const Iter} strictly decreases.\qed + \end{proof} + + \noindent + This brings us to our property we want to establish for @{text Solve}. + + + \begin{lemma} + If @{thm (prem 1) Solve} and @{thm (prem 2) Solve} then there exists + a @{text rhs} such that @{term "Solve X (Init (UNIV // \A)) = {(X, rhs)}"} + and @{term "invariant {(X, rhs)}"}. + \end{lemma} + + \begin{proof} + In order to prove this lemma using \eqref{whileprinciple}, we have to use a slightly + stronger invariant since Lem.~\ref{iterone} and \ref{itertwo} have the precondition + that @{term "(X, rhs) \ ES"} for some @{text rhs}. This precondition is needed + in order to choose in the @{const Iter}-step an equation that is not \mbox{@{term "X = rhs"}}. + Therefore our invariant cannot be just @{term "invariant ES"}, but must be + @{term "invariant ES \ (\rhs. (X, rhs) \ ES)"}. By assumption + @{thm (prem 2) Solve} and Lem.~\ref{invzero}, the more general invariant holds for + the initial equational system. This is premise 1 of~\eqref{whileprinciple}. + Premise 2 is given by Lem.~\ref{iterone} and the fact that @{const Iter} might + modify the @{text rhs} in the equation @{term "X = rhs"}, but does not remove it. + Premise 3 of~\eqref{whileprinciple} is by Lem.~\ref{itertwo}. Now in premise 4 + we like to show that there exists a @{text rhs} such that @{term "ES = {(X, rhs)}"} + and that @{text "invariant {(X, rhs)}"} holds, provided the condition @{text "Cond"} + does not holds. By the stronger invariant we know there exists such a @{text "rhs"} + with @{term "(X, rhs) \ ES"}. Because @{text Cond} is not true, we know the cardinality + of @{text ES} is @{text 1}. This means @{text "ES"} must actually be the set @{text "{(X, rhs)}"}, + for which the invariant holds. This allows us to conclude that + @{term "Solve X (Init (UNIV // \A)) = {(X, rhs)}"} and @{term "invariant {(X, rhs)}"} hold, + as needed.\qed + \end{proof} + + \noindent + With this lemma in place we can show that for every equivalence class in @{term "UNIV // \A"} + there exists a regular expression. + + \begin{lemma}\label{every_eqcl_has_reg} + @{thm[mode=IfThen] every_eqcl_has_reg} + \end{lemma} + + \begin{proof} + By the preceding lemma, we know that there exists a @{text "rhs"} such + that @{term "Solve X (Init (UNIV // \A))"} returns the equation @{text "X = rhs"}, + and that the invariant holds for this equation. That means we + know @{text "X = \\ ` rhs"}. We further know that + this is equal to \mbox{@{text "\\ ` (Arden X rhs)"}} using the properties of the + invariant and Lem.~\ref{ardenable}. Using the validity property for the equation @{text "X = rhs"}, + we can infer that @{term "rhss rhs \ {X}"} and because the @{text Arden} operation + removes that @{text X} from @{text rhs}, that @{term "rhss (Arden X rhs) = {}"}. + This means the right-hand side @{term "Arden X rhs"} can only consist of terms of the form @{term "Lam r"}. + So we can collect those (finitely many) regular expressions @{text rs} and have @{term "X = L (\rs)"}. + With this we can conclude the proof.\qed + \end{proof} + + \noindent + Lem.~\ref{every_eqcl_has_reg} allows us to finally give a proof for the first direction + of the Myhill-Nerode theorem. + + \begin{proof}[of Thm.~\ref{myhillnerodeone}] + By Lem.~\ref{every_eqcl_has_reg} we know that there exists a regular expression for + every equivalence class in @{term "UNIV // \A"}. Since @{text "finals A"} is + a subset of @{term "UNIV // \A"}, we also know that for every equivalence class + in @{term "finals A"} there exists a regular expression. Moreover by assumption + we know that @{term "finals A"} must be finite, and therefore there must be a finite + set of regular expressions @{text "rs"} such that + @{term "\(finals A) = L (\rs)"}. + Since the left-hand side is equal to @{text A}, we can use @{term "\rs"} + as the regular expression that is needed in the theorem.\qed + \end{proof} +*} + + + + +section {* Myhill-Nerode, Second Part *} + +text {* + We will prove in this section the second part of the Myhill-Nerode + theorem. It can be formulated in our setting as follows: + + \begin{theorem} + Given @{text "r"} is a regular expression, then @{thm Myhill_Nerode2}. + \end{theorem} + + \noindent + The proof will be by induction on the structure of @{text r}. It turns out + the base cases are straightforward. + + + \begin{proof}[Base Cases] + The cases for @{const NULL}, @{const EMPTY} and @{const CHAR} are routine, because + we can easily establish that + + \begin{center} + \begin{tabular}{l} + @{thm quot_null_eq}\\ + @{thm quot_empty_subset}\\ + @{thm quot_char_subset} + \end{tabular} + \end{center} + + \noindent + hold, which shows that @{term "UNIV // \(L r)"} must be finite.\qed + \end{proof} + + \noindent + Much more interesting, however, are the inductive cases. They seem hard to solve + directly. The reader is invited to try. + + Our proof will rely on some + \emph{tagging-functions} defined over strings. Given the inductive hypothesis, it will + be easy to prove that the \emph{range} of these tagging-functions is finite + (the range of a function @{text f} is defined as @{text "range f \ f ` UNIV"}). + With this we will be able to infer that the tagging-functions, seen as relations, + give rise to finitely many equivalence classes of @{const UNIV}. Finally we + will show that the tagging-relations are more refined than @{term "\(L r)"}, which + implies that @{term "UNIV // \(L r)"} must also be finite (a relation @{text "R\<^isub>1"} + is said to \emph{refine} @{text "R\<^isub>2"} provided @{text "R\<^isub>1 \ R\<^isub>2"}). + We formally define the notion of a \emph{tagging-relation} as follows. + + \begin{definition}[Tagging-Relation] Given a tagging-function @{text tag}, then two strings @{text x} + and @{text y} are \emph{tag-related} provided + \begin{center} + @{text "x =tag= y \ tag x = tag y"}\;. + \end{center} + \end{definition} + + + In order to establish finiteness of a set @{text A}, we shall use the following powerful + principle from Isabelle/HOL's library. + % + \begin{equation}\label{finiteimageD} + @{thm[mode=IfThen] finite_imageD} + \end{equation} + + \noindent + It states that if an image of a set under an injective function @{text f} (injective over this set) + is finite, then the set @{text A} itself must be finite. We can use it to establish the following + two lemmas. + + \begin{lemma}\label{finone} + @{thm[mode=IfThen] finite_eq_tag_rel} + \end{lemma} + + \begin{proof} + We set in \eqref{finiteimageD}, @{text f} to be @{text "X \ tag ` X"}. We have + @{text "range f"} to be a subset of @{term "Pow (range tag)"}, which we know must be + finite by assumption. Now @{term "f (UNIV // =tag=)"} is a subset of @{text "range f"}, + and so also finite. Injectivity amounts to showing that @{text "X = Y"} under the + assumptions that @{text "X, Y \ "}~@{term "UNIV // =tag="} and @{text "f X = f Y"}. + From the assumptions we can obtain @{text "x \ X"} and @{text "y \ Y"} with + @{text "tag x = tag y"}. Since @{text x} and @{text y} are tag-related, this in + turn means that the equivalence classes @{text X} + and @{text Y} must be equal.\qed + \end{proof} + + \begin{lemma}\label{fintwo} + Given two equivalence relations @{text "R\<^isub>1"} and @{text "R\<^isub>2"}, whereby + @{text "R\<^isub>1"} refines @{text "R\<^isub>2"}. + If @{thm (prem 1) refined_partition_finite[where ?R1.0="R\<^isub>1" and ?R2.0="R\<^isub>2"]} + then @{thm (concl) refined_partition_finite[where ?R1.0="R\<^isub>1" and ?R2.0="R\<^isub>2"]}. + \end{lemma} + + \begin{proof} + We prove this lemma again using \eqref{finiteimageD}. This time we set @{text f} to + be @{text "X \"}~@{term "{R\<^isub>1 `` {x} | x. x \ X}"}. It is easy to see that + @{term "finite (f ` (UNIV // R\<^isub>2))"} because it is a subset of @{term "Pow (UNIV // R\<^isub>1)"}, + which is finite by assumption. What remains to be shown is that @{text f} is injective + on @{term "UNIV // R\<^isub>2"}. This is equivalent to showing that two equivalence + classes, say @{text "X"} and @{text Y}, in @{term "UNIV // R\<^isub>2"} are equal, provided + @{text "f X = f Y"}. For @{text "X = Y"} to be equal, we have to find two elements + @{text "x \ X"} and @{text "y \ Y"} such that they are @{text R\<^isub>2} related. + We know there exists a @{text "x \ X"} with \mbox{@{term "X = R\<^isub>2 `` {x}"}}. + From the latter fact we can infer that @{term "R\<^isub>1 ``{x} \ f X"} + and further @{term "R\<^isub>1 ``{x} \ f Y"}. This means we can obtain a @{text y} + such that @{term "R\<^isub>1 `` {x} = R\<^isub>1 `` {y}"} holds. Consequently @{text x} and @{text y} + are @{text "R\<^isub>1"}-related. Since by assumption @{text "R\<^isub>1"} refines @{text "R\<^isub>2"}, + they must also be @{text "R\<^isub>2"}-related, as we need to show.\qed + \end{proof} + + \noindent + Chaining Lem.~\ref{finone} and \ref{fintwo} together, means in order to show + that @{term "UNIV // \(L r)"} is finite, we have to find a tagging-function whose + range can be shown to be finite and whose tagging-relation refines @{term "\(L r)"}. + Let us attempt the @{const ALT}-case first. + + \begin{proof}[@{const "ALT"}-Case] + We take as tagging-function + % + \begin{center} + @{thm tag_str_ALT_def[where A="A" and B="B", THEN meta_eq_app]} + \end{center} + + \noindent + where @{text "A"} and @{text "B"} are some arbitrary languages. + We can show in general, if @{term "finite (UNIV // \A)"} and @{term "finite (UNIV // \B)"} + then @{term "finite ((UNIV // \A) \ (UNIV // \B))"} holds. The range of + @{term "tag_str_ALT A B"} is a subset of this product set---so finite. It remains to be shown + that @{text "=tag\<^isub>A\<^isub>L\<^isub>T A B="} refines @{term "\(A \ B)"}. This amounts to + showing + % + \begin{center} + @{term "tag\<^isub>A\<^isub>L\<^isub>T A B x = tag\<^isub>A\<^isub>L\<^isub>T A B y \ x \(A \ B) y"} + \end{center} + % + \noindent + which by unfolding the Myhill-Nerode relation is identical to + % + \begin{equation}\label{pattern} + @{text "\z. tag\<^isub>A\<^isub>L\<^isub>T A B x = tag\<^isub>A\<^isub>L\<^isub>T A B y \ x @ z \ A \ B \ y @ z \ A \ B"} + \end{equation} + % + \noindent + since both @{text "=tag\<^isub>A\<^isub>L\<^isub>T A B="} and @{term "\(A \ B)"} are symmetric. To solve + \eqref{pattern} we just have to unfold the definition of the tagging-function and analyse + in which set, @{text A} or @{text B}, the string @{term "x @ z"} is. + The definition of the tagging-function will give us in each case the + information to infer that @{text "y @ z \ A \ B"}. + Finally we + can discharge this case by setting @{text A} to @{term "L r\<^isub>1"} and @{text B} to @{term "L r\<^isub>2"}.\qed + \end{proof} + + + \noindent + The pattern in \eqref{pattern} is repeated for the other two cases. Unfortunately, + they are slightly more complicated. In the @{const SEQ}-case we essentially have + to be able to infer that + % + \begin{center} + @{text "\"}@{term "x @ z \ A ;; B \ y @ z \ A ;; B"} + \end{center} + % + \noindent + using the information given by the appropriate tagging-function. The complication + is to find out what the possible splits of @{text "x @ z"} are to be in @{term "A ;; B"} + (this was easy in case of @{term "A \ B"}). To deal with this complication we define the + notions of \emph{string prefixes} + % + \begin{center} + @{text "x \ y \ \z. y = x @ z"}\hspace{10mm} + @{text "x < y \ x \ y \ x \ y"} + \end{center} + % + \noindent + and \emph{string subtraction}: + % + \begin{center} + @{text "[] - y \ []"}\hspace{10mm} + @{text "x - [] \ x"}\hspace{10mm} + @{text "cx - dy \ if c = d then x - y else cx"} + \end{center} + % + \noindent + where @{text c} and @{text d} are characters, and @{text x} and @{text y} are strings. + + Now assuming @{term "x @ z \ A ;; B"} there are only two possible ways of how to `split' + this string to be in @{term "A ;; B"}: + % + \begin{center} + \begin{tabular}{@ {}c@ {\hspace{10mm}}c@ {}} + \scalebox{0.7}{ + \begin{tikzpicture} + \node[draw,minimum height=3.8ex] (xa) { $\hspace{3em}@{text "x'"}\hspace{3em}$ }; + \node[draw,minimum height=3.8ex, right=-0.03em of xa] (xxa) { $\hspace{0.2em}@{text "x - x'"}\hspace{0.2em}$ }; + \node[draw,minimum height=3.8ex, right=-0.03em of xxa] (z) { $\hspace{5em}@{text z}\hspace{5em}$ }; + + \draw[decoration={brace,transform={yscale=3}},decorate] + (xa.north west) -- ($(xxa.north east)+(0em,0em)$) + node[midway, above=0.5em]{@{text x}}; + + \draw[decoration={brace,transform={yscale=3}},decorate] + (z.north west) -- ($(z.north east)+(0em,0em)$) + node[midway, above=0.5em]{@{text z}}; + + \draw[decoration={brace,transform={yscale=3}},decorate] + ($(xa.north west)+(0em,3ex)$) -- ($(z.north east)+(0em,3ex)$) + node[midway, above=0.8em]{@{term "x @ z \ A ;; B"}}; + + \draw[decoration={brace,transform={yscale=3}},decorate] + ($(z.south east)+(0em,0ex)$) -- ($(xxa.south west)+(0em,0ex)$) + node[midway, below=0.5em]{@{term "(x - x') @ z \ B"}}; + + \draw[decoration={brace,transform={yscale=3}},decorate] + ($(xa.south east)+(0em,0ex)$) -- ($(xa.south west)+(0em,0ex)$) + node[midway, below=0.5em]{@{term "x' \ A"}}; + \end{tikzpicture}} + & + \scalebox{0.7}{ + \begin{tikzpicture} + \node[draw,minimum height=3.8ex] (x) { $\hspace{4.8em}@{text x}\hspace{4.8em}$ }; + \node[draw,minimum height=3.8ex, right=-0.03em of x] (za) { $\hspace{0.6em}@{text "z'"}\hspace{0.6em}$ }; + \node[draw,minimum height=3.8ex, right=-0.03em of za] (zza) { $\hspace{2.6em}@{text "z - z'"}\hspace{2.6em}$ }; + + \draw[decoration={brace,transform={yscale=3}},decorate] + (x.north west) -- ($(za.north west)+(0em,0em)$) + node[midway, above=0.5em]{@{text x}}; + + \draw[decoration={brace,transform={yscale=3}},decorate] + ($(za.north west)+(0em,0ex)$) -- ($(zza.north east)+(0em,0ex)$) + node[midway, above=0.5em]{@{text z}}; + + \draw[decoration={brace,transform={yscale=3}},decorate] + ($(x.north west)+(0em,3ex)$) -- ($(zza.north east)+(0em,3ex)$) + node[midway, above=0.8em]{@{term "x @ z \ A ;; B"}}; + + \draw[decoration={brace,transform={yscale=3}},decorate] + ($(za.south east)+(0em,0ex)$) -- ($(x.south west)+(0em,0ex)$) + node[midway, below=0.5em]{@{text "x @ z' \ A"}}; + + \draw[decoration={brace,transform={yscale=3}},decorate] + ($(zza.south east)+(0em,0ex)$) -- ($(za.south east)+(0em,0ex)$) + node[midway, below=0.5em]{@{text "(z - z') \ B"}}; + \end{tikzpicture}} + \end{tabular} + \end{center} + % + \noindent + Either there is a prefix of @{text x} in @{text A} and the rest is in @{text B} (first picture), + or @{text x} and a prefix of @{text "z"} is in @{text A} and the rest in @{text B} (second picture). + In both cases we have to show that @{term "y @ z \ A ;; B"}. For this we use the + following tagging-function + % + \begin{center} + @{thm tag_str_SEQ_def[where ?L1.0="A" and ?L2.0="B", THEN meta_eq_app]} + \end{center} + + \noindent + with the idea that in the first split we have to make sure that @{text "(x - x') @ z"} + is in the language @{text B}. + + \begin{proof}[@{const SEQ}-Case] + If @{term "finite (UNIV // \A)"} and @{term "finite (UNIV // \B)"} + then @{term "finite ((UNIV // \A) \ (Pow (UNIV // \B)))"} holds. The range of + @{term "tag_str_SEQ A B"} is a subset of this product set, and therefore finite. + We have to show injectivity of this tagging-function as + % + \begin{center} + @{term "\z. tag_str_SEQ A B x = tag_str_SEQ A B y \ x @ z \ A ;; B \ y @ z \ A ;; B"} + \end{center} + % + \noindent + There are two cases to be considered (see pictures above). First, there exists + a @{text "x'"} such that + @{text "x' \ A"}, @{text "x' \ x"} and @{text "(x - x') @ z \ B"} hold. We therefore have + % + \begin{center} + @{term "(\B `` {x - x'}) \ ({\B `` {x - x'} |x'. x' \ x \ x' \ A})"} + \end{center} + % + \noindent + and by the assumption about @{term "tag_str_SEQ A B"} also + % + \begin{center} + @{term "(\B `` {x - x'}) \ ({\B `` {y - y'} |y'. y' \ y \ y' \ A})"} + \end{center} + % + \noindent + That means there must be a @{text "y'"} such that @{text "y' \ A"} and + @{term "\B `` {x - x'} = \B `` {y - y'}"}. This equality means that + @{term "(x - x') \B (y - y')"} holds. Unfolding the Myhill-Nerode + relation and together with the fact that @{text "(x - x') @ z \ B"}, we + have @{text "(y - y') @ z \ B"}. We already know @{text "y' \ A"}, therefore + @{term "y @ z \ A ;; B"}, as needed in this case. + + Second, there exists a @{text "z'"} such that @{term "x @ z' \ A"} and @{text "z - z' \ B"}. + By the assumption about @{term "tag_str_SEQ A B"} we have + @{term "\A `` {x} = \A `` {y}"} and thus @{term "x \A y"}. Which means by the Myhill-Nerode + relation that @{term "y @ z' \ A"} holds. Using @{text "z - z' \ B"}, we can conclude also in this case + with @{term "y @ z \ A ;; B"}. We again can complete the @{const SEQ}-case + by setting @{text A} to @{term "L r\<^isub>1"} and @{text B} to @{term "L r\<^isub>2"}.\qed + \end{proof} + + \noindent + The case for @{const STAR} is similar to @{const SEQ}, but poses a few extra challenges. When + we analyse the case that @{text "x @ z"} is an element in @{term "A\"} and @{text x} is not the + empty string, we + have the following picture: + % + \begin{center} + \scalebox{0.7}{ + \begin{tikzpicture} + \node[draw,minimum height=3.8ex] (xa) { $\hspace{4em}@{text "x'\<^isub>m\<^isub>a\<^isub>x"}\hspace{4em}$ }; + \node[draw,minimum height=3.8ex, right=-0.03em of xa] (xxa) { $\hspace{0.5em}@{text "x - x'\<^isub>m\<^isub>a\<^isub>x"}\hspace{0.5em}$ }; + \node[draw,minimum height=3.8ex, right=-0.03em of xxa] (za) { $\hspace{2em}@{text "z\<^isub>a"}\hspace{2em}$ }; + \node[draw,minimum height=3.8ex, right=-0.03em of za] (zb) { $\hspace{7em}@{text "z\<^isub>b"}\hspace{7em}$ }; + + \draw[decoration={brace,transform={yscale=3}},decorate] + (xa.north west) -- ($(xxa.north east)+(0em,0em)$) + node[midway, above=0.5em]{@{text x}}; + + \draw[decoration={brace,transform={yscale=3}},decorate] + (za.north west) -- ($(zb.north east)+(0em,0em)$) + node[midway, above=0.5em]{@{text z}}; + + \draw[decoration={brace,transform={yscale=3}},decorate] + ($(xa.north west)+(0em,3ex)$) -- ($(zb.north east)+(0em,3ex)$) + node[midway, above=0.8em]{@{term "x @ z \ A\"}}; + + \draw[decoration={brace,transform={yscale=3}},decorate] + ($(za.south east)+(0em,0ex)$) -- ($(xxa.south west)+(0em,0ex)$) + node[midway, below=0.5em]{@{text "(x - x'\<^isub>m\<^isub>a\<^isub>x) @ z\<^isub>a \ A"}}; + + \draw[decoration={brace,transform={yscale=3}},decorate] + ($(xa.south east)+(0em,0ex)$) -- ($(xa.south west)+(0em,0ex)$) + node[midway, below=0.5em]{@{term "x'\<^isub>m\<^isub>a\<^isub>x \ A\"}}; + + \draw[decoration={brace,transform={yscale=3}},decorate] + ($(zb.south east)+(0em,0ex)$) -- ($(zb.south west)+(0em,0ex)$) + node[midway, below=0.5em]{@{term "z\<^isub>b \ A\"}}; + + \draw[decoration={brace,transform={yscale=3}},decorate] + ($(zb.south east)+(0em,-4ex)$) -- ($(xxa.south west)+(0em,-4ex)$) + node[midway, below=0.5em]{@{term "(x - x'\<^isub>m\<^isub>a\<^isub>x) @ z \ A\"}}; + \end{tikzpicture}} + \end{center} + % + \noindent + We can find a strict prefix @{text "x'"} of @{text x} such that @{term "x' \ A\"}, + @{text "x' < x"} and the rest @{term "(x - x') @ z \ A\"}. For example the empty string + @{text "[]"} would do. + There are potentially many such prefixes, but there can only be finitely many of them (the + string @{text x} is finite). Let us therefore choose the longest one and call it + @{text "x'\<^isub>m\<^isub>a\<^isub>x"}. Now for the rest of the string @{text "(x - x'\<^isub>m\<^isub>a\<^isub>x) @ z"} we + know it is in @{term "A\"}. By definition of @{term "A\"}, we can separate + this string into two parts, say @{text "a"} and @{text "b"}, such that @{text "a \ A"} + and @{term "b \ A\"}. Now @{text a} must be strictly longer than @{text "x - x'\<^isub>m\<^isub>a\<^isub>x"}, + otherwise @{text "x'\<^isub>m\<^isub>a\<^isub>x"} is not the longest prefix. That means @{text a} + `overlaps' with @{text z}, splitting it into two components @{text "z\<^isub>a"} and + @{text "z\<^isub>b"}. For this we know that @{text "(x - x'\<^isub>m\<^isub>a\<^isub>x) @ z\<^isub>a \ A"} and + @{term "z\<^isub>b \ A\"}. To cut a story short, we have divided @{term "x @ z \ A\"} + such that we have a string @{text a} with @{text "a \ A"} that lies just on the + `border' of @{text x} and @{text z}. This string is @{text "(x - x'\<^isub>m\<^isub>a\<^isub>x) @ z\<^isub>a"}. + + In order to show that @{term "x @ z \ A\"} implies @{term "y @ z \ A\"}, we use + the following tagging-function: + % + \begin{center} + @{thm tag_str_STAR_def[where ?L1.0="A", THEN meta_eq_app]}\smallskip + \end{center} + + \begin{proof}[@{const STAR}-Case] + If @{term "finite (UNIV // \A)"} + then @{term "finite (Pow (UNIV // \A))"} holds. The range of + @{term "tag_str_STAR A"} is a subset of this set, and therefore finite. + Again we have to show injectivity of this tagging-function as + % + \begin{center} + @{term "\z. tag_str_STAR A x = tag_str_STAR A y \ x @ z \ A\ \ y @ z \ A\"} + \end{center} + % + \noindent + We first need to consider the case that @{text x} is the empty string. + From the assumption we can infer @{text y} is the empty string and + clearly have @{term "y @ z \ A\"}. In case @{text x} is not the empty + string, we can divide the string @{text "x @ z"} as shown in the picture + above. By the tagging-function we have + % + \begin{center} + @{term "\A `` {(x - x'\<^isub>m\<^isub>a\<^isub>x)} \ ({\A `` {x - x'} |x'. x' < x \ x' \ A\})"} + \end{center} + % + \noindent + which by assumption is equal to + % + \begin{center} + @{term "\A `` {(x - x'\<^isub>m\<^isub>a\<^isub>x)} \ ({\A `` {y - y'} |y'. y' < y \ y' \ A\})"} + \end{center} + % + \noindent + and we know that we have a @{term "y' \ A\"} and @{text "y' < y"} + and also know @{term "(x - x'\<^isub>m\<^isub>a\<^isub>x) \A (y - y')"}. Unfolding the Myhill-Nerode + relation we know @{term "(y - y') @ z\<^isub>a \ A"}. We also know that @{term "z\<^isub>b \ A\"}. + Therefore @{term "y' @ ((y - y') @ z\<^isub>a) @ z\<^isub>b \ A\"}, which means + @{term "y @ z \ A\"}. As the last step we have to set @{text "A"} to @{term "L r"} and + complete the proof.\qed + \end{proof} +*} + +section {* Second Part based on Partial Derivatives *} + +text {* + We briefly considered using the method Brzozowski presented in the + Appendix of~\cite{Brzozowski64} in order to prove the second + direction of the Myhill-Nerode theorem. There he calculates the + derivatives for regular expressions and shows that for every + language there can be only finitely many of them %derivations (if + regarded equal modulo ACI). We could have used as tagging-function + the set of derivatives of a regular expression with respect to a + language. Using the fact that two strings are Myhill-Nerode related + whenever their derivative is the same, together with the fact that + there are only finitely such derivatives would give us a similar + argument as ours. However it seems not so easy to calculate the set + of derivatives modulo ACI. Therefore we preferred our direct method + of using tagging-functions. + +*} + +section {* Closure Properties *} + + +section {* Conclusion and Related Work *} + +text {* + In this paper we took the view that a regular language is one where there + exists a regular expression that matches all of its strings. Regular + expressions can conveniently be defined as a datatype in HOL-based theorem + provers. For us it was therefore interesting to find out how far we can push + this point of view. We have established in Isabelle/HOL both directions + of the Myhill-Nerode theorem. + % + \begin{theorem}[The Myhill-Nerode Theorem]\mbox{}\\ + A language @{text A} is regular if and only if @{thm (rhs) Myhill_Nerode}. + \end{theorem} + % + \noindent + Having formalised this theorem means we + pushed our point of view quite far. Using this theorem we can obviously prove when a language + is \emph{not} regular---by establishing that it has infinitely many + equivalence classes generated by the Myhill-Nerode relation (this is usually + the purpose of the pumping lemma \cite{Kozen97}). We can also use it to + establish the standard textbook results about closure properties of regular + languages. Interesting is the case of closure under complement, because + it seems difficult to construct a regular expression for the complement + language by direct means. However the existence of such a regular expression + can be easily proved using the Myhill-Nerode theorem since + % + \begin{center} + @{term "s\<^isub>1 \A s\<^isub>2"} if and only if @{term "s\<^isub>1 \(-A) s\<^isub>2"} + \end{center} + % + \noindent + holds for any strings @{text "s\<^isub>1"} and @{text + "s\<^isub>2"}. Therefore @{text A} and the complement language @{term "-A"} give rise to the same + partitions. Proving the existence of such a regular expression via automata + using the standard method would + be quite involved. It includes the + steps: regular expression @{text "\"} non-deterministic automaton @{text + "\"} deterministic automaton @{text "\"} complement automaton @{text "\"} + regular expression. + + While regular expressions are convenient in formalisations, they have some + limitations. One is that there seems to be no method of calculating a + minimal regular expression (for example in terms of length) for a regular + language, like there is + for automata. On the other hand, efficient regular expression matching, + without using automata, poses no problem \cite{OwensReppyTuron09}. + For an implementation of a simple regular expression matcher, + whose correctness has been formally established, we refer the reader to + Owens and Slind \cite{OwensSlind08}. + + + Our formalisation consists of 780 lines of Isabelle/Isar code for the first + direction and 460 for the second, plus around 300 lines of standard material about + regular languages. While this might be seen as too large to count as a + concise proof pearl, this should be seen in the context of the work done by + Constable at al \cite{Constable00} who formalised the Myhill-Nerode theorem + in Nuprl using automata. They write that their four-member team needed + something on the magnitude of 18 months for their formalisation. The + estimate for our formalisation is that we needed approximately 3 months and + this included the time to find our proof arguments. Unlike Constable et al, + who were able to follow the proofs from \cite{HopcroftUllman69}, we had to + find our own arguments. So for us the formalisation was not the + bottleneck. It is hard to gauge the size of a formalisation in Nurpl, but + from what is shown in the Nuprl Math Library about their development it + seems substantially larger than ours. The code of ours can be found in the + Mercurial Repository at + \mbox{\url{http://www4.in.tum.de/~urbanc/regexp.html}}. + + + Our proof of the first direction is very much inspired by \emph{Brzozowski's + algebraic method} used to convert a finite automaton to a regular + expression \cite{Brzozowski64}. The close connection can be seen by considering the equivalence + classes as the states of the minimal automaton for the regular language. + However there are some subtle differences. Since we identify equivalence + classes with the states of the automaton, then the most natural choice is to + characterise each state with the set of strings starting from the initial + state leading up to that state. Usually, however, the states are characterised as the + strings starting from that state leading to the terminal states. The first + choice has consequences about how the initial equational system is set up. We have + the $\lambda$-term on our `initial state', while Brzozowski has it on the + terminal states. This means we also need to reverse the direction of Arden's + Lemma. + + This is also where our method shines, because we can completely + side-step the standard argument \cite{Kozen97} where automata need + to be composed, which as stated in the Introduction is not so easy + to formalise in a HOL-based theorem prover. However, it is also the + direction where we had to spend most of the `conceptual' time, as + our proof-argument based on tagging-functions is new for + establishing the Myhill-Nerode theorem. All standard proofs of this + direction proceed by arguments over automata.\medskip + + \noindent + {\bf Acknowledgements:} We are grateful for the comments we received from Larry + Paulson. + +*} + + +(*<*) +end +(*>*) \ No newline at end of file