Journal/Paper.thy
changeset 172 21ee3a852a02
parent 170 b1258b7d2789
child 173 d371536861bc
--- a/Journal/Paper.thy	Mon Jul 25 15:40:12 2011 +0000
+++ b/Journal/Paper.thy	Mon Jul 25 18:00:52 2011 +0000
@@ -1,6 +1,6 @@
 (*<*)
 theory Paper
-imports "../Closure" 
+imports "../Closures" 
 begin
 
 declare [[show_question_marks = false]]
@@ -16,17 +16,40 @@
   "Append_rexp2 r_itm r \<equiv> Append_rexp r r_itm"
 
 
+abbreviation
+  "pow" (infixl "\<up>" 100)
+where
+  "A \<up> n \<equiv> A ^^ n"
+
+syntax (latex output)
+  "_Collect" :: "pttrn => bool => 'a set"              ("(1{_ | _})")
+  "_CollectIn" :: "pttrn => 'a set => bool => 'a set"   ("(1{_ \<in> _ | _})")
+translations
+  "_Collect p P"      <= "{p. P}"
+  "_Collect p P"      <= "{p|xs. P}"
+  "_CollectIn p A P"  <= "{p : A. P}"
+
+abbreviation "NULL \<equiv> Zero"
+abbreviation "EMPTY \<equiv> One"
+abbreviation "CHAR \<equiv> Atom"
+abbreviation "ALT \<equiv> Plus"
+abbreviation "SEQ \<equiv> Times"
+abbreviation "STAR \<equiv> Star"
+
+
+ML {* @{term "op ^^"} *}
+
 notation (latex output)
   str_eq_rel ("\<approx>\<^bsub>_\<^esub>") and
   str_eq ("_ \<approx>\<^bsub>_\<^esub> _") and
-  Seq (infixr "\<cdot>" 100) and
-  Star ("_\<^bsup>\<star>\<^esup>") and
+  conc (infixr "\<cdot>" 100) and
+  star ("_\<^bsup>\<star>\<^esup>") and
   pow ("_\<^bsup>_\<^esup>" [100, 100] 100) and
   Suc ("_+1" [100] 100) and
   quotient ("_ \<^raw:\ensuremath{\!\sslash\!}> _" [90, 90] 90) and
   REL ("\<approx>") and
   UPLUS ("_ \<^raw:\ensuremath{\uplus}> _" [90, 90] 90) and
-  L_rexp ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and
+  lang ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and
   Lam ("\<lambda>'(_')" [100] 100) and 
   Trn ("'(_, _')" [100, 100] 100) and 
   EClass ("\<lbrakk>_\<rbrakk>\<^bsub>_\<^esub>" [100, 100] 100) and
@@ -34,18 +57,33 @@
   Setalt ("\<^raw:\ensuremath{\bigplus}>_" [1000] 999) and
   Append_rexp2 ("_ \<^raw:\ensuremath{\triangleleft}> _" [100, 100] 100) and
   Append_rexp_rhs ("_ \<^raw:\ensuremath{\triangleleft}> _" [100, 100] 50) and
+  
   uminus ("\<^raw:\ensuremath{\overline{>_\<^raw:}}>" [100] 100) and
-  tag_str_ALT ("tag\<^isub>A\<^isub>L\<^isub>T _ _" [100, 100] 100) and
-  tag_str_ALT ("tag\<^isub>A\<^isub>L\<^isub>T _ _ _" [100, 100, 100] 100) and
-  tag_str_SEQ ("tag\<^isub>S\<^isub>E\<^isub>Q _ _" [100, 100] 100) and
-  tag_str_SEQ ("tag\<^isub>S\<^isub>E\<^isub>Q _ _ _" [100, 100, 100] 100) and
-  tag_str_STAR ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _" [100] 100) and
-  tag_str_STAR ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _ _" [100, 100] 100)
+  tag_str_Plus ("tag\<^isub>A\<^isub>L\<^isub>T _ _" [100, 100] 100) and
+  tag_str_Plus ("tag\<^isub>A\<^isub>L\<^isub>T _ _ _" [100, 100, 100] 100) and
+  tag_str_Times ("tag\<^isub>S\<^isub>E\<^isub>Q _ _" [100, 100] 100) and
+  tag_str_Times ("tag\<^isub>S\<^isub>E\<^isub>Q _ _ _" [100, 100, 100] 100) and
+  tag_str_Star ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _" [100] 100) and
+  tag_str_Star ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _ _" [100, 100] 100)
 
 lemma meta_eq_app:
   shows "f \<equiv> \<lambda>x. g x \<Longrightarrow> f x \<equiv> g x"
   by auto
 
+lemma conc_def':
+  "A \<cdot> B = {s\<^isub>1 @ s\<^isub>2 | s\<^isub>1 s\<^isub>2. s\<^isub>1 \<in> A \<and> s\<^isub>2 \<in> B}"
+unfolding conc_def by simp
+
+lemma conc_Union_left: 
+  shows "B \<cdot> (\<Union>n. A \<up> n) = (\<Union>n. B \<cdot> (A \<up> n))"
+unfolding conc_def by auto
+
+lemma test:
+  assumes X_in_eqs: "(X, rhs) \<in> Init (UNIV // \<approx>A)"
+  shows "X = \<Union> (lang_trm `  rhs)"
+using assms l_eq_r_in_eqs by (simp)
+
+
 (* THEOREMS *)
 
 notation (Rule output)
@@ -91,33 +129,40 @@
   Science, with many beautiful theorems and many useful algorithms. There is a
   wide range of textbooks on this subject, many of which are aimed at students
   and contain very detailed `pencil-and-paper' proofs
-  (e.g.~\cite{Kozen97}). It seems natural to exercise theorem provers by
-  formalising the theorems and by verifying formally the algorithms.
-  Some of the popular theorem provers are based on Higher-Order Logic (HOL).
+  (e.g.~\cite{Kozen97, HopcroftUllman69}). It seems natural to exercise theorem provers by
+  formalising the theorems and by verifying formally the algorithms.  A
+  popular choice for a theorem prover would be one based on Higher-Order Logic
+  (HOL), such as HOL4, HOLlight and Isabelle/HOL. For our development we will
+  use the latter theorem prover. One distinguishing feature of HOL is it's
+  type system based on Church's Simple Theory of Types \cite{Church40}.  The
+  limitations of this type system are one of the main motivations
+  behind the work presented in this paper.
 
-  There is however a problem: the typical approach to regular languages is to
+  The typical approach to regular languages is to
   introduce finite automata and then define everything in terms of them.  For
   example, a regular language is normally defined as one whose strings are
   recognised by a finite deterministic automaton. This approach has many
   benefits. Among them is the fact that it is easy to convince oneself that
   regular languages are closed under complementation: one just has to exchange
   the accepting and non-accepting states in the corresponding automaton to
-  obtain an automaton for the complement language.  The problem, however, lies with
-  formalising such reasoning in a HOL-based theorem prover, in our case
-  Isabelle/HOL. Automata are built up from states and transitions that 
-  need to be represented as graphs, matrices or functions, none
-  of which can be defined as an inductive datatype. 
+  obtain an automaton for the complement language.  The problem, however, lies
+  with formalising such reasoning in a HOL-based theorem prover. Automata are
+  built up from states and transitions that need to be represented as graphs,
+  matrices or functions, none of which can be defined as an inductive
+  datatype.
 
   In case of graphs and matrices, this means we have to build our own
   reasoning infrastructure for them, as neither Isabelle/HOL nor HOL4 nor
   HOLlight support them with libraries. Even worse, reasoning about graphs and
-  matrices can be a real hassle in HOL-based theorem provers.  Consider for
+  matrices can be a real hassle in HOL-based theorem provers, because
+  we have to be able to combine automata.  Consider for
   example the operation of sequencing two automata, say $A_1$ and $A_2$, by
   connecting the accepting states of $A_1$ to the initial state of $A_2$:
   %  
+
   \begin{center}
   \begin{tabular}{ccc}
-  \begin{tikzpicture}[scale=0.8]
+  \begin{tikzpicture}[scale=0.9]
   %\draw[step=2mm] (-1,-1) grid (1,1);
   
   \draw[rounded corners=1mm, very thick] (-1.0,-0.3) rectangle (-0.2,0.3);
@@ -143,7 +188,7 @@
 
   &
 
-  \begin{tikzpicture}[scale=0.8]
+  \begin{tikzpicture}[scale=0.9]
   %\draw[step=2mm] (-1,-1) grid (1,1);
   
   \draw[rounded corners=1mm, very thick] (-1.0,-0.3) rectangle (-0.2,0.3);
@@ -170,20 +215,24 @@
   \end{center}
 
   \noindent
-  On `paper' we can define the corresponding graph in terms of the disjoint 
+  On `paper' or a theorem prover based on set-theory, we can define the corresponding 
+  graph in terms of the disjoint 
   union of the state nodes. Unfortunately in HOL, the standard definition for disjoint 
   union, namely 
   %
   \begin{equation}\label{disjointunion}
-  @{term "UPLUS A\<^isub>1 A\<^isub>2 \<equiv> {(1, x) | x. x \<in> A\<^isub>1} \<union> {(2, y) | y. y \<in> A\<^isub>2}"}
+  @{text "A\<^isub>1 \<uplus> A\<^isub>2 \<equiv> {(1, x) | x \<in> A\<^isub>1} \<union> {(2, y) | y \<in> A\<^isub>2}"}
   \end{equation}
 
   \noindent
   changes the type---the disjoint union is not a set, but a set of pairs. 
   Using this definition for disjoint union means we do not have a single type for automata
   and hence will not be able to state certain properties about \emph{all}
-  automata, since there is no type quantification available in HOL (unlike in Coq, for example). An
-  alternative, which provides us with a single type for automata, is to give every 
+  automata, since there is no type quantification available in HOL (unlike in Coq, for example). 
+  As a result, we would not be able to define a language being regular
+  in terms of the existence of an automata.
+
+  An alternative, which provides us with a single type for automata, is to give every 
   state node an identity, for example a natural
   number, and then be careful to rename these identities apart whenever
   connecting two automata. This results in clunky proofs
@@ -219,28 +268,42 @@
 
 
   \noindent
-  Moreover, it is not so clear how to conveniently impose a finiteness condition 
-  upon functions in order to represent \emph{finite} automata. The best is
-  probably to resort to more advanced reasoning frameworks, such as \emph{locales}
-  or \emph{type classes},
-  which are \emph{not} available in all HOL-based theorem provers.
+  Moreover, it is not so clear how to conveniently impose a finiteness
+  condition upon functions in order to represent \emph{finite} automata. The
+  best is probably to resort to more advanced reasoning frameworks, such as
+  \emph{locales} or \emph{type classes}, which are \emph{not} available in all
+  HOL-based theorem provers.
 
-  {\bf add commnets from Brozowski}
+  Because of these problems to do with representing automata, there seems to
+  be no substantial formalisation of automata theory and regular languages
+  carried out in HOL-based theorem provers. Nipkow \cite{Nipkow98} establishes
+  the link between regular expressions and automata in the context of
+  lexing. Berghofer and Reiter \cite{BerghoferReiter09} formalise automata
+  working over bit strings in the context of Presburger arithmetic.  The only
+  larger formalisations of automata theory are carried out in Nuprl
+  \cite{Constable00} and in Coq \cite{Filliatre97}.
 
-  Because of these problems to do with representing automata, there seems
-  to be no substantial formalisation of automata theory and regular languages 
-  carried out in HOL-based theorem provers. Nipkow  \cite{Nipkow98} establishes
-  the link between regular expressions and automata in
-  the context of lexing. Berghofer and Reiter \cite{BerghoferReiter09} 
-  formalise automata working over 
-  bit strings in the context of Presburger arithmetic.
-  The only larger formalisations of automata theory 
-  are carried out in Nuprl \cite{Constable00} and in Coq \cite{Filliatre97}.
+  Also one might consider the Myhill-Nerode theorem as well-worn stock 
+  material for a computer scientists, but paper proofs of this theorem contain some 
+  subtle side-conditions which are easily overlooked and which make formal reasoning 
+  painful. For example in Kozen's proof \cite{Kozen97} it is not sufficient to
+  have just any automata recognising a regular language, but one which does
+  not have inaccessible states. This means if we define a regular language
+  for which \emph{any} finite automaton exists, then one has to ensure that
+  another equivalent can be found satisfying the side-conditions. Similarly
+  Brozowski mentiones in \cite{Brozowski10} side-conditions of finite automata
+  in connection of state-complexity: there it is required that automata
+  must be complete in the sense of having a total transition function. 
+  Furthermore, if a `sink' state is present which accepts no word, then there
+  must be only one such state. . . .
+  Such 'little' lemmas make formalisation of these results in a theroem
+  prover hair-pulling experiences.
   
   In this paper, we will not attempt to formalise automata theory in
-  Isabelle/HOL, but take a different approach to regular
-  languages. Instead of defining a regular language as one where there exists
-  an automaton that recognises all strings of the language, we define a
+  Isabelle/HOL nor attempt to formalise any automata proof from the
+  literature, but take a different approach to regular languages than is
+  usually taken. Instead of defining a regular language as one where there
+  exists an automaton that recognises all strings of the language, we define a
   regular language as:
 
   \begin{dfntn}
@@ -249,31 +312,37 @@
   \end{dfntn}
   
   \noindent
-  The reason is that regular expressions, unlike graphs, matrices and functions, can
-  be easily defined as inductive datatype. Consequently a corresponding reasoning 
-  infrastructure comes for free. This has recently been exploited in HOL4 with a formalisation
-  of regular expression matching based on derivatives \cite{OwensSlind08} and
-  with an equivalence checker for regular expressions in Isabelle/HOL \cite{KraussNipkow11}.  
-  The purpose of this paper is to
-  show that a central result about regular languages---the Myhill-Nerode theorem---can 
-  be recreated by only using regular expressions. This theorem gives necessary
-  and sufficient conditions for when a language is regular. As a corollary of this
-  theorem we can easily establish the usual closure properties, including 
-  complementation, for regular languages.\smallskip
+  The reason is that regular expressions, unlike graphs, matrices and
+  functions, can be easily defined as inductive datatype. Consequently a
+  corresponding reasoning infrastructure (like induction or recursion) comes
+  for free. This has recently been exploited in HOL4 with a formalisation of
+  regular expression matching based on derivatives \cite{OwensSlind08} and
+  with an equivalence checker for regular expressions in Isabelle/HOL
+  \cite{KraussNipkow11}.  The purpose of this paper is to show that a central
+  result about regular languages---the Myhill-Nerode theorem---can be
+  recreated by only using regular expressions. This theorem gives necessary
+  and sufficient conditions for when a language is regular. As a corollary of
+  this theorem we can easily establish the usual closure properties, including
+  complementation, for regular languages.\medskip
   
   \noindent
-  {\bf Contributions:} 
-  There is an extensive literature on regular languages.
-  To our best knowledge, our proof of the Myhill-Nerode theorem is the
-  first that is based on regular expressions, only. We prove the part of this theorem 
-  stating that a regular expression has only finitely many partitions using certain 
-  tagging-functions. Again to our best knowledge, these tagging-functions have
-  not been used before to establish the Myhill-Nerode theorem.
+  {\bf Contributions:} There is an extensive literature on regular languages.
+  To our best knowledge, our proof of the Myhill-Nerode theorem is the first
+  that is based on regular expressions, only. The part of this theorem stating
+  that finitely many partitions of a language w.r.t.~to the Myhill-Nerode
+  relation imply that the language is regular is proved by a folklore argument
+  of solving equational sytems.  For the other part we give two proofs: a
+  direct proof using certain tagging-functions, and an indirect proof using
+  Antimirov's partial derivatives \cite{Antimirov95} (also earlier russion work). 
+  Again to our best knowledge, the tagging-functions have not been used before to
+  establish the Myhill-Nerode theorem.
+
 *}
 
 section {* Preliminaries *}
 
 text {*
+  \noindent
   Strings in Isabelle/HOL are lists of characters with the \emph{empty string}
   being represented by the empty list, written @{term "[]"}.  \emph{Languages}
   are sets of strings. The language containing all strings is written in
@@ -282,23 +351,23 @@
   @{term "A \<up> n"}. They are defined as usual
 
   \begin{center}
-  @{thm Seq_def[THEN eq_reflection, where A1="A" and B1="B"]}
+  @{thm conc_def'[THEN eq_reflection, where A1="A" and B1="B"]}
   \hspace{7mm}
-  @{thm pow.simps(1)[THEN eq_reflection, where A1="A"]}
+  @{thm lang_pow.simps(1)[THEN eq_reflection, where A1="A"]}
   \hspace{7mm}
-  @{thm pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]}
+  @{thm lang_pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]}
   \end{center}
 
   \noindent
   where @{text "@"} is the list-append operation. The Kleene-star of a language @{text A}
-  is defined as the union over all powers, namely @{thm Star_def}. In the paper
+  is defined as the union over all powers, namely @{thm star_def}. In the paper
   we will make use of the following properties of these constructions.
   
   \begin{prpstn}\label{langprops}\mbox{}\\
   \begin{tabular}{@ {}ll}
   (i)   & @{thm star_cases}     \\ 
   (ii)  & @{thm[mode=IfThen] pow_length}\\
-  (iii) & @{thm seq_Union_left} \\ 
+  (iii) & @{thm conc_Union_left} \\ 
   \end{tabular}
   \end{prpstn}
 
@@ -377,18 +446,18 @@
   \begin{center}
   \begin{tabular}{c@ {\hspace{10mm}}c}
   \begin{tabular}{rcl}
-  @{thm (lhs) L_rexp.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) L_rexp.simps(1)}\\
-  @{thm (lhs) L_rexp.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) L_rexp.simps(2)}\\
-  @{thm (lhs) L_rexp.simps(3)[where c="c"]} & @{text "\<equiv>"} & @{thm (rhs) L_rexp.simps(3)[where c="c"]}\\
+  @{thm (lhs) lang.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(1)}\\
+  @{thm (lhs) lang.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(2)}\\
+  @{thm (lhs) lang.simps(3)[where a="c"]} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(3)[where a="c"]}\\
   \end{tabular}
   &
   \begin{tabular}{rcl}
-  @{thm (lhs) L_rexp.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} &
-       @{thm (rhs) L_rexp.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\
-  @{thm (lhs) L_rexp.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\<equiv>"} &
-       @{thm (rhs) L_rexp.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\
-  @{thm (lhs) L_rexp.simps(6)[where r="r"]} & @{text "\<equiv>"} &
-      @{thm (rhs) L_rexp.simps(6)[where r="r"]}\\
+  @{thm (lhs) lang.simps(4)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]} & @{text "\<equiv>"} &
+       @{thm (rhs) lang.simps(4)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]}\\
+  @{thm (lhs) lang.simps(5)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]} & @{text "\<equiv>"} &
+       @{thm (rhs) lang.simps(5)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]}\\
+  @{thm (lhs) lang.simps(6)[where r="r"]} & @{text "\<equiv>"} &
+      @{thm (rhs) lang.simps(6)[where r="r"]}\\
   \end{tabular}
   \end{tabular}
   \end{center}
@@ -413,6 +482,10 @@
 section {* The Myhill-Nerode Theorem, First Part *}
 
 text {*
+  Folklore: Henzinger (arden-DFA-regexp.pdf)
+
+
+  \noindent
   The key definition in the Myhill-Nerode theorem is the
   \emph{Myhill-Nerode relation}, which states that w.r.t.~a language two 
   strings are related, provided there is no distinguishing extension in this
@@ -533,8 +606,8 @@
   
   \begin{center}
   @{text "\<calL>(Y, r) \<equiv>"} %
-  @{thm (rhs) L_trm.simps(2)[where X="Y" and r="r", THEN eq_reflection]}\hspace{10mm}
-  @{thm L_trm.simps(1)[where r="r", THEN eq_reflection]}
+  @{thm (rhs) lang_trm.simps(2)[where X="Y" and r="r", THEN eq_reflection]}\hspace{10mm}
+  @{thm lang_trm.simps(1)[where r="r", THEN eq_reflection]}
   \end{center}
 
   \noindent
@@ -931,9 +1004,9 @@
 
   \begin{center}
   \begin{tabular}{l}
-  @{thm quot_null_eq}\\
-  @{thm quot_empty_subset}\\
-  @{thm quot_char_subset}
+  @{thm quot_zero_eq}\\
+  @{thm quot_one_subset}\\
+  @{thm quot_atom_subset}
   \end{tabular}
   \end{center}
 
@@ -1026,7 +1099,7 @@
   We take as tagging-function 
   %
   \begin{center}
-  @{thm tag_str_ALT_def[where A="A" and B="B", THEN meta_eq_app]}
+  @{thm tag_str_Plus_def[where A="A" and B="B", THEN meta_eq_app]}
   \end{center}
 
   \noindent
@@ -1159,7 +1232,7 @@
   following tagging-function
   %
   \begin{center}
-  @{thm tag_str_SEQ_def[where ?L1.0="A" and ?L2.0="B", THEN meta_eq_app]}
+  @{thm tag_str_Times_def[where ?L1.0="A" and ?L2.0="B", THEN meta_eq_app]}
   \end{center}
 
   \noindent
@@ -1273,7 +1346,7 @@
   the following tagging-function:
   %
   \begin{center}
-  @{thm tag_str_STAR_def[where ?L1.0="A", THEN meta_eq_app]}\smallskip
+  @{thm tag_str_Star_def[where ?L1.0="A", THEN meta_eq_app]}\smallskip
   \end{center}
 
   \begin{proof}[@{const STAR}-Case]
@@ -1427,6 +1500,9 @@
   our proof-argument based on tagging-functions is new for
   establishing the Myhill-Nerode theorem. All standard proofs of this
   direction proceed by arguments over automata.\medskip
+
+  We expect that the development of Krauss \& Nipkoe gets easier by
+  using partial derivatives.
   
   \noindent
   {\bf Acknowledgements:} We are grateful for the comments we received from Larry