diff -r feb7b31d6bf1 -r 21ee3a852a02 Journal/Paper.thy --- a/Journal/Paper.thy Mon Jul 25 15:40:12 2011 +0000 +++ b/Journal/Paper.thy Mon Jul 25 18:00:52 2011 +0000 @@ -1,6 +1,6 @@ (*<*) theory Paper -imports "../Closure" +imports "../Closures" begin declare [[show_question_marks = false]] @@ -16,17 +16,40 @@ "Append_rexp2 r_itm r \ Append_rexp r r_itm" +abbreviation + "pow" (infixl "\" 100) +where + "A \ n \ A ^^ n" + +syntax (latex output) + "_Collect" :: "pttrn => bool => 'a set" ("(1{_ | _})") + "_CollectIn" :: "pttrn => 'a set => bool => 'a set" ("(1{_ \ _ | _})") +translations + "_Collect p P" <= "{p. P}" + "_Collect p P" <= "{p|xs. P}" + "_CollectIn p A P" <= "{p : A. P}" + +abbreviation "NULL \ Zero" +abbreviation "EMPTY \ One" +abbreviation "CHAR \ Atom" +abbreviation "ALT \ Plus" +abbreviation "SEQ \ Times" +abbreviation "STAR \ Star" + + +ML {* @{term "op ^^"} *} + notation (latex output) str_eq_rel ("\\<^bsub>_\<^esub>") and str_eq ("_ \\<^bsub>_\<^esub> _") and - Seq (infixr "\" 100) and - Star ("_\<^bsup>\\<^esup>") and + conc (infixr "\" 100) and + star ("_\<^bsup>\\<^esup>") and pow ("_\<^bsup>_\<^esup>" [100, 100] 100) and Suc ("_+1" [100] 100) and quotient ("_ \<^raw:\ensuremath{\!\sslash\!}> _" [90, 90] 90) and REL ("\") and UPLUS ("_ \<^raw:\ensuremath{\uplus}> _" [90, 90] 90) and - L_rexp ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and + lang ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and Lam ("\'(_')" [100] 100) and Trn ("'(_, _')" [100, 100] 100) and EClass ("\_\\<^bsub>_\<^esub>" [100, 100] 100) and @@ -34,18 +57,33 @@ Setalt ("\<^raw:\ensuremath{\bigplus}>_" [1000] 999) and Append_rexp2 ("_ \<^raw:\ensuremath{\triangleleft}> _" [100, 100] 100) and Append_rexp_rhs ("_ \<^raw:\ensuremath{\triangleleft}> _" [100, 100] 50) and + uminus ("\<^raw:\ensuremath{\overline{>_\<^raw:}}>" [100] 100) and - tag_str_ALT ("tag\<^isub>A\<^isub>L\<^isub>T _ _" [100, 100] 100) and - tag_str_ALT ("tag\<^isub>A\<^isub>L\<^isub>T _ _ _" [100, 100, 100] 100) and - tag_str_SEQ ("tag\<^isub>S\<^isub>E\<^isub>Q _ _" [100, 100] 100) and - tag_str_SEQ ("tag\<^isub>S\<^isub>E\<^isub>Q _ _ _" [100, 100, 100] 100) and - tag_str_STAR ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _" [100] 100) and - tag_str_STAR ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _ _" [100, 100] 100) + tag_str_Plus ("tag\<^isub>A\<^isub>L\<^isub>T _ _" [100, 100] 100) and + tag_str_Plus ("tag\<^isub>A\<^isub>L\<^isub>T _ _ _" [100, 100, 100] 100) and + tag_str_Times ("tag\<^isub>S\<^isub>E\<^isub>Q _ _" [100, 100] 100) and + tag_str_Times ("tag\<^isub>S\<^isub>E\<^isub>Q _ _ _" [100, 100, 100] 100) and + tag_str_Star ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _" [100] 100) and + tag_str_Star ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _ _" [100, 100] 100) lemma meta_eq_app: shows "f \ \x. g x \ f x \ g x" by auto +lemma conc_def': + "A \ B = {s\<^isub>1 @ s\<^isub>2 | s\<^isub>1 s\<^isub>2. s\<^isub>1 \ A \ s\<^isub>2 \ B}" +unfolding conc_def by simp + +lemma conc_Union_left: + shows "B \ (\n. A \ n) = (\n. B \ (A \ n))" +unfolding conc_def by auto + +lemma test: + assumes X_in_eqs: "(X, rhs) \ Init (UNIV // \A)" + shows "X = \ (lang_trm ` rhs)" +using assms l_eq_r_in_eqs by (simp) + + (* THEOREMS *) notation (Rule output) @@ -91,33 +129,40 @@ Science, with many beautiful theorems and many useful algorithms. There is a wide range of textbooks on this subject, many of which are aimed at students and contain very detailed `pencil-and-paper' proofs - (e.g.~\cite{Kozen97}). It seems natural to exercise theorem provers by - formalising the theorems and by verifying formally the algorithms. - Some of the popular theorem provers are based on Higher-Order Logic (HOL). + (e.g.~\cite{Kozen97, HopcroftUllman69}). It seems natural to exercise theorem provers by + formalising the theorems and by verifying formally the algorithms. A + popular choice for a theorem prover would be one based on Higher-Order Logic + (HOL), such as HOL4, HOLlight and Isabelle/HOL. For our development we will + use the latter theorem prover. One distinguishing feature of HOL is it's + type system based on Church's Simple Theory of Types \cite{Church40}. The + limitations of this type system are one of the main motivations + behind the work presented in this paper. - There is however a problem: the typical approach to regular languages is to + The typical approach to regular languages is to introduce finite automata and then define everything in terms of them. For example, a regular language is normally defined as one whose strings are recognised by a finite deterministic automaton. This approach has many benefits. Among them is the fact that it is easy to convince oneself that regular languages are closed under complementation: one just has to exchange the accepting and non-accepting states in the corresponding automaton to - obtain an automaton for the complement language. The problem, however, lies with - formalising such reasoning in a HOL-based theorem prover, in our case - Isabelle/HOL. Automata are built up from states and transitions that - need to be represented as graphs, matrices or functions, none - of which can be defined as an inductive datatype. + obtain an automaton for the complement language. The problem, however, lies + with formalising such reasoning in a HOL-based theorem prover. Automata are + built up from states and transitions that need to be represented as graphs, + matrices or functions, none of which can be defined as an inductive + datatype. In case of graphs and matrices, this means we have to build our own reasoning infrastructure for them, as neither Isabelle/HOL nor HOL4 nor HOLlight support them with libraries. Even worse, reasoning about graphs and - matrices can be a real hassle in HOL-based theorem provers. Consider for + matrices can be a real hassle in HOL-based theorem provers, because + we have to be able to combine automata. Consider for example the operation of sequencing two automata, say $A_1$ and $A_2$, by connecting the accepting states of $A_1$ to the initial state of $A_2$: % + \begin{center} \begin{tabular}{ccc} - \begin{tikzpicture}[scale=0.8] + \begin{tikzpicture}[scale=0.9] %\draw[step=2mm] (-1,-1) grid (1,1); \draw[rounded corners=1mm, very thick] (-1.0,-0.3) rectangle (-0.2,0.3); @@ -143,7 +188,7 @@ & - \begin{tikzpicture}[scale=0.8] + \begin{tikzpicture}[scale=0.9] %\draw[step=2mm] (-1,-1) grid (1,1); \draw[rounded corners=1mm, very thick] (-1.0,-0.3) rectangle (-0.2,0.3); @@ -170,20 +215,24 @@ \end{center} \noindent - On `paper' we can define the corresponding graph in terms of the disjoint + On `paper' or a theorem prover based on set-theory, we can define the corresponding + graph in terms of the disjoint union of the state nodes. Unfortunately in HOL, the standard definition for disjoint union, namely % \begin{equation}\label{disjointunion} - @{term "UPLUS A\<^isub>1 A\<^isub>2 \ {(1, x) | x. x \ A\<^isub>1} \ {(2, y) | y. y \ A\<^isub>2}"} + @{text "A\<^isub>1 \ A\<^isub>2 \ {(1, x) | x \ A\<^isub>1} \ {(2, y) | y \ A\<^isub>2}"} \end{equation} \noindent changes the type---the disjoint union is not a set, but a set of pairs. Using this definition for disjoint union means we do not have a single type for automata and hence will not be able to state certain properties about \emph{all} - automata, since there is no type quantification available in HOL (unlike in Coq, for example). An - alternative, which provides us with a single type for automata, is to give every + automata, since there is no type quantification available in HOL (unlike in Coq, for example). + As a result, we would not be able to define a language being regular + in terms of the existence of an automata. + + An alternative, which provides us with a single type for automata, is to give every state node an identity, for example a natural number, and then be careful to rename these identities apart whenever connecting two automata. This results in clunky proofs @@ -219,28 +268,42 @@ \noindent - Moreover, it is not so clear how to conveniently impose a finiteness condition - upon functions in order to represent \emph{finite} automata. The best is - probably to resort to more advanced reasoning frameworks, such as \emph{locales} - or \emph{type classes}, - which are \emph{not} available in all HOL-based theorem provers. + Moreover, it is not so clear how to conveniently impose a finiteness + condition upon functions in order to represent \emph{finite} automata. The + best is probably to resort to more advanced reasoning frameworks, such as + \emph{locales} or \emph{type classes}, which are \emph{not} available in all + HOL-based theorem provers. - {\bf add commnets from Brozowski} + Because of these problems to do with representing automata, there seems to + be no substantial formalisation of automata theory and regular languages + carried out in HOL-based theorem provers. Nipkow \cite{Nipkow98} establishes + the link between regular expressions and automata in the context of + lexing. Berghofer and Reiter \cite{BerghoferReiter09} formalise automata + working over bit strings in the context of Presburger arithmetic. The only + larger formalisations of automata theory are carried out in Nuprl + \cite{Constable00} and in Coq \cite{Filliatre97}. - Because of these problems to do with representing automata, there seems - to be no substantial formalisation of automata theory and regular languages - carried out in HOL-based theorem provers. Nipkow \cite{Nipkow98} establishes - the link between regular expressions and automata in - the context of lexing. Berghofer and Reiter \cite{BerghoferReiter09} - formalise automata working over - bit strings in the context of Presburger arithmetic. - The only larger formalisations of automata theory - are carried out in Nuprl \cite{Constable00} and in Coq \cite{Filliatre97}. + Also one might consider the Myhill-Nerode theorem as well-worn stock + material for a computer scientists, but paper proofs of this theorem contain some + subtle side-conditions which are easily overlooked and which make formal reasoning + painful. For example in Kozen's proof \cite{Kozen97} it is not sufficient to + have just any automata recognising a regular language, but one which does + not have inaccessible states. This means if we define a regular language + for which \emph{any} finite automaton exists, then one has to ensure that + another equivalent can be found satisfying the side-conditions. Similarly + Brozowski mentiones in \cite{Brozowski10} side-conditions of finite automata + in connection of state-complexity: there it is required that automata + must be complete in the sense of having a total transition function. + Furthermore, if a `sink' state is present which accepts no word, then there + must be only one such state. . . . + Such 'little' lemmas make formalisation of these results in a theroem + prover hair-pulling experiences. In this paper, we will not attempt to formalise automata theory in - Isabelle/HOL, but take a different approach to regular - languages. Instead of defining a regular language as one where there exists - an automaton that recognises all strings of the language, we define a + Isabelle/HOL nor attempt to formalise any automata proof from the + literature, but take a different approach to regular languages than is + usually taken. Instead of defining a regular language as one where there + exists an automaton that recognises all strings of the language, we define a regular language as: \begin{dfntn} @@ -249,31 +312,37 @@ \end{dfntn} \noindent - The reason is that regular expressions, unlike graphs, matrices and functions, can - be easily defined as inductive datatype. Consequently a corresponding reasoning - infrastructure comes for free. This has recently been exploited in HOL4 with a formalisation - of regular expression matching based on derivatives \cite{OwensSlind08} and - with an equivalence checker for regular expressions in Isabelle/HOL \cite{KraussNipkow11}. - The purpose of this paper is to - show that a central result about regular languages---the Myhill-Nerode theorem---can - be recreated by only using regular expressions. This theorem gives necessary - and sufficient conditions for when a language is regular. As a corollary of this - theorem we can easily establish the usual closure properties, including - complementation, for regular languages.\smallskip + The reason is that regular expressions, unlike graphs, matrices and + functions, can be easily defined as inductive datatype. Consequently a + corresponding reasoning infrastructure (like induction or recursion) comes + for free. This has recently been exploited in HOL4 with a formalisation of + regular expression matching based on derivatives \cite{OwensSlind08} and + with an equivalence checker for regular expressions in Isabelle/HOL + \cite{KraussNipkow11}. The purpose of this paper is to show that a central + result about regular languages---the Myhill-Nerode theorem---can be + recreated by only using regular expressions. This theorem gives necessary + and sufficient conditions for when a language is regular. As a corollary of + this theorem we can easily establish the usual closure properties, including + complementation, for regular languages.\medskip \noindent - {\bf Contributions:} - There is an extensive literature on regular languages. - To our best knowledge, our proof of the Myhill-Nerode theorem is the - first that is based on regular expressions, only. We prove the part of this theorem - stating that a regular expression has only finitely many partitions using certain - tagging-functions. Again to our best knowledge, these tagging-functions have - not been used before to establish the Myhill-Nerode theorem. + {\bf Contributions:} There is an extensive literature on regular languages. + To our best knowledge, our proof of the Myhill-Nerode theorem is the first + that is based on regular expressions, only. The part of this theorem stating + that finitely many partitions of a language w.r.t.~to the Myhill-Nerode + relation imply that the language is regular is proved by a folklore argument + of solving equational sytems. For the other part we give two proofs: a + direct proof using certain tagging-functions, and an indirect proof using + Antimirov's partial derivatives \cite{Antimirov95} (also earlier russion work). + Again to our best knowledge, the tagging-functions have not been used before to + establish the Myhill-Nerode theorem. + *} section {* Preliminaries *} text {* + \noindent Strings in Isabelle/HOL are lists of characters with the \emph{empty string} being represented by the empty list, written @{term "[]"}. \emph{Languages} are sets of strings. The language containing all strings is written in @@ -282,23 +351,23 @@ @{term "A \ n"}. They are defined as usual \begin{center} - @{thm Seq_def[THEN eq_reflection, where A1="A" and B1="B"]} + @{thm conc_def'[THEN eq_reflection, where A1="A" and B1="B"]} \hspace{7mm} - @{thm pow.simps(1)[THEN eq_reflection, where A1="A"]} + @{thm lang_pow.simps(1)[THEN eq_reflection, where A1="A"]} \hspace{7mm} - @{thm pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]} + @{thm lang_pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]} \end{center} \noindent where @{text "@"} is the list-append operation. The Kleene-star of a language @{text A} - is defined as the union over all powers, namely @{thm Star_def}. In the paper + is defined as the union over all powers, namely @{thm star_def}. In the paper we will make use of the following properties of these constructions. \begin{prpstn}\label{langprops}\mbox{}\\ \begin{tabular}{@ {}ll} (i) & @{thm star_cases} \\ (ii) & @{thm[mode=IfThen] pow_length}\\ - (iii) & @{thm seq_Union_left} \\ + (iii) & @{thm conc_Union_left} \\ \end{tabular} \end{prpstn} @@ -377,18 +446,18 @@ \begin{center} \begin{tabular}{c@ {\hspace{10mm}}c} \begin{tabular}{rcl} - @{thm (lhs) L_rexp.simps(1)} & @{text "\"} & @{thm (rhs) L_rexp.simps(1)}\\ - @{thm (lhs) L_rexp.simps(2)} & @{text "\"} & @{thm (rhs) L_rexp.simps(2)}\\ - @{thm (lhs) L_rexp.simps(3)[where c="c"]} & @{text "\"} & @{thm (rhs) L_rexp.simps(3)[where c="c"]}\\ + @{thm (lhs) lang.simps(1)} & @{text "\"} & @{thm (rhs) lang.simps(1)}\\ + @{thm (lhs) lang.simps(2)} & @{text "\"} & @{thm (rhs) lang.simps(2)}\\ + @{thm (lhs) lang.simps(3)[where a="c"]} & @{text "\"} & @{thm (rhs) lang.simps(3)[where a="c"]}\\ \end{tabular} & \begin{tabular}{rcl} - @{thm (lhs) L_rexp.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\"} & - @{thm (rhs) L_rexp.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ - @{thm (lhs) L_rexp.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\"} & - @{thm (rhs) L_rexp.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ - @{thm (lhs) L_rexp.simps(6)[where r="r"]} & @{text "\"} & - @{thm (rhs) L_rexp.simps(6)[where r="r"]}\\ + @{thm (lhs) lang.simps(4)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]} & @{text "\"} & + @{thm (rhs) lang.simps(4)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]}\\ + @{thm (lhs) lang.simps(5)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]} & @{text "\"} & + @{thm (rhs) lang.simps(5)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]}\\ + @{thm (lhs) lang.simps(6)[where r="r"]} & @{text "\"} & + @{thm (rhs) lang.simps(6)[where r="r"]}\\ \end{tabular} \end{tabular} \end{center} @@ -413,6 +482,10 @@ section {* The Myhill-Nerode Theorem, First Part *} text {* + Folklore: Henzinger (arden-DFA-regexp.pdf) + + + \noindent The key definition in the Myhill-Nerode theorem is the \emph{Myhill-Nerode relation}, which states that w.r.t.~a language two strings are related, provided there is no distinguishing extension in this @@ -533,8 +606,8 @@ \begin{center} @{text "\(Y, r) \"} % - @{thm (rhs) L_trm.simps(2)[where X="Y" and r="r", THEN eq_reflection]}\hspace{10mm} - @{thm L_trm.simps(1)[where r="r", THEN eq_reflection]} + @{thm (rhs) lang_trm.simps(2)[where X="Y" and r="r", THEN eq_reflection]}\hspace{10mm} + @{thm lang_trm.simps(1)[where r="r", THEN eq_reflection]} \end{center} \noindent @@ -931,9 +1004,9 @@ \begin{center} \begin{tabular}{l} - @{thm quot_null_eq}\\ - @{thm quot_empty_subset}\\ - @{thm quot_char_subset} + @{thm quot_zero_eq}\\ + @{thm quot_one_subset}\\ + @{thm quot_atom_subset} \end{tabular} \end{center} @@ -1026,7 +1099,7 @@ We take as tagging-function % \begin{center} - @{thm tag_str_ALT_def[where A="A" and B="B", THEN meta_eq_app]} + @{thm tag_str_Plus_def[where A="A" and B="B", THEN meta_eq_app]} \end{center} \noindent @@ -1159,7 +1232,7 @@ following tagging-function % \begin{center} - @{thm tag_str_SEQ_def[where ?L1.0="A" and ?L2.0="B", THEN meta_eq_app]} + @{thm tag_str_Times_def[where ?L1.0="A" and ?L2.0="B", THEN meta_eq_app]} \end{center} \noindent @@ -1273,7 +1346,7 @@ the following tagging-function: % \begin{center} - @{thm tag_str_STAR_def[where ?L1.0="A", THEN meta_eq_app]}\smallskip + @{thm tag_str_Star_def[where ?L1.0="A", THEN meta_eq_app]}\smallskip \end{center} \begin{proof}[@{const STAR}-Case] @@ -1427,6 +1500,9 @@ our proof-argument based on tagging-functions is new for establishing the Myhill-Nerode theorem. All standard proofs of this direction proceed by arguments over automata.\medskip + + We expect that the development of Krauss \& Nipkoe gets easier by + using partial derivatives. \noindent {\bf Acknowledgements:} We are grateful for the comments we received from Larry