# HG changeset patch # User urbanc # Date 1297074203 0 # Node ID 426070e68b213203fb87ec77fb5a27ff9cd9b3d4 # Parent 8ab3a06577cfde9ae3366ec0fc12137326b3e7fb more on the paper diff -r 8ab3a06577cf -r 426070e68b21 Myhill_1.thy --- a/Myhill_1.thy Sun Feb 06 11:21:12 2011 +0000 +++ b/Myhill_1.thy Mon Feb 07 10:23:23 2011 +0000 @@ -133,27 +133,29 @@ shows "x @ y \ A\" using a b by (blast intro: star_intro1 star_intro2) +lemma star_cases: + shows "A\ = {[]} \ A ;; A\" +proof + { fix x + have "x \ A\ \ x \ {[]} \ A ;; A\" + unfolding Seq_def + by (induct rule: star_induct) (auto) + } + then show "A\ \ {[]} \ A ;; A\" by auto +next + show "{[]} \ A ;; A\ \ A\" + unfolding Seq_def by auto +qed + lemma star_decom: - "\x \ A\; x \ []\ \(\ a b. x = a @ b \ a \ [] \ a \ A \ b \ A\)" + assumes a: "x \ A\" "x \ []" + shows "\a b. x = a @ b \ a \ [] \ a \ A \ b \ A\" +using a apply(induct rule: star_induct) apply(simp) apply(blast) done -lemma lang_star_cases: - shows "L\ = {[]} \ L ;; L\" -proof - { fix x - have "x \ L\ \ x \ {[]} \ L ;; L\" - unfolding Seq_def - by (induct rule: star_induct) (auto) - } - then show "L\ \ {[]} \ L ;; L\" by auto -next - show "{[]} \ L ;; L\ \ L\" - unfolding Seq_def by auto -qed - lemma shows seq_Union_left: "B ;; (\n. A \ n) = (\n. B ;; (A \ n))" and seq_Union_right: "(\n. A \ n) ;; B = (\n. (A \ n) ;; B)" @@ -237,7 +239,7 @@ assume eq: "X = B ;; A\" have "A\ = {[]} \ A\ ;; A" unfolding seq_star_comm[symmetric] - by (rule lang_star_cases) + by (rule star_cases) then have "B ;; A\ = B ;; ({[]} \ A\ ;; A)" by (rule seq_add_left) also have "\ = B \ B ;; (A\ ;; A)" @@ -351,7 +353,7 @@ @{text "\A"} is an equivalence class defined by language @{text "A"}. *} definition - str_eq_rel ("\_" [100] 100) + str_eq_rel :: "lang \ (string \ string) set" ("\_" [100] 100) where "\A \ {(x, y). (\z. x @ z \ A \ y @ z \ A)}" @@ -361,7 +363,9 @@ *} definition - "finals A \ {\A `` {x} | x . x \ A}" + finals :: "lang \ lang set" +where + "finals A \ {\A `` {x} | x . x \ A}" text {* The following lemma establishes the relationshipt between @@ -464,14 +468,19 @@ @{text "CS"}, the term @{text "init_rhs CS X"} is used to extract the right hand side of the equation describing the formation of @{text "X"}. The definition of @{text "init_rhs"} is: - *} +*} + +definition + transition :: "lang \ char \ lang \ bool" ("_ \_\_" [100,100,100] 100) +where + "Y \c\ X \ Y ;; {[c]} \ X" definition "init_rhs CS X \ if ([] \ X) then - {Lam(EMPTY)} \ {Trn Y (CHAR c) | Y c. Y \ CS \ Y ;; {[c]} \ X} + {Lam EMPTY} \ {Trn Y (CHAR c) | Y c. Y \ CS \ Y \c\ X} else - {Trn Y (CHAR c)| Y c. Y \ CS \ Y ;; {[c]} \ X}" + {Trn Y (CHAR c)| Y c. Y \ CS \ Y \c\ X}" text {* In the definition of @{text "init_rhs"}, the term @@ -483,7 +492,7 @@ With the help of @{text "init_rhs"}, the equitional system descrbing the formation of every equivalent class inside @{text "CS"} is given by the following @{text "eqs(CS)"}. - *} +*} definition "eqs CS \ {(X, init_rhs CS X) | X. X \ CS}" @@ -546,10 +555,11 @@ With the help of the two functions immediately above, Ardens' transformation on right hand side @{text "rhs"} is implemented by the following function @{text "arden_variate X rhs"}. - After this transformation, the recursive occurent of @{text "X"} - in @{text "rhs"} will be eliminated, while the - string set defined by @{text "rhs"} is kept unchanged. - *} + After this transformation, the recursive occurence of @{text "X"} + in @{text "rhs"} will be eliminated, while the string set defined + by @{text "rhs"} is kept unchanged. +*} + definition "arden_variate X rhs \ append_rhs_rexp (rhs - items_of rhs X) (STAR (rexp_of rhs X))" @@ -580,9 +590,9 @@ "eqs_subst ES X xrhs \ {(Y, rhs_subst yrhs X xrhs) | Y yrhs. (Y, yrhs) \ ES}" text {* - The computation of regular expressions for equivalent classes is accomplished + The computation of regular expressions for equivalence classes is accomplished using a iteration principle given by the following lemma. - *} +*} lemma wf_iter [rule_format]: fixes f @@ -773,7 +783,7 @@ text {* The following several lemmas until @{text "init_ES_satisfy_Inv"} shows that the initial equational system satisfies invariant @{text "Inv"}. - *} +*} lemma defined_by_str: "\s \ X; X \ UNIV // (\Lang)\ \ X = (\Lang) `` {s}" @@ -824,16 +834,17 @@ and "clist \ Y" using decom "(1)" every_eqclass_has_transition by blast hence - "x \ L {Trn Y (CHAR c)| Y c. Y \ UNIV // (\Lang) \ Y ;; {[c]} \ X}" - using "(1)" decom + "x \ L {Trn Y (CHAR c)| Y c. Y \ UNIV // (\Lang) \ Y \c\ X}" + unfolding transition_def + using "(1)" decom by (simp, rule_tac x = "Trn Y (CHAR c)" in exI, simp add:Seq_def) - thus ?thesis using X_in_eqs "(1)" - by (simp add:eqs_def init_rhs_def) + thus ?thesis using X_in_eqs "(1)" + by (simp add: eqs_def init_rhs_def) qed qed next show "L xrhs \ X" using X_in_eqs - by (auto simp:eqs_def init_rhs_def) + by (auto simp:eqs_def init_rhs_def transition_def) qed lemma finite_init_rhs: @@ -851,7 +862,7 @@ ultimately show ?thesis by auto qed - thus ?thesis by (simp add:init_rhs_def) + thus ?thesis by (simp add:init_rhs_def transition_def) qed lemma init_ES_satisfy_Inv: @@ -884,7 +895,8 @@ From this point until @{text "iteration_step"}, it is proved that there exists iteration steps which keep @{text "Inv(ES)"} while decreasing the size of @{text "ES"}. - *} +*} + lemma arden_variate_keeps_eq: assumes l_eq_r: "X = L rhs" and not_empty: "[] \ L (rexp_of rhs X)" diff -r 8ab3a06577cf -r 426070e68b21 Paper/Paper.thy --- a/Paper/Paper.thy Sun Feb 06 11:21:12 2011 +0000 +++ b/Paper/Paper.thy Mon Feb 07 10:23:23 2011 +0000 @@ -22,7 +22,8 @@ REL ("\") and UPLUS ("_ \<^raw:\ensuremath{\uplus}> _" [90, 90] 90) and L ("L '(_')" [0] 101) and - EClass ("\_\\<^bsub>_\<^esub>" [100, 100] 100) + EClass ("\_\\<^bsub>_\<^esub>" [100, 100] 100) and + transition ("_ \<^raw:\ensuremath{\stackrel{\text{>_\<^raw:}}{\Longrightarrow}}> _" [100, 100, 100] 100) (*>*) @@ -40,7 +41,7 @@ introduce finite automata and then define everything in terms of them. For example, a regular language is normally defined as one whose strings are recognised by a finite deterministic automaton. This approach has many - benefits. Among them is that it is easy to convince oneself from the fact that + benefits. Among them is the fact that it is easy to convince oneself that regular languages are closed under complementation: one just has to exchange the accepting and non-accepting states in the corresponding automaton to obtain an automaton for the complement language. The problem, however, lies with @@ -139,7 +140,7 @@ to be no substantial formalisation of automata theory and regular languages carried out in a HOL-based theorem prover. We are only aware of the large formalisation of automata theory in Nuprl \cite{Constable00} and - some smaller formalisations in Coq, for example \cite{Filliatre97}. + some smaller formalisations in Coq (for example \cite{Filliatre97}). In this paper, we will not attempt to formalise automata theory, but take a completely different approach to regular languages. Instead of defining a regular language as one @@ -153,12 +154,12 @@ \noindent The reason is that regular expressions, unlike graphs and matrices, can - be easily defined as inductive datatype. Therefore a corresponding reasoning - infrastructure comes for free. This has recently been used in HOL4 for formalising regular - expression matching based on derivatives \cite{OwensSlind08}. The purpose of this paper is to - show that a central result about regular languages, the Myhill-Nerode theorem, - can be recreated by only using regular expressions. This theorem gives a necessary - and sufficient condition for when a language is regular. As a corollary of this + be easily defined as inductive datatype. Consequently a corresponding reasoning + infrastructure comes for free. This has recently been exploited in HOL4 with a formalisation + of regular expression matching based on derivatives \cite{OwensSlind08}. The purpose of this paper is to + show that a central result about regular languages---the Myhill-Nerode theorem---can + be recreated by only using regular expressions. This theorem gives necessary + and sufficient conditions for when a language is regular. As a corollary of this theorem we can easily establish the usual closure properties, including complementation, for regular languages.\smallskip @@ -176,11 +177,9 @@ Strings in Isabelle/HOL are lists of characters with the \emph{empty string} being represented by the empty list, written @{term "[]"}. \emph{Languages} are sets of strings. The language containing all strings is written in - Isabelle/HOL as @{term "UNIV::string set"}. The notation for the quotient - of a language @{text A} according to a relation @{term REL} is @{term "A // - REL"}. The concatenation of two languages is written @{term "A ;; B"}; a - language raised to the power $n$ is written @{term "A \ n"}. Both concepts - are defined as + Isabelle/HOL as @{term "UNIV::string set"}. The concatenation of two languages + is written @{term "A ;; B"} and a language raised to the power $n$ is written + @{term "A \ n"}. Their definitions are \begin{center} @{thm Seq_def[THEN eq_reflection, where A1="A" and B1="B"]} @@ -192,10 +191,31 @@ \noindent where @{text "@"} is the usual list-append operation. The Kleene-star of a language @{text A} - is defined as the union over all powers, namely @{thm Star_def}. + is defined as the union over all powers, namely @{thm Star_def}. In the paper + we will often make use of the following properties. + \begin{proposition}\label{langprops}\mbox{}\\ + \begin{tabular}{@ {}ll@ {\hspace{10mm}}ll} + (i) & @{thm star_cases} & (ii) & @{thm[mode=IfThen] pow_length}\\ + (iii) & @{thm seq_Union_left} & + \end{tabular} + \end{proposition} + + \noindent + We omit the proofs of these properties, but invite the reader to consult + our formalisation.\footnote{Available at ???} + + + The notation for the quotient of a language @{text A} according to an + equivalence relation @{term REL} is @{term "A // REL"}. We will write + @{text "\x\\<^isub>\"} for the equivalence class defined + as @{text "{y | y \ x}"}. + + Central to our proof will be the solution of equational systems - involving regular expressions. For this we will use the following ``reverse'' + involving regular expressions. For this we will use Arden's lemma \cite{} + which solves equations of the form @{term "X = A ;; X \ B"} provided + @{term "[] \ A"}. However we will need the following ``reverse'' version of Arden's lemma. \begin{lemma}[Reverse Arden's Lemma]\mbox{}\\ @@ -206,7 +226,8 @@ \begin{proof} For the right-to-left direction we assume @{thm (rhs) ardens_revised} and show - that @{thm (lhs) ardens_revised} holds. From Lemma ??? we have @{term "A\ = {[]} \ A ;; A\"}, + that @{thm (lhs) ardens_revised} holds. From Prop.~\ref{langprops}$(i)$ + we have @{term "A\ = {[]} \ A ;; A\"}, which is equal to @{term "A\ = {[]} \ A\ ;; A"}. Adding @{text B} to both sides gives @{term "B ;; A\ = B ;; ({[]} \ A\ ;; A)"}, whose right-hand side is equal to @{term "(B ;; A\) ;; A \ B"}. This completes this direction. @@ -220,15 +241,17 @@ \noindent Using this property we can show that @{term "B ;; (A \ n) \ X"} holds for - all @{text n}. From this we can infer @{term "B ;; A\ \ X"} using Lemma ???. + all @{text n}. From this we can infer @{term "B ;; A\ \ X"} using the definition + of @{text "\"}. For the inclusion in the other direction we assume a string @{text s} with length @{text k} is element in @{text X}. Since @{thm (prem 1) ardens_revised} - we know that @{term "s \ X ;; (A \ Suc k)"} since its length is only @{text k} + we know by Prop.~\ref{langprops}$(ii)$ that + @{term "s \ X ;; (A \ Suc k)"} since its length is only @{text k} (the strings in @{term "X ;; (A \ Suc k)"} are all longer). From @{text "(*)"} it follows then that @{term s} must be element in @{term "(\m\{0..k}. B ;; (A \ m))"}. This in turn - implies that @{term s} is in @{term "(\n. B ;; (A \ n))"}. Using Lemma ??? this - is equal to @{term "B ;; A\"}, as we needed to show.\qed + implies that @{term s} is in @{term "(\n. B ;; (A \ n))"}. Using Prop.~\ref{langprops}$(iii)$ + this is equal to @{term "B ;; A\"}, as we needed to show.\qed \end{proof} \noindent @@ -275,13 +298,27 @@ @{thm str_eq_rel_def[simplified]} \end{definition} - \begin{definition} @{text "finals A"} are the equivalence classes that contain - strings from @{text A}\\ + \noindent + It is easy to see that @{term "\A"} is an equivalence relation, which partitions + the set of all string, @{text "UNIV"}, into a set of equivalence classed. We define + the set @{term "finals A"} as those equivalence classes that contain strings of + @{text A}, namely + + \begin{equation} @{thm finals_def} - \end{definition} + \end{equation} + + \noindent + It is easy to show that @{thm lang_is_union_of_finals} holds. We can also define + a notion of \emph{transition} between equivalence classes as - @{thm lang_is_union_of_finals} + \begin{equation} + @{thm transition_def} + \end{equation} + \noindent + which means if we add the character @{text c} to all strings in the equivalence + class @{text Y} HERE \begin{theorem} Given a language @{text A}. diff -r 8ab3a06577cf -r 426070e68b21 tphols-2011/myhill.pdf Binary file tphols-2011/myhill.pdf has changed