diff -r 7743d2ad71d1 -r 61d0a412a3ae Journal/Paper.thy --- a/Journal/Paper.thy Thu Jun 02 16:44:35 2011 +0000 +++ b/Journal/Paper.thy Thu Jun 02 20:02:16 2011 +0000 @@ -1,6 +1,6 @@ (*<*) theory Paper -imports "../Derivs" "~~/src/HOL/Library/LaTeXsugar" +imports "../Closure" begin declare [[show_question_marks = false]] @@ -26,7 +26,7 @@ quotient ("_ \<^raw:\ensuremath{\!\sslash\!}> _" [90, 90] 90) and REL ("\") and UPLUS ("_ \<^raw:\ensuremath{\uplus}> _" [90, 90] 90) and - L ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and + L_rexp ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and Lam ("\'(_')" [100] 100) and Trn ("'(_, _')" [100, 100] 100) and EClass ("\_\\<^bsub>_\<^esub>" [100, 100] 100) and @@ -41,16 +41,52 @@ tag_str_SEQ ("tag\<^isub>S\<^isub>E\<^isub>Q _ _ _" [100, 100, 100] 100) and tag_str_STAR ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _" [100] 100) and tag_str_STAR ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _ _" [100, 100] 100) + lemma meta_eq_app: shows "f \ \x. g x \ f x \ g x" by auto +(* THEOREMS *) + +notation (Rule output) + "==>" ("\<^raw:\mbox{}\inferrule{\mbox{>_\<^raw:}}>\<^raw:{\mbox{>_\<^raw:}}>") + +syntax (Rule output) + "_bigimpl" :: "asms \ prop \ prop" + ("\<^raw:\mbox{}\inferrule{>_\<^raw:}>\<^raw:{\mbox{>_\<^raw:}}>") + + "_asms" :: "prop \ asms \ asms" + ("\<^raw:\mbox{>_\<^raw:}\\>/ _") + + "_asm" :: "prop \ asms" ("\<^raw:\mbox{>_\<^raw:}>") + +notation (Axiom output) + "Trueprop" ("\<^raw:\mbox{}\inferrule{\mbox{}}{\mbox{>_\<^raw:}}>") + +notation (IfThen output) + "==>" ("\<^raw:{\normalsize{}>If\<^raw:\,}> _/ \<^raw:{\normalsize \,>then\<^raw:\,}>/ _.") +syntax (IfThen output) + "_bigimpl" :: "asms \ prop \ prop" + ("\<^raw:{\normalsize{}>If\<^raw:\,}> _ /\<^raw:{\normalsize \,>then\<^raw:\,}>/ _.") + "_asms" :: "prop \ asms \ asms" ("\<^raw:\mbox{>_\<^raw:}> /\<^raw:{\normalsize \,>and\<^raw:\,}>/ _") + "_asm" :: "prop \ asms" ("\<^raw:\mbox{>_\<^raw:}>") + +notation (IfThenNoBox output) + "==>" ("\<^raw:{\normalsize{}>If\<^raw:\,}> _/ \<^raw:{\normalsize \,>then\<^raw:\,}>/ _.") +syntax (IfThenNoBox output) + "_bigimpl" :: "asms \ prop \ prop" + ("\<^raw:{\normalsize{}>If\<^raw:\,}> _ /\<^raw:{\normalsize \,>then\<^raw:\,}>/ _.") + "_asms" :: "prop \ asms \ asms" ("_ /\<^raw:{\normalsize \,>and\<^raw:\,}>/ _") + "_asm" :: "prop \ asms" ("_") + + (*>*) section {* Introduction *} text {* + \noindent Regular languages are an important and well-understood subject in Computer Science, with many beautiful theorems and many useful algorithms. There is a wide range of textbooks on this subject, many of which are aimed at students @@ -76,7 +112,7 @@ HOLlight support them with libraries. Even worse, reasoning about graphs and matrices can be a real hassle in HOL-based theorem provers. Consider for example the operation of sequencing two automata, say $A_1$ and $A_2$, by - connecting the accepting states of $A_1$ to the initial state of $A_2$:\\[-5.5mm] + connecting the accepting states of $A_1$ to the initial state of $A_2$: % \begin{center} \begin{tabular}{ccc} @@ -206,10 +242,10 @@ an automaton that recognises all strings of the language, we define a regular language as: - \begin{definition} + \begin{dfntn} A language @{text A} is \emph{regular}, provided there is a regular expression that matches all strings of @{text "A"}. - \end{definition} + \end{dfntn} \noindent The reason is that regular expressions, unlike graphs, matrices and functions, can @@ -241,7 +277,7 @@ being represented by the empty list, written @{term "[]"}. \emph{Languages} are sets of strings. The language containing all strings is written in Isabelle/HOL as @{term "UNIV::string set"}. The concatenation of two languages - is written @{term "A ;; B"} and a language raised to the power @{text n} is written + is written @{term "A \ B"} and a language raised to the power @{text n} is written @{term "A \ n"}. They are defined as usual \begin{center} @@ -257,13 +293,13 @@ is defined as the union over all powers, namely @{thm Star_def}. In the paper we will make use of the following properties of these constructions. - \begin{proposition}\label{langprops}\mbox{}\\ + \begin{prpstn}\label{langprops}\mbox{}\\ \begin{tabular}{@ {}ll} (i) & @{thm star_cases} \\ (ii) & @{thm[mode=IfThen] pow_length}\\ (iii) & @{thm seq_Union_left} \\ \end{tabular} - \end{proposition} + \end{prpstn} \noindent In @{text "(ii)"} we use the notation @{term "length s"} for the length of a @@ -280,24 +316,24 @@ Central to our proof will be the solution of equational systems involving equivalence classes of languages. For this we will use Arden's Lemma \cite{Brzozowski64}, - which solves equations of the form @{term "X = A ;; X \ B"} provided + which solves equations of the form @{term "X = A \ X \ B"} provided @{term "[] \ A"}. However we will need the following `reverse' - version of Arden's Lemma (`reverse' in the sense of changing the order of @{term "A ;; X"} to - \mbox{@{term "X ;; A"}}). + version of Arden's Lemma (`reverse' in the sense of changing the order of @{term "A \ X"} to + \mbox{@{term "X \ A"}}). - \begin{lemma}[Reverse Arden's Lemma]\label{arden}\mbox{}\\ + \begin{lmm}[Reverse Arden's Lemma]\label{arden}\mbox{}\\ If @{thm (prem 1) arden} then @{thm (lhs) arden} if and only if @{thm (rhs) arden}. - \end{lemma} + \end{lmm} \begin{proof} For the right-to-left direction we assume @{thm (rhs) arden} and show that @{thm (lhs) arden} holds. From Prop.~\ref{langprops}@{text "(i)"} - we have @{term "A\ = {[]} \ A ;; A\"}, - which is equal to @{term "A\ = {[]} \ A\ ;; A"}. Adding @{text B} to both - sides gives @{term "B ;; A\ = B ;; ({[]} \ A\ ;; A)"}, whose right-hand side - is equal to @{term "(B ;; A\) ;; A \ B"}. This completes this direction. + we have @{term "A\ = {[]} \ A \ A\"}, + which is equal to @{term "A\ = {[]} \ A\ \ A"}. Adding @{text B} to both + sides gives @{term "B \ A\ = B \ ({[]} \ A\ \ A)"}, whose right-hand side + is equal to @{term "(B \ A\) \ A \ B"}. This completes this direction. For the other direction we assume @{thm (lhs) arden}. By a simple induction on @{text n}, we can establish the property @@ -307,18 +343,18 @@ \end{center} \noindent - Using this property we can show that @{term "B ;; (A \ n) \ X"} holds for - all @{text n}. From this we can infer @{term "B ;; A\ \ X"} using the definition + Using this property we can show that @{term "B \ (A \ n) \ X"} holds for + all @{text n}. From this we can infer @{term "B \ A\ \ X"} using the definition of @{text "\"}. For the inclusion in the other direction we assume a string @{text s} with length @{text k} is an element in @{text X}. Since @{thm (prem 1) arden} we know by Prop.~\ref{langprops}@{text "(ii)"} that - @{term "s \ X ;; (A \ Suc k)"} since its length is only @{text k} - (the strings in @{term "X ;; (A \ Suc k)"} are all longer). + @{term "s \ X \ (A \ Suc k)"} since its length is only @{text k} + (the strings in @{term "X \ (A \ Suc k)"} are all longer). From @{text "(*)"} it follows then that - @{term s} must be an element in @{term "(\m\{0..k}. B ;; (A \ m))"}. This in turn - implies that @{term s} is in @{term "(\n. B ;; (A \ n))"}. Using Prop.~\ref{langprops}@{text "(iii)"} - this is equal to @{term "B ;; A\"}, as we needed to show.\qed + @{term s} must be an element in @{term "(\m\{0..k}. B \ (A \ m))"}. This in turn + implies that @{term s} is in @{term "(\n. B \ (A \ n))"}. Using Prop.~\ref{langprops}@{text "(iii)"} + this is equal to @{term "B \ A\"}, as we needed to show.\qed \end{proof} \noindent @@ -381,12 +417,12 @@ strings are related, provided there is no distinguishing extension in this language. This can be defined as a tertiary relation. - \begin{definition}[Myhill-Nerode Relation] Given a language @{text A}, two strings @{text x} and + \begin{dfntn}[Myhill-Nerode Relation] Given a language @{text A}, two strings @{text x} and @{text y} are Myhill-Nerode related provided \begin{center} @{thm str_eq_def[simplified str_eq_rel_def Pair_Collect]} \end{center} - \end{definition} + \end{dfntn} \noindent It is easy to see that @{term "\A"} is an equivalence relation, which @@ -407,9 +443,9 @@ that if there are finitely many equivalence classes, like in the example above, then the language is regular. In our setting we therefore have to show: - \begin{theorem}\label{myhillnerodeone} + \begin{thrm}\label{myhillnerodeone} @{thm[mode=IfThen] Myhill_Nerode1} - \end{theorem} + \end{thrm} \noindent To prove this theorem, we first define the set @{term "finals A"} as those equivalence @@ -496,8 +532,8 @@ \begin{center} @{text "\(Y, r) \"} % - @{thm (rhs) L_rhs_trm.simps(2)[where X="Y" and r="r", THEN eq_reflection]}\hspace{10mm} - @{thm L_rhs_trm.simps(1)[where r="r", THEN eq_reflection]} + @{thm (rhs) L_trm.simps(2)[where X="Y" and r="r", THEN eq_reflection]}\hspace{10mm} + @{thm L_trm.simps(1)[where r="r", THEN eq_reflection]} \end{center} \noindent @@ -544,9 +580,9 @@ for representing the right-hand sides of equations, we can prove \eqref{inv1} and \eqref{inv2} more concisely as % - \begin{lemma}\label{inv} + \begin{lmm}\label{inv} If @{thm (prem 1) test} then @{text "X = \ \ ` rhs"}. - \end{lemma} + \end{lmm} \noindent Our proof of Thm.~\ref{myhillnerodeone} will proceed by transforming the @@ -597,13 +633,13 @@ \noindent This allows us to prove a version of Arden's Lemma on the level of equations. - \begin{lemma}\label{ardenable} + \begin{lmm}\label{ardenable} Given an equation @{text "X = rhs"}. If @{text "X = \\ ` rhs"}, @{thm (prem 2) Arden_keeps_eq}, and @{thm (prem 3) Arden_keeps_eq}, then @{text "X = \\ ` (Arden X rhs)"}. - \end{lemma} + \end{lmm} \noindent Our @{text ardenable} condition is slightly stronger than needed for applying Arden's Lemma, @@ -739,9 +775,9 @@ It is straightforward to prove that the initial equational system satisfies the invariant. - \begin{lemma}\label{invzero} + \begin{lmm}\label{invzero} @{thm[mode=IfThen] Init_ES_satisfies_invariant} - \end{lemma} + \end{lmm} \begin{proof} Finiteness is given by the assumption and the way how we set up the @@ -754,9 +790,9 @@ \noindent Next we show that @{text Iter} preserves the invariant. - \begin{lemma}\label{iterone} + \begin{lmm}\label{iterone} @{thm[mode=IfThen] iteration_step_invariant[where xrhs="rhs"]} - \end{lemma} + \end{lmm} \begin{proof} The argument boils down to choosing an equation @{text "Y = yrhs"} to be eliminated @@ -786,9 +822,9 @@ \noindent We also need the fact that @{text Iter} decreases the termination measure. - \begin{lemma}\label{itertwo} + \begin{lmm}\label{itertwo} @{thm[mode=IfThen] iteration_step_measure[simplified (no_asm), where xrhs="rhs"]} - \end{lemma} + \end{lmm} \begin{proof} By assumption we know that @{text "ES"} is finite and has more than one element. @@ -803,11 +839,11 @@ This brings us to our property we want to establish for @{text Solve}. - \begin{lemma} + \begin{lmm} If @{thm (prem 1) Solve} and @{thm (prem 2) Solve} then there exists a @{text rhs} such that @{term "Solve X (Init (UNIV // \A)) = {(X, rhs)}"} and @{term "invariant {(X, rhs)}"}. - \end{lemma} + \end{lmm} \begin{proof} In order to prove this lemma using \eqref{whileprinciple}, we have to use a slightly @@ -835,9 +871,9 @@ With this lemma in place we can show that for every equivalence class in @{term "UNIV // \A"} there exists a regular expression. - \begin{lemma}\label{every_eqcl_has_reg} + \begin{lmm}\label{every_eqcl_has_reg} @{thm[mode=IfThen] every_eqcl_has_reg} - \end{lemma} + \end{lmm} \begin{proof} By the preceding lemma, we know that there exists a @{text "rhs"} such @@ -879,9 +915,9 @@ We will prove in this section the second part of the Myhill-Nerode theorem. It can be formulated in our setting as follows: - \begin{theorem} + \begin{thrm} Given @{text "r"} is a regular expression, then @{thm Myhill_Nerode2}. - \end{theorem} + \end{thrm} \noindent The proof will be by induction on the structure of @{text r}. It turns out @@ -919,12 +955,12 @@ is said to \emph{refine} @{text "R\<^isub>2"} provided @{text "R\<^isub>1 \ R\<^isub>2"}). We formally define the notion of a \emph{tagging-relation} as follows. - \begin{definition}[Tagging-Relation] Given a tagging-function @{text tag}, then two strings @{text x} + \begin{dfntn}[Tagging-Relation] Given a tagging-function @{text tag}, then two strings @{text x} and @{text y} are \emph{tag-related} provided \begin{center} @{text "x =tag= y \ tag x = tag y"}\;. \end{center} - \end{definition} + \end{dfntn} In order to establish finiteness of a set @{text A}, we shall use the following powerful @@ -939,9 +975,9 @@ is finite, then the set @{text A} itself must be finite. We can use it to establish the following two lemmas. - \begin{lemma}\label{finone} + \begin{lmm}\label{finone} @{thm[mode=IfThen] finite_eq_tag_rel} - \end{lemma} + \end{lmm} \begin{proof} We set in \eqref{finiteimageD}, @{text f} to be @{text "X \ tag ` X"}. We have @@ -955,12 +991,12 @@ and @{text Y} must be equal.\qed \end{proof} - \begin{lemma}\label{fintwo} + \begin{lmm}\label{fintwo} Given two equivalence relations @{text "R\<^isub>1"} and @{text "R\<^isub>2"}, whereby @{text "R\<^isub>1"} refines @{text "R\<^isub>2"}. If @{thm (prem 1) refined_partition_finite[where ?R1.0="R\<^isub>1" and ?R2.0="R\<^isub>2"]} then @{thm (concl) refined_partition_finite[where ?R1.0="R\<^isub>1" and ?R2.0="R\<^isub>2"]}. - \end{lemma} + \end{lmm} \begin{proof} We prove this lemma again using \eqref{finiteimageD}. This time we set @{text f} to @@ -1028,12 +1064,12 @@ to be able to infer that % \begin{center} - @{text "\"}@{term "x @ z \ A ;; B \ y @ z \ A ;; B"} + @{text "\"}@{term "x @ z \ A \ B \ y @ z \ A \ B"} \end{center} % \noindent using the information given by the appropriate tagging-function. The complication - is to find out what the possible splits of @{text "x @ z"} are to be in @{term "A ;; B"} + is to find out what the possible splits of @{text "x @ z"} are to be in @{term "A \ B"} (this was easy in case of @{term "A \ B"}). To deal with this complication we define the notions of \emph{string prefixes} % @@ -1054,8 +1090,8 @@ \noindent where @{text c} and @{text d} are characters, and @{text x} and @{text y} are strings. - Now assuming @{term "x @ z \ A ;; B"} there are only two possible ways of how to `split' - this string to be in @{term "A ;; B"}: + Now assuming @{term "x @ z \ A \ B"} there are only two possible ways of how to `split' + this string to be in @{term "A \ B"}: % \begin{center} \begin{tabular}{@ {}c@ {\hspace{10mm}}c@ {}} @@ -1075,7 +1111,7 @@ \draw[decoration={brace,transform={yscale=3}},decorate] ($(xa.north west)+(0em,3ex)$) -- ($(z.north east)+(0em,3ex)$) - node[midway, above=0.8em]{@{term "x @ z \ A ;; B"}}; + node[midway, above=0.8em]{@{term "x @ z \ A \ B"}}; \draw[decoration={brace,transform={yscale=3}},decorate] ($(z.south east)+(0em,0ex)$) -- ($(xxa.south west)+(0em,0ex)$) @@ -1102,7 +1138,7 @@ \draw[decoration={brace,transform={yscale=3}},decorate] ($(x.north west)+(0em,3ex)$) -- ($(zza.north east)+(0em,3ex)$) - node[midway, above=0.8em]{@{term "x @ z \ A ;; B"}}; + node[midway, above=0.8em]{@{term "x @ z \ A \ B"}}; \draw[decoration={brace,transform={yscale=3}},decorate] ($(za.south east)+(0em,0ex)$) -- ($(x.south west)+(0em,0ex)$) @@ -1118,7 +1154,7 @@ \noindent Either there is a prefix of @{text x} in @{text A} and the rest is in @{text B} (first picture), or @{text x} and a prefix of @{text "z"} is in @{text A} and the rest in @{text B} (second picture). - In both cases we have to show that @{term "y @ z \ A ;; B"}. For this we use the + In both cases we have to show that @{term "y @ z \ A \ B"}. For this we use the following tagging-function % \begin{center} @@ -1136,7 +1172,7 @@ We have to show injectivity of this tagging-function as % \begin{center} - @{term "\z. tag_str_SEQ A B x = tag_str_SEQ A B y \ x @ z \ A ;; B \ y @ z \ A ;; B"} + @{term "\z. tag_str_SEQ A B x = tag_str_SEQ A B y \ x @ z \ A \ B \ y @ z \ A \ B"} \end{center} % \noindent @@ -1161,13 +1197,13 @@ @{term "(x - x') \B (y - y')"} holds. Unfolding the Myhill-Nerode relation and together with the fact that @{text "(x - x') @ z \ B"}, we have @{text "(y - y') @ z \ B"}. We already know @{text "y' \ A"}, therefore - @{term "y @ z \ A ;; B"}, as needed in this case. + @{term "y @ z \ A \ B"}, as needed in this case. Second, there exists a @{text "z'"} such that @{term "x @ z' \ A"} and @{text "z - z' \ B"}. By the assumption about @{term "tag_str_SEQ A B"} we have @{term "\A `` {x} = \A `` {y}"} and thus @{term "x \A y"}. Which means by the Myhill-Nerode relation that @{term "y @ z' \ A"} holds. Using @{text "z - z' \ B"}, we can conclude also in this case - with @{term "y @ z \ A ;; B"}. We again can complete the @{const SEQ}-case + with @{term "y @ z \ A \ B"}. We again can complete the @{const SEQ}-case by setting @{text A} to @{term "L r\<^isub>1"} and @{text B} to @{term "L r\<^isub>2"}.\qed \end{proof} @@ -1309,9 +1345,9 @@ this point of view. We have established in Isabelle/HOL both directions of the Myhill-Nerode theorem. % - \begin{theorem}[The Myhill-Nerode Theorem]\mbox{}\\ + \begin{thrm}[The Myhill-Nerode Theorem]\mbox{}\\ A language @{text A} is regular if and only if @{thm (rhs) Myhill_Nerode}. - \end{theorem} + \end{thrm} % \noindent Having formalised this theorem means we