# HG changeset patch # User urbanc # Date 1311815522 0 # Node ID 50cc1a39c990bad40f34a908e0197b92ac7fe90f # Parent 6969de1eb96b337872af644f5da1becbb7ca797b more one the paper diff -r 6969de1eb96b -r 50cc1a39c990 Journal/Paper.thy --- a/Journal/Paper.thy Wed Jul 27 15:29:39 2011 +0000 +++ b/Journal/Paper.thy Thu Jul 28 01:12:02 2011 +0000 @@ -295,21 +295,21 @@ larger formalisations of automata theory are carried out in Nuprl \cite{Constable00} and in Coq \cite{Filliatre97}. - Also one might consider automata theory as well-worn stock material where + Also one might consider automata theory as a well-worn stock subject where everything is crystal clear. However, paper proofs about automata often involve subtle side-conditions which are easily overlooked, but which make formal reasoning rather painful. For example Kozen's proof of the Myhill-Nerode theorem requires that the automata do not have inaccessible states \cite{Kozen97}. Another subtle side-condition is completeness of - automata: automata need to have total transition functions and at most one + automata, that is automata need to have total transition functions and at most one `sink' state from which there is no connection to a final state (Brozowski - mentions this side-condition in connection with state complexity - \cite{Brozowski10}). Such side-conditions mean that if we define a regular + mentions this side-condition in the context of state complexity + of automata \cite{Brozowski10}). Such side-conditions mean that if we define a regular language as one for which there exists \emph{a} finite automaton that - recognises all its strings (Def.~\ref{baddef}), then we need a lemma which + recognises all its strings (see Def.~\ref{baddef}), then we need a lemma which ensures that another equivalent can be found satisfying the side-condition. Unfortunately, such `little' and `obvious' lemmas make - formalisations of automata theory hair-pulling experiences. + a formalisation of automata theory a hair-pulling experience. In this paper, we will not attempt to formalise automata theory in @@ -362,19 +362,24 @@ text {* \noindent Strings in Isabelle/HOL are lists of characters with the \emph{empty string} - being represented by the empty list, written @{term "[]"}. \emph{Languages} - are sets of strings. The language containing all strings is written in - Isabelle/HOL as @{term "UNIV::string set"}. The concatenation of two languages - is written @{term "A \ B"} and a language raised to the power @{text n} is written + being represented by the empty list, written @{term "[]"}. We assume there + are only finitely many different characters. \emph{Languages} are sets of + strings. The language containing all strings is written in Isabelle/HOL as + @{term "UNIV::string set"}. The concatenation of two languages is written + @{term "A \ B"} and a language raised to the power @{text n} is written @{term "A \ n"}. They are defined as usual \begin{center} - - @{thm conc_def'[THEN eq_reflection, where A1="A" and B1="B"]} - \hspace{7mm} - @{thm lang_pow.simps(1)[THEN eq_reflection, where A1="A"]} - \hspace{7mm} - @{thm lang_pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]} + \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} + @{thm (lhs) conc_def'[THEN eq_reflection, where A1="A" and B1="B"]} + & @{text "\"} & @{thm (rhs) conc_def'[THEN eq_reflection, where A1="A" and B1="B"]}\\ + + @{thm (lhs) lang_pow.simps(1)[THEN eq_reflection, where A1="A"]} + & @{text "\"} & @{thm (rhs) lang_pow.simps(1)[THEN eq_reflection, where A1="A"]}\\ + + @{thm (lhs) lang_pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]} + & @{text "\"} & @{thm (rhs) lang_pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]} + \end{tabular} \end{center} \noindent @@ -453,11 +458,11 @@ \begin{center} \begin{tabular}{rcl} @{text r} & @{text "::="} & @{term ZERO}\\ - & @{text"|"} & @{term ONE}\\ - & @{text"|"} & @{term "ATOM c"}\\ - & @{text"|"} & @{term "TIMES r r"}\\ - & @{text"|"} & @{term "PLUS r r"}\\ - & @{text"|"} & @{term "STAR r"} + & @{text"|"} & @{term One}\\ + & @{text"|"} & @{term "Atom c"}\\ + & @{text"|"} & @{term "Times r r"}\\ + & @{text"|"} & @{term "Plus r r"}\\ + & @{text"|"} & @{term "Star r"} \end{tabular} \end{center} @@ -498,9 +503,8 @@ section {* The Myhill-Nerode Theorem, First Part *} text {* + \noindent \footnote{Folklore: Henzinger (arden-DFA-regexp.pdf); Hofmann} - - \noindent The key definition in the Myhill-Nerode theorem is the \emph{Myhill-Nerode relation}, which states that w.r.t.~a language two strings are related, provided there is no distinguishing extension in this @@ -552,10 +556,10 @@ equivalence class in @{term "finals {[c]}"}. It is straightforward to show that in general - \begin{center} + \begin{equation}\label{finalprops} @{thm lang_is_union_of_finals}\hspace{15mm} @{thm finals_in_partitions} - \end{center} + \end{equation} \noindent hold. @@ -622,10 +626,25 @@ transitions---the successor `state' of an equivalence class @{text Y} can be reached by adding a character to the end of @{text Y}. This is also the reason why we have to use our reverse version of Arden's Lemma.} - %In our initial equation system there can only be - %finitely many terms of the form @{text "(Y\<^isub>i\<^isub>j, ATOM c\<^isub>i\<^isub>j)"} in a right-hand side - %since by assumption there are only finitely many - %equivalence classes and only finitely many characters. + In our running example we have the initial equational system + + \begin{equation}\label{exmpcs} + \mbox{\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} + @{term "X\<^isub>1"} & @{text "="} & @{text "\(ONE)"}\\ + @{term "X\<^isub>2"} & @{text "="} & @{text "(X\<^isub>1, ATOM c)"}\\ + @{term "X\<^isub>3"} & @{text "="} & @{text "(X\<^isub>1, ATOM d\<^isub>1) + \ + (X\<^isub>1, ATOM d\<^isub>n)"}\\ + & & \mbox{}\hspace{3mm}@{text "+ (X\<^isub>3, ATOM c\<^isub>1) + \ + (X\<^isub>3, ATOM c\<^isub>m)"} + \end{tabular}} + \end{equation} + + \noindent + where @{text "d\<^isub>1\d\<^isub>n"} is the sequence of all characters + except @{text c}, and @{text "c\<^isub>1\c\<^isub>m"} is the sequence of all + characters. In our initial equation systems there can only be finitely many + terms of the form @{text "(Y\<^isub>i\<^isub>j, ATOM c\<^isub>i\<^isub>j)"}, + since by assumption there are only finitely many equivalence classes and + only finitely many characters. + Overloading the function @{text \} for the two kinds of terms in the equational system, we have @@ -698,8 +717,14 @@ we define the \emph{append-operation} taking a term and a regular expression as argument \begin{center} - @{thm Append_rexp.simps(2)[where X="Y" and r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]}\hspace{10mm} - @{thm Append_rexp.simps(1)[where r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]} + \begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l} + @{thm (lhs) Append_rexp.simps(2)[where X="Y" and r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]} + & @{text "\"} & + @{thm (rhs) Append_rexp.simps(2)[where X="Y" and r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]}\\ + @{thm (lhs) Append_rexp.simps(1)[where r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]} + & @{text "\"} & + @{thm (rhs) Append_rexp.simps(1)[where r="r\<^isub>1" and rexp="r\<^isub>2", THEN eq_reflection]} + \end{tabular} \end{center} \noindent @@ -711,15 +736,15 @@ \mbox{\begin{tabular}{rc@ {\hspace{2mm}}r@ {\hspace{1mm}}l} @{thm (lhs) Arden_def} & @{text "\"}~~\mbox{} & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "let"}}\\ & & @{text "rhs' ="} & @{term "rhs - {Trn X r | r. Trn X r \ rhs}"} \\ - & & @{text "r' ="} & @{term "STAR (\ {r. Trn X r \ rhs})"}\\ - & & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "in"}~~@{term "append_rhs_rexp rhs' r'"}}\\ + & & @{text "r' ="} & @{term "Star (\ {r. Trn X r \ rhs})"}\\ + & & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "in"}~~@{term "Append_rexp_rhs rhs' r'"}}\\ \end{tabular}} \end{equation} \noindent In this definition, we first delete all terms of the form @{text "(X, r)"} from @{text rhs}; then we calculate the combined regular expressions for all @{text r} coming - from the deleted @{text "(X, r)"}, and take the @{const STAR} of it; + from the deleted @{text "(X, r)"}, and take the @{const Star} of it; finally we append this regular expression to @{text rhs'}. It can be easily seen that this operation mimics Arden's Lemma on the level of equations. To ensure the non-emptiness condition of Arden's Lemma we say that a right-hand side is @@ -751,7 +776,7 @@ @{thm (lhs) Subst_def} & @{text "\"}~~\mbox{} & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "let"}}\\ & & @{text "rhs' ="} & @{term "rhs - {Trn X r | r. Trn X r \ rhs}"} \\ & & @{text "r' ="} & @{term "\ {r. Trn X r \ rhs}"}\\ - & & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "in"}~~@{term "rhs' \ append_rhs_rexp xrhs r'"}}\\ + & & \multicolumn{2}{@ {\hspace{-2mm}}l}{@{text "in"}~~@{term "rhs' \ Append_rexp_rhs xrhs r'"}}\\ \end{tabular} \end{center} @@ -900,8 +925,10 @@ We prove this as follows: \begin{center} - @{text "\ ES."} @{thm (prem 1) Subst_all_satisfies_invariant} implies + \begin{tabular}{@ {}l@ {}} + @{text "\ ES."}\\ \mbox{}\hspace{5mm}@{thm (prem 1) Subst_all_satisfies_invariant} implies @{thm (concl) Subst_all_satisfies_invariant} + \end{tabular} \end{center} \noindent @@ -1315,7 +1342,7 @@ \end{proof} \noindent - The case for @{const STAR} is similar to @{const TIMES}, but poses a few extra challenges. When + The case for @{const Star} is similar to @{const TIMES}, but poses a few extra challenges. When we analyse the case that @{text "x @ z"} is an element in @{term "A\"} and @{text x} is not the empty string, we have the following picture: @@ -1382,14 +1409,14 @@ @{thm tag_str_Star_def[where ?L1.0="A", THEN meta_eq_app]}\smallskip \end{center} - \begin{proof}[@{const STAR}-Case] + \begin{proof}[@{const Star}-Case] If @{term "finite (UNIV // \A)"} then @{term "finite (Pow (UNIV // \A))"} holds. The range of - @{term "tag_str_STAR A"} is a subset of this set, and therefore finite. + @{term "tag_str_Star A"} is a subset of this set, and therefore finite. Again we have to show injectivity of this tagging-function as % \begin{center} - @{term "\z. tag_str_STAR A x = tag_str_STAR A y \ x @ z \ A\ \ y @ z \ A\"} + @{term "\z. tag_str_Star A x = tag_str_Star A y \ x @ z \ A\ \ y @ z \ A\"} \end{center} % \noindent @@ -1450,7 +1477,7 @@ \end{equation} \noindent - It is realtively easy to establish the following identidies for left-quotients: + It is realtively easy to establish the following properties for left-quotients: \begin{center} \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{2mm}}l} @@ -1483,8 +1510,10 @@ @{thm (lhs) der.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\"} & @{thm (rhs) der.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ @{thm (lhs) der.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} - & @{text "\"}\\ - \multicolumn{3}{@ {\hspace{5mm}}l@ {}}{@{thm (rhs) der.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}}\\ + & @{text "\"} & @{text "if"}~@{term "nullable r\<^isub>1"}~@{text "then"}~% + @{term "Plus (Times (der c r\<^isub>1) r\<^isub>2) (der c r\<^isub>2)"}\\ + & & \phantom{@{text "if"}~@{term "nullable r\<^isub>1"}~}@{text "else"}~% + @{term "Times (der c r\<^isub>1) r\<^isub>2"}\\ @{thm (lhs) der.simps(6)} & @{text "\"} & @{thm (rhs) der.simps(6)}\smallskip\\ @{thm (lhs) ders.simps(1)} & @{text "\"} & @{thm (rhs) ders.simps(1)}\\ @{thm (lhs) ders.simps(2)} & @{text "\"} & @{thm (rhs) ders.simps(2)}\\ @@ -1496,7 +1525,7 @@ can recognise the empty string: \begin{center} - \begin{tabular}{cc} + \begin{tabular}{c@ {\hspace{10mm}}c} \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1.5mm}}l@ {}} @{thm (lhs) nullable.simps(1)} & @{text "\"} & @{thm (rhs) nullable.simps(1)}\\ @{thm (lhs) nullable.simps(2)} & @{text "\"} & @{thm (rhs) nullable.simps(2)}\\ @@ -1568,8 +1597,10 @@ @{thm (lhs) pder.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\"} & @{thm (rhs) pder.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ @{thm (lhs) pder.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} - & @{text "\"}\\ - \multicolumn{3}{@ {\hspace{20mm}}l@ {}}{@{thm (rhs) pder.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}}\\ + & @{text "\"} & @{text "if"}~@{term "nullable r\<^isub>1"}~@{text "then"}~% + @{term "(Times_set (pder c r\<^isub>1) r\<^isub>2) \ (pder c r\<^isub>2)"}\\ + & & \phantom{@{text "if"}~@{term "nullable r\<^isub>1"}~}@{text "else"}~% + @{term "Times_set (pder c r\<^isub>1) r\<^isub>2"}\\ @{thm (lhs) pder.simps(6)} & @{text "\"} & @{thm (rhs) pder.simps(6)}\smallskip\\ @{thm (lhs) pders.simps(1)} & @{text "\"} & @{thm (rhs) pders.simps(1)}\\ @{thm (lhs) pders.simps(2)} & @{text "\"} & @{thm (rhs) pders.simps(2)}\\