diff -r 01d223421ba0 -r 44c4450152e3 Journal/Paper.thy --- a/Journal/Paper.thy Wed Dec 12 11:45:04 2012 +0000 +++ b/Journal/Paper.thy Fri Mar 01 17:13:32 2013 +0000 @@ -611,7 +611,7 @@ strings are related, provided there is no distinguishing extension in this language. This can be defined as a ternary relation. - \begin{dfntn}[(Myhill-Nerode Relation)]\label{myhillneroderel} + \begin{dfntn}[Myhill-Nerode Relation]\label{myhillneroderel} Given a language @{text A}, two strings @{text x} and @{text y} are Myhill-Nerode related provided \begin{center} @@ -1190,7 +1190,7 @@ the base cases are straightforward. - \begin{proof}[(Base Cases)] + \begin{proof}[Base Cases] The cases for @{const ZERO}, @{const ONE} and @{const ATOM} are routine, because we can easily establish that @@ -1275,7 +1275,7 @@ as follows. - \begin{dfntn}[(Tagging-Relation)] Given a tagging-function @{text tag}, then two strings @{text x} + \begin{dfntn}[Tagging-Relation] Given a tagging-function @{text tag}, then two strings @{text x} and @{text y} are \emph{tag-related} provided \begin{center} @{text "x \<^raw:$\threesim$>\<^bsub>tag\<^esub> y \ tag x = tag y"}\;. @@ -1353,7 +1353,7 @@ @{term "x"}~@{term "=(tag_Plus A B)="}~@{term y}. As we shall see, this definition will provide us with just the right assumptions in order to get the proof through. - \begin{proof}[(@{const "PLUS"}-Case)] + \begin{proof}[@{const "PLUS"}-Case] We can show in general, if @{term "finite (UNIV // \A)"} and @{term "finite (UNIV // \B)"} then @{term "finite ((UNIV // \A) \ (UNIV // \B))"} holds. The range of @{term "tag_Plus A B"} is a subset of this product @@ -1477,7 +1477,7 @@ With this definition in place, let us prove the @{const "Times"}-case. - \begin{proof}[(@{const TIMES}-Case)] + \begin{proof}[@{const TIMES}-Case] If @{term "finite (UNIV // \A)"} and @{term "finite (UNIV // \B)"} then @{term "finite ((UNIV // \A) \ (Pow (UNIV // \B)))"} holds. The range of @{term "tag_Times A B"} is a subset of this product set, and therefore finite. @@ -1602,7 +1602,7 @@ @{text "{\x\<^isub>s\\<^bsub>\A\<^esub> | x\<^isub>p < x \ x\<^isub>p \ A\<^isup>\ \ (x\<^isub>p, x\<^isub>s) \ Partitions x}"} \end{center} - \begin{proof}[(@{const Star}-Case)] + \begin{proof}[@{const Star}-Case] If @{term "finite (UNIV // \A)"} then @{term "finite (Pow (UNIV // \A))"} holds. The range of @{term "tag_Star A"} is a subset of this set, and therefore finite. @@ -2275,7 +2275,7 @@ the non-regularity of languages. For this we use the following version of the Continuation Lemma (see for example~\cite{Rosenberg06}). - \begin{lmm}[(Continuation Lemma)] + \begin{lmm}[Continuation Lemma] If a language @{text A} is regular and a set of strings @{text B} is infinite, then there exist two distinct strings @{text x} and @{text y} in @{text B} such that @{term "x \A y"}. @@ -2300,7 +2300,7 @@ \begin{proof} After unfolding the definition of @{text "B"}, we need to establish that given @{term "i \ j"}, the strings @{text "a\<^sup>i"} and @{text "a\<^sup>j"} are not Myhill-Nerode related by @{text "A"}. - That means we have to show that \mbox{@{text "\z. a\<^sup>i @ z \ A = a\<^sup>j @ z \ A"}} leads to + That means we have to show that @{text "\z. a\<^sup>i @ z \ A = a\<^sup>j @ z \ A"} leads to a contradiction. Let us take @{text "b\<^sup>i"} for @{text "z"}. Then we know @{text "a\<^sup>i @ b\<^sup>i \ A"}. But since @{term "i \ j"}, @{text "a\<^sup>j @ b\<^sup>i \ A"}. Therefore @{text "a\<^sup>i"} and @{text "a\<^sup>j"} cannot be Myhill-Nerode related by @{text "A"}, and we are done. @@ -2323,10 +2323,28 @@ exists a regular expression that matches all of its strings. Regular expressions can conveniently be defined as a datatype in theorem provers. For us it was therefore interesting to find out how far we can push - this point of view. We have established in Isabelle/HOL both directions + this point of view. But this question whether formal language theory can + be done without automata crops up also in non-theorem prover contexts. For + example, Gasarch asked in the Computational Complexity blog by \citeN{GasarchBlog} + whether the complementation of + regular-expression languages can be proved without using automata. He concluded + + \begin{quote} + {\it \ldots it can't be done} + \end{quote} + + \noindent + and even pondered + + \begin{quote} + {\it \ldots [b]ut is there a rigorous way to even state that?} + \end{quote} + + \noindent + We have established in Isabelle/HOL both directions of the Myhill-Nerode Theorem. % - \begin{thrm}[(Myhill-Nerode Theorem)]\mbox{}\\ + \begin{thrm}[Myhill-Nerode Theorem]\mbox{}\\ A language @{text A} is regular if and only if @{thm (rhs) Myhill_Nerode}. \end{thrm} @@ -2336,11 +2354,9 @@ regular---by establishing that it has infinitely many equivalence classes generated by the Myhill-Nerode Relation (this is usually the purpose of the Pumping Lemma). We can also use it to establish the standard - textbook results about closure properties of regular languages. Interesting - is the case of closure under complement, because it seems difficult to - construct a regular expression for the complement language by direct - means. However the existence of such a regular expression can be easily - proved using the Myhill-Nerode Theorem. + textbook results about closure properties of regular languages. The case of + closure under complement follows easily from the Myhill-Nerode Theorem. + So our answer to Gasarch is ``{\it it can be done!''} %Our insistence on regular expressions for proving the Myhill-Nerode Theorem %arose from the problem of defining formally the regularity of a language. @@ -2377,6 +2393,10 @@ %language theory---the Myhill-Nerode Theorem. While regular expressions are convenient, they have some limitations. One is + that there are some regular expressions for which the smallest regular + expression for the complement language has a doubly-exponential blowup in size + as shown by \citeN{Gelade12}. + Another is that there seems to be no method of calculating a minimal regular expression (for example in terms of length) for a regular language, like there is for automata. On the other hand, efficient regular expression matching, without