Journal/Paper.thy
changeset 247 087e6c255e33
parent 245 40b8d485ce8d
child 248 47446f111550
--- a/Journal/Paper.thy	Wed Sep 07 18:16:49 2011 +0000
+++ b/Journal/Paper.thy	Wed Sep 07 18:17:56 2011 +0000
@@ -413,9 +413,10 @@
   \end{dfntn}
   
   \noindent
+  And then `forget' automata.
   The reason is that regular expressions, unlike graphs, matrices and
   functions, can be easily defined as an inductive datatype. A reasoning
-  infrastructure (like induction and recursion) comes then for free in
+  infrastructure (like induction and recursion) comes for free in
   HOL. Moreover, no side-conditions will be needed for regular expressions,
   like we need for automata. This convenience of regular expressions has
   recently been exploited in HOL4 with a formalisation of regular expression
@@ -2113,7 +2114,8 @@
   @{term "Deriv_lang B A"} is regular.
 
   Even more surprising is the fact that for \emph{every} language @{text A}, the language
-  consisting of all substrings of @{text A} is regular \cite{Shallit08, Gasarch09}. 
+  consisting of all substrings of @{text A} is regular \cite{Haines69} (see also 
+  \cite{Shallit08, Gasarch09}). 
   A \emph{substring} can be obtained
   by striking out zero or more characters from a string. This can be defined 
   inductively in Isabelle/HOL by the following three rules:
@@ -2139,15 +2141,19 @@
   \noindent
   We like to establish
 
-  \begin{lmm}\label{subseqreg}
+  \begin{thrm}[Haines \cite{Haines69}]\label{subseqreg}
   For every language @{text A}, the languages @{text "(i)"} @{term "SUBSEQ A"} and 
   @{text "(ii)"} @{term "SUPSEQ A"}
   are regular.
-  \end{lmm}
+  \end{thrm}
 
   \noindent
   Our proof follows the one given in \cite[Pages 92--95]{Shallit08}, except that we use
-  Higman's Lemma, which is already proved in the Isabelle/HOL library \cite{Berghofer03}. 
+  Higman's Lemma, which is already proved in the Isabelle/HOL library 
+  \cite{Berghofer03}.\footnote{Unfortunately, Berghofer's formalisation of Higman's lemma 
+  is restricted to 2-letter alphabets,
+  which means also our formalisation of Theorem~\ref{subseqreg} is `tainted' with 
+  this constraint. However our methodology is applicable to any alphabet of finite size.} 
   Higman's Lemma allows us to infer that every set @{text A} of antichains, satisfying
 
   \begin{equation}\label{higman}
@@ -2157,7 +2163,7 @@
   \noindent
   is finite.
 
-  The first step in our proof of Lemma~\ref{subseqreg} is to establish the 
+  The first step in our proof of Theorem~\ref{subseqreg} is to establish the 
   following simple properties for @{term SUPSEQ}
 
   \begin{equation}\label{supseqprops}
@@ -2203,7 +2209,7 @@
   \end{proof}
 
   \noindent
-  Now we can prove our main lemma w.r.t.~@{const "SUPSEQ"}, namely
+  Now we can prove the main lemma w.r.t.~@{const "SUPSEQ"}, namely
 
   \begin{lmm}\label{mset}
   For every language @{text A}, there exists a finite language @{text M} such that
@@ -2238,16 +2244,16 @@
   \end{proof}
 
   \noindent
-  This lemma allows us to establish the second part of Lemma~\ref{subseqreg}.
+  This lemma allows us to establish the second part of Theorem~\ref{subseqreg}.
 
-  \begin{proof}[Proof of the Second Part of Lemma~\ref{subseqreg}]
+  \begin{proof}[Proof of the Second Part of Theorem~\ref{subseqreg}]
   Given any language @{text A}, by Lemma~\ref{mset} we know there exists
   a finite, and thus regular, language @{text M}. We further have @{term "SUPSEQ M = SUPSEQ A"},
   which establishes the second part.    
   \end{proof}
 
   \noindent
-  In order to establish the first part of Lemma~\ref{subseqreg}, we use the
+  In order to establish the first part of this theorem, we use the
   property proved in \cite{Shallit08}, namely that
 
   \begin{equation}\label{compl}
@@ -2255,10 +2261,10 @@
   \end{equation}
 
   \noindent
-  holds. Now the first part of~\ref{subseqreg} is a simple consequence of the second part.
+  holds. Now the first part of Theorem~\ref{subseqreg} is a simple consequence of the second part.
 
-  \begin{proof}[Proof of the First Part of Lemma~\ref{subseqreg}]
-  By the second part of Lemma~\ref{subseqreg}, we know the right-hand side of \eqref{compl}
+  \begin{proof}[Proof of the First Part of Theorem~\ref{subseqreg}]
+  By the second part, we know the right-hand side of \eqref{compl}
   is regular, which means @{term "- SUBSEQ A"} is regular. But since
   we established already that regularity is preserved under complement, also @{term "SUBSEQ A"}
   must be regular. 
@@ -2284,10 +2290,10 @@
   Using this lemma, it is straightforward to establish that the language 
   \mbox{@{text "A \<equiv> \<Union>\<^isub>n a\<^sup>n @ b\<^sup>n"}} is not regular (@{text "a\<^sup>n"} stands
   for the strings consisting of @{text n} times the character a; similarly for
-  @{text "b\<^isub>n"}). For this consider the infinite set @{text "B \<equiv> \<Union>\<^isub>n a\<^sup>n"}.
+  @{text "b\<^isup>n"}). For this consider the infinite set @{text "B \<equiv> \<Union>\<^isub>n a\<^sup>n"}.
 
   \begin{lmm}
-  No two distinct strings in @{text "B"} are Myhill-Nerode related by @{text A}.
+  No two distinct strings in set @{text "B"} are Myhill-Nerode related by language @{text A}.
   \end{lmm} 
 
   \begin{proof}
@@ -2428,8 +2434,11 @@
   based on tagging-functions), plus around 300 lines of standard material
   about regular languages. The formalisation of derivatives and partial
   derivatives shown in Section~\ref{derivatives} consists of 390 lines of
-  code.  The closure properties in Section~\ref{closure} can be established in
-  190 lines of code.  The algorithm for solving equational systems, which we
+  code.  The closure properties in Section~\ref{closure} (except Theorem~\ref{subseqreg}) 
+  can be established in
+  190 lines of code. The Continuation Lemma and the non-regularity of @{text "a\<^sup>n b\<^sup>n"} 
+  require 80 lines of code.
+  The algorithm for solving equational systems, which we
   used in the first direction, is conceptually relatively simple. Still the
   use of sets over which the algorithm operates means it is not as easy to
   formalise as one might hope. However, it seems sets cannot be avoided since
@@ -2467,7 +2476,7 @@
   \noindent
   {\bf Acknowledgements:}
   We are grateful for the comments we received from Larry Paulson.  Tobias
-  Nipkow made us aware of the properties in Lemma~\ref{subseqreg} and Tjark
+  Nipkow made us aware of the properties in Theorem~\ref{subseqreg} and Tjark
   Weber helped us with proving them.
 *}