regexp: changeset 71:426070e68b21

--- a/Myhill_1.thy	Sun Feb 06 11:21:12 2011 +0000
+++ b/Myhill_1.thy	Mon Feb 07 10:23:23 2011 +0000
@@ -133,27 +133,29 @@
   shows "x @ y \<in> A\<star>"
 using a b by (blast intro: star_intro1 star_intro2)
 
+lemma star_cases:
+  shows "A\<star> =  {[]} \<union> A ;; A\<star>"
+proof
+  { fix x
+    have "x \<in> A\<star> \<Longrightarrow> x \<in> {[]} \<union> A ;; A\<star>"
+      unfolding Seq_def
+    by (induct rule: star_induct) (auto)
+  }
+  then show "A\<star> \<subseteq> {[]} \<union> A ;; A\<star>" by auto
+next
+  show "{[]} \<union> A ;; A\<star> \<subseteq> A\<star>"
+    unfolding Seq_def by auto
+qed
+
 lemma star_decom: 
-  "\<lbrakk>x \<in> A\<star>; x \<noteq> []\<rbrakk> \<Longrightarrow>(\<exists> a b. x = a @ b \<and> a \<noteq> [] \<and> a \<in> A \<and> b \<in> A\<star>)"
+  assumes a: "x \<in> A\<star>" "x \<noteq> []"
+  shows "\<exists>a b. x = a @ b \<and> a \<noteq> [] \<and> a \<in> A \<and> b \<in> A\<star>"
+using a
 apply(induct rule: star_induct)
 apply(simp)
 apply(blast)
 done
 
-lemma lang_star_cases:
-  shows "L\<star> =  {[]} \<union> L ;; L\<star>"
-proof
-  { fix x
-    have "x \<in> L\<star> \<Longrightarrow> x \<in> {[]} \<union> L ;; L\<star>"
-      unfolding Seq_def
-    by (induct rule: star_induct) (auto)
-  }
-  then show "L\<star> \<subseteq> {[]} \<union> L ;; L\<star>" by auto
-next
-  show "{[]} \<union> L ;; L\<star> \<subseteq> L\<star>"
-    unfolding Seq_def by auto
-qed
-
 lemma
   shows seq_Union_left:  "B ;; (\<Union>n. A \<up> n) = (\<Union>n. B ;; (A \<up> n))"
   and   seq_Union_right: "(\<Union>n. A \<up> n) ;; B = (\<Union>n. (A \<up> n) ;; B)"
@@ -237,7 +239,7 @@
   assume eq: "X = B ;; A\<star>"
   have "A\<star> = {[]} \<union> A\<star> ;; A" 
     unfolding seq_star_comm[symmetric]
-    by (rule lang_star_cases)
+    by (rule star_cases)
   then have "B ;; A\<star> = B ;; ({[]} \<union> A\<star> ;; A)"
     by (rule seq_add_left)
   also have "\<dots> = B \<union> B ;; (A\<star> ;; A)"
@@ -351,7 +353,7 @@
   @{text "\<approx>A"} is an equivalence class defined by language @{text "A"}.
 *}
 definition
-  str_eq_rel ("\<approx>_" [100] 100)
+  str_eq_rel :: "lang \<Rightarrow> (string \<times> string) set" ("\<approx>_" [100] 100)
 where
   "\<approx>A \<equiv> {(x, y).  (\<forall>z. x @ z \<in> A \<longleftrightarrow> y @ z \<in> A)}"
 
@@ -361,7 +363,9 @@
 *}
 
 definition 
-   "finals A \<equiv> {\<approx>A `` {x} | x . x \<in> A}"
+  finals :: "lang \<Rightarrow> lang set"
+where
+  "finals A \<equiv> {\<approx>A `` {x} | x . x \<in> A}"
 
 text {* 
   The following lemma establishes the relationshipt between 
@@ -464,14 +468,19 @@
   @{text "CS"}, the term @{text "init_rhs CS X"} is used to extract the right hand side of
   the equation describing the formation of @{text "X"}. The definition of @{text "init_rhs"}
   is:
-  *}
+*}
+
+definition 
+  transition :: "lang \<Rightarrow> char \<Rightarrow> lang \<Rightarrow> bool" ("_ \<Turnstile>_\<Rightarrow>_" [100,100,100] 100)
+where
+  "Y \<Turnstile>c\<Rightarrow> X \<equiv> Y ;; {[c]} \<subseteq> X"
 
 definition
   "init_rhs CS X \<equiv>  
       if ([] \<in> X) then 
-          {Lam(EMPTY)} \<union> {Trn Y (CHAR c) | Y c. Y \<in> CS \<and> Y ;; {[c]} \<subseteq> X}
+          {Lam EMPTY} \<union> {Trn Y (CHAR c) | Y c. Y \<in> CS \<and> Y \<Turnstile>c\<Rightarrow> X}
       else 
-          {Trn Y (CHAR c)| Y c. Y \<in> CS \<and> Y ;; {[c]} \<subseteq> X}"
+          {Trn Y (CHAR c)| Y c. Y \<in> CS \<and> Y \<Turnstile>c\<Rightarrow> X}"
 
 text {*
   In the definition of @{text "init_rhs"}, the term 
@@ -483,7 +492,7 @@
 
   With the help of @{text "init_rhs"}, the equitional system descrbing the formation of every
   equivalent class inside @{text "CS"} is given by the following @{text "eqs(CS)"}.
-  *}
+*}
 
 
 definition "eqs CS \<equiv> {(X, init_rhs CS X) | X.  X \<in> CS}"
@@ -546,10 +555,11 @@
   With the help of the two functions immediately above, Ardens'
   transformation on right hand side @{text "rhs"} is implemented
   by the following function @{text "arden_variate X rhs"}.
-  After this transformation, the recursive occurent of @{text "X"}
-  in @{text "rhs"} will be eliminated, while the 
-  string set defined by @{text "rhs"} is kept unchanged.
-  *}
+  After this transformation, the recursive occurence of @{text "X"}
+  in @{text "rhs"} will be eliminated, while the string set defined 
+  by @{text "rhs"} is kept unchanged.
+*}
+
 definition 
   "arden_variate X rhs \<equiv> 
         append_rhs_rexp (rhs - items_of rhs X) (STAR (rexp_of rhs X))"
@@ -580,9 +590,9 @@
   "eqs_subst ES X xrhs \<equiv> {(Y, rhs_subst yrhs X xrhs) | Y yrhs. (Y, yrhs) \<in> ES}"
 
 text {*
-  The computation of regular expressions for equivalent classes is accomplished
+  The computation of regular expressions for equivalence classes is accomplished
   using a iteration principle given by the following lemma.
-  *}
+*}
 
 lemma wf_iter [rule_format]: 
   fixes f
@@ -773,7 +783,7 @@
 text {*
   The following several lemmas until @{text "init_ES_satisfy_Inv"} shows that
   the initial equational system satisfies invariant @{text "Inv"}.
-  *}
+*}
 
 lemma defined_by_str:
   "\<lbrakk>s \<in> X; X \<in> UNIV // (\<approx>Lang)\<rbrakk> \<Longrightarrow> X = (\<approx>Lang) `` {s}"
@@ -824,16 +834,17 @@
         and "clist \<in> Y"
         using decom "(1)" every_eqclass_has_transition by blast
       hence 
-        "x \<in> L {Trn Y (CHAR c)| Y c. Y \<in> UNIV // (\<approx>Lang) \<and> Y ;; {[c]} \<subseteq> X}"
-        using "(1)" decom
+        "x \<in> L {Trn Y (CHAR c)| Y c. Y \<in> UNIV // (\<approx>Lang) \<and> Y \<Turnstile>c\<Rightarrow> X}"
+        unfolding transition_def
+	using "(1)" decom
         by (simp, rule_tac x = "Trn Y (CHAR c)" in exI, simp add:Seq_def)
-      thus ?thesis using X_in_eqs "(1)"
-        by (simp add:eqs_def init_rhs_def)
+      thus ?thesis using X_in_eqs "(1)"	
+        by (simp add: eqs_def init_rhs_def)
     qed
   qed
 next
   show "L xrhs \<subseteq> X" using X_in_eqs
-    by (auto simp:eqs_def init_rhs_def) 
+    by (auto simp:eqs_def init_rhs_def transition_def) 
 qed
 
 lemma finite_init_rhs: 
@@ -851,7 +862,7 @@
     ultimately show ?thesis 
       by auto
   qed
-  thus ?thesis by (simp add:init_rhs_def)
+  thus ?thesis by (simp add:init_rhs_def transition_def)
 qed
 
 lemma init_ES_satisfy_Inv:
@@ -884,7 +895,8 @@
   From this point until @{text "iteration_step"}, it is proved
   that there exists iteration steps which keep @{text "Inv(ES)"} while
   decreasing the size of @{text "ES"}.
-  *}
+*}
+
 lemma arden_variate_keeps_eq:
   assumes l_eq_r: "X = L rhs"
   and not_empty: "[] \<notin> L (rexp_of rhs X)"

--- a/Paper/Paper.thy	Sun Feb 06 11:21:12 2011 +0000
+++ b/Paper/Paper.thy	Mon Feb 07 10:23:23 2011 +0000
@@ -22,7 +22,8 @@
   REL ("\<approx>") and
   UPLUS ("_ \<^raw:\ensuremath{\uplus}> _" [90, 90] 90) and
   L ("L '(_')" [0] 101) and
-  EClass ("\<lbrakk>_\<rbrakk>\<^bsub>_\<^esub>" [100, 100] 100) 
+  EClass ("\<lbrakk>_\<rbrakk>\<^bsub>_\<^esub>" [100, 100] 100) and
+  transition ("_ \<^raw:\ensuremath{\stackrel{\text{>_\<^raw:}}{\Longrightarrow}}> _" [100, 100, 100] 100)
 (*>*)
 
 
@@ -40,7 +41,7 @@
   introduce finite automata and then define everything in terms of them.  For
   example, a regular language is normally defined as one whose strings are
   recognised by a finite deterministic automaton. This approach has many
-  benefits. Among them is that it is easy to convince oneself from the fact that
+  benefits. Among them is the fact that it is easy to convince oneself that
   regular languages are closed under complementation: one just has to exchange
   the accepting and non-accepting states in the corresponding automaton to
   obtain an automaton for the complement language.  The problem, however, lies with
@@ -139,7 +140,7 @@
   to be no substantial formalisation of automata theory and regular languages 
   carried out in a HOL-based theorem prover. We are only aware of the 
   large formalisation of automata theory in Nuprl \cite{Constable00} and 
-  some smaller formalisations in Coq, for example \cite{Filliatre97}.
+  some smaller formalisations in Coq (for example \cite{Filliatre97}).
   
   In this paper, we will not attempt to formalise automata theory, but take a completely 
   different approach to regular languages. Instead of defining a regular language as one 
@@ -153,12 +154,12 @@
   
   \noindent
   The reason is that regular expressions, unlike graphs and matrices, can
-  be easily defined as inductive datatype. Therefore a corresponding reasoning 
-  infrastructure comes for free. This has recently been used in HOL4 for formalising regular 
-  expression matching based on derivatives \cite{OwensSlind08}.  The purpose of this paper is to
-  show that a central result about regular languages, the Myhill-Nerode theorem,  
-  can be recreated by only using regular expressions. This theorem gives a necessary
-  and sufficient condition for when a language is regular. As a corollary of this
+  be easily defined as inductive datatype. Consequently a corresponding reasoning 
+  infrastructure comes for free. This has recently been exploited in HOL4 with a formalisation
+  of regular expression matching based on derivatives \cite{OwensSlind08}.  The purpose of this paper is to
+  show that a central result about regular languages---the Myhill-Nerode theorem---can 
+  be recreated by only using regular expressions. This theorem gives necessary
+  and sufficient conditions for when a language is regular. As a corollary of this
   theorem we can easily establish the usual closure properties, including 
   complementation, for regular languages.\smallskip
   
@@ -176,11 +177,9 @@
   Strings in Isabelle/HOL are lists of characters with the \emph{empty string}
   being represented by the empty list, written @{term "[]"}. \emph{Languages}
   are sets of strings. The language containing all strings is written in
-  Isabelle/HOL as @{term "UNIV::string set"}.  The notation for the quotient
-  of a language @{text A} according to a relation @{term REL} is @{term "A //
-  REL"}. The concatenation of two languages is written @{term "A ;; B"}; a
-  language raised to the power $n$ is written @{term "A \<up> n"}. Both concepts
-  are defined as
+  Isabelle/HOL as @{term "UNIV::string set"}. The concatenation of two languages 
+  is written @{term "A ;; B"} and a language raised to the power $n$ is written 
+  @{term "A \<up> n"}. Their definitions are
 
   \begin{center}
   @{thm Seq_def[THEN eq_reflection, where A1="A" and B1="B"]}
@@ -192,10 +191,31 @@
 
   \noindent
   where @{text "@"} is the usual list-append operation. The Kleene-star of a language @{text A}
-  is defined as the union over all powers, namely @{thm Star_def}.
+  is defined as the union over all powers, namely @{thm Star_def}. In the paper
+  we will often make use of the following properties.
   
+  \begin{proposition}\label{langprops}\mbox{}\\
+  \begin{tabular}{@ {}ll@ {\hspace{10mm}}ll}
+  (i)   & @{thm star_cases}      & (ii)  & @{thm[mode=IfThen] pow_length}\\
+  (iii) & @{thm seq_Union_left}  & 
+  \end{tabular}
+  \end{proposition}
+
+  \noindent
+  We omit the proofs of these properties, but invite the reader to consult
+  our formalisation.\footnote{Available at ???}
+
+
+  The notation for the quotient of a language @{text A} according to an 
+  equivalence relation @{term REL} is @{term "A // REL"}. We will write 
+  @{text "\<lbrakk>x\<rbrakk>\<^isub>\<approx>"} for the equivalence class defined 
+  as @{text "{y | y \<approx> x}"}.
+
+
   Central to our proof will be the solution of equational systems
-  involving regular expressions. For this we will use the following ``reverse'' 
+  involving regular expressions. For this we will use Arden's lemma \cite{}
+  which solves equations of the form @{term "X = A ;; X \<union> B"} provided
+  @{term "[] \<notin> A"}. However we will need the following ``reverse'' 
   version of Arden's lemma.
 
   \begin{lemma}[Reverse Arden's Lemma]\mbox{}\\
@@ -206,7 +226,8 @@
 
   \begin{proof}
   For the right-to-left direction we assume @{thm (rhs) ardens_revised} and show
-  that @{thm (lhs) ardens_revised} holds. From Lemma ??? we have @{term "A\<star> = {[]} \<union> A ;; A\<star>"},
+  that @{thm (lhs) ardens_revised} holds. From Prop.~\ref{langprops}$(i)$ 
+  we have @{term "A\<star> = {[]} \<union> A ;; A\<star>"},
   which is equal to @{term "A\<star> = {[]} \<union> A\<star> ;; A"}. Adding @{text B} to both 
   sides gives @{term "B ;; A\<star> = B ;; ({[]} \<union> A\<star> ;; A)"}, whose right-hand side
   is equal to @{term "(B ;; A\<star>) ;; A \<union> B"}. This completes this direction. 
@@ -220,15 +241,17 @@
   
   \noindent
   Using this property we can show that @{term "B ;; (A \<up> n) \<subseteq> X"} holds for
-  all @{text n}. From this we can infer @{term "B ;; A\<star> \<subseteq> X"} using Lemma ???.
+  all @{text n}. From this we can infer @{term "B ;; A\<star> \<subseteq> X"} using the definition
+  of @{text "\<star>"}.
   For the inclusion in the other direction we assume a string @{text s}
   with length @{text k} is element in @{text X}. Since @{thm (prem 1) ardens_revised}
-  we know that @{term "s \<notin> X ;; (A \<up> Suc k)"} since its length is only @{text k}
+  we know by Prop.~\ref{langprops}$(ii)$ that 
+  @{term "s \<notin> X ;; (A \<up> Suc k)"} since its length is only @{text k}
   (the strings in @{term "X ;; (A \<up> Suc k)"} are all longer). 
   From @{text "(*)"} it follows then that
   @{term s} must be element in @{term "(\<Union>m\<in>{0..k}. B ;; (A \<up> m))"}. This in turn
-  implies that @{term s} is in @{term "(\<Union>n. B ;; (A \<up> n))"}. Using Lemma ??? this
-  is equal to @{term "B ;; A\<star>"}, as we needed to show.\qed
+  implies that @{term s} is in @{term "(\<Union>n. B ;; (A \<up> n))"}. Using Prop.~\ref{langprops}$(iii)$ 
+  this is equal to @{term "B ;; A\<star>"}, as we needed to show.\qed
   \end{proof}
 
   \noindent
@@ -275,13 +298,27 @@
   @{thm str_eq_rel_def[simplified]}
   \end{definition}
 
-  \begin{definition} @{text "finals A"} are the equivalence classes that contain
-  strings from @{text A}\\
+  \noindent
+  It is easy to see that @{term "\<approx>A"} is an equivalence relation, which partitions
+  the set of all string, @{text "UNIV"}, into a set of equivalence classed. We define
+  the set @{term "finals A"} as those equivalence classes that contain strings of 
+  @{text A}, namely
+
+  \begin{equation} 
   @{thm finals_def}
-  \end{definition}
+  \end{equation}
+
+  \noindent
+  It is easy to show that @{thm lang_is_union_of_finals} holds. We can also define 
+  a notion of \emph{transition} between equivalence classes as
 
-  @{thm lang_is_union_of_finals}
+  \begin{equation} 
+  @{thm transition_def}
+  \end{equation}
 
+  \noindent
+  which means if we add the character @{text c} to all strings in the equivalence
+  class @{text Y} HERE
 
   \begin{theorem}
   Given a language @{text A}.

Binary file tphols-2011/myhill.pdf has changed

author	urbanc
	Mon, 07 Feb 2011 10:23:23 +0000
changeset 71	426070e68b21
parent 70	8ab3a06577cf
child 72	e5116c2e6187

Myhill_1.thy		file \| annotate \| diff \| comparison \| revisions
Paper/Paper.thy		file \| annotate \| diff \| comparison \| revisions
tphols-2011/myhill.pdf		file \| annotate \| diff \| comparison \| revisions