regexp: Paper/Paper.thy@0d4d5bb321dc


(*<*)
theory Paper
imports "../Myhill" "LaTeXsugar"
begin

declare [[show_question_marks = false]]

consts
 REL :: "(string \<times> string) \<Rightarrow> bool"


notation (latex output)
  str_eq_rel ("\<approx>\<^bsub>_\<^esub>") and
  Seq (infixr "\<cdot>" 100) and
  Star ("_\<^bsup>\<star>\<^esup>") and
  pow ("_\<^bsup>_\<^esup>" [100, 100] 100) and
  Suc ("_+1" [100] 100) and
  quotient ("_ \<^raw:\ensuremath{\!\sslash\!}> _" [90, 90] 90) and
  REL ("\<approx>")

(*>*)

section {* Introduction *}

text {*
  Regular languages are an important and well-understood subject in Computer
  Science with many beautiful theorems and many useful algorithms. There is a
  wide range of textbooks about this subject. Many of these textbooks, such as
  \cite{Kozen97}, are aimed at students and contain very detailed
  ``pencil-and-paper'' proofs. It seems natural to exercise theorem provers by
  formalising these theorems and by verifying formally the algorithms. There
  is however a problem: the typical approach to regular languages is to start
  with finite automata.

  
  



  Therefore instead of defining a regular language as being one where there exists an
  automata that regognises all of its strings, we define 

  \begin{definition}[A Regular Language]
  A language @{text A} is regular, if there is a regular expression that matches all
  strings of @{text "A"}.
  \end{definition}
  
  \noindent
  {\bf Contributions:} A proof of the Myhil-Nerode Theorem based on regular expressions. The 
  finiteness part of this theorem is proved using tagging-functions (which to our knowledge
  are novel in this context).
  
*}

section {* Preliminaries *}

text {*
  Strings in Isabelle/HOL are lists of characters and the
  \emph{empty string} is the empty list, written @{term "[]"}. \emph{Languages} are sets of 
  strings. The language containing all strings is written in Isabelle/HOL as @{term "UNIV::string set"}.
  The notation for the quotient of a language @{text A} according to a relation @{term REL} is
  @{term "A // REL"}. The concatenation of two languages is written @{term "A ;; B"}; a language 
  raised  tow the power $n$ is written @{term "A \<up> n"}. Both concepts are defined as

  \begin{center}
  @{thm Seq_def[THEN eq_reflection, where A1="A" and B1="B"]}
  \hspace{7mm}
  @{thm pow.simps(1)[THEN eq_reflection, where A1="A"]}
  \hspace{7mm}
  @{thm pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]}
  \end{center}

  \noindent
  where @{text "@"} is the usual list-append operation. The Kleene-star of a language @{text A}
  is defined as the union over all powers, namely @{thm Star_def}.
  

  Regular expressions are defined as the following datatype

  \begin{center}
  @{text r} @{text "::="}
  @{term NULL}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} 
  @{term EMPTY}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} 
  @{term "CHAR c"}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} 
  @{term "SEQ r r"}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} 
  @{term "ALT r r"}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} 
  @{term "STAR r"}
  \end{center}

  Central to our proof will be the solution of equational systems
  involving regular expressions. For this we will use the following ``reverse'' 
  version of Arden's lemma.

  \begin{lemma}[Reverse Arden's Lemma]\mbox{}\\
  If @{thm (prem 1) ardens_revised} then
  @{thm (lhs) ardens_revised} has the unique solution
  @{thm (rhs) ardens_revised}.
  \end{lemma}

  \begin{proof}
  For the right-to-left direction we assume @{thm (rhs) ardens_revised} and show
  that @{thm (lhs) ardens_revised} holds. From Lemma ??? we have @{term "A\<star> = {[]} \<union> A ;; A\<star>"},
  which is equal to @{term "A\<star> = {[]} \<union> A\<star> ;; A"}. Adding @{text B} to both 
  sides gives @{term "B ;; A\<star> = B ;; ({[]} \<union> A\<star> ;; A)"}, whose right-hand side
  is equal to @{term "(B ;; A\<star>) ;; A \<union> B"}. This completes this direction. 

  For the other direction we assume @{thm (lhs) ardens_revised}. By a simple induction
  on @{text n}, we can establish the property

  \begin{center}
  @{text "(*)"}\hspace{5mm} @{thm (concl) ardens_helper}
  \end{center}
  
  \noindent
  Using this property we can show that @{term "B ;; (A \<up> n) \<subseteq> X"} holds for
  all @{text n}. From this we can infer @{term "B ;; A\<star> \<subseteq> X"} using Lemma ???.
  For the inclusion in the other direction we assume a string @{text s}
  with length @{text k} is element in @{text X}. Since @{thm (prem 1) ardens_revised}
  we know that @{term "s \<notin> X ;; (A \<up> Suc k)"} since its length is only @{text k}
  (the strings in @{term "X ;; (A \<up> Suc k)"} are all longer). 
  From @{text "(*)"} it follows then that
  @{term s} must be element in @{term "(\<Union>m\<in>{0..k}. B ;; (A \<up> m))"}. This in turn
  implies that @{term s} is in @{term "(\<Union>n. B ;; (A \<up> n))"}. Using Lemma ??? this
  is equal to @{term "B ;; A\<star>"}, as we needed to show.\qed
  \end{proof}
*}

section {* Finite Partitions Imply Regularity of a Language *}

text {*
  \begin{theorem}
  Given a language @{text A}.
  @{thm[mode=IfThen] hard_direction[where Lang="A"]}
  \end{theorem}
*}

section {* Regular Expressions Generate Finitely Many Partitions *}

text {*

  \begin{theorem}
  Given @{text "r"} is a regular expressions, then @{thm rexp_imp_finite}.
  \end{theorem}  

  \begin{proof}
  By induction on the structure of @{text r}. The cases for @{const NULL}, @{const EMPTY}
  and @{const CHAR} are straightforward, because we can easily establish

  \begin{center}
  \begin{tabular}{l}
  @{thm quot_null_eq}\\
  @{thm quot_empty_subset}\\
  @{thm quot_char_subset}
  \end{tabular}
  \end{center}

  \end{proof}
*}


section {* Conclusion and Related Work *}

(*<*)
end
(*>*)
author	urbanc
	Wed, 02 Feb 2011 13:54:07 +0000
changeset 58	0d4d5bb321dc
parent 54	c19d2fc2cc69
child 59	fc35eb54fdc9
permissions	-rw-r--r--