# HG changeset patch # User urbanc # Date 1297231786 0 # Node ID 1436fc451bb935c3c4f7ad3e4d3d31b050601ff7 # Parent 6a0efaabde1973c1a5f5e14d0bc0eee76510ef5d added something about Setalt and folds diff -r 6a0efaabde19 -r 1436fc451bb9 Myhill_1.thy --- a/Myhill_1.thy Wed Feb 09 04:54:23 2011 +0000 +++ b/Myhill_1.thy Wed Feb 09 06:09:46 2011 +0000 @@ -271,7 +271,7 @@ overloading L_rexp \ "L:: rexp \ lang" begin fun - L_rexp :: "rexp \ string set" + L_rexp :: "rexp \ lang" where "L_rexp (NULL) = {}" | "L_rexp (EMPTY) = {[]}" @@ -281,6 +281,7 @@ | "L_rexp (STAR r) = (L_rexp r)\" end + text {* ALT-combination of a set or regulare expressions *} abbreviation @@ -293,6 +294,7 @@ *} lemma folds_alt_simp [simp]: + fixes rs::"rexp set" assumes a: "finite rs" shows "L (\rs) = \ (L ` rs)" apply(rule set_eqI) diff -r 6a0efaabde19 -r 1436fc451bb9 Paper/Paper.thy --- a/Paper/Paper.thy Wed Feb 09 04:54:23 2011 +0000 +++ b/Paper/Paper.thy Wed Feb 09 06:09:46 2011 +0000 @@ -26,7 +26,8 @@ Lam ("\'(_')" [100] 100) and Trn ("_, _" [100, 100] 100) and EClass ("\_\\<^bsub>_\<^esub>" [100, 100] 100) and - transition ("_ \<^raw:\ensuremath{\stackrel{\text{>_\<^raw:}}{\Longmapsto}}> _" [100, 100, 100] 100) + transition ("_ \<^raw:\ensuremath{\stackrel{\text{>_\<^raw:}}{\Longmapsto}}> _" [100, 100, 100] 100) and + Setalt ("\<^raw:\ensuremath{\bigplus}>_" [1000] 999) (*>*) @@ -116,7 +117,7 @@ \noindent On ``paper'' we can define the corresponding graph in terms of the disjoint - union of the state nodes. Unfortunately in HOL, the definition for disjoint + union of the state nodes. Unfortunately in HOL, the standard definition for disjoint union, namely % \begin{equation}\label{disjointunion} @@ -137,9 +138,9 @@ constructions, which are not pleasant to reason about. Functions are much better supported in Isabelle/HOL, but they still lead to similar - problems as with graphs. Composing two non-deterministic automata in parallel - poses still the problem of how to implement disjoint unions. Nipkow \cite{Nipkow98} - dismisses the option using identities, because it leads to messy proofs. He + problems as with graphs. Composing, for example, two non-deterministic automata in parallel + poses again the problem of how to implement disjoint unions. Nipkow \cite{Nipkow98} + dismisses the option of using identities, because it leads to ``messy proofs''. He opts for a variant of \eqref{disjointunion}, but writes \begin{quote} @@ -172,7 +173,7 @@ \end{definition} \noindent - The reason is that regular expressions, unlike graphs and matrices, can + The reason is that regular expressions, unlike graphs, matrices and functons, can be easily defined as inductive datatype. Consequently a corresponding reasoning infrastructure comes for free. This has recently been exploited in HOL4 with a formalisation of regular expression matching based on derivatives \cite{OwensSlind08}. The purpose of this paper is to @@ -183,7 +184,9 @@ complementation, for regular languages.\smallskip \noindent - {\bf Contributions:} To our knowledge, our proof of the Myhill-Nerode theorem is the + {\bf Contributions:} + There is an extensive literature on regular languages. + To our knowledge, our proof of the Myhill-Nerode theorem is the first that is based on regular expressions, only. We prove the part of this theorem stating that a regular expression has only finitely many partitions using certain tagging-functions. Again to our best knowledge, these tagging functions have @@ -211,7 +214,7 @@ \noindent where @{text "@"} is the usual list-append operation. The Kleene-star of a language @{text A} is defined as the union over all powers, namely @{thm Star_def}. In the paper - we will often make use of the following properties. + we will make use of the following properties of these constructions. \begin{proposition}\label{langprops}\mbox{}\\ \begin{tabular}{@ {}ll@ {\hspace{10mm}}ll} @@ -221,12 +224,12 @@ \end{proposition} \noindent - We omit the proofs of these properties, but invite the reader to consult + We omit the proofs, but invite the reader to consult our formalisation.\footnote{Available at ???} The notation for the quotient of a language @{text A} according to an - equivalence relation @{term REL} is @{term "A // REL"}. We will write + equivalence relation @{term REL} is in Isabelle/HOL @{term "A // REL"}. We will write @{text "\x\\<^isub>\"} for the equivalence class defined as @{text "{y | y \ x}"}. @@ -274,7 +277,7 @@ \end{proof} \noindent - Regular expressions are defined as the following inductive datatype + Regular expressions are defined as the inductive datatype \begin{center} @{text r} @{text "::="} @@ -287,7 +290,7 @@ \end{center} \noindent - and the language matched by a regular expression is defined as: + and the language matched by a regular expression is defined as \begin{center} \begin{tabular}{c@ {\hspace{10mm}}c} @@ -308,7 +311,21 @@ \end{tabular} \end{center} + \noindent + Given a set or regular expressions @{text rs}, we will need the operation of generating + a regular expressions that matches all languages of @{text rs}. We only need the existence + of such an regular expressions therefore we use Isabelle's @{const "fold_graph"} and Hilbert's + @{text "\"} to define @{term "\rs"} which, roughly speaking, folds @{const ALT} over the + set @{text rs} with @{const NULL} for the empty set. We can prove that for finite sets @{text rs} + \begin{center} + @{thm (lhs) folds_alt_simp}@{text "= \ (\ ` rs)"} + \end{center} + + \noindent + holds. (whereby @{text "\ ` rs"} stands for the + image of the set @{text rs} under function @{text "\"}). + *} diff -r 6a0efaabde19 -r 1436fc451bb9 Paper/document/root.tex --- a/Paper/document/root.tex Wed Feb 09 04:54:23 2011 +0000 +++ b/Paper/document/root.tex Wed Feb 09 06:09:46 2011 +0000 @@ -10,6 +10,8 @@ \usepackage{times} \usepackage{proof} \usepackage{stmaryrd} +\usepackage{mathabx} + \urlstyle{rm} \isabellestyle{it} @@ -32,18 +34,18 @@ \maketitle \begin{abstract} -There are numerous textbooks on regular languages. Nearly all of them -introduce the subject by describing finite automata and -only mentioning on the side a connection with regular expressions. -Unfortunately, automata are a hassle for formalisations in HOL-based -theorem provers. The reason is that they need to be represented as graphs, -matrices or functions, none of which are inductive datatypes. Also -operations, such as disjoint unions of graphs, are not easily formalisiable -in HOL. In contrast, regular expressions can be defined conveniently -as datatype and a corresponding reasoning infrastructure comes for -free. We show in this paper that a central result from formal -language theory---the Myhill-Nerode theorem---can be recreated -using only regular expressions. +There are numerous textbooks on regular languages. Nearly all of them +introduce the subject by describing finite automata and only mentioning on the +side a connection with regular expressions. Unfortunately, automata are a +hassle for formalisations in HOL-based theorem provers. The reason is that +they need to be represented as graphs, matrices or functions, none of which +are inductive datatypes. Also convenient operations for disjoint unions of +graphs and functions are not easily formalisiable in HOL. In contrast, regular +expressions can be defined conveniently as datatype and a corresponding +reasoning infrastructure comes for free. We show in this paper that a central +result from formal language theory---the Myhill-Nerode theorem---can be +recreated using only regular expressions. + \end{abstract}