regexp: changeset 154:7c68b9ad4486

--- a/Matcher.thy	Wed Apr 06 08:18:23 2011 +0000
+++ b/Matcher.thy	Tue Apr 19 02:19:56 2011 +0000
@@ -2,30 +2,107 @@
   imports "Main" 
 begin
 
+term "TYPE (nat * int)"
+term "TYPE ('a)"
+
+definition
+  P:: "'a itself \<Rightarrow> bool"
+where
+  "P (TYPE ('a)) \<equiv> ((\<lambda>x. (x::'a)) = (\<lambda>x. x))"
+
 
 section {* Sequential Composition of Sets *}
 
-fun
-  lang_seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ;; _" [100,100] 100)
+definition
+  Seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ;; _" [100,100] 100)
 where 
-  "L1 ;; L2 = {s1 @ s2 | s1 s2. s1 \<in> L1 \<and> s2 \<in> L2}"
+  "A ;; B = {s1 @ s2 | s1 s2. s1 \<in> A \<and> s2 \<in> B}"
+
+
+text {* Two Simple Properties about Sequential Composition *}
+
+lemma seq_empty [simp]:
+  shows "A ;; {[]} = A"
+  and   "{[]} ;; A = A"
+by (simp_all add: Seq_def)
+
+lemma seq_null [simp]:
+  shows "A ;; {} = {}"
+  and   "{} ;; A = {}"
+by (simp_all add: Seq_def)
 
 
 section {* Kleene Star for Sets *}
 
 inductive_set
   Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
-  for L :: "string set"
+  for A :: "string set"
 where
-  start[intro]: "[] \<in> L\<star>"
-| step[intro]:  "\<lbrakk>s1 \<in> L; s2 \<in> L\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> L\<star>"
+  start[intro]: "[] \<in> A\<star>"
+| step[intro]:  "\<lbrakk>s1 \<in> A; s2 \<in> A\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> A\<star>"
 
 
-text {* A standard property of Star *}
+text {* A Standard Property of Star *}
+
+lemma star_cases:
+  shows "A\<star> = {[]} \<union> A ;; A\<star>"
+unfolding Seq_def
+by (auto) (metis Star.simps)
+
+lemma star_decomp: 
+  assumes a: "c # x \<in> A\<star>" 
+  shows "\<exists>a b. x = a @ b \<and> c # a \<in> A \<and> b \<in> A\<star>"
+using a
+by (induct x\<equiv>"c # x" rule: Star.induct) 
+   (auto simp add: append_eq_Cons_conv)
+
+section {* Left-Quotient of a Set *}
+
+definition
+  Der :: "char \<Rightarrow> string set \<Rightarrow> string set"
+where
+  "Der c A \<equiv> {s. [c] @ s \<in> A}"
+
+lemma Der_null [simp]:
+  shows "Der c {} = {}"
+unfolding Der_def
+by auto
+
+lemma Der_empty [simp]:
+  shows "Der c {[]} = {}"
+unfolding Der_def
+by auto
 
-lemma lang_star_cases:
-  shows "L\<star> =  {[]} \<union> L ;; L\<star>"
-by (auto) (metis Star.simps)
+lemma Der_char [simp]:
+  shows "Der c {[d]} = (if c = d then {[]} else {})"
+unfolding Der_def
+by auto
+
+lemma Der_union [simp]:
+  shows "Der c (A \<union> B) = Der c A \<union> Der c B"
+unfolding Der_def
+by auto
+
+lemma Der_seq [simp]:
+  shows "Der c (A ;; B) = (Der c A) ;; B \<union> (if [] \<in> A then Der c B else {})"
+unfolding Der_def Seq_def
+by (auto simp add: Cons_eq_append_conv)
+
+lemma Der_star [simp]:
+  shows "Der c (A\<star>) = (Der c A) ;; A\<star>"
+proof -    
+  have "Der c (A\<star>) = Der c ({[]} \<union> A ;; A\<star>)"
+    by (simp only: star_cases[symmetric])
+  also have "... = Der c (A ;; A\<star>)"
+    by (simp only: Der_union Der_empty) (simp)
+  also have "... = (Der c A) ;; A\<star> \<union> (if [] \<in> A then Der c (A\<star>) else {})"
+    by simp
+  also have "... =  (Der c A) ;; A\<star>"
+    unfolding Seq_def Der_def
+    by (auto dest: star_decomp)
+  finally show "Der c (A\<star>) = (Der c A) ;; A\<star>" .
+qed
+
 
 section {* Regular Expressions *}
 
@@ -37,39 +114,6 @@
 | ALT rexp rexp
 | STAR rexp
 
-types lang = "string set" 
-
-definition
-  DERIV :: "string \<Rightarrow> lang \<Rightarrow> lang"
-where
-  "DERIV s A \<equiv> {s'. s @ s' \<in> A}"
-
-definition
-  delta :: "lang \<Rightarrow> lang"
-where
-  "delta A = (if [] \<in> A then {[]} else {})"
-
-lemma
-  "DERIV s (P ; Q) = \<Union>{(delta (DERIV s1 P)) ; (DERIV s2 Q) | s1 s2. s = s1 @ s2}"
-apply(auto)
-
-fun
-  D1 :: "string \<Rightarrow> lang \<Rightarrow> lang"
-where
-  "D1 [] A = A"
-| "D1 (c # s) A = DERIV [c] (D1 s A)"
-
-fun
-  D2 :: "string \<Rightarrow> lang \<Rightarrow> lang"
-where
-  "D2 [] A = A"
-| "D2 (c # s) A = DERIV [c] (D1 s A)"
-
-function
-  D
-where
-  "D s P Q = P ;; Q"
-| "D (c#s) = 
 
 section {* Semantics of Regular Expressions *}
  
@@ -79,11 +123,10 @@
   "L (NULL) = {}"
 | "L (EMPTY) = {[]}"
 | "L (CHAR c) = {[c]}"
-| "L (SEQ r1 r2) = (L r1) ; (L r2)"
+| "L (SEQ r1 r2) = (L r1) ;; (L r2)"
 | "L (ALT r1 r2) = (L r1) \<union> (L r2)"
 | "L (STAR r) = (L r)\<star>"
 
-
 section {* The Matcher *}
 
 fun
@@ -113,64 +156,41 @@
 | "derivative (c # s) r = derivative s (der c r)"
 
 fun
-  matches :: "rexp \<Rightarrow> string \<Rightarrow> bool"
+  matcher :: "rexp \<Rightarrow> string \<Rightarrow> bool"
 where
-  "matches r s = nullable (derivative s r)"
+  "matcher r s = nullable (derivative s r)"
 
 
 section {* Correctness Proof of the Matcher *}
 
 lemma nullable_correctness:
-  shows "nullable r \<longleftrightarrow> [] \<in> L r"
-by (induct r) (auto) 
+  shows "nullable r  \<longleftrightarrow> [] \<in> (L r)"
+by (induct r) (auto simp add: Seq_def) 
+
 
 lemma der_correctness:
-  shows "s \<in> L (der c r) \<longleftrightarrow> c # s \<in> L r"
-proof (induct r arbitrary: s)
-  case (SEQ r1 r2 s)
-  have ih1: "\<And>s. s \<in> L (der c r1) \<longleftrightarrow> c # s \<in> L r1" by fact
-  have ih2: "\<And>s. s \<in> L (der c r2) \<longleftrightarrow> c # s \<in> L r2" by fact
-  show "s \<in> L (der c (SEQ r1 r2)) \<longleftrightarrow> c # s \<in> L (SEQ r1 r2)" 
-    using ih1 ih2 by (auto simp add: nullable_correctness Cons_eq_append_conv)
-next
-  case (STAR r s)
-  have ih: "\<And>s. s \<in> L (der c r) \<longleftrightarrow> c # s \<in> L r" by fact
-  show "s \<in> L (der c (STAR r)) \<longleftrightarrow> c # s \<in> L (STAR r)"
-  proof
-    assume "s \<in> L (der c (STAR r))"
-    then have "s \<in> L (der c r) ; L r\<star>" by simp
-    then have "c # s \<in> L r ; (L r)\<star>" 
-      by (auto simp add: ih Cons_eq_append_conv)
-    then have "c # s \<in> (L r)\<star>" using lang_star_cases by auto
-    then show "c # s \<in> L (STAR r)" by simp
-  next
-    assume "c # s \<in> L (STAR r)"
-    then have "c # s \<in> (L r)\<star>" by simp
-    then have "s \<in> L (der c r); (L r)\<star>"
-      by (induct x\<equiv>"c # s" rule: Star.induct)
-         (auto simp add: ih append_eq_Cons_conv)
-    then show "s \<in> L (der c (STAR r))" by simp  
-  qed
-qed (simp_all)
+  shows "L (der c r) = Der c (L r)"
+by (induct r) 
+   (simp_all add: nullable_correctness)
 
-lemma matches_correctness:
-  shows "matches r s \<longleftrightarrow> s \<in> L r"
+lemma matcher_correctness:
+  shows "matcher r s \<longleftrightarrow> s \<in> L r"
 by (induct s arbitrary: r)
-   (simp_all add: nullable_correctness der_correctness)
+   (simp_all add: nullable_correctness der_correctness Der_def)
 
 section {* Examples *}
 
-value "matches NULL []"
-value "matches (CHAR (CHR ''a'')) [CHR ''a'']"
-value "matches (CHAR a) [a,a]"
-value "matches (STAR (CHAR a)) []"
-value "matches (STAR (CHAR a))  [a,a]"
-value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''"
-value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''"
+value "matcher NULL []"
+value "matcher (CHAR (CHR ''a'')) [CHR ''a'']"
+value "matcher (CHAR a) [a,a]"
+value "matcher (STAR (CHAR a)) []"
+value "matcher (STAR (CHAR a))  [a,a]"
+value "matcher (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''"
+value "matcher (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''"
 
 section {* Incorrect Matcher - fun-definition rejected *}
 
-function 
+fun
   match :: "rexp list \<Rightarrow> string \<Rightarrow> bool"
 where
   "match [] [] = True"
@@ -182,8 +202,6 @@
 | "match (ALT r1 r2 # rs) s = (match (r1 # rs) s \<or> match (r2 # rs) s)" 
 | "match (SEQ r1 r2 # rs) s = match (r1 # r2 # rs) s"
 | "match (STAR r # rs) s = (match rs s \<or> match (r # (STAR r) # rs) s)"
-apply(pat_completeness)
-apply(auto)
-done
+
 
 end    
\ No newline at end of file

--- a/Paper/Paper.thy	Wed Apr 06 08:18:23 2011 +0000
+++ b/Paper/Paper.thy	Tue Apr 19 02:19:56 2011 +0000
@@ -69,7 +69,7 @@
   formalising such reasoning in a HOL-based theorem prover, in our case
   Isabelle/HOL. Automata are built up from states and transitions that 
   need to be represented as graphs, matrices or functions, none
-  of which can be defined as inductive datatype. 
+  of which can be defined as an inductive datatype. 
 
   In case of graphs and matrices, this means we have to build our own
   reasoning infrastructure for them, as neither Isabelle/HOL nor HOL4 nor
@@ -145,7 +145,7 @@
   changes the type---the disjoint union is not a set, but a set of pairs. 
   Using this definition for disjoint union means we do not have a single type for automata
   and hence will not be able to state certain properties about \emph{all}
-  automata, since there is no type quantification available in HOL. An
+  automata, since there is no type quantification available in HOL (unlike in Coq, for example). An
   alternative, which provides us with a single type for automata, is to give every 
   state node an identity, for example a natural
   number, and then be careful to rename these identities apart whenever
@@ -377,7 +377,7 @@
   The key definition in the Myhill-Nerode theorem is the
   \emph{Myhill-Nerode relation}, which states that w.r.t.~a language two 
   strings are related, provided there is no distinguishing extension in this
-  language. This can be defined as tertiary relation.
+  language. This can be defined as a tertiary relation.
 
   \begin{definition}[Myhill-Nerode Relation] Given a language @{text A}, two strings @{text x} and
   @{text y} are Myhill-Nerode related provided
@@ -573,7 +573,7 @@
   finally we append this regular expression to @{text rhs'}. It can be easily seen 
   that this operation mimics Arden's lemma on the level of equations. To ensure
   the non-emptiness condition of Arden's lemma we say that a right-hand side is
-  \emph{ardenable} provided
+  @{text ardenable} provided
 
   \begin{center}
   @{thm ardenable_def}
@@ -627,7 +627,7 @@
   \noindent
   Finally, we can define how an equational system should be solved. For this 
   we will need to iterate the process of eliminating equations until only one equation
-  will be left in the system. However, we not just want to have any equation
+  will be left in the system. However, we do not just want to have any equation
   as being the last one, but the one involving the equivalence class for 
   which we want to calculate the regular 
   expression. Let us suppose this equivalence class is @{text X}. 
@@ -682,7 +682,7 @@
   @{text "P"} involving @{const Solve}. For this we have to discharge the following
   proof obligations: first the
   initial equational system satisfies the invariant; second the iteration
-  step @{text "Iter"} preserves the the invariant as long as the condition @{term Cond} holds;
+  step @{text "Iter"} preserves the invariant as long as the condition @{term Cond} holds;
   third @{text "Iter"} decreases the termination order, and fourth that
   once the condition does not hold anymore then the property @{text P} must hold.
 
@@ -709,7 +709,7 @@
   and number of terms in each equation); the second makes sure the `meaning' of the 
   equations is preserved under our transformations. The other properties are a bit more
   technical, but are needed to get our proof through. Distinctness states that every
-  equation in the system is distinct. Ardenable ensures that we can always
+  equation in the system is distinct. @{text Ardenable} ensures that we can always
   apply the arden operation. 
   The last property states that every @{text rhs} can only contain equivalence classes
   for which there is an equation. Therefore @{text lhss} is just the set containing 
@@ -729,7 +729,7 @@
   \begin{proof}
   Finiteness is given by the assumption and the way how we set up the 
   initial equational system. Soundness is proved in Lem.~\ref{inv}. Distinctness
-  follows from the fact that the equivalence classes are disjoint. The ardenable
+  follows from the fact that the equivalence classes are disjoint. The @{text ardenable}
   property also follows from the setup of the initial equational system, as does 
   validity.\qed
   \end{proof}
@@ -756,7 +756,7 @@
   Finiteness is straightforward, as @{const Subst} and @{const Arden} operations 
   keep the equational system finite. These operations also preserve soundness 
   and distinctness (we proved soundness for @{const Arden} in Lem.~\ref{ardenable}).
-  The property ardenable is clearly preserved because the append-operation
+  The property @{text ardenable} is clearly preserved because the append-operation
   cannot make a regular expression to match the empty string. Validity is
   given because @{const Arden} removes an equivalence class from @{text yrhs}
   and then @{const Subst_all} removes @{text Y} from the equational system.
@@ -832,7 +832,7 @@
   we can infer that @{term "rhss rhs \<subseteq> {X}"} and because the arden operation
   removes that @{text X} from @{text rhs}, that @{term "rhss (Arden X rhs) = {}"}.
   This means the right-hand side @{term "Arden X rhs"} can only consist of terms of the form @{term "Lam r"}.
-  So we can collect those (finitely many) regular expressions and have @{term "X = L (\<Uplus>rs)"}.
+  So we can collect those (finitely many) regular expressions @{text rs} and have @{term "X = L (\<Uplus>rs)"}.
   With this we can conclude the proof.\qed
   \end{proof}
 
@@ -893,7 +893,7 @@
   \end{proof}
 
   \noindent
-  Much more interesting, however, are the inductive cases. They seem hard to be solved 
+  Much more interesting, however, are the inductive cases. They seem hard to solve 
   directly. The reader is invited to try. 
 
   Our proof will rely on some
@@ -1274,8 +1274,8 @@
   exists a regular expression that matches all of its strings. Regular
   expressions can conveniently be defined as a datatype in HOL-based theorem
   provers. For us it was therefore interesting to find out how far we can push
-  this point of view. We have established both directions of the Myhill-Nerode
-  theorem.
+  this point of view. We have established in Isabelle/HOL both directions 
+  of the Myhill-Nerode theorem.
   %
   \begin{theorem}[The Myhill-Nerode Theorem]\mbox{}\\
   A language @{text A} is regular if and only if @{thm (rhs) Myhill_Nerode}.
@@ -1352,7 +1352,8 @@
   We briefly considered using the method Brzozowski presented in the Appendix
   of~\cite{Brzozowski64} in order to prove the second direction of the
   Myhill-Nerode theorem. There he calculates the derivatives for regular
-  expressions and shows that there can be only finitely many of them. We could
+  expressions and shows that there can be only finitely many of them (if regarded equal 
+  modulo ACI). We could
   have used as the tag of a string @{text s} the derivative of a regular expression
   generated with respect to @{text s}.  Using the fact that two strings are
   Myhill-Nerode related whenever their derivative is the same, together with

--- a/Paper/document/root.tex	Wed Apr 06 08:18:23 2011 +0000
+++ b/Paper/document/root.tex	Tue Apr 19 02:19:56 2011 +0000
@@ -50,7 +50,7 @@
 they need to be represented as graphs, matrices or functions, none of which
 are inductive datatypes. Also convenient operations for disjoint unions of
 graphs and functions are not easily formalisiable in HOL. In contrast, regular
-expressions can be defined conveniently as datatype and a corresponding
+expressions can be defined conveniently as a datatype and a corresponding
 reasoning infrastructure comes for free. We show in this paper that a central
 result from formal language theory---the Myhill-Nerode theorem---can be
 recreated using only regular expressions.
@@ -60,7 +60,7 @@
 
 \input{session}
 
-\mbox{}\\[-10mm]
+%%\mbox{}\\[-10mm]
 \bibliographystyle{plain}
 \bibliography{root}

author	urbanc
	Tue, 19 Apr 2011 02:19:56 +0000
changeset 154	7c68b9ad4486
parent 153	e7e4e490326b
child 155	d8d1e1f53d6e

Matcher.thy		file \| annotate \| diff \| comparison \| revisions
Paper/Paper.thy		file \| annotate \| diff \| comparison \| revisions
Paper/document/root.tex		file \| annotate \| diff \| comparison \| revisions