regexp: changeset 48:61d9684a557a

--- a/Myhill.thy	Fri Jan 28 19:17:40 2011 +0000
+++ b/Myhill.thy	Sat Jan 29 11:41:17 2011 +0000
@@ -2,9 +2,9 @@
   imports Myhill_1
 begin
 
-section {* Direction: @{text "regular language \<Rightarrow>finite partition"} *}
+section {* Direction @{text "regular language \<Rightarrow>finite partition"} *}
 
-subsection {* The scheme for this direction *}
+subsection {* The scheme*}
 
 text {* 
   The following convenient notation @{text "x \<approx>Lang y"} means:
@@ -243,9 +243,9 @@
   qed
 qed
 
-subsection {* Lemmas for basic cases *}
+subsection {* The proof*}
 
-subsection {* The case for @{const "NULL"} *}
+subsubsection {* The case for @{const "NULL"} *}
 
 lemma quot_null_eq:
   shows "(UNIV // \<approx>{}) = ({UNIV}::lang set)"
@@ -256,7 +256,7 @@
 unfolding quot_null_eq by simp
 
 
-subsection {* The case for @{const "EMPTY"} *}
+subsubsection {* The case for @{const "EMPTY"} *}
 
 
 lemma quot_empty_subset:
@@ -283,7 +283,7 @@
 by (rule finite_subset[OF quot_empty_subset]) (simp)
 
 
-subsection {* The case for @{const "CHAR"} *}
+subsubsection {* The case for @{const "CHAR"} *}
 
 lemma quot_char_subset:
   "UNIV // (\<approx>{[c]}) \<subseteq> {{[]},{[c]}, UNIV - {[], [c]}}"
@@ -316,12 +316,13 @@
 
 
 
-subsection {* The case for @{text "SEQ"}*}
+subsubsection {* The case for @{text "SEQ"}*}
 
 definition 
   tag_str_SEQ :: "lang \<Rightarrow> lang \<Rightarrow> string \<Rightarrow> (lang \<times> lang set)"
 where
-  "tag_str_SEQ L1 L2 = (\<lambda>x. (\<approx>L1 `` {x}, {(\<approx>L2 `` {x - xa}) | xa.  xa \<le> x \<and> xa \<in> L1}))"
+  "tag_str_SEQ L1 L2 = 
+     (\<lambda>x. (\<approx>L1 `` {x}, {(\<approx>L2 `` {x - xa}) | xa.  xa \<le> x \<and> xa \<in> L1}))"
 
 
 lemma append_seq_elim:
@@ -410,7 +411,7 @@
     by auto
 qed
 
-subsection {* The case for @{const ALT} *}
+subsubsection {* The case for @{const ALT} *}
 
 definition 
   tag_str_ALT :: "lang \<Rightarrow> lang \<Rightarrow> string \<Rightarrow> (lang \<times> lang)"
@@ -440,14 +441,14 @@
     by auto
 qed
 
-subsection {* The case for @{const "STAR"} *}
+subsubsection {* The case for @{const "STAR"} *}
 
 text {* 
   This turned out to be the trickiest case. 
   Any string @{text "x"} in language @{text "L\<^isub>1\<star>"}, 
   can be splited into a prefix @{text "xa \<in> L\<^isub>1\<star>"} and a suffix @{text "x - xa \<in> L\<^isub>1"}.
   For one such @{text "x"}, there can be many such splits. The tagging of @{text "x"} is then 
-  defined by collecting the @{text "L\<^isub>1"}-state of the suffixes from every possible split.
+  defined by collecting the @{text "L\<^isub>1"}-state of the suffixe from every possible split.
 *} 
 
 definition 
@@ -496,6 +497,7 @@
   the tagging function for case @{text "STAR"}.
   *}
 
+
 lemma tag_str_STAR_injI:
   fixes v w
   assumes eq_tag: "tag_str_STAR L\<^isub>1 v = tag_str_STAR L\<^isub>1 w"
@@ -531,6 +533,173 @@
         \end{minipage}
         *}
       case False
+      -- {* 
+        \begin{minipage}{0.9\textwidth}
+        Since @{text "x @ z \<in> L\<^isub>1\<star>"}, @{text "x"} can always be splited
+        by a prefix @{text "xa"} together with its suffix @{text "x - xa"}, such
+        that both @{text "xa"} and @{text "(x - xa) @ z"} are in @{text "L\<^isub>1\<star>"},
+        and there could be many such splittings.Therefore, the following set @{text "?S"} 
+        is nonempty, and finite as well:
+        \end{minipage}
+        *}
+      let ?S = "{xa. xa < x \<and> xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star>}"
+      have "finite ?S"
+        by (rule_tac B = "{xa. xa < x}" in finite_subset, 
+          auto simp:finite_strict_prefix_set)
+      moreover have "?S \<noteq> {}" using False xz_in_star
+        by (simp, rule_tac x = "[]" in exI, auto simp:strict_prefix_def)
+      -- {* Since @{text "?S"} is finite, we can always single out the longest
+          and name it @{text "xa_max"}:
+          *}
+      ultimately have "\<exists> xa_max \<in> ?S. \<forall> xa \<in> ?S. length xa \<le> length xa_max" 
+        using finite_set_has_max by blast
+      then obtain xa_max 
+        where h1: "xa_max < x" 
+        and h2: "xa_max \<in> L\<^isub>1\<star>" 
+        and h3: "(x - xa_max) @ z \<in> L\<^isub>1\<star>" 
+        and h4:"\<forall> xa < x. xa \<in> L\<^isub>1\<star> \<and> (x - xa) @ z \<in> L\<^isub>1\<star>  
+                                     \<longrightarrow> length xa \<le> length xa_max"
+        by blast
+      -- {*
+          \begin{minipage}{0.9\textwidth}
+          By the equality of tags, the counterpart of @{text "xa_max"} among 
+          @{text "y"}-prefixes, named @{text "ya"}, can be found:
+          \end{minipage}
+          *}
+      obtain ya 
+        where h5: "ya < y" and h6: "ya \<in> L\<^isub>1\<star>" 
+        and eq_xya: "(x - xa_max) \<approx>L\<^isub>1 (y - ya)"
+      proof-
+        from tag_xy have "{\<approx>L\<^isub>1 `` {x - xa} |xa. xa < x \<and> xa \<in> L\<^isub>1\<star>} = 
+          {\<approx>L\<^isub>1 `` {y - xa} |xa. xa < y \<and> xa \<in> L\<^isub>1\<star>}" (is "?left = ?right")
+          by (auto simp:tag_str_STAR_def)
+        moreover have "\<approx>L\<^isub>1 `` {x - xa_max} \<in> ?left" using h1 h2 by auto
+        ultimately have "\<approx>L\<^isub>1 `` {x - xa_max} \<in> ?right" by simp
+        with prems show ?thesis apply 
+          (simp add:Image_def str_eq_rel_def str_eq_def) by blast
+      qed 
+      -- {*
+          \begin{minipage}{0.9\textwidth}
+          If the following proposition can be proved, then the @{text "?thesis"}:
+          @{text "y @ z \<in> L\<^isub>1\<star>"} is just a simple consequence.
+          \end{minipage}
+          *}
+      have "(y - ya) @ z \<in> L\<^isub>1\<star>" 
+      proof-
+        -- {* The idea is to split the suffix @{text "z"} into @{text "za"} and @{text "zb"}, 
+          such that: *}
+        obtain za zb where eq_zab: "z = za @ zb" 
+          and l_za: "(y - ya)@za \<in> L\<^isub>1" and ls_zb: "zb \<in> L\<^isub>1\<star>"
+        proof -
+          -- {* 
+            \begin{minipage}{0.9\textwidth}
+            Since @{text  "(x - xa_max) @ z"} is in @{text "L\<^isub>1\<star>"}, it can be split into
+            @{text "a"} and @{text "b"} such that:
+            \end{minipage}
+            *}
+          from h1 have "(x - xa_max) @ z \<noteq> []" 
+            by (auto simp:strict_prefix_def elim:prefixE)
+          from star_decom [OF h3 this]
+          obtain a b where a_in: "a \<in> L\<^isub>1" 
+            and a_neq: "a \<noteq> []" and b_in: "b \<in> L\<^isub>1\<star>" 
+            and ab_max: "(x - xa_max) @ z = a @ b" by blast
+          -- {* Now the candiates for @{text "za"} and @{text "zb"} are found:*}
+          let ?za = "a - (x - xa_max)" and ?zb = "b"
+          have pfx: "(x - xa_max) \<le> a" (is "?P1") 
+            and eq_z: "z = ?za @ ?zb" (is "?P2")
+          proof -
+            -- {* 
+              \begin{minipage}{0.9\textwidth}
+              Since @{text "(x - xa_max) @ z = a @ b"}, the string @{text "(x - xa_max) @ z"}
+              could be splited in two ways:
+              \end{minipage}
+              *}
+            have "((x - xa_max) \<le> a \<and> (a - (x - xa_max)) @ b = z) \<or> 
+              (a < (x - xa_max) \<and> ((x - xa_max) - a) @ z = b)" 
+              using app_eq_dest[OF ab_max] by (auto simp:strict_prefix_def)
+            moreover {
+              -- {* However, the undsired way can be refuted by absurdity: *}
+              assume np: "a < (x - xa_max)" 
+                and b_eqs: "((x - xa_max) - a) @ z = b"
+              have "False"
+              proof -
+                let ?xa_max' = "xa_max @ a"
+                have "?xa_max' < x" 
+                  using np h1 by (clarsimp simp:strict_prefix_def diff_prefix) 
+                moreover have "?xa_max' \<in> L\<^isub>1\<star>" 
+                  using a_in h2 by (simp add:star_intro3) 
+                moreover have "(x - ?xa_max') @ z \<in> L\<^isub>1\<star>" 
+                  using b_eqs b_in np h1 by (simp add:diff_diff_appd)
+                moreover have "\<not> (length ?xa_max' \<le> length xa_max)" 
+                  using a_neq by simp
+                ultimately show ?thesis using h4 by blast
+              qed }
+            -- {* Now it can be shown that the splitting goes the way we desired. *}
+            ultimately show ?P1 and ?P2 by auto
+          qed
+          hence "(x - xa_max)@?za \<in> L\<^isub>1" using a_in by (auto elim:prefixE)
+          -- {* Now candidates @{text "?za"} and @{text "?zb"} have all the requred properteis. *}
+          with eq_xya have "(y - ya) @ ?za \<in> L\<^isub>1" 
+            by (auto simp:str_eq_def str_eq_rel_def)
+           with eq_z and b_in prems
+          show ?thesis by blast
+        qed
+        -- {* 
+            \begin{minipage}{0.9\textwidth}
+            From the properties of @{text "za"} and @{text "zb"} such obtained,
+              @{text "?thesis"} can be shown easily. 
+            \end{minipage}
+            *}
+        from step [OF l_za ls_zb]
+        have "((y - ya) @ za) @ zb \<in> L\<^isub>1\<star>" .
+        with eq_zab show ?thesis by simp
+      qed
+      with h5 h6 show ?thesis 
+        by (drule_tac star_intro1, auto simp:strict_prefix_def elim:prefixE)
+    qed
+  } 
+  -- {* By instantiating the reasoning pattern just derived for both directions:*} 
+  from this [OF _ eq_tag] and this [OF _ eq_tag [THEN sym]]
+  -- {* The thesis is proved as a trival consequence: *} 
+    show  ?thesis by (unfold str_eq_def str_eq_rel_def, blast)
+qed
+
+
+lemma -- {* The oringal version with a poor readability*}
+  fixes v w
+  assumes eq_tag: "tag_str_STAR L\<^isub>1 v = tag_str_STAR L\<^isub>1 w"
+  shows "(v::string) \<approx>(L\<^isub>1\<star>) w"
+proof-
+    -- {* 
+    \begin{minipage}{0.9\textwidth}
+    According to the definition of @{text "\<approx>Lang"}, 
+    proving @{text "v \<approx>(L\<^isub>1\<star>) w"} amounts to
+    showing: for any string @{text "u"},
+    if @{text "v @ u \<in> (L\<^isub>1\<star>)"} then @{text "w @ u \<in> (L\<^isub>1\<star>)"} and vice versa.
+    The reasoning pattern for both directions are the same, as derived
+    in the following:
+    \end{minipage}
+    *}
+  { fix x y z
+    assume xz_in_star: "x @ z \<in> L\<^isub>1\<star>" 
+      and tag_xy: "tag_str_STAR L\<^isub>1 x = tag_str_STAR L\<^isub>1 y"
+    have "y @ z \<in> L\<^isub>1\<star>"
+    proof(cases "x = []")
+      -- {* 
+        The degenerated case when @{text "x"} is a null string is easy to prove:
+        *}
+      case True
+      with tag_xy have "y = []" 
+        by (auto simp:tag_str_STAR_def strict_prefix_def)
+      thus ?thesis using xz_in_star True by simp
+    next
+        -- {*
+        \begin{minipage}{0.9\textwidth}
+        The case when @{text "x"} is not null, and
+        @{text "x @ z"} is in @{text "L\<^isub>1\<star>"}, 
+        \end{minipage}
+        *}
+      case False
       obtain x_max 
         where h1: "x_max < x" 
         and h2: "x_max \<in> L\<^isub>1\<star>" 
@@ -621,6 +790,7 @@
     by auto
 qed
 
+subsubsection{* The conclusion *}
 
 lemma rexp_imp_finite:
   fixes r::"rexp"

--- a/Myhill_1.thy	Fri Jan 28 19:17:40 2011 +0000
+++ b/Myhill_1.thy	Sat Jan 29 11:41:17 2011 +0000
@@ -82,6 +82,87 @@
 apply (simp, (erule exE| erule conjE)+)
 by (rule_tac x = "s1 @ a" in exI, rule_tac x = b in exI, simp add:step)
 
+
+text {* The syntax of regular expressions is defined by the datatype @{text "rexp"}. *}
+datatype rexp =
+  NULL
+| EMPTY
+| CHAR char
+| SEQ rexp rexp
+| ALT rexp rexp
+| STAR rexp
+
+
+text {* 
+  The following @{text "L"} is an overloaded operator, where @{text "L(x)"} evaluates to 
+  the language represented by the syntactic object @{text "x"}.
+*}
+consts L:: "'a \<Rightarrow> string set"
+
+
+text {* 
+  The @{text "L(rexp)"} for regular expression @{text "rexp"} is defined by the 
+  following overloading function @{text "L_rexp"}.
+*}
+overloading L_rexp \<equiv> "L::  rexp \<Rightarrow> string set"
+begin
+fun
+  L_rexp :: "rexp \<Rightarrow> string set"
+where
+    "L_rexp (NULL) = {}"
+  | "L_rexp (EMPTY) = {[]}"
+  | "L_rexp (CHAR c) = {[c]}"
+  | "L_rexp (SEQ r1 r2) = (L_rexp r1) ;; (L_rexp r2)"
+  | "L_rexp (ALT r1 r2) = (L_rexp r1) \<union> (L_rexp r2)"
+  | "L_rexp (STAR r) = (L_rexp r)\<star>"
+end
+
+(* Just a technical lemma. *)
+lemma [simp]:
+  shows "(x, y) \<in> {(x, y). P x y} \<longleftrightarrow> P x y"
+by simp
+
+text {*
+  @{text "\<approx>L"} is an equivalent class defined by language @{text "Lang"}.
+*}
+definition
+  str_eq_rel ("\<approx>_" [100] 100)
+where
+  "\<approx>Lang \<equiv> {(x, y).  (\<forall>z. x @ z \<in> Lang \<longleftrightarrow> y @ z \<in> Lang)}"
+
+text {* 
+  Among equivlant clases of @{text "\<approx>Lang"}, the set @{text "finals(Lang)"} singles out 
+  those which contains strings from @{text "Lang"}.
+*}
+
+definition 
+   "finals Lang \<equiv> {\<approx>Lang `` {x} | x . x \<in> Lang}"
+
+text {* 
+  The following lemma show the relationshipt between @{text "finals(Lang)"} and @{text "Lang"}.
+*}
+lemma lang_is_union_of_finals: 
+  "Lang = \<Union> finals(Lang)"
+proof 
+  show "Lang \<subseteq> \<Union> (finals Lang)"
+  proof
+    fix x
+    assume "x \<in> Lang"   
+    thus "x \<in> \<Union> (finals Lang)"
+      apply (simp add:finals_def, rule_tac x = "(\<approx>Lang) `` {x}" in exI)
+      by (auto simp:Image_def str_eq_rel_def)    
+  qed
+next
+  show "\<Union> (finals Lang) \<subseteq> Lang"
+    apply (clarsimp simp:finals_def str_eq_rel_def)
+    by (drule_tac x = "[]" in spec, auto)
+qed
+
+section {* Direction @{text "finite partition \<Rightarrow> regular language"}*}
+
+subsection {*
+  Ardens lemma
+  *}
 text {* Ardens lemma expressed at the level of language, rather than the level of regular expression. *}
 
 theorem ardens_revised:
@@ -144,40 +225,9 @@
   qed
 qed
 
-
-text {* The syntax of regular expressions is defined by the datatype @{text "rexp"}. *}
-datatype rexp =
-  NULL
-| EMPTY
-| CHAR char
-| SEQ rexp rexp
-| ALT rexp rexp
-| STAR rexp
-
-
-text {* 
-  The following @{text "L"} is an overloaded operator, where @{text "L(x)"} evaluates to 
-  the language represented by the syntactic object @{text "x"}.
-*}
-consts L:: "'a \<Rightarrow> string set"
-
-
-text {* 
-  The @{text "L(rexp)"} for regular expression @{text "rexp"} is defined by the 
-  following overloading function @{text "L_rexp"}.
-*}
-overloading L_rexp \<equiv> "L::  rexp \<Rightarrow> string set"
-begin
-fun
-  L_rexp :: "rexp \<Rightarrow> string set"
-where
-    "L_rexp (NULL) = {}"
-  | "L_rexp (EMPTY) = {[]}"
-  | "L_rexp (CHAR c) = {[c]}"
-  | "L_rexp (SEQ r1 r2) = (L_rexp r1) ;; (L_rexp r2)"
-  | "L_rexp (ALT r1 r2) = (L_rexp r1) \<union> (L_rexp r2)"
-  | "L_rexp (STAR r) = (L_rexp r)\<star>"
-end
+subsection {*
+  Defintions peculiar to this direction
+  *}
 
 text {*
   To obtain equational system out of finite set of equivalent classes, a fold operation
@@ -202,49 +252,6 @@
 apply (rule someI2_ex, erule finite_imp_fold_graph)
 by (erule fold_graph.induct, auto)
 
-(* Just a technical lemma. *)
-lemma [simp]:
-  shows "(x, y) \<in> {(x, y). P x y} \<longleftrightarrow> P x y"
-by simp
-
-text {*
-  @{text "\<approx>L"} is an equivalent class defined by language @{text "Lang"}.
-*}
-definition
-  str_eq_rel ("\<approx>_" [100] 100)
-where
-  "\<approx>Lang \<equiv> {(x, y).  (\<forall>z. x @ z \<in> Lang \<longleftrightarrow> y @ z \<in> Lang)}"
-
-text {* 
-  Among equivlant clases of @{text "\<approx>Lang"}, the set @{text "finals(Lang)"} singles out 
-  those which contains strings from @{text "Lang"}.
-*}
-
-definition 
-   "finals Lang \<equiv> {\<approx>Lang `` {x} | x . x \<in> Lang}"
-
-text {* 
-  The following lemma show the relationshipt between @{text "finals(Lang)"} and @{text "Lang"}.
-*}
-lemma lang_is_union_of_finals: 
-  "Lang = \<Union> finals(Lang)"
-proof 
-  show "Lang \<subseteq> \<Union> (finals Lang)"
-  proof
-    fix x
-    assume "x \<in> Lang"   
-    thus "x \<in> \<Union> (finals Lang)"
-      apply (simp add:finals_def, rule_tac x = "(\<approx>Lang) `` {x}" in exI)
-      by (auto simp:Image_def str_eq_rel_def)    
-  qed
-next
-  show "\<Union> (finals Lang) \<subseteq> Lang"
-    apply (clarsimp simp:finals_def str_eq_rel_def)
-    by (drule_tac x = "[]" in spec, auto)
-qed
-
-section {* Direction @{text "finite partition \<Rightarrow> regular language"}*}
-
 text {* 
   The relationship between equivalent classes can be described by an
   equational system.

Binary file tphols-2011/myhill.pdf has changed

author	zhang
	Sat, 29 Jan 2011 11:41:17 +0000
changeset 48	61d9684a557a
parent 47	bea2466a6084
child 49	59936c012add

Myhill.thy		file \| annotate \| diff \| comparison \| revisions
Myhill_1.thy		file \| annotate \| diff \| comparison \| revisions
tphols-2011/myhill.pdf		file \| annotate \| diff \| comparison \| revisions