regexp: comparison Regular.thy

equal deleted inserted replaced

-:24083ffa7611
+:b04cc5e4e84c
+(* Author: Christian Urban, Xingyuan Zhang, Chunhan Wu *)
+theory Regular
+imports Main Folds
+begin
+section {* Preliminary definitions *}
+type_synonym lang = "string set"
+text {*  Sequential composition of two languages *}
+definition
+Seq :: "lang \<Rightarrow> lang \<Rightarrow> lang" (infixr "\<cdot>" 100)
+where
+"A \<cdot> B = {s\<^isub>1 @ s\<^isub>2 | s\<^isub>1 s\<^isub>2. s\<^isub>1 \<in> A \<and> s\<^isub>2 \<in> B}"
+text {* Some properties of operator @{text "\<cdot>"}. *}
+lemma seq_add_left:
+assumes a: "A = B"
+shows "C \<cdot> A = C \<cdot> B"
+using a by simp
+lemma seq_union_distrib_right:
+shows "(A \<union> B) \<cdot> C = (A \<cdot> C) \<union> (B \<cdot> C)"
+unfolding Seq_def by auto
+lemma seq_union_distrib_left:
+shows "C \<cdot> (A \<union> B) = (C \<cdot> A) \<union> (C \<cdot> B)"
+unfolding Seq_def by  auto
+lemma seq_intro:
+assumes a: "x \<in> A" "y \<in> B"
+shows "x @ y \<in> A \<cdot> B "
+using a by (auto simp: Seq_def)
+lemma seq_assoc:
+shows "(A \<cdot> B) \<cdot> C = A \<cdot> (B \<cdot> C)"
+unfolding Seq_def
+apply(auto)
+apply(blast)
+by (metis append_assoc)
+lemma seq_empty [simp]:
+shows "A \<cdot> {[]} = A"
+and   "{[]} \<cdot> A = A"
+by (simp_all add: Seq_def)
+lemma seq_null [simp]:
+shows "A \<cdot> {} = {}"
+and   "{} \<cdot> A = {}"
+by (simp_all add: Seq_def)
+text {* Power and Star of a language *}
+fun
+pow :: "lang \<Rightarrow> nat \<Rightarrow> lang" (infixl "\<up>" 100)
+where
+"A \<up> 0 = {[]}"
+| "A \<up> (Suc n) =  A \<cdot> (A \<up> n)"
+definition
+Star :: "lang \<Rightarrow> lang" ("_\<star>" [101] 102)
+where
+"A\<star> \<equiv> (\<Union>n. A \<up> n)"
+lemma star_start[intro]:
+shows "[] \<in> A\<star>"
+proof -
+have "[] \<in> A \<up> 0" by auto
+then show "[] \<in> A\<star>" unfolding Star_def by blast
+qed
+lemma star_step [intro]:
+assumes a: "s1 \<in> A"
+and     b: "s2 \<in> A\<star>"
+shows "s1 @ s2 \<in> A\<star>"
+proof -
+from b obtain n where "s2 \<in> A \<up> n" unfolding Star_def by auto
+then have "s1 @ s2 \<in> A \<up> (Suc n)" using a by (auto simp add: Seq_def)
+then show "s1 @ s2 \<in> A\<star>" unfolding Star_def by blast
+qed
+lemma star_induct[consumes 1, case_names start step]:
+assumes a: "x \<in> A\<star>"
+and     b: "P []"
+and     c: "\<And>s1 s2. \<lbrakk>s1 \<in> A; s2 \<in> A\<star>; P s2\<rbrakk> \<Longrightarrow> P (s1 @ s2)"
+shows "P x"
+proof -
+from a obtain n where "x \<in> A \<up> n" unfolding Star_def by auto
+then show "P x"
+by (induct n arbitrary: x)
+(auto intro!: b c simp add: Seq_def Star_def)
+qed
+lemma star_intro1:
+assumes a: "x \<in> A\<star>"
+and     b: "y \<in> A\<star>"
+shows "x @ y \<in> A\<star>"
+using a b
+by (induct rule: star_induct) (auto)
+lemma star_intro2:
+assumes a: "y \<in> A"
+shows "y \<in> A\<star>"
+proof -
+from a have "y @ [] \<in> A\<star>" by blast
+then show "y \<in> A\<star>" by simp
+qed
+lemma star_intro3:
+assumes a: "x \<in> A\<star>"
+and     b: "y \<in> A"
+shows "x @ y \<in> A\<star>"
+using a b by (blast intro: star_intro1 star_intro2)
+lemma star_cases:
+shows "A\<star> =  {[]} \<union> A \<cdot> A\<star>"
+proof
+{ fix x
+have "x \<in> A\<star> \<Longrightarrow> x \<in> {[]} \<union> A \<cdot> A\<star>"
+unfolding Seq_def
+by (induct rule: star_induct) (auto)
+}
+then show "A\<star> \<subseteq> {[]} \<union> A \<cdot> A\<star>" by auto
+next
+show "{[]} \<union> A \<cdot> A\<star> \<subseteq> A\<star>"
+unfolding Seq_def by auto
+qed
+lemma star_decom:
+assumes a: "x \<in> A\<star>" "x \<noteq> []"
+shows "\<exists>a b. x = a @ b \<and> a \<noteq> [] \<and> a \<in> A \<and> b \<in> A\<star>"
+using a
+by (induct rule: star_induct) (blast)+
+lemma seq_Union_left:
+shows "B \<cdot> (\<Union>n. A \<up> n) = (\<Union>n. B \<cdot> (A \<up> n))"
+unfolding Seq_def by auto
+lemma seq_Union_right:
+shows "(\<Union>n. A \<up> n) \<cdot> B = (\<Union>n. (A \<up> n) \<cdot> B)"
+unfolding Seq_def by auto
+lemma seq_pow_comm:
+shows "A \<cdot> (A \<up> n) = (A \<up> n) \<cdot> A"
+by (induct n) (simp_all add: seq_assoc[symmetric])
+lemma seq_star_comm:
+shows "A \<cdot> A\<star> = A\<star> \<cdot> A"
+unfolding Star_def seq_Union_left
+unfolding seq_pow_comm seq_Union_right
+by simp
+text {* Two lemmas about the length of strings in @{text "A \<up> n"} *}
+lemma pow_length:
+assumes a: "[] \<notin> A"
+and     b: "s \<in> A \<up> Suc n"
+shows "n < length s"
+using b
+proof (induct n arbitrary: s)
+case 0
+have "s \<in> A \<up> Suc 0" by fact
+with a have "s \<noteq> []" by auto
+then show "0 < length s" by auto
+next
+case (Suc n)
+have ih: "\<And>s. s \<in> A \<up> Suc n \<Longrightarrow> n < length s" by fact
+have "s \<in> A \<up> Suc (Suc n)" by fact
+then obtain s1 s2 where eq: "s = s1 @ s2" and *: "s1 \<in> A" and **: "s2 \<in> A \<up> Suc n"
+by (auto simp add: Seq_def)
+from ih ** have "n < length s2" by simp
+moreover have "0 < length s1" using * a by auto
+ultimately show "Suc n < length s" unfolding eq
+by (simp only: length_append)
+qed
+lemma seq_pow_length:
+assumes a: "[] \<notin> A"
+and     b: "s \<in> B \<cdot> (A \<up> Suc n)"
+shows "n < length s"
+proof -
+from b obtain s1 s2 where eq: "s = s1 @ s2" and *: "s2 \<in> A \<up> Suc n"
+unfolding Seq_def by auto
+from * have " n < length s2" by (rule pow_length[OF a])
+then show "n < length s" using eq by simp
+qed
+section {* A modified version of Arden's lemma *}
+text {*  A helper lemma for Arden *}
+lemma arden_helper:
+assumes eq: "X = X \<cdot> A \<union> B"
+shows "X = X \<cdot> (A \<up> Suc n) \<union> (\<Union>m\<in>{0..n}. B \<cdot> (A \<up> m))"
+proof (induct n)
+case 0
+show "X = X \<cdot> (A \<up> Suc 0) \<union> (\<Union>(m::nat)\<in>{0..0}. B \<cdot> (A \<up> m))"
+using eq by simp
+next
+case (Suc n)
+have ih: "X = X \<cdot> (A \<up> Suc n) \<union> (\<Union>m\<in>{0..n}. B \<cdot> (A \<up> m))" by fact
+also have "\<dots> = (X \<cdot> A \<union> B) \<cdot> (A \<up> Suc n) \<union> (\<Union>m\<in>{0..n}. B \<cdot> (A \<up> m))" using eq by simp
+also have "\<dots> = X \<cdot> (A \<up> Suc (Suc n)) \<union> (B \<cdot> (A \<up> Suc n)) \<union> (\<Union>m\<in>{0..n}. B \<cdot> (A \<up> m))"
+by (simp add: seq_union_distrib_right seq_assoc)
+also have "\<dots> = X \<cdot> (A \<up> Suc (Suc n)) \<union> (\<Union>m\<in>{0..Suc n}. B \<cdot> (A \<up> m))"
+by (auto simp add: le_Suc_eq)
+finally show "X = X \<cdot> (A \<up> Suc (Suc n)) \<union> (\<Union>m\<in>{0..Suc n}. B \<cdot> (A \<up> m))" .
+qed
+theorem arden:
+assumes nemp: "[] \<notin> A"
+shows "X = X \<cdot> A \<union> B \<longleftrightarrow> X = B \<cdot> A\<star>"
+proof
+assume eq: "X = B \<cdot> A\<star>"
+have "A\<star> = {[]} \<union> A\<star> \<cdot> A"
+unfolding seq_star_comm[symmetric]
+by (rule star_cases)
+then have "B \<cdot> A\<star> = B \<cdot> ({[]} \<union> A\<star> \<cdot> A)"
+by (rule seq_add_left)
+also have "\<dots> = B \<union> B \<cdot> (A\<star> \<cdot> A)"
+unfolding seq_union_distrib_left by simp
+also have "\<dots> = B \<union> (B \<cdot> A\<star>) \<cdot> A"
+by (simp only: seq_assoc)
+finally show "X = X \<cdot> A \<union> B"
+using eq by blast
+next
+assume eq: "X = X \<cdot> A \<union> B"
+{ fix n::nat
+have "B \<cdot> (A \<up> n) \<subseteq> X" using arden_helper[OF eq, of "n"] by auto }
+then have "B \<cdot> A\<star> \<subseteq> X"
+unfolding Seq_def Star_def UNION_def by auto
+moreover
+{ fix s::string
+obtain k where "k = length s" by auto
+then have not_in: "s \<notin> X \<cdot> (A \<up> Suc k)"
+using seq_pow_length[OF nemp] by blast
+assume "s \<in> X"
+then have "s \<in> X \<cdot> (A \<up> Suc k) \<union> (\<Union>m\<in>{0..k}. B \<cdot> (A \<up> m))"
+using arden_helper[OF eq, of "k"] by auto
+then have "s \<in> (\<Union>m\<in>{0..k}. B \<cdot> (A \<up> m))" using not_in by auto
+moreover
+have "(\<Union>m\<in>{0..k}. B \<cdot> (A \<up> m)) \<subseteq> (\<Union>n. B \<cdot> (A \<up> n))" by auto
+ultimately
+have "s \<in> B \<cdot> A\<star>"
+unfolding seq_Union_left Star_def by auto }
+then have "X \<subseteq> B \<cdot> A\<star>" by auto
+ultimately
+show "X = B \<cdot> A\<star>" by simp
+qed
+section {* Regular Expressions *}
+datatype rexp =
+NULL
+| EMPTY
+| CHAR char
+| SEQ rexp rexp
+| ALT rexp rexp
+| STAR rexp
+fun
+L_rexp :: "rexp \<Rightarrow> lang"
+where
+"L_rexp (NULL) = {}"
+| "L_rexp (EMPTY) = {[]}"
+| "L_rexp (CHAR c) = {[c]}"
+| "L_rexp (SEQ r1 r2) = (L_rexp r1) \<cdot> (L_rexp r2)"
+| "L_rexp (ALT r1 r2) = (L_rexp r1) \<union> (L_rexp r2)"
+| "L_rexp (STAR r) = (L_rexp r)\<star>"
+text {* ALT-combination for a set of regular expressions *}
+abbreviation
+Setalt  ("\<Uplus>_" [1000] 999)
+where
+"\<Uplus>A \<equiv> folds ALT NULL A"
+text {*
+For finite sets, @{term Setalt} is preserved under @{term L_exp}.
+*}
+lemma folds_alt_simp [simp]:
+fixes rs::"rexp set"
+assumes a: "finite rs"
+shows "L_rexp (\<Uplus>rs) = \<Union> (L_rexp ` rs)"
+unfolding folds_def
+apply(rule set_eqI)
+apply(rule someI2_ex)
+apply(rule_tac finite_imp_fold_graph[OF a])
+apply(erule fold_graph.induct)
+apply(auto)
+done
+end