A formal presentation of automaton and Myhill-Nerrode theorem added to serve as a basis for further discussion
theory "RegSet" imports "Main" begintext {* Sequential composition of sets *}definition lang_seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ; _" [100,100] 100)where "L1 ; L2 = {s1@s2 | s1 s2. s1 \<in> L1 \<and> s2 \<in> L2}"section {* Kleene star for sets *}inductive_set Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102) for L :: "string set"where start[intro]: "[] \<in> L\<star>"| step[intro]: "\<lbrakk>s1 \<in> L; s2 \<in> L\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> L\<star>"text {* A standard property of star *}lemma lang_star_cases: shows "L\<star> = {[]} \<union> L ; L\<star>"proof { fix x have "x \<in> L\<star> \<Longrightarrow> x \<in> {[]} \<union> L ; L\<star>" unfolding lang_seq_def by (induct rule: Star.induct) (auto) } then show "L\<star> \<subseteq> {[]} \<union> L ; L\<star>" by autonext show "{[]} \<union> L ; L\<star> \<subseteq> L\<star>" unfolding lang_seq_def by autoqedlemma lang_star_cases2: shows "[] \<notin> L \<Longrightarrow> L\<star> - {[]} = L ; L\<star>"by (subst lang_star_cases) (simp add: lang_seq_def)section {* Regular Expressions *}datatype rexp = NULL| EMPTY| CHAR char| SEQ rexp rexp| ALT rexp rexp| STAR rexpsection {* Semantics of Regular Expressions *}fun L :: "rexp \<Rightarrow> string set"where "L (NULL) = {}"| "L (EMPTY) = {[]}"| "L (CHAR c) = {[c]}"| "L (SEQ r1 r2) = (L r1) ; (L r2)"| "L (ALT r1 r2) = (L r1) \<union> (L r2)"| "L (STAR r) = (L r)\<star>"abbreviation CUNIV :: "string set"where "CUNIV \<equiv> (\<lambda>x. [x]) ` (UNIV::char set)"lemma CUNIV_star_minus: "(CUNIV\<star> - {[c]}) = {[]} \<union> (CUNIV - {[c]}; (CUNIV\<star>))"apply(subst lang_star_cases)apply(simp add: lang_seq_def)oopslemma string_in_CUNIV: shows "s \<in> CUNIV\<star>"proof (induct s) case Nil show "[] \<in> CUNIV\<star>" by (rule start)next case (Cons c s) have "[c] \<in> CUNIV" by simp moreover have "s \<in> CUNIV\<star>" by fact ultimately have "[c] @ s \<in> CUNIV\<star>" by (rule step) then show "c # s \<in> CUNIV\<star>" by simpqedlemma UNIV_CUNIV_star: shows "UNIV = CUNIV\<star>"using string_in_CUNIVby (auto)abbreviation reg :: "string set => bool"where "reg L1 \<equiv> (\<exists>r. L r = L1)"lemma reg_null [intro]: shows "reg {}"by (metis L.simps(1))lemma reg_empty [intro]: shows "reg {[]}"by (metis L.simps(2))lemma reg_star [intro]: shows "reg L1 \<Longrightarrow> reg (L1\<star>)"by (metis L.simps(6))lemma reg_seq [intro]: assumes a: "reg L1" "reg L2" shows "reg (L1 ; L2)"using aby (metis L.simps(4)) lemma reg_union [intro]: assumes a: "reg L1" "reg L2" shows "reg (L1 \<union> L2)"using aby (metis L.simps(5)) lemma reg_string [intro]: fixes s::string shows "reg {s}"proof (induct s) case Nil show "reg {[]}" by (rule reg_empty)next case (Cons c s) have "reg {s}" by fact then obtain r where "L r = {s}" by auto then have "L (SEQ (CHAR c) r) = {[c]} ; {s}" by simp also have "\<dots> = {c # s}" by (simp add: lang_seq_def) finally show "reg {c # s}" by blast qedlemma reg_finite [intro]: assumes a: "finite L1" shows "reg L1"using aproof(induct) case empty show "reg {}" by (rule reg_null)next case (insert s S) have "reg {s}" by (rule reg_string) moreover have "reg S" by fact ultimately have "reg ({s} \<union> S)" by (rule reg_union) then show "reg (insert s S)" by simpqedlemma reg_cuniv [intro]: shows "reg (CUNIV)"by (rule reg_finite) (auto)lemma reg_univ: shows "reg (UNIV::string set)"proof - have "reg CUNIV" by (rule reg_cuniv) then have "reg (CUNIV\<star>)" by (rule reg_star) then show "reg UNIV" by (simp add: UNIV_CUNIV_star)qedlemma reg_finite_subset: assumes a: "finite L1" and b: "reg L1" "L2 \<subseteq> L1" shows "reg L2"using a bapply(induct arbitrary: L2)apply(simp add: reg_empty)oopslemma reg_not: shows "reg (UNIV - L r)"proof (induct r) case NULL have "reg UNIV" by (rule reg_univ) then show "reg (UNIV - L NULL)" by simpnext case EMPTY have "[] \<notin> CUNIV" by auto moreover have "reg (CUNIV; CUNIV\<star>)" by auto ultimately have "reg (CUNIV\<star> - {[]})" using lang_star_cases2 by simp then show "reg (UNIV - L EMPTY)" by (simp add: UNIV_CUNIV_star)next case (CHAR c) then show "?case" apply(simp)using reg_UNIVapply(simp)apply(simp add: char_star2[symmetric])apply(rule reg_seq)apply(rule reg_cuniv)apply(rule reg_star)apply(rule reg_cuniv)oopsend