theory Matcher imports "Main" beginsection {* Sequential Composition of Sets *}fun lang_seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ; _" [100,100] 100)where "L1 ; L2 = {s1 @ s2 | s1 s2. s1 \<in> L1 \<and> s2 \<in> L2}"section {* Kleene Star for Sets *}inductive_set Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102) for L :: "string set"where start[intro]: "[] \<in> L\<star>"| step[intro]: "\<lbrakk>s1 \<in> L; s2 \<in> L\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> L\<star>"text {* A standard property of Star *}lemma lang_star_cases: shows "L\<star> = {[]} \<union> L ; L\<star>"by (auto) (metis Star.simps)section {* Regular Expressions *}datatype rexp = NULL| EMPTY| CHAR char| SEQ rexp rexp| ALT rexp rexp| STAR rexpsection {* Semantics of Regular Expressions *}fun L :: "rexp \<Rightarrow> string set"where "L (NULL) = {}"| "L (EMPTY) = {[]}"| "L (CHAR c) = {[c]}"| "L (SEQ r1 r2) = (L r1) ; (L r2)"| "L (ALT r1 r2) = (L r1) \<union> (L r2)"| "L (STAR r) = (L r)\<star>"section {* The Matcher *}fun nullable :: "rexp \<Rightarrow> bool"where "nullable (NULL) = False"| "nullable (EMPTY) = True"| "nullable (CHAR c) = False"| "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"| "nullable (STAR r) = True"fun der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"where "der c (NULL) = NULL"| "der c (EMPTY) = NULL"| "der c (CHAR c') = (if c = c' then EMPTY else NULL)"| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"| "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)"| "der c (STAR r) = SEQ (der c r) (STAR r)"fun derivative :: "string \<Rightarrow> rexp \<Rightarrow> rexp"where "derivative [] r = r"| "derivative (c # s) r = derivative s (der c r)"fun matches :: "rexp \<Rightarrow> string \<Rightarrow> bool"where "matches r s = nullable (derivative s r)"section {* Correctness Proof of the Matcher *}lemma nullable_correctness: shows "nullable r \<longleftrightarrow> [] \<in> L r"by (induct r) (auto) lemma der_correctness: shows "s \<in> L (der c r) \<longleftrightarrow> c # s \<in> L r"proof (induct r arbitrary: s) case (SEQ r1 r2 s) have ih1: "\<And>s. s \<in> L (der c r1) \<longleftrightarrow> c # s \<in> L r1" by fact have ih2: "\<And>s. s \<in> L (der c r2) \<longleftrightarrow> c # s \<in> L r2" by fact show "s \<in> L (der c (SEQ r1 r2)) \<longleftrightarrow> c # s \<in> L (SEQ r1 r2)" using ih1 ih2 by (auto simp add: nullable_correctness Cons_eq_append_conv)next case (STAR r s) have ih: "\<And>s. s \<in> L (der c r) \<longleftrightarrow> c # s \<in> L r" by fact show "s \<in> L (der c (STAR r)) \<longleftrightarrow> c # s \<in> L (STAR r)" proof assume "s \<in> L (der c (STAR r))" then have "s \<in> L (der c r) ; L r\<star>" by simp then have "c # s \<in> L r ; (L r)\<star>" by (auto simp add: ih Cons_eq_append_conv) then have "c # s \<in> (L r)\<star>" using lang_star_cases by auto then show "c # s \<in> L (STAR r)" by simp next assume "c # s \<in> L (STAR r)" then have "c # s \<in> (L r)\<star>" by simp then have "s \<in> L (der c r); (L r)\<star>" by (induct x\<equiv>"c # s" rule: Star.induct) (auto simp add: ih append_eq_Cons_conv) then show "s \<in> L (der c (STAR r))" by simp qedqed (simp_all)lemma matches_correctness: shows "matches r s \<longleftrightarrow> s \<in> L r"by (induct s arbitrary: r) (simp_all add: nullable_correctness der_correctness)section {* Examples *}value "matches NULL []"value "matches (CHAR (CHR ''a'')) [CHR ''a'']"value "matches (CHAR a) [a,a]"value "matches (STAR (CHAR a)) []"value "matches (STAR (CHAR a)) [a,a]"value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''"value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''"section {* Incorrect Matcher - fun-definition rejected *}function match :: "rexp list \<Rightarrow> string \<Rightarrow> bool"where "match [] [] = True"| "match [] (c # s) = False"| "match (NULL # rs) s = False" | "match (EMPTY # rs) s = match rs s"| "match (CHAR c # rs) [] = False"| "match (CHAR c # rs) (d # s) = (if c = d then match rs s else False)" | "match (ALT r1 r2 # rs) s = (match (r1 # rs) s \<or> match (r2 # rs) s)" | "match (SEQ r1 r2 # rs) s = match (r1 # r2 # rs) s"| "match (STAR r # rs) s = (match rs s \<or> match (r # (STAR r) # rs) s)"apply(pat_completeness)apply(auto)doneend