# HG changeset patch # User urbanc # Date 1286093568 0 # Node ID 8d657fa3ba2e1d4223e67f50fe5f47ac7f0f3c6a # Parent 0d63f1c1f67f9806b043cecadddd4e3d3ebe0b1d added simple regexp matcher from Slind et al diff -r 0d63f1c1f67f -r 8d657fa3ba2e Matcher.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Matcher.thy Sun Oct 03 08:12:48 2010 +0000 @@ -0,0 +1,156 @@ +theory Matcher + imports "Main" +begin + + +section {* Sequential Composition of Sets *} + +fun + lang_seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ; _" [100,100] 100) +where + "L1 ; L2 = {s1 @ s2 | s1 s2. s1 \<in> L1 \<and> s2 \<in> L2}" + + +section {* Kleene Star for Sets *} + +inductive_set + Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102) + for L :: "string set" +where + start[intro]: "[] \<in> L\<star>" +| step[intro]: "\<lbrakk>s1 \<in> L; s2 \<in> L\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> L\<star>" + + +text {* A standard property of star *} + +lemma lang_star_cases: + shows "L\<star> = {[]} \<union> L ; L\<star>" +by (auto) (metis Star.simps) + +section {* Regular Expressions *} + +datatype rexp = + NULL +| EMPTY +| CHAR char +| SEQ rexp rexp +| ALT rexp rexp +| STAR rexp + + +section {* Semantics of Regular Expressions *} + +fun + L :: "rexp \<Rightarrow> string set" +where + "L (NULL) = {}" +| "L (EMPTY) = {[]}" +| "L (CHAR c) = {[c]}" +| "L (SEQ r1 r2) = (L r1) ; (L r2)" +| "L (ALT r1 r2) = (L r1) \<union> (L r2)" +| "L (STAR r) = (L r)\<star>" + + +section {* The Matcher *} + +fun + nullable :: "rexp \<Rightarrow> bool" +where + "nullable (NULL) = False" +| "nullable (EMPTY) = True" +| "nullable (CHAR c) = False" +| "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)" +| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)" +| "nullable (STAR r) = True" + +fun + der :: "char \<Rightarrow> rexp \<Rightarrow> rexp" +where + "der c (NULL) = NULL" +| "der c (EMPTY) = NULL" +| "der c (CHAR c') = (if c=c' then EMPTY else NULL)" +| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" +| "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)" +| "der c (STAR r) = SEQ (der c r) (STAR r)" + +fun + derivative :: "string \<Rightarrow> rexp \<Rightarrow> rexp" +where + "derivative [] r = r" +| "derivative (c#s) r = derivative s (der c r)" + +fun + matches :: "rexp \<Rightarrow> string \<Rightarrow> bool" +where + "matches r s = nullable (derivative s r)" + + +section {* Correctness Proof of the Matcher *} + +lemma nullable_correctness: + shows "nullable r \<longleftrightarrow> [] \<in> L r" +by (induct r) (auto) + +lemma der_correctness: + shows "s \<in> L (der c r) \<longleftrightarrow> c#s \<in> L r" +proof (induct r arbitrary: s) + case (SEQ r1 r2 s) + have ih1: "\<And>s. s \<in> L (der c r1) \<longleftrightarrow> c#s \<in> L r1" by fact + have ih2: "\<And>s. s \<in> L (der c r2) \<longleftrightarrow> c#s \<in> L r2" by fact + show "s \<in> L (der c (SEQ r1 r2)) \<longleftrightarrow> c#s \<in> L (SEQ r1 r2)" + using ih1 ih2 by (auto simp add: nullable_correctness Cons_eq_append_conv) +next + case (STAR r s) + have ih: "\<And>s. s \<in> L (der c r) \<longleftrightarrow> c#s \<in> L r" by fact + show "s \<in> L (der c (STAR r)) \<longleftrightarrow> c#s \<in> L (STAR r)" + proof + assume "s \<in> L (der c (STAR r))" + then have "s \<in> L (der c r) ; L r\<star>" by simp + then have "c#s \<in> L r ; (L r)\<star>" + by (auto simp add: ih Cons_eq_append_conv) + then have "c#s \<in> (L r)\<star>" using lang_star_cases by auto + then show "c#s \<in> L (STAR r)" by simp + next + assume "c#s \<in> L (STAR r)" + then have "c#s \<in> (L r)\<star>" by simp + then have "s \<in> L (der c r); (L r)\<star>" + by (induct x\<equiv>"c#s" rule: Star.induct) + (auto simp add: ih append_eq_Cons_conv) + then show "s \<in> L (der c (STAR r))" by simp + qed +qed (simp_all) + +lemma matches_correctness: + shows "matches r s \<longleftrightarrow> s \<in> L r" +by (induct s arbitrary: r) + (simp_all add: nullable_correctness der_correctness) + +section {* Examples *} + +value "matches NULL []" +value "matches (CHAR (CHR ''a'')) [CHR ''a'']" +value "matches (CHAR a) [a,a]" +value "matches (STAR (CHAR a)) []" +value "matches (STAR (CHAR a)) [a,a]" +value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''" +value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''" + +section {* Incorrect Matcher - fun-definition rejected *} + +function + match :: "rexp list \<Rightarrow> string \<Rightarrow> bool" +where + "match [] [] = True" +| "match [] (c#s) = False" +| "match (NULL#rs) s = False" +| "match (EMPTY#rs) s = match rs s" +| "match (CHAR c#rs) [] = False" +| "match (CHAR c#rs) (d#s) = (if c = d then match rs s else False)" +| "match (ALT r1 r2#rs) s = (match (r1#rs) s \<or> match (r2#rs) s)" +| "match (SEQ r1 r2#rs) s = match (r1#r2#rs) s" +| "match (STAR r#rs) s = (match rs s \<or> match (r#(STAR r)#rs) s)" +apply(pat_completeness) +apply(auto) +done + +end \ No newline at end of file