# HG changeset patch # User urbanc # Date 1286093568 0 # Node ID 8d657fa3ba2e1d4223e67f50fe5f47ac7f0f3c6a # Parent 0d63f1c1f67f9806b043cecadddd4e3d3ebe0b1d added simple regexp matcher from Slind et al diff -r 0d63f1c1f67f -r 8d657fa3ba2e Matcher.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Matcher.thy Sun Oct 03 08:12:48 2010 +0000 @@ -0,0 +1,156 @@ +theory Matcher + imports "Main" +begin + + +section {* Sequential Composition of Sets *} + +fun + lang_seq :: "string set \ string set \ string set" ("_ ; _" [100,100] 100) +where + "L1 ; L2 = {s1 @ s2 | s1 s2. s1 \ L1 \ s2 \ L2}" + + +section {* Kleene Star for Sets *} + +inductive_set + Star :: "string set \ string set" ("_\" [101] 102) + for L :: "string set" +where + start[intro]: "[] \ L\" +| step[intro]: "\s1 \ L; s2 \ L\\ \ s1 @ s2 \ L\" + + +text {* A standard property of star *} + +lemma lang_star_cases: + shows "L\ = {[]} \ L ; L\" +by (auto) (metis Star.simps) + +section {* Regular Expressions *} + +datatype rexp = + NULL +| EMPTY +| CHAR char +| SEQ rexp rexp +| ALT rexp rexp +| STAR rexp + + +section {* Semantics of Regular Expressions *} + +fun + L :: "rexp \ string set" +where + "L (NULL) = {}" +| "L (EMPTY) = {[]}" +| "L (CHAR c) = {[c]}" +| "L (SEQ r1 r2) = (L r1) ; (L r2)" +| "L (ALT r1 r2) = (L r1) \ (L r2)" +| "L (STAR r) = (L r)\" + + +section {* The Matcher *} + +fun + nullable :: "rexp \ bool" +where + "nullable (NULL) = False" +| "nullable (EMPTY) = True" +| "nullable (CHAR c) = False" +| "nullable (ALT r1 r2) = (nullable r1 \ nullable r2)" +| "nullable (SEQ r1 r2) = (nullable r1 \ nullable r2)" +| "nullable (STAR r) = True" + +fun + der :: "char \ rexp \ rexp" +where + "der c (NULL) = NULL" +| "der c (EMPTY) = NULL" +| "der c (CHAR c') = (if c=c' then EMPTY else NULL)" +| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" +| "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)" +| "der c (STAR r) = SEQ (der c r) (STAR r)" + +fun + derivative :: "string \ rexp \ rexp" +where + "derivative [] r = r" +| "derivative (c#s) r = derivative s (der c r)" + +fun + matches :: "rexp \ string \ bool" +where + "matches r s = nullable (derivative s r)" + + +section {* Correctness Proof of the Matcher *} + +lemma nullable_correctness: + shows "nullable r \ [] \ L r" +by (induct r) (auto) + +lemma der_correctness: + shows "s \ L (der c r) \ c#s \ L r" +proof (induct r arbitrary: s) + case (SEQ r1 r2 s) + have ih1: "\s. s \ L (der c r1) \ c#s \ L r1" by fact + have ih2: "\s. s \ L (der c r2) \ c#s \ L r2" by fact + show "s \ L (der c (SEQ r1 r2)) \ c#s \ L (SEQ r1 r2)" + using ih1 ih2 by (auto simp add: nullable_correctness Cons_eq_append_conv) +next + case (STAR r s) + have ih: "\s. s \ L (der c r) \ c#s \ L r" by fact + show "s \ L (der c (STAR r)) \ c#s \ L (STAR r)" + proof + assume "s \ L (der c (STAR r))" + then have "s \ L (der c r) ; L r\" by simp + then have "c#s \ L r ; (L r)\" + by (auto simp add: ih Cons_eq_append_conv) + then have "c#s \ (L r)\" using lang_star_cases by auto + then show "c#s \ L (STAR r)" by simp + next + assume "c#s \ L (STAR r)" + then have "c#s \ (L r)\" by simp + then have "s \ L (der c r); (L r)\" + by (induct x\"c#s" rule: Star.induct) + (auto simp add: ih append_eq_Cons_conv) + then show "s \ L (der c (STAR r))" by simp + qed +qed (simp_all) + +lemma matches_correctness: + shows "matches r s \ s \ L r" +by (induct s arbitrary: r) + (simp_all add: nullable_correctness der_correctness) + +section {* Examples *} + +value "matches NULL []" +value "matches (CHAR (CHR ''a'')) [CHR ''a'']" +value "matches (CHAR a) [a,a]" +value "matches (STAR (CHAR a)) []" +value "matches (STAR (CHAR a)) [a,a]" +value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''" +value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''" + +section {* Incorrect Matcher - fun-definition rejected *} + +function + match :: "rexp list \ string \ bool" +where + "match [] [] = True" +| "match [] (c#s) = False" +| "match (NULL#rs) s = False" +| "match (EMPTY#rs) s = match rs s" +| "match (CHAR c#rs) [] = False" +| "match (CHAR c#rs) (d#s) = (if c = d then match rs s else False)" +| "match (ALT r1 r2#rs) s = (match (r1#rs) s \ match (r2#rs) s)" +| "match (SEQ r1 r2#rs) s = match (r1#r2#rs) s" +| "match (STAR r#rs) s = (match rs s \ match (r#(STAR r)#rs) s)" +apply(pat_completeness) +apply(auto) +done + +end \ No newline at end of file