added simple regexp matcher from Slind et al
authorurbanc
Sun, 03 Oct 2010 08:12:48 +0000
changeset 3 8d657fa3ba2e
parent 2 0d63f1c1f67f
child 4 f20f391b21fa
added simple regexp matcher from Slind et al
Matcher.thy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Matcher.thy	Sun Oct 03 08:12:48 2010 +0000
@@ -0,0 +1,156 @@
+theory Matcher
+  imports "Main" 
+begin
+
+
+section {* Sequential Composition of Sets *}
+
+fun
+  lang_seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ; _" [100,100] 100)
+where 
+  "L1 ; L2 = {s1 @ s2 | s1 s2. s1 \<in> L1 \<and> s2 \<in> L2}"
+
+
+section {* Kleene Star for Sets *}
+
+inductive_set
+  Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
+  for L :: "string set"
+where
+  start[intro]: "[] \<in> L\<star>"
+| step[intro]:  "\<lbrakk>s1 \<in> L; s2 \<in> L\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> L\<star>"
+
+
+text {* A standard property of star *}
+
+lemma lang_star_cases:
+  shows "L\<star> =  {[]} \<union> L ; L\<star>"
+by (auto) (metis Star.simps)
+
+section {* Regular Expressions *}
+
+datatype rexp =
+  NULL
+| EMPTY
+| CHAR char
+| SEQ rexp rexp
+| ALT rexp rexp
+| STAR rexp
+
+
+section {* Semantics of Regular Expressions *}
+ 
+fun
+  L :: "rexp \<Rightarrow> string set"
+where
+  "L (NULL) = {}"
+| "L (EMPTY) = {[]}"
+| "L (CHAR c) = {[c]}"
+| "L (SEQ r1 r2) = (L r1) ; (L r2)"
+| "L (ALT r1 r2) = (L r1) \<union> (L r2)"
+| "L (STAR r) = (L r)\<star>"
+
+
+section {* The Matcher *}
+
+fun
+ nullable :: "rexp \<Rightarrow> bool"
+where
+  "nullable (NULL) = False"
+| "nullable (EMPTY) = True"
+| "nullable (CHAR c) = False"
+| "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
+| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
+| "nullable (STAR r) = True"
+
+fun
+ der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
+where
+  "der c (NULL) = NULL"
+| "der c (EMPTY) = NULL"
+| "der c (CHAR c') = (if c=c' then EMPTY else NULL)"
+| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
+| "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)"
+| "der c (STAR r) = SEQ (der c r) (STAR r)"
+
+fun 
+ derivative :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
+where
+  "derivative [] r = r"
+| "derivative (c#s) r = derivative s (der c r)"
+
+fun
+  matches :: "rexp \<Rightarrow> string \<Rightarrow> bool"
+where
+  "matches r s = nullable (derivative s r)"
+
+
+section {* Correctness Proof of the Matcher *}
+
+lemma nullable_correctness:
+  shows "nullable r \<longleftrightarrow> [] \<in> L r"
+by (induct r) (auto) 
+
+lemma der_correctness:
+  shows "s \<in> L (der c r) \<longleftrightarrow> c#s \<in> L r"
+proof (induct r arbitrary: s)
+  case (SEQ r1 r2 s)
+  have ih1: "\<And>s. s \<in> L (der c r1) \<longleftrightarrow> c#s \<in> L r1" by fact
+  have ih2: "\<And>s. s \<in> L (der c r2) \<longleftrightarrow> c#s \<in> L r2" by fact
+  show "s \<in> L (der c (SEQ r1 r2)) \<longleftrightarrow> c#s \<in> L (SEQ r1 r2)" 
+    using ih1 ih2 by (auto simp add: nullable_correctness Cons_eq_append_conv)
+next
+  case (STAR r s)
+  have ih: "\<And>s. s \<in> L (der c r) \<longleftrightarrow> c#s \<in> L r" by fact
+  show "s \<in> L (der c (STAR r)) \<longleftrightarrow> c#s \<in> L (STAR r)"
+  proof
+    assume "s \<in> L (der c (STAR r))"
+    then have "s \<in> L (der c r) ; L r\<star>" by simp
+    then have "c#s \<in> L r ; (L r)\<star>" 
+      by (auto simp add: ih Cons_eq_append_conv)
+    then have "c#s \<in> (L r)\<star>" using lang_star_cases by auto
+    then show "c#s \<in> L (STAR r)" by simp
+  next
+    assume "c#s \<in> L (STAR r)"
+    then have "c#s \<in> (L r)\<star>" by simp
+    then have "s \<in> L (der c r); (L r)\<star>"
+      by (induct x\<equiv>"c#s" rule: Star.induct)
+         (auto simp add: ih append_eq_Cons_conv)
+    then show "s \<in> L (der c (STAR r))" by simp  
+  qed
+qed (simp_all)
+
+lemma matches_correctness:
+  shows "matches r s \<longleftrightarrow> s \<in> L r"
+by (induct s arbitrary: r)
+   (simp_all add: nullable_correctness der_correctness)
+
+section {* Examples *}
+
+value "matches NULL []"
+value "matches (CHAR (CHR ''a'')) [CHR ''a'']"
+value "matches (CHAR a) [a,a]"
+value "matches (STAR (CHAR a)) []"
+value "matches (STAR (CHAR a))  [a,a]"
+value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''"
+value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''"
+
+section {* Incorrect Matcher - fun-definition rejected *}
+
+function 
+  match :: "rexp list \<Rightarrow> string \<Rightarrow> bool"
+where
+  "match [] [] = True"
+| "match [] (c#s) = False"
+| "match (NULL#rs) s = False"  
+| "match (EMPTY#rs) s = match rs s"
+| "match (CHAR c#rs) [] = False"
+| "match (CHAR c#rs) (d#s) = (if c = d then match rs s else False)"         
+| "match (ALT r1 r2#rs) s = (match (r1#rs) s \<or> match (r2#rs) s)" 
+| "match (SEQ r1 r2#rs) s = match (r1#r2#rs) s"
+| "match (STAR r#rs) s = (match rs s \<or> match (r#(STAR r)#rs) s)"
+apply(pat_completeness)
+apply(auto)
+done
+
+end    
\ No newline at end of file