Matcher.thy
author urbanc
Sun, 03 Oct 2010 08:12:48 +0000
changeset 3 8d657fa3ba2e
child 5 074d9a4b2bc9
permissions -rw-r--r--
added simple regexp matcher from Slind et al

theory Matcher
  imports "Main" 
begin


section {* Sequential Composition of Sets *}

fun
  lang_seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ; _" [100,100] 100)
where 
  "L1 ; L2 = {s1 @ s2 | s1 s2. s1 \<in> L1 \<and> s2 \<in> L2}"


section {* Kleene Star for Sets *}

inductive_set
  Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
  for L :: "string set"
where
  start[intro]: "[] \<in> L\<star>"
| step[intro]:  "\<lbrakk>s1 \<in> L; s2 \<in> L\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> L\<star>"


text {* A standard property of star *}

lemma lang_star_cases:
  shows "L\<star> =  {[]} \<union> L ; L\<star>"
by (auto) (metis Star.simps)

section {* Regular Expressions *}

datatype rexp =
  NULL
| EMPTY
| CHAR char
| SEQ rexp rexp
| ALT rexp rexp
| STAR rexp


section {* Semantics of Regular Expressions *}
 
fun
  L :: "rexp \<Rightarrow> string set"
where
  "L (NULL) = {}"
| "L (EMPTY) = {[]}"
| "L (CHAR c) = {[c]}"
| "L (SEQ r1 r2) = (L r1) ; (L r2)"
| "L (ALT r1 r2) = (L r1) \<union> (L r2)"
| "L (STAR r) = (L r)\<star>"


section {* The Matcher *}

fun
 nullable :: "rexp \<Rightarrow> bool"
where
  "nullable (NULL) = False"
| "nullable (EMPTY) = True"
| "nullable (CHAR c) = False"
| "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
| "nullable (STAR r) = True"

fun
 der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
where
  "der c (NULL) = NULL"
| "der c (EMPTY) = NULL"
| "der c (CHAR c') = (if c=c' then EMPTY else NULL)"
| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
| "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)"
| "der c (STAR r) = SEQ (der c r) (STAR r)"

fun 
 derivative :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
where
  "derivative [] r = r"
| "derivative (c#s) r = derivative s (der c r)"

fun
  matches :: "rexp \<Rightarrow> string \<Rightarrow> bool"
where
  "matches r s = nullable (derivative s r)"


section {* Correctness Proof of the Matcher *}

lemma nullable_correctness:
  shows "nullable r \<longleftrightarrow> [] \<in> L r"
by (induct r) (auto) 

lemma der_correctness:
  shows "s \<in> L (der c r) \<longleftrightarrow> c#s \<in> L r"
proof (induct r arbitrary: s)
  case (SEQ r1 r2 s)
  have ih1: "\<And>s. s \<in> L (der c r1) \<longleftrightarrow> c#s \<in> L r1" by fact
  have ih2: "\<And>s. s \<in> L (der c r2) \<longleftrightarrow> c#s \<in> L r2" by fact
  show "s \<in> L (der c (SEQ r1 r2)) \<longleftrightarrow> c#s \<in> L (SEQ r1 r2)" 
    using ih1 ih2 by (auto simp add: nullable_correctness Cons_eq_append_conv)
next
  case (STAR r s)
  have ih: "\<And>s. s \<in> L (der c r) \<longleftrightarrow> c#s \<in> L r" by fact
  show "s \<in> L (der c (STAR r)) \<longleftrightarrow> c#s \<in> L (STAR r)"
  proof
    assume "s \<in> L (der c (STAR r))"
    then have "s \<in> L (der c r) ; L r\<star>" by simp
    then have "c#s \<in> L r ; (L r)\<star>" 
      by (auto simp add: ih Cons_eq_append_conv)
    then have "c#s \<in> (L r)\<star>" using lang_star_cases by auto
    then show "c#s \<in> L (STAR r)" by simp
  next
    assume "c#s \<in> L (STAR r)"
    then have "c#s \<in> (L r)\<star>" by simp
    then have "s \<in> L (der c r); (L r)\<star>"
      by (induct x\<equiv>"c#s" rule: Star.induct)
         (auto simp add: ih append_eq_Cons_conv)
    then show "s \<in> L (der c (STAR r))" by simp  
  qed
qed (simp_all)

lemma matches_correctness:
  shows "matches r s \<longleftrightarrow> s \<in> L r"
by (induct s arbitrary: r)
   (simp_all add: nullable_correctness der_correctness)

section {* Examples *}

value "matches NULL []"
value "matches (CHAR (CHR ''a'')) [CHR ''a'']"
value "matches (CHAR a) [a,a]"
value "matches (STAR (CHAR a)) []"
value "matches (STAR (CHAR a))  [a,a]"
value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''"
value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''"

section {* Incorrect Matcher - fun-definition rejected *}

function 
  match :: "rexp list \<Rightarrow> string \<Rightarrow> bool"
where
  "match [] [] = True"
| "match [] (c#s) = False"
| "match (NULL#rs) s = False"  
| "match (EMPTY#rs) s = match rs s"
| "match (CHAR c#rs) [] = False"
| "match (CHAR c#rs) (d#s) = (if c = d then match rs s else False)"         
| "match (ALT r1 r2#rs) s = (match (r1#rs) s \<or> match (r2#rs) s)" 
| "match (SEQ r1 r2#rs) s = match (r1#r2#rs) s"
| "match (STAR r#rs) s = (match rs s \<or> match (r#(STAR r)#rs) s)"
apply(pat_completeness)
apply(auto)
done

end