Matcher.thy
author urbanc
Fri, 18 Feb 2011 15:06:06 +0000
changeset 115 c5f138b5fc88
parent 103 f460d5f75cb5
child 154 7c68b9ad4486
permissions -rw-r--r--
added comment from Larry

theory Matcher
  imports "Main" 
begin


section {* Sequential Composition of Sets *}

fun
  lang_seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ;; _" [100,100] 100)
where 
  "L1 ;; L2 = {s1 @ s2 | s1 s2. s1 \<in> L1 \<and> s2 \<in> L2}"


section {* Kleene Star for Sets *}

inductive_set
  Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
  for L :: "string set"
where
  start[intro]: "[] \<in> L\<star>"
| step[intro]:  "\<lbrakk>s1 \<in> L; s2 \<in> L\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> L\<star>"


text {* A standard property of Star *}

lemma lang_star_cases:
  shows "L\<star> =  {[]} \<union> L ;; L\<star>"
by (auto) (metis Star.simps)

section {* Regular Expressions *}

datatype rexp =
  NULL
| EMPTY
| CHAR char
| SEQ rexp rexp
| ALT rexp rexp
| STAR rexp

types lang = "string set" 

definition
  DERIV :: "string \<Rightarrow> lang \<Rightarrow> lang"
where
  "DERIV s A \<equiv> {s'. s @ s' \<in> A}"

definition
  delta :: "lang \<Rightarrow> lang"
where
  "delta A = (if [] \<in> A then {[]} else {})"

lemma
  "DERIV s (P ; Q) = \<Union>{(delta (DERIV s1 P)) ; (DERIV s2 Q) | s1 s2. s = s1 @ s2}"
apply(auto)

fun
  D1 :: "string \<Rightarrow> lang \<Rightarrow> lang"
where
  "D1 [] A = A"
| "D1 (c # s) A = DERIV [c] (D1 s A)"

fun
  D2 :: "string \<Rightarrow> lang \<Rightarrow> lang"
where
  "D2 [] A = A"
| "D2 (c # s) A = DERIV [c] (D1 s A)"

function
  D
where
  "D s P Q = P ;; Q"
| "D (c#s) = 

section {* Semantics of Regular Expressions *}
 
fun
  L :: "rexp \<Rightarrow> string set"
where
  "L (NULL) = {}"
| "L (EMPTY) = {[]}"
| "L (CHAR c) = {[c]}"
| "L (SEQ r1 r2) = (L r1) ; (L r2)"
| "L (ALT r1 r2) = (L r1) \<union> (L r2)"
| "L (STAR r) = (L r)\<star>"


section {* The Matcher *}

fun
 nullable :: "rexp \<Rightarrow> bool"
where
  "nullable (NULL) = False"
| "nullable (EMPTY) = True"
| "nullable (CHAR c) = False"
| "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
| "nullable (STAR r) = True"

fun
 der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
where
  "der c (NULL) = NULL"
| "der c (EMPTY) = NULL"
| "der c (CHAR c') = (if c = c' then EMPTY else NULL)"
| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
| "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)"
| "der c (STAR r) = SEQ (der c r) (STAR r)"

fun 
 derivative :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
where
  "derivative [] r = r"
| "derivative (c # s) r = derivative s (der c r)"

fun
  matches :: "rexp \<Rightarrow> string \<Rightarrow> bool"
where
  "matches r s = nullable (derivative s r)"


section {* Correctness Proof of the Matcher *}

lemma nullable_correctness:
  shows "nullable r \<longleftrightarrow> [] \<in> L r"
by (induct r) (auto) 

lemma der_correctness:
  shows "s \<in> L (der c r) \<longleftrightarrow> c # s \<in> L r"
proof (induct r arbitrary: s)
  case (SEQ r1 r2 s)
  have ih1: "\<And>s. s \<in> L (der c r1) \<longleftrightarrow> c # s \<in> L r1" by fact
  have ih2: "\<And>s. s \<in> L (der c r2) \<longleftrightarrow> c # s \<in> L r2" by fact
  show "s \<in> L (der c (SEQ r1 r2)) \<longleftrightarrow> c # s \<in> L (SEQ r1 r2)" 
    using ih1 ih2 by (auto simp add: nullable_correctness Cons_eq_append_conv)
next
  case (STAR r s)
  have ih: "\<And>s. s \<in> L (der c r) \<longleftrightarrow> c # s \<in> L r" by fact
  show "s \<in> L (der c (STAR r)) \<longleftrightarrow> c # s \<in> L (STAR r)"
  proof
    assume "s \<in> L (der c (STAR r))"
    then have "s \<in> L (der c r) ; L r\<star>" by simp
    then have "c # s \<in> L r ; (L r)\<star>" 
      by (auto simp add: ih Cons_eq_append_conv)
    then have "c # s \<in> (L r)\<star>" using lang_star_cases by auto
    then show "c # s \<in> L (STAR r)" by simp
  next
    assume "c # s \<in> L (STAR r)"
    then have "c # s \<in> (L r)\<star>" by simp
    then have "s \<in> L (der c r); (L r)\<star>"
      by (induct x\<equiv>"c # s" rule: Star.induct)
         (auto simp add: ih append_eq_Cons_conv)
    then show "s \<in> L (der c (STAR r))" by simp  
  qed
qed (simp_all)

lemma matches_correctness:
  shows "matches r s \<longleftrightarrow> s \<in> L r"
by (induct s arbitrary: r)
   (simp_all add: nullable_correctness der_correctness)

section {* Examples *}

value "matches NULL []"
value "matches (CHAR (CHR ''a'')) [CHR ''a'']"
value "matches (CHAR a) [a,a]"
value "matches (STAR (CHAR a)) []"
value "matches (STAR (CHAR a))  [a,a]"
value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''"
value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''"

section {* Incorrect Matcher - fun-definition rejected *}

function 
  match :: "rexp list \<Rightarrow> string \<Rightarrow> bool"
where
  "match [] [] = True"
| "match [] (c # s) = False"
| "match (NULL # rs) s = False"  
| "match (EMPTY # rs) s = match rs s"
| "match (CHAR c # rs) [] = False"
| "match (CHAR c # rs) (d # s) = (if c = d then match rs s else False)"         
| "match (ALT r1 r2 # rs) s = (match (r1 # rs) s \<or> match (r2 # rs) s)" 
| "match (SEQ r1 r2 # rs) s = match (r1 # r2 # rs) s"
| "match (STAR r # rs) s = (match rs s \<or> match (r # (STAR r) # rs) s)"
apply(pat_completeness)
apply(auto)
done

end