progs/Matcher.thy
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Mon, 28 Oct 2013 13:19:44 +0000
changeset 167 cfba674a8fdf
child 208 bd5a8a6b3871
permissions -rw-r--r--
added matcher

theory Matcher
  imports "Main" 
begin

section {* Regular Expressions *}

datatype rexp =
  NULL
| EMPTY
| CHAR char
| SEQ rexp rexp
| ALT rexp rexp
| STAR rexp


section {* Sequential Composition of Sets *}

definition
  Seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ;; _" [100,100] 100)
where 
  "A ;; B = {s1 @ s2 | s1 s2. s1 \<in> A \<and> s2 \<in> B}"

text {* Two Simple Properties about Sequential Composition *}

lemma seq_empty [simp]:
  shows "A ;; {[]} = A"
  and   "{[]} ;; A = A"
by (simp_all add: Seq_def)

lemma seq_null [simp]:
  shows "A ;; {} = {}"
  and   "{} ;; A = {}"
by (simp_all add: Seq_def)

section {* Kleene Star for Sets *}

inductive_set
  Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
  for A :: "string set"
where
  start[intro]: "[] \<in> A\<star>"
| step[intro]:  "\<lbrakk>s1 \<in> A; s2 \<in> A\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> A\<star>"


text {* A Standard Property of Star *}

lemma star_cases:
  shows "A\<star> = {[]} \<union> A ;; A\<star>"
unfolding Seq_def
by (auto) (metis Star.simps)

lemma star_decomp: 
  assumes a: "c # x \<in> A\<star>" 
  shows "\<exists>a b. x = a @ b \<and> c # a \<in> A \<and> b \<in> A\<star>"
using a
by (induct x\<equiv>"c # x" rule: Star.induct) 
   (auto simp add: append_eq_Cons_conv)


section {* Semantics of Regular Expressions *}
 
fun
  L :: "rexp \<Rightarrow> string set"
where
  "L (NULL) = {}"
| "L (EMPTY) = {[]}"
| "L (CHAR c) = {[c]}"
| "L (SEQ r1 r2) = (L r1) ;; (L r2)"
| "L (ALT r1 r2) = (L r1) \<union> (L r2)"
| "L (STAR r) = (L r)\<star>"

section {* The Matcher *}

fun
 nullable :: "rexp \<Rightarrow> bool"
where
  "nullable (NULL) = False"
| "nullable (EMPTY) = True"
| "nullable (CHAR c) = False"
| "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
| "nullable (STAR r) = True"

fun
 der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
where
  "der c (NULL) = NULL"
| "der c (EMPTY) = NULL"
| "der c (CHAR c') = (if c = c' then EMPTY else NULL)"
| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
| "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)"
| "der c (STAR r) = SEQ (der c r) (STAR r)"

fun 
 ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
where
  "ders [] r = r"
| "ders (c # s) r = ders s (der c r)"

fun
  matcher :: "rexp \<Rightarrow> string \<Rightarrow> bool"
where
  "matcher r s = nullable (ders s r)"


section {* Correctness Proof of the Matcher *}

lemma nullable_correctness:
  shows "nullable r  \<longleftrightarrow> [] \<in> (L r)"
by (induct r) (auto simp add: Seq_def) 
section {* Left-Quotient of a Set *}

definition
  Der :: "char \<Rightarrow> string set \<Rightarrow> string set"
where
  "Der c A \<equiv> {s. [c] @ s \<in> A}"

lemma Der_null [simp]:
  shows "Der c {} = {}"
unfolding Der_def
by auto

lemma Der_empty [simp]:
  shows "Der c {[]} = {}"
unfolding Der_def
by auto

lemma Der_char [simp]:
  shows "Der c {[d]} = (if c = d then {[]} else {})"
unfolding Der_def
by auto

lemma Der_union [simp]:
  shows "Der c (A \<union> B) = Der c A \<union> Der c B"
unfolding Der_def
by auto

lemma Der_seq [simp]:
  shows "Der c (A ;; B) = (Der c A) ;; B \<union> (if [] \<in> A then Der c B else {})"
unfolding Der_def Seq_def
by (auto simp add: Cons_eq_append_conv)

lemma Der_star [simp]:
  shows "Der c (A\<star>) = (Der c A) ;; A\<star>"
proof -    
  have "Der c (A\<star>) = Der c ({[]} \<union> A ;; A\<star>)"
    by (simp only: star_cases[symmetric])
  also have "... = Der c (A ;; A\<star>)"
    by (simp only: Der_union Der_empty) (simp)
  also have "... = (Der c A) ;; A\<star> \<union> (if [] \<in> A then Der c (A\<star>) else {})"
    by simp
  also have "... =  (Der c A) ;; A\<star>"
    unfolding Seq_def Der_def
    by (auto dest: star_decomp)
  finally show "Der c (A\<star>) = (Der c A) ;; A\<star>" .
qed


lemma der_correctness:
  shows "L (der c r) = Der c (L r)"
by (induct r) 
   (simp_all add: nullable_correctness)

lemma matcher_correctness:
  shows "matcher r s \<longleftrightarrow> s \<in> L r"
by (induct s arbitrary: r)
   (simp_all add: nullable_correctness der_correctness Der_def)

section {* Examples *}

definition 
  "CHRA \<equiv> CHAR (CHR ''a'')"

definition 
  "ALT1 \<equiv> ALT CHRA EMPTY"

definition 
  "SEQ3 \<equiv> SEQ (SEQ ALT1 ALT1) ALT1"

value "matcher SEQ3 ''aaa''"

value "matcher NULL []"
value "matcher (CHAR (CHR ''a'')) [CHR ''a'']"
value "matcher (CHAR a) [a,a]"
value "matcher (STAR (CHAR a)) []"
value "matcher (STAR (CHAR a))  [a,a]"
value "matcher (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''"
value "matcher (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''"

section {* Incorrect Matcher - fun-definition rejected *}

fun
  match :: "rexp list \<Rightarrow> string \<Rightarrow> bool"
where
  "match [] [] = True"
| "match [] (c # s) = False"
| "match (NULL # rs) s = False"  
| "match (EMPTY # rs) s = match rs s"
| "match (CHAR c # rs) [] = False"
| "match (CHAR c # rs) (d # s) = (if c = d then match rs s else False)"         
| "match (ALT r1 r2 # rs) s = (match (r1 # rs) s \<or> match (r2 # rs) s)" 
| "match (SEQ r1 r2 # rs) s = match (r1 # r2 # rs) s"
| "match (STAR r # rs) s = (match rs s \<or> match (r # (STAR r) # rs) s)"


end