theory Matcher
imports "Main"
begin
section {* Sequential Composition of Sets *}
fun
lang_seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ; _" [100,100] 100)
where
"L1 ; L2 = {s1 @ s2 | s1 s2. s1 \<in> L1 \<and> s2 \<in> L2}"
section {* Kleene Star for Sets *}
inductive_set
Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
for L :: "string set"
where
start[intro]: "[] \<in> L\<star>"
| step[intro]: "\<lbrakk>s1 \<in> L; s2 \<in> L\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> L\<star>"
text {* A standard property of Star *}
lemma lang_star_cases:
shows "L\<star> = {[]} \<union> L ; L\<star>"
by (auto) (metis Star.simps)
section {* Regular Expressions *}
datatype rexp =
NULL
| EMPTY
| CHAR char
| SEQ rexp rexp
| ALT rexp rexp
| STAR rexp
section {* Semantics of Regular Expressions *}
fun
L :: "rexp \<Rightarrow> string set"
where
"L (NULL) = {}"
| "L (EMPTY) = {[]}"
| "L (CHAR c) = {[c]}"
| "L (SEQ r1 r2) = (L r1) ; (L r2)"
| "L (ALT r1 r2) = (L r1) \<union> (L r2)"
| "L (STAR r) = (L r)\<star>"
section {* The Matcher *}
fun
nullable :: "rexp \<Rightarrow> bool"
where
"nullable (NULL) = False"
| "nullable (EMPTY) = True"
| "nullable (CHAR c) = False"
| "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
| "nullable (STAR r) = True"
fun
der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
where
"der c (NULL) = NULL"
| "der c (EMPTY) = NULL"
| "der c (CHAR c') = (if c = c' then EMPTY else NULL)"
| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
| "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)"
| "der c (STAR r) = SEQ (der c r) (STAR r)"
fun
derivative :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
where
"derivative [] r = r"
| "derivative (c # s) r = derivative s (der c r)"
fun
matches :: "rexp \<Rightarrow> string \<Rightarrow> bool"
where
"matches r s = nullable (derivative s r)"
section {* Correctness Proof of the Matcher *}
lemma nullable_correctness:
shows "nullable r \<longleftrightarrow> [] \<in> L r"
by (induct r) (auto)
lemma der_correctness:
shows "s \<in> L (der c r) \<longleftrightarrow> c # s \<in> L r"
proof (induct r arbitrary: s)
case (SEQ r1 r2 s)
have ih1: "\<And>s. s \<in> L (der c r1) \<longleftrightarrow> c # s \<in> L r1" by fact
have ih2: "\<And>s. s \<in> L (der c r2) \<longleftrightarrow> c # s \<in> L r2" by fact
show "s \<in> L (der c (SEQ r1 r2)) \<longleftrightarrow> c # s \<in> L (SEQ r1 r2)"
using ih1 ih2 by (auto simp add: nullable_correctness Cons_eq_append_conv)
next
case (STAR r s)
have ih: "\<And>s. s \<in> L (der c r) \<longleftrightarrow> c # s \<in> L r" by fact
show "s \<in> L (der c (STAR r)) \<longleftrightarrow> c # s \<in> L (STAR r)"
proof
assume "s \<in> L (der c (STAR r))"
then have "s \<in> L (der c r) ; L r\<star>" by simp
then have "c # s \<in> L r ; (L r)\<star>"
by (auto simp add: ih Cons_eq_append_conv)
then have "c # s \<in> (L r)\<star>" using lang_star_cases by auto
then show "c # s \<in> L (STAR r)" by simp
next
assume "c # s \<in> L (STAR r)"
then have "c # s \<in> (L r)\<star>" by simp
then have "s \<in> L (der c r); (L r)\<star>"
by (induct x\<equiv>"c # s" rule: Star.induct)
(auto simp add: ih append_eq_Cons_conv)
then show "s \<in> L (der c (STAR r))" by simp
qed
qed (simp_all)
lemma matches_correctness:
shows "matches r s \<longleftrightarrow> s \<in> L r"
by (induct s arbitrary: r)
(simp_all add: nullable_correctness der_correctness)
section {* Examples *}
value "matches NULL []"
value "matches (CHAR (CHR ''a'')) [CHR ''a'']"
value "matches (CHAR a) [a,a]"
value "matches (STAR (CHAR a)) []"
value "matches (STAR (CHAR a)) [a,a]"
value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''"
value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''"
section {* Incorrect Matcher - fun-definition rejected *}
function
match :: "rexp list \<Rightarrow> string \<Rightarrow> bool"
where
"match [] [] = True"
| "match [] (c # s) = False"
| "match (NULL # rs) s = False"
| "match (EMPTY # rs) s = match rs s"
| "match (CHAR c # rs) [] = False"
| "match (CHAR c # rs) (d # s) = (if c = d then match rs s else False)"
| "match (ALT r1 r2 # rs) s = (match (r1 # rs) s \<or> match (r2 # rs) s)"
| "match (SEQ r1 r2 # rs) s = match (r1 # r2 # rs) s"
| "match (STAR r # rs) s = (match rs s \<or> match (r # (STAR r) # rs) s)"
apply(pat_completeness)
apply(auto)
done
end