theory Matcher
  imports "Main" 
begin


section {* Regular Expressions *}

datatype rexp =
  NULL
| EMPTY
| CHAR char
| SEQ rexp rexp
| ALT rexp rexp
| STAR rexp


section {* Sequential Composition of Sets *}

definition
  Seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ;; _" [100,100] 100)
where 
  "A ;; B = {s1 @ s2 | s1 s2. s1 \<in> A \<and> s2 \<in> B}"

text {* Two Simple Properties about Sequential Composition *}

lemma seq_empty [simp]:
  shows "A ;; {[]} = A"
  and   "{[]} ;; A = A"
by (simp_all add: Seq_def)

lemma seq_null [simp]:
  shows "A ;; {} = {}"
  and   "{} ;; A = {}"
by (simp_all add: Seq_def)

section {* Kleene Star for Sets *}

inductive_set
  Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
  for A :: "string set"
where
  start[intro]: "[] \<in> A\<star>"
| step[intro]:  "\<lbrakk>s1 \<in> A; s2 \<in> A\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> A\<star>"


text {* A Standard Property of Star *}

lemma star_cases:
  shows "A\<star> = {[]} \<union> A ;; A\<star>"
unfolding Seq_def
by (auto) (metis Star.simps)

lemma star_decomp: 
  assumes a: "c # x \<in> A\<star>" 
  shows "\<exists>a b. x = a @ b \<and> c # a \<in> A \<and> b \<in> A\<star>"
using a
by (induct x\<equiv>"c # x" rule: Star.induct) 
   (auto simp add: append_eq_Cons_conv)


section {* Semantics of Regular Expressions *}
 
fun
  L :: "rexp \<Rightarrow> string set"
where
  "L (NULL) = {}"
| "L (EMPTY) = {[]}"
| "L (CHAR c) = {[c]}"
| "L (SEQ r1 r2) = (L r1) ;; (L r2)"
| "L (ALT r1 r2) = (L r1) \<union> (L r2)"
| "L (STAR r) = (L r)\<star>"

section {* The Matcher *}

fun
 nullable :: "rexp \<Rightarrow> bool"
where
  "nullable (NULL) = False"
| "nullable (EMPTY) = True"
| "nullable (CHAR c) = False"
| "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
| "nullable (STAR r) = True"

fun
 noccurs :: "rexp \<Rightarrow> bool"
where
  "noccurs (NULL) = True"
| "noccurs (EMPTY) = False"
| "noccurs (CHAR c) = False"
| "noccurs (ALT r1 r2) = (noccurs r1 \<or> noccurs r2)"
| "noccurs (SEQ r1 r2) = (noccurs r1 \<or> noccurs r2)"
| "noccurs (STAR r) = (noccurs r)"

lemma
  "\<not> noccurs r \<Longrightarrow> L r \<noteq> {}"
apply(induct r)
apply(auto simp add: Seq_def)
done

lemma
  "L r = {} \<Longrightarrow> noccurs r"
apply(induct r)
apply(auto simp add: Seq_def)
done

lemma does_not_hold:
  "noccurs r \<Longrightarrow> L r = {}"
apply(induct r)
apply(auto simp add: Seq_def)
oops

fun
 der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
where
  "der c (NULL) = NULL"
| "der c (EMPTY) = NULL"
| "der c (CHAR c') = (if c = c' then EMPTY else NULL)"
| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
| "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)"
| "der c (STAR r) = SEQ (der c r) (STAR r)"

fun 
 ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
where
  "ders [] r = r"
| "ders (c # s) r = ders s (der c r)"

fun
  matcher :: "rexp \<Rightarrow> string \<Rightarrow> bool"
where
  "matcher r s = nullable (ders s r)"


section {* Correctness Proof of the Matcher *}

lemma nullable_correctness:
  shows "nullable r  \<longleftrightarrow> [] \<in> (L r)"
by (induct r) (auto simp add: Seq_def) 
section {* Left-Quotient of a Set *}

fun
 zeroable :: "rexp \<Rightarrow> bool"
where
  "zeroable (NULL) = True"
| "zeroable (EMPTY) = False"
| "zeroable (CHAR c) = False"
| "zeroable (ALT r1 r2) = (zeroable r1 \<and> zeroable r2)"
| "zeroable (SEQ r1 r2) = (zeroable r1 \<or> zeroable r2)"
| "zeroable (STAR r) = False"


lemma zeroable_correctness:
  shows "zeroable r  \<longleftrightarrow>  (L r = {})"
apply(induct r)
apply(auto simp add: Seq_def)[6]
done

section {* Left-Quotient of a Set *}

definition
  Der :: "char \<Rightarrow> string set \<Rightarrow> string set"
where
  "Der c A \<equiv> {s. [c] @ s \<in> A}"

lemma Der_null [simp]:
  shows "Der c {} = {}"
unfolding Der_def
by auto

lemma Der_empty [simp]:
  shows "Der c {[]} = {}"
unfolding Der_def
by auto

lemma Der_char [simp]:
  shows "Der c {[d]} = (if c = d then {[]} else {})"
unfolding Der_def
by auto

lemma Der_union [simp]:
  shows "Der c (A \<union> B) = Der c A \<union> Der c B"
unfolding Der_def
by auto

lemma Der_seq [simp]:
  shows "Der c (A ;; B) = (Der c A) ;; B \<union> (if [] \<in> A then Der c B else {})"
unfolding Der_def Seq_def
by (auto simp add: Cons_eq_append_conv)

lemma Der_star [simp]:
  shows "Der c (A\<star>) = (Der c A) ;; A\<star>"
proof -    
  have "Der c (A\<star>) = Der c ({[]} \<union> A ;; A\<star>)"
    by (simp only: star_cases[symmetric])
  also have "... = Der c (A ;; A\<star>)"
    by (simp only: Der_union Der_empty) (simp)
  also have "... = (Der c A) ;; A\<star> \<union> (if [] \<in> A then Der c (A\<star>) else {})"
    by simp
  also have "... =  (Der c A) ;; A\<star>"
    unfolding Seq_def Der_def
    by (auto dest: star_decomp)
  finally show "Der c (A\<star>) = (Der c A) ;; A\<star>" .
qed


lemma der_correctness:
  shows "L (der c r) = Der c (L r)"
by (induct r) 
   (simp_all add: nullable_correctness)

lemma matcher_correctness:
  shows "matcher r s \<longleftrightarrow> s \<in> L r"
by (induct s arbitrary: r)
   (simp_all add: nullable_correctness der_correctness Der_def)

section {* Examples *}

definition 
  "CHRA \<equiv> CHAR (CHR ''a'')"

definition 
  "ALT1 \<equiv> ALT CHRA EMPTY"

definition 
  "SEQ3 \<equiv> SEQ (SEQ ALT1 ALT1) ALT1"

value "matcher SEQ3 ''aaa''"

value "matcher NULL []"
value "matcher (CHAR (CHR ''a'')) [CHR ''a'']"
value "matcher (CHAR a) [a,a]"
value "matcher (STAR (CHAR a)) []"
value "matcher (STAR (CHAR a))  [a,a]"
value "matcher (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''"
value "matcher (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''"

section {* Incorrect Matcher - fun-definition rejected *}

fun
  match :: "rexp list \<Rightarrow> string \<Rightarrow> bool"
where
  "match [] [] = True"
| "match [] (c # s) = False"
| "match (NULL # rs) s = False"  
| "match (EMPTY # rs) s = match rs s"
| "match (CHAR c # rs) [] = False"
| "match (CHAR c # rs) (d # s) = (if c = d then match rs s else False)"         
| "match (ALT r1 r2 # rs) s = (match (r1 # rs) s \<or> match (r2 # rs) s)" 
| "match (SEQ r1 r2 # rs) s = match (r1 # r2 # rs) s"
| "match (STAR r # rs) s = (match rs s \<or> match (r # (STAR r) # rs) s)"


end    