regexp: Theories/Regular.thy@fd39492b187c


theory Regular
imports Main Folds
begin

section {* Preliminary definitions *}

type_synonym lang = "string set"


text {*  Sequential composition of two languages *}

definition 
  Seq :: "lang \<Rightarrow> lang \<Rightarrow> lang" (infixr ";;" 100)
where 
  "A ;; B = {s\<^isub>1 @ s\<^isub>2 | s\<^isub>1 s\<^isub>2. s\<^isub>1 \<in> A \<and> s\<^isub>2 \<in> B}"


text {* Some properties of operator @{text ";;"}. *}

lemma seq_add_left:
  assumes a: "A = B"
  shows "C ;; A = C ;; B"
using a by simp

lemma seq_union_distrib_right:
  shows "(A \<union> B) ;; C = (A ;; C) \<union> (B ;; C)"
unfolding Seq_def by auto

lemma seq_union_distrib_left:
  shows "C ;; (A \<union> B) = (C ;; A) \<union> (C ;; B)"
unfolding Seq_def by  auto

lemma seq_intro:
  assumes a: "x \<in> A" "y \<in> B"
  shows "x @ y \<in> A ;; B "
using a by (auto simp: Seq_def)

lemma seq_assoc:
  shows "(A ;; B) ;; C = A ;; (B ;; C)"
unfolding Seq_def
apply(auto)
apply(blast)
by (metis append_assoc)

lemma seq_empty [simp]:
  shows "A ;; {[]} = A"
  and   "{[]} ;; A = A"
by (simp_all add: Seq_def)

lemma seq_null [simp]:
  shows "A ;; {} = {}"
  and   "{} ;; A = {}"
by (simp_all add: Seq_def)


text {* Power and Star of a language *}

fun 
  pow :: "lang \<Rightarrow> nat \<Rightarrow> lang" (infixl "\<up>" 100)
where
  "A \<up> 0 = {[]}"
| "A \<up> (Suc n) =  A ;; (A \<up> n)" 

definition
  Star :: "lang \<Rightarrow> lang" ("_\<star>" [101] 102)
where
  "A\<star> \<equiv> (\<Union>n. A \<up> n)"


lemma star_start[intro]:
  shows "[] \<in> A\<star>"
proof -
  have "[] \<in> A \<up> 0" by auto
  then show "[] \<in> A\<star>" unfolding Star_def by blast
qed

lemma star_step [intro]:
  assumes a: "s1 \<in> A" 
  and     b: "s2 \<in> A\<star>"
  shows "s1 @ s2 \<in> A\<star>"
proof -
  from b obtain n where "s2 \<in> A \<up> n" unfolding Star_def by auto
  then have "s1 @ s2 \<in> A \<up> (Suc n)" using a by (auto simp add: Seq_def)
  then show "s1 @ s2 \<in> A\<star>" unfolding Star_def by blast
qed

lemma star_induct[consumes 1, case_names start step]:
  assumes a: "x \<in> A\<star>" 
  and     b: "P []"
  and     c: "\<And>s1 s2. \<lbrakk>s1 \<in> A; s2 \<in> A\<star>; P s2\<rbrakk> \<Longrightarrow> P (s1 @ s2)"
  shows "P x"
proof -
  from a obtain n where "x \<in> A \<up> n" unfolding Star_def by auto
  then show "P x"
    by (induct n arbitrary: x)
       (auto intro!: b c simp add: Seq_def Star_def)
qed
    
lemma star_intro1:
  assumes a: "x \<in> A\<star>"
  and     b: "y \<in> A\<star>"
  shows "x @ y \<in> A\<star>"
using a b
by (induct rule: star_induct) (auto)

lemma star_intro2: 
  assumes a: "y \<in> A"
  shows "y \<in> A\<star>"
proof -
  from a have "y @ [] \<in> A\<star>" by blast
  then show "y \<in> A\<star>" by simp
qed

lemma star_intro3:
  assumes a: "x \<in> A\<star>"
  and     b: "y \<in> A"
  shows "x @ y \<in> A\<star>"
using a b by (blast intro: star_intro1 star_intro2)

lemma star_cases:
  shows "A\<star> =  {[]} \<union> A ;; A\<star>"
proof
  { fix x
    have "x \<in> A\<star> \<Longrightarrow> x \<in> {[]} \<union> A ;; A\<star>"
      unfolding Seq_def
    by (induct rule: star_induct) (auto)
  }
  then show "A\<star> \<subseteq> {[]} \<union> A ;; A\<star>" by auto
next
  show "{[]} \<union> A ;; A\<star> \<subseteq> A\<star>"
    unfolding Seq_def by auto
qed

lemma star_decom: 
  assumes a: "x \<in> A\<star>" "x \<noteq> []"
  shows "\<exists>a b. x = a @ b \<and> a \<noteq> [] \<and> a \<in> A \<and> b \<in> A\<star>"
using a
by (induct rule: star_induct) (blast)+

lemma
  shows seq_Union_left:  "B ;; (\<Union>n. A \<up> n) = (\<Union>n. B ;; (A \<up> n))"
  and   seq_Union_right: "(\<Union>n. A \<up> n) ;; B = (\<Union>n. (A \<up> n) ;; B)"
unfolding Seq_def by auto

lemma seq_pow_comm:
  shows "A ;; (A \<up> n) = (A \<up> n) ;; A"
by (induct n) (simp_all add: seq_assoc[symmetric])

lemma seq_star_comm:
  shows "A ;; A\<star> = A\<star> ;; A"
unfolding Star_def seq_Union_left
unfolding seq_pow_comm seq_Union_right 
by simp


text {* Two lemmas about the length of strings in @{text "A \<up> n"} *}

lemma pow_length:
  assumes a: "[] \<notin> A"
  and     b: "s \<in> A \<up> Suc n"
  shows "n < length s"
using b
proof (induct n arbitrary: s)
  case 0
  have "s \<in> A \<up> Suc 0" by fact
  with a have "s \<noteq> []" by auto
  then show "0 < length s" by auto
next
  case (Suc n)
  have ih: "\<And>s. s \<in> A \<up> Suc n \<Longrightarrow> n < length s" by fact
  have "s \<in> A \<up> Suc (Suc n)" by fact
  then obtain s1 s2 where eq: "s = s1 @ s2" and *: "s1 \<in> A" and **: "s2 \<in> A \<up> Suc n"
    by (auto simp add: Seq_def)
  from ih ** have "n < length s2" by simp
  moreover have "0 < length s1" using * a by auto
  ultimately show "Suc n < length s" unfolding eq 
    by (simp only: length_append)
qed

lemma seq_pow_length:
  assumes a: "[] \<notin> A"
  and     b: "s \<in> B ;; (A \<up> Suc n)"
  shows "n < length s"
proof -
  from b obtain s1 s2 where eq: "s = s1 @ s2" and *: "s2 \<in> A \<up> Suc n"
    unfolding Seq_def by auto
  from * have " n < length s2" by (rule pow_length[OF a])
  then show "n < length s" using eq by simp
qed


section {* A modified version of Arden's lemma *}

text {*  A helper lemma for Arden *}

lemma arden_helper:
  assumes eq: "X = X ;; A \<union> B"
  shows "X = X ;; (A \<up> Suc n) \<union> (\<Union>m\<in>{0..n}. B ;; (A \<up> m))"
proof (induct n)
  case 0 
  show "X = X ;; (A \<up> Suc 0) \<union> (\<Union>(m::nat)\<in>{0..0}. B ;; (A \<up> m))"
    using eq by simp
next
  case (Suc n)
  have ih: "X = X ;; (A \<up> Suc n) \<union> (\<Union>m\<in>{0..n}. B ;; (A \<up> m))" by fact
  also have "\<dots> = (X ;; A \<union> B) ;; (A \<up> Suc n) \<union> (\<Union>m\<in>{0..n}. B ;; (A \<up> m))" using eq by simp
  also have "\<dots> = X ;; (A \<up> Suc (Suc n)) \<union> (B ;; (A \<up> Suc n)) \<union> (\<Union>m\<in>{0..n}. B ;; (A \<up> m))"
    by (simp add: seq_union_distrib_right seq_assoc)
  also have "\<dots> = X ;; (A \<up> Suc (Suc n)) \<union> (\<Union>m\<in>{0..Suc n}. B ;; (A \<up> m))"
    by (auto simp add: le_Suc_eq)
  finally show "X = X ;; (A \<up> Suc (Suc n)) \<union> (\<Union>m\<in>{0..Suc n}. B ;; (A \<up> m))" .
qed

theorem arden:
  assumes nemp: "[] \<notin> A"
  shows "X = X ;; A \<union> B \<longleftrightarrow> X = B ;; A\<star>"
proof
  assume eq: "X = B ;; A\<star>"
  have "A\<star> = {[]} \<union> A\<star> ;; A" 
    unfolding seq_star_comm[symmetric]
    by (rule star_cases)
  then have "B ;; A\<star> = B ;; ({[]} \<union> A\<star> ;; A)"
    by (rule seq_add_left)
  also have "\<dots> = B \<union> B ;; (A\<star> ;; A)"
    unfolding seq_union_distrib_left by simp
  also have "\<dots> = B \<union> (B ;; A\<star>) ;; A" 
    by (simp only: seq_assoc)
  finally show "X = X ;; A \<union> B" 
    using eq by blast 
next
  assume eq: "X = X ;; A \<union> B"
  { fix n::nat
    have "B ;; (A \<up> n) \<subseteq> X" using arden_helper[OF eq, of "n"] by auto }
  then have "B ;; A\<star> \<subseteq> X" 
    unfolding Seq_def Star_def UNION_def by auto
  moreover
  { fix s::string
    obtain k where "k = length s" by auto
    then have not_in: "s \<notin> X ;; (A \<up> Suc k)" 
      using seq_pow_length[OF nemp] by blast
    assume "s \<in> X"
    then have "s \<in> X ;; (A \<up> Suc k) \<union> (\<Union>m\<in>{0..k}. B ;; (A \<up> m))"
      using arden_helper[OF eq, of "k"] by auto
    then have "s \<in> (\<Union>m\<in>{0..k}. B ;; (A \<up> m))" using not_in by auto
    moreover
    have "(\<Union>m\<in>{0..k}. B ;; (A \<up> m)) \<subseteq> (\<Union>n. B ;; (A \<up> n))" by auto
    ultimately 
    have "s \<in> B ;; A\<star>" 
      unfolding seq_Union_left Star_def by auto }
  then have "X \<subseteq> B ;; A\<star>" by auto
  ultimately 
  show "X = B ;; A\<star>" by simp
qed


section {* Regular Expressions *}

datatype rexp =
  NULL
| EMPTY
| CHAR char
| SEQ rexp rexp
| ALT rexp rexp
| STAR rexp


text {* 
  The function @{text L} is overloaded, with the idea that @{text "L x"} 
  evaluates to the language represented by the object @{text x}.
*}

consts L:: "'a \<Rightarrow> lang"

overloading L_rexp \<equiv> "L::  rexp \<Rightarrow> lang"
begin
fun
  L_rexp :: "rexp \<Rightarrow> lang"
where
    "L_rexp (NULL) = {}"
  | "L_rexp (EMPTY) = {[]}"
  | "L_rexp (CHAR c) = {[c]}"
  | "L_rexp (SEQ r1 r2) = (L_rexp r1) ;; (L_rexp r2)"
  | "L_rexp (ALT r1 r2) = (L_rexp r1) \<union> (L_rexp r2)"
  | "L_rexp (STAR r) = (L_rexp r)\<star>"
end


text {* ALT-combination for a set of regular expressions *}

abbreviation
  Setalt  ("\<Uplus>_" [1000] 999) 
where
  "\<Uplus>A \<equiv> folds ALT NULL A"

text {* 
  For finite sets, @{term Setalt} is preserved under @{term L}.
*}

lemma folds_alt_simp [simp]:
  fixes rs::"rexp set"
  assumes a: "finite rs"
  shows "L (\<Uplus>rs) = \<Union> (L ` rs)"
unfolding folds_def
apply(rule set_eqI)
apply(rule someI2_ex)
apply(rule_tac finite_imp_fold_graph[OF a])
apply(erule fold_graph.induct)
apply(auto)
done

end
author	urbanc
	Thu, 21 Apr 2011 12:07:11 +0000
changeset 156	fd39492b187c
parent 149	e122cb146ecc
permissions	-rw-r--r--