# HG changeset patch # User urbanc # Date 1305748483 0 # Node ID e93760534354f67b52a10b232003f29e89389095 # Parent a8a442ba0dbf5802852829aedb6a51034932904c added directory for journal version; took uptodate version of the theory files diff -r a8a442ba0dbf -r e93760534354 Closure.thy --- a/Closure.thy Thu May 12 05:55:05 2011 +0000 +++ b/Closure.thy Wed May 18 19:54:43 2011 +0000 @@ -1,223 +1,140 @@ -theory "RegSet" - imports "Main" +theory Closure +imports Myhill_2 begin - -text {* Sequential composition of sets *} +section {* Closure properties of regular languages *} -definition - lang_seq :: "string set \ string set \ string set" ("_ ; _" [100,100] 100) -where - "L1 ; L2 = {s1@s2 | s1 s2. s1 \ L1 \ s2 \ L2}" +abbreviation + regular :: "lang \ bool" +where + "regular A \ \r::rexp. A = L r" -section {* Kleene star for sets *} +lemma closure_union[intro]: + assumes "regular A" "regular B" + shows "regular (A \ B)" +proof - + from assms obtain r1 r2::rexp where "L r1 = A" "L r2 = B" by auto + then have "A \ B = L (ALT r1 r2)" by simp + then show "regular (A \ B)" by blast +qed -inductive_set - Star :: "string set \ string set" ("_\" [101] 102) - for L :: "string set" -where - start[intro]: "[] \ L\" -| step[intro]: "\s1 \ L; s2 \ L\\ \ s1 @ s2 \ L\" +lemma closure_seq[intro]: + assumes "regular A" "regular B" + shows "regular (A ;; B)" +proof - + from assms obtain r1 r2::rexp where "L r1 = A" "L r2 = B" by auto + then have "A ;; B = L (SEQ r1 r2)" by simp + then show "regular (A ;; B)" by blast +qed - -text {* A standard property of star *} +lemma closure_star[intro]: + assumes "regular A" + shows "regular (A\)" +proof - + from assms obtain r::rexp where "L r = A" by auto + then have "A\ = L (STAR r)" by simp + then show "regular (A\)" by blast +qed -lemma lang_star_cases: - shows "L\ = {[]} \ L ; L\" -proof - { fix x - have "x \ L\ \ x \ {[]} \ L ; L\" - unfolding lang_seq_def - by (induct rule: Star.induct) (auto) - } - then show "L\ \ {[]} \ L ; L\" by auto -next - show "{[]} \ L ; L\ \ L\" - unfolding lang_seq_def by auto +lemma closure_complement[intro]: + assumes "regular A" + shows "regular (- A)" +proof - + from assms have "finite (UNIV // \A)" by (simp add: Myhill_Nerode) + then have "finite (UNIV // \(-A))" by (simp add: str_eq_rel_def) + then show "regular (- A)" by (simp add: Myhill_Nerode) +qed + +lemma closure_difference[intro]: + assumes "regular A" "regular B" + shows "regular (A - B)" +proof - + have "A - B = - (- A \ B)" by blast + moreover + have "regular (- (- A \ B))" + using assms by blast + ultimately show "regular (A - B)" by simp +qed + +lemma closure_intersection[intro]: + assumes "regular A" "regular B" + shows "regular (A \ B)" +proof - + have "A \ B = - (- A \ - B)" by blast + moreover + have "regular (- (- A \ - B))" + using assms by blast + ultimately show "regular (A \ B)" by simp qed -lemma lang_star_cases2: - shows "[] \ L \ L\ - {[]} = L ; L\" -by (subst lang_star_cases) - (simp add: lang_seq_def) - - -section {* Regular Expressions *} - -datatype rexp = - NULL -| EMPTY -| CHAR char -| SEQ rexp rexp -| ALT rexp rexp -| STAR rexp - - -section {* Semantics of Regular Expressions *} +text {* closure under string reversal *} fun - L :: "rexp \ string set" + Rev :: "rexp \ rexp" where - "L (NULL) = {}" -| "L (EMPTY) = {[]}" -| "L (CHAR c) = {[c]}" -| "L (SEQ r1 r2) = (L r1) ; (L r2)" -| "L (ALT r1 r2) = (L r1) \ (L r2)" -| "L (STAR r) = (L r)\" - -abbreviation - CUNIV :: "string set" -where - "CUNIV \ (\x. [x]) ` (UNIV::char set)" - -lemma CUNIV_star_minus: - "(CUNIV\ - {[c]}) = {[]} \ (CUNIV - {[c]}; (CUNIV\))" -apply(subst lang_star_cases) -apply(simp add: lang_seq_def) -oops - + "Rev NULL = NULL" +| "Rev EMPTY = EMPTY" +| "Rev (CHAR c) = CHAR c" +| "Rev (ALT r1 r2) = ALT (Rev r1) (Rev r2)" +| "Rev (SEQ r1 r2) = SEQ (Rev r2) (Rev r1)" +| "Rev (STAR r) = STAR (Rev r)" -lemma string_in_CUNIV: - shows "s \ CUNIV\" -proof (induct s) - case Nil - show "[] \ CUNIV\" by (rule start) -next - case (Cons c s) - have "[c] \ CUNIV" by simp - moreover - have "s \ CUNIV\" by fact - ultimately have "[c] @ s \ CUNIV\" by (rule step) - then show "c # s \ CUNIV\" by simp -qed - -lemma UNIV_CUNIV_star: - shows "UNIV = CUNIV\" -using string_in_CUNIV -by (auto) - -abbreviation - reg :: "string set => bool" -where - "reg L1 \ (\r. L r = L1)" - -lemma reg_null [intro]: - shows "reg {}" -by (metis L.simps(1)) +lemma rev_Seq: + "(rev ` A) ;; (rev ` B) = rev ` (B ;; A)" +unfolding Seq_def image_def +apply(auto) +apply(rule_tac x="xb @ xa" in exI) +apply(auto) +done -lemma reg_empty [intro]: - shows "reg {[]}" -by (metis L.simps(2)) - -lemma reg_star [intro]: - shows "reg L1 \ reg (L1\)" -by (metis L.simps(6)) - -lemma reg_seq [intro]: - assumes a: "reg L1" "reg L2" - shows "reg (L1 ; L2)" +lemma rev_Star1: + assumes a: "s \ (rev ` A)\" + shows "s \ rev ` (A\)" using a -by (metis L.simps(4)) - -lemma reg_union [intro]: - assumes a: "reg L1" "reg L2" - shows "reg (L1 \ L2)" -using a -by (metis L.simps(5)) +proof(induct rule: star_induct) + case (step s1 s2) + have inj: "inj (rev::string \ string)" unfolding inj_on_def by auto + have "s1 \ rev ` A" "s2 \ rev ` (A\)" by fact+ + then obtain x1 x2 where "x1 \ A" "x2 \ A\" and eqs: "s1 = rev x1" "s2 = rev x2" by auto + then have "x1 \ A\" "x2 \ A\" by (auto intro: star_intro2) + then have "x2 @ x1 \ A\" by (auto intro: star_intro1) + then have "rev (x2 @ x1) \ rev ` A\" using inj by (simp only: inj_image_mem_iff) + then show "s1 @ s2 \ rev ` A\" using eqs by simp +qed (auto) -lemma reg_string [intro]: - fixes s::string - shows "reg {s}" -proof (induct s) - case Nil - show "reg {[]}" by (rule reg_empty) -next - case (Cons c s) - have "reg {s}" by fact - then obtain r where "L r = {s}" by auto - then have "L (SEQ (CHAR c) r) = {[c]} ; {s}" by simp - also have "\ = {c # s}" by (simp add: lang_seq_def) - finally show "reg {c # s}" by blast -qed +lemma rev_Star2: + assumes a: "s \ A\" + shows "rev s \ (rev ` A)\" +using a +proof(induct rule: star_induct) + case (step s1 s2) + have inj: "inj (rev::string \ string)" unfolding inj_on_def by auto + have "s1 \ A"by fact + then have "rev s1 \ rev ` A" using inj by (simp only: inj_image_mem_iff) + then have "rev s1 \ (rev ` A)\" by (auto intro: star_intro2) + moreover + have "rev s2 \ (rev ` A)\" by fact + ultimately show "rev (s1 @ s2) \ (rev ` A)\" by (auto intro: star_intro1) +qed (auto) -lemma reg_finite [intro]: - assumes a: "finite L1" - shows "reg L1" -using a -proof(induct) - case empty - show "reg {}" by (rule reg_null) -next - case (insert s S) - have "reg {s}" by (rule reg_string) - moreover - have "reg S" by fact - ultimately have "reg ({s} \ S)" by (rule reg_union) - then show "reg (insert s S)" by simp +lemma rev_Star: + "(rev ` A)\ = rev ` (A\)" +using rev_Star1 rev_Star2 by auto + +lemma rev_lang: + "L (Rev r) = rev ` (L r)" +by (induct r) (simp_all add: rev_Star rev_Seq image_Un) + +lemma closure_reversal[intro]: + assumes "regular A" + shows "regular (rev ` A)" +proof - + from assms obtain r::rexp where "L r = A" by auto + then have "L (Rev r) = rev ` A" by (simp add: rev_lang) + then show "regular (rev` A)" by blast qed -lemma reg_cuniv [intro]: - shows "reg (CUNIV)" -by (rule reg_finite) (auto) -lemma reg_univ: - shows "reg (UNIV::string set)" -proof - - have "reg CUNIV" by (rule reg_cuniv) - then have "reg (CUNIV\)" by (rule reg_star) - then show "reg UNIV" by (simp add: UNIV_CUNIV_star) -qed - -lemma reg_finite_subset: - assumes a: "finite L1" - and b: "reg L1" "L2 \ L1" - shows "reg L2" -using a b -apply(induct arbitrary: L2) -apply(simp add: reg_empty) -oops - - -lemma reg_not: - shows "reg (UNIV - L r)" -proof (induct r) - case NULL - have "reg UNIV" by (rule reg_univ) - then show "reg (UNIV - L NULL)" by simp -next - case EMPTY - have "[] \ CUNIV" by auto - moreover - have "reg (CUNIV; CUNIV\)" by auto - ultimately have "reg (CUNIV\ - {[]})" - using lang_star_cases2 by simp - then show "reg (UNIV - L EMPTY)" by (simp add: UNIV_CUNIV_star) -next - case (CHAR c) - then show "?case" - apply(simp) - -using reg_UNIV -apply(simp) -apply(simp add: char_star2[symmetric]) -apply(rule reg_seq) -apply(rule reg_cuniv) -apply(rule reg_star) -apply(rule reg_cuniv) -oops - - - -end - - - - - - - - - - +end \ No newline at end of file diff -r a8a442ba0dbf -r e93760534354 Derivs.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Derivs.thy Wed May 18 19:54:43 2011 +0000 @@ -0,0 +1,521 @@ +theory Derivs +imports Closure +begin + +section {* Experiments with Derivatives -- independent of Myhill-Nerode *} + +subsection {* Left-Quotients *} + +definition + Delta :: "lang \ lang" +where + "Delta A = (if [] \ A then {[]} else {})" + +definition + Der :: "char \ lang \ lang" +where + "Der c A \ {s. [c] @ s \ A}" + +definition + Ders :: "string \ lang \ lang" +where + "Ders s A \ {s'. s @ s' \ A}" + +definition + Ders_set :: "lang \ lang \ lang" +where + "Ders_set A B \ {s' | s s'. s @ s' \ B \ s \ A}" + +lemma Ders_set_Ders: + shows "Ders_set A B = (\s \ A. Ders s B)" +unfolding Ders_set_def Ders_def +by auto + +lemma Der_null [simp]: + shows "Der c {} = {}" +unfolding Der_def +by auto + +lemma Der_empty [simp]: + shows "Der c {[]} = {}" +unfolding Der_def +by auto + +lemma Der_char [simp]: + shows "Der c {[d]} = (if c = d then {[]} else {})" +unfolding Der_def +by auto + +lemma Der_union [simp]: + shows "Der c (A \ B) = Der c A \ Der c B" +unfolding Der_def +by auto + +lemma Der_seq [simp]: + shows "Der c (A ;; B) = (Der c A) ;; B \ (Delta A ;; Der c B)" +unfolding Der_def Delta_def +unfolding Seq_def +by (auto simp add: Cons_eq_append_conv) + +lemma Der_star [simp]: + shows "Der c (A\) = (Der c A) ;; A\" +proof - + have incl: "Delta A ;; Der c (A\) \ (Der c A) ;; A\" + unfolding Der_def Delta_def Seq_def + apply(auto) + apply(drule star_decom) + apply(auto simp add: Cons_eq_append_conv) + done + + have "Der c (A\) = Der c ({[]} \ A ;; A\)" + by (simp only: star_cases[symmetric]) + also have "... = Der c (A ;; A\)" + by (simp only: Der_union Der_empty) (simp) + also have "... = (Der c A) ;; A\ \ (Delta A ;; Der c (A\))" + by simp + also have "... = (Der c A) ;; A\" + using incl by auto + finally show "Der c (A\) = (Der c A) ;; A\" . +qed + + +lemma Ders_singleton: + shows "Ders [c] A = Der c A" +unfolding Der_def Ders_def +by simp + +lemma Ders_append: + shows "Ders (s1 @ s2) A = Ders s2 (Ders s1 A)" +unfolding Ders_def by simp + +lemma MN_Rel_Ders: + shows "x \A y \ Ders x A = Ders y A" +unfolding Ders_def str_eq_def str_eq_rel_def +by auto + +subsection {* Brozowsky's derivatives of regular expressions *} + +fun + nullable :: "rexp \ bool" +where + "nullable (NULL) = False" +| "nullable (EMPTY) = True" +| "nullable (CHAR c) = False" +| "nullable (ALT r1 r2) = (nullable r1 \ nullable r2)" +| "nullable (SEQ r1 r2) = (nullable r1 \ nullable r2)" +| "nullable (STAR r) = True" + +fun + der :: "char \ rexp \ rexp" +where + "der c (NULL) = NULL" +| "der c (EMPTY) = NULL" +| "der c (CHAR c') = (if c = c' then EMPTY else NULL)" +| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" +| "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)" +| "der c (STAR r) = SEQ (der c r) (STAR r)" + +function + ders :: "string \ rexp \ rexp" +where + "ders [] r = r" +| "ders (s @ [c]) r = der c (ders s r)" +by (auto) (metis rev_cases) + +termination + by (relation "measure (length o fst)") (auto) + +lemma Delta_nullable: + shows "Delta (L r) = (if nullable r then {[]} else {})" +unfolding Delta_def +by (induct r) (auto simp add: Seq_def split: if_splits) + +lemma Der_der: + fixes r::rexp + shows "Der c (L r) = L (der c r)" +by (induct r) (simp_all add: Delta_nullable) + +lemma Ders_ders: + fixes r::rexp + shows "Ders s (L r) = L (ders s r)" +apply(induct s rule: rev_induct) +apply(simp add: Ders_def) +apply(simp only: ders.simps) +apply(simp only: Ders_append) +apply(simp only: Ders_singleton) +apply(simp only: Der_der) +done + + +subsection {* Antimirov's Partial Derivatives *} + +abbreviation + "SEQS R r \ {SEQ r' r | r'. r' \ R}" + +fun + pder :: "char \ rexp \ rexp set" +where + "pder c NULL = {NULL}" +| "pder c EMPTY = {NULL}" +| "pder c (CHAR c') = (if c = c' then {EMPTY} else {NULL})" +| "pder c (ALT r1 r2) = (pder c r1) \ (pder c r2)" +| "pder c (SEQ r1 r2) = SEQS (pder c r1) r2 \ (if nullable r1 then pder c r2 else {})" +| "pder c (STAR r) = SEQS (pder c r) (STAR r)" + +abbreviation + "pder_set c R \ \r \ R. pder c r" + +function + pders :: "string \ rexp \ rexp set" +where + "pders [] r = {r}" +| "pders (s @ [c]) r = pder_set c (pders s r)" +by (auto) (metis rev_cases) + +termination + by (relation "measure (length o fst)") (auto) + +abbreviation + "pders_set A r \ \s \ A. pders s r" + +lemma pders_append: + "pders (s1 @ s2) r = \ (pders s2) ` (pders s1 r)" +apply(induct s2 arbitrary: s1 r rule: rev_induct) +apply(simp) +apply(subst append_assoc[symmetric]) +apply(simp only: pders.simps) +apply(auto) +done + +lemma pders_singleton: + "pders [c] r = pder c r" +apply(subst append_Nil[symmetric]) +apply(simp only: pders.simps) +apply(simp) +done + +lemma pder_set_lang: + shows "(\ (L ` pder_set c R)) = (\r \ R. (\L ` (pder c r)))" +unfolding image_def +by auto + +lemma + shows seq_UNION_left: "B ;; (\n\C. A n) = (\n\C. B ;; A n)" + and seq_UNION_right: "(\n\C. A n) ;; B = (\n\C. A n ;; B)" +unfolding Seq_def by auto + +lemma Der_pder: + fixes r::rexp + shows "Der c (L r) = \ L ` (pder c r)" +by (induct r) (auto simp add: Delta_nullable seq_UNION_right) + +lemma Ders_pders: + fixes r::rexp + shows "Ders s (L r) = \ L ` (pders s r)" +proof (induct s rule: rev_induct) + case (snoc c s) + have ih: "Ders s (L r) = \ L ` (pders s r)" by fact + have "Ders (s @ [c]) (L r) = Ders [c] (Ders s (L r))" + by (simp add: Ders_append) + also have "\ = Der c (\ L ` (pders s r))" using ih + by (simp add: Ders_singleton) + also have "\ = (\r\pders s r. Der c (L r))" + unfolding Der_def image_def by auto + also have "\ = (\r\pders s r. (\ L ` (pder c r)))" + by (simp add: Der_pder) + also have "\ = (\L ` (pder_set c (pders s r)))" + by (simp add: pder_set_lang) + also have "\ = (\L ` (pders (s @ [c]) r))" + by simp + finally show "Ders (s @ [c]) (L r) = \L ` pders (s @ [c]) r" . +qed (simp add: Ders_def) + +lemma Ders_set_pders_set: + fixes r::rexp + shows "Ders_set A (L r) = (\ L ` (pders_set A r))" +by (simp add: Ders_set_Ders Ders_pders) + +lemma pders_NULL [simp]: + shows "pders s NULL = {NULL}" +by (induct s rule: rev_induct) (simp_all) + +lemma pders_EMPTY [simp]: + shows "pders s EMPTY = (if s = [] then {EMPTY} else {NULL})" +by (induct s rule: rev_induct) (auto) + +lemma pders_CHAR [simp]: + shows "pders s (CHAR c) = (if s = [] then {CHAR c} else (if s = [c] then {EMPTY} else {NULL}))" +by (induct s rule: rev_induct) (auto) + +lemma pders_ALT [simp]: + shows "pders s (ALT r1 r2) = (if s = [] then {ALT r1 r2} else (pders s r1) \ (pders s r2))" +by (induct s rule: rev_induct) (auto) + +definition + "Suf s \ {v. v \ [] \ (\u. u @ v = s)}" + +lemma Suf: + shows "Suf (s @ [c]) = (Suf s) ;; {[c]} \ {[c]}" +unfolding Suf_def Seq_def +by (auto simp add: append_eq_append_conv2 append_eq_Cons_conv) + +lemma Suf_Union: + shows "(\v \ Suf s ;; {[c]}. P v) = (\v \ Suf s. P (v @ [c]))" +by (auto simp add: Seq_def) + +lemma inclusion1: + shows "pder_set c (SEQS R r2) \ SEQS (pder_set c R) r2 \ (pder c r2)" +apply(auto simp add: if_splits) +apply(blast) +done + +lemma pders_SEQ: + shows "pders s (SEQ r1 r2) \ SEQS (pders s r1) r2 \ (\v \ Suf s. pders v r2)" +proof (induct s rule: rev_induct) + case (snoc c s) + have ih: "pders s (SEQ r1 r2) \ SEQS (pders s r1) r2 \ (\v \ Suf s. pders v r2)" + by fact + have "pders (s @ [c]) (SEQ r1 r2) = pder_set c (pders s (SEQ r1 r2))" by simp + also have "\ \ pder_set c (SEQS (pders s r1) r2 \ (\v \ Suf s. pders v r2))" + using ih by auto + also have "\ = pder_set c (SEQS (pders s r1) r2) \ pder_set c (\v \ Suf s. pders v r2)" + by (simp) + also have "\ = pder_set c (SEQS (pders s r1) r2) \ (\v \ Suf s. pder_set c (pders v r2))" + by (simp) + also have "\ \ pder_set c (SEQS (pders s r1) r2) \ (pder c r2) \ (\v \ Suf s. pders (v @ [c]) r2)" + by (auto) + also have "\ \ SEQS (pder_set c (pders s r1)) r2 \ (pder c r2) \ (\v \ Suf s. pders (v @ [c]) r2)" + using inclusion1 by blast + also have "\ = SEQS (pders (s @ [c]) r1) r2 \ (\v \ Suf (s @ [c]). pders v r2)" + apply(subst (2) pders.simps) + apply(simp only: Suf) + apply(simp add: Suf_Union pders_singleton) + apply(auto) + done + finally show ?case . +qed (simp) + +lemma pders_STAR: + assumes a: "s \ []" + shows "pders s (STAR r) \ (\v \ Suf s. SEQS (pders v r) (STAR r))" +using a +proof (induct s rule: rev_induct) + case (snoc c s) + have ih: "s \ [] \ pders s (STAR r) \ (\v\Suf s. SEQS (pders v r) (STAR r))" by fact + { assume asm: "s \ []" + have "pders (s @ [c]) (STAR r) = pder_set c (pders s (STAR r))" by simp + also have "\ \ (pder_set c (\v\Suf s. SEQS (pders v r) (STAR r)))" + using ih[OF asm] by blast + also have "\ = (\v\Suf s. pder_set c (SEQS (pders v r) (STAR r)))" + by simp + also have "\ \ (\v\Suf s. (SEQS (pder_set c (pders v r)) (STAR r) \ pder c (STAR r)))" + using inclusion1 by blast + also have "\ = (\v\Suf s. (SEQS (pder_set c (pders v r)) (STAR r))) \ pder c (STAR r)" + using asm by (auto simp add: Suf_def) + also have "\ = (\v\Suf s. (SEQS (pders (v @ [c]) r) (STAR r))) \ (SEQS (pder c r) (STAR r))" + by simp + also have "\ = (\v\Suf (s @ [c]). (SEQS (pders v r) (STAR r)))" + apply(simp only: Suf) + apply(simp add: Suf_Union pders_singleton) + apply(auto) + done + finally have ?case . + } + moreover + { assume asm: "s = []" + then have ?case + apply(simp add: pders_singleton Suf_def) + apply(auto) + apply(rule_tac x="[c]" in exI) + apply(simp add: pders_singleton) + done + } + ultimately show ?case by blast +qed (simp) + +abbreviation + "UNIV1 \ UNIV - {[]}" + +lemma pders_set_NULL: + shows "pders_set UNIV1 NULL = {NULL}" +by auto + +lemma pders_set_EMPTY: + shows "pders_set UNIV1 EMPTY = {NULL}" +by (auto split: if_splits) + +lemma pders_set_CHAR: + shows "pders_set UNIV1 (CHAR c) \ {EMPTY, NULL}" +by (auto split: if_splits) + +lemma pders_set_ALT: + shows "pders_set UNIV1 (ALT r1 r2) = pders_set UNIV1 r1 \ pders_set UNIV1 r2" +by auto + +lemma pders_set_SEQ_aux: + assumes a: "s \ UNIV1" + shows "pders_set (Suf s) r2 \ pders_set UNIV1 r2" +using a by (auto simp add: Suf_def) + +lemma pders_set_SEQ: + shows "pders_set UNIV1 (SEQ r1 r2) \ SEQS (pders_set UNIV1 r1) r2 \ pders_set UNIV1 r2" +apply(rule UN_least) +apply(rule subset_trans) +apply(rule pders_SEQ) +apply(simp) +apply(rule conjI) +apply(auto)[1] +apply(rule subset_trans) +apply(rule pders_set_SEQ_aux) +apply(auto) +done + +lemma pders_set_STAR: + shows "pders_set UNIV1 (STAR r) \ SEQS (pders_set UNIV1 r) (STAR r)" +apply(rule UN_least) +apply(rule subset_trans) +apply(rule pders_STAR) +apply(simp) +apply(simp add: Suf_def) +apply(auto) +done + +lemma finite_SEQS: + assumes a: "finite A" + shows "finite (SEQS A r)" +using a by (auto) + +lemma finite_pders_set_UNIV1: + shows "finite (pders_set UNIV1 r)" +apply(induct r) +apply(simp) +apply(simp only: pders_set_EMPTY) +apply(simp) +apply(rule finite_subset) +apply(rule pders_set_CHAR) +apply(simp) +apply(rule finite_subset) +apply(rule pders_set_SEQ) +apply(simp only: finite_SEQS finite_Un) +apply(simp) +apply(simp only: pders_set_ALT) +apply(simp) +apply(rule finite_subset) +apply(rule pders_set_STAR) +apply(simp only: finite_SEQS) +done + +lemma pders_set_UNIV_UNIV1: + shows "pders_set UNIV r = pders [] r \ pders_set UNIV1 r" +apply(auto) +apply(rule_tac x="[]" in exI) +apply(simp) +done + +lemma finite_pders_set_UNIV: + shows "finite (pders_set UNIV r)" +unfolding pders_set_UNIV_UNIV1 +by (simp add: finite_pders_set_UNIV1) + +lemma finite_pders_set: + shows "finite (pders_set A r)" +apply(rule rev_finite_subset) +apply(rule_tac r="r" in finite_pders_set_UNIV) +apply(auto) +done + +lemma finite_pders: + shows "finite (pders s r)" +using finite_pders_set[where A="{s}" and r="r"] +by simp + +lemma finite_pders2: + shows "finite {pders s r | s. s \ A}" +proof - + have "{pders s r | s. s \ A} \ Pow (pders_set A r)" by auto + moreover + have "finite (Pow (pders_set A r))" + using finite_pders_set by simp + ultimately + show "finite {pders s r | s. s \ A}" + by(rule finite_subset) +qed + + +lemma Myhill_Nerode3: + fixes r::"rexp" + shows "finite (UNIV // \(L r))" +proof - + have "finite (UNIV // =(\x. pders x r)=)" + proof - + have "range (\x. pders x r) \ {pders s r | s. s \ UNIV}" by auto + moreover + have "finite {pders s r | s. s \ UNIV}" by (rule finite_pders2) + ultimately + have "finite (range (\x. pders x r))" + by (rule finite_subset) + then show "finite (UNIV // =(\x. pders x r)=)" + by (rule finite_eq_tag_rel) + qed + moreover + have " =(\x. pders x r)= \ \(L r)" + unfolding tag_eq_rel_def + by (auto simp add: str_eq_def[symmetric] MN_Rel_Ders Ders_pders) + moreover + have "equiv UNIV =(\x. pders x r)=" + unfolding equiv_def refl_on_def sym_def trans_def + unfolding tag_eq_rel_def + by auto + moreover + have "equiv UNIV (\(L r))" + unfolding equiv_def refl_on_def sym_def trans_def + unfolding str_eq_rel_def + by auto + ultimately show "finite (UNIV // \(L r))" + by (rule refined_partition_finite) +qed + + +section {* Closure under Left-Quotients *} + +lemma closure_left_quotient: + assumes "regular A" + shows "regular (Ders_set B A)" +proof - + from assms obtain r::rexp where eq: "L r = A" by auto + have fin: "finite (pders_set B r)" by (rule finite_pders_set) + + have "Ders_set B (L r) = (\ L ` (pders_set B r))" + by (simp add: Ders_set_pders_set) + also have "\ = L (\(pders_set B r))" using fin by simp + finally have "Ders_set B A = L (\(pders_set B r))" using eq + by simp + then show "regular (Ders_set B A)" by auto +qed + + +section {* Relating standard and partial derivations *} + +lemma + shows "(\ L ` (pder c r)) = L (der c r)" +unfolding Der_der[symmetric] Der_pder by simp + +lemma + shows "(\ L ` (pders s r)) = L (ders s r)" +unfolding Ders_ders[symmetric] Ders_pders by simp + + + +fun + width :: "rexp \ nat" +where + "width (NULL) = 0" +| "width (EMPTY) = 0" +| "width (CHAR c) = 1" +| "width (ALT r1 r2) = width r1 + width r2" +| "width (SEQ r1 r2) = width r1 + width r2" +| "width (STAR r) = width r" + + + +end \ No newline at end of file diff -r a8a442ba0dbf -r e93760534354 IsaMakefile --- a/IsaMakefile Thu May 12 05:55:05 2011 +0000 +++ b/IsaMakefile Wed May 18 19:54:43 2011 +0000 @@ -69,6 +69,20 @@ cd Paper/generated ; $(ISABELLE_TOOL) latex -o pdf root.tex cp Paper/generated/root.pdf paper.pdf +## Journal Version + +session4: Journal/ROOT.ML \ + Journal/document/root* \ + Journal/*.thy + @$(USEDIR) -D generated -f ROOT.ML HOL Journal + +journal: session4 + rm -f Journal/generated/*.aux # otherwise latex will fall over + cd Journal/generated ; $(ISABELLE_TOOL) latex -o pdf root.tex + cd Journal/generated ; bibtex root + cd Journal/generated ; $(ISABELLE_TOOL) latex -o pdf root.tex + cp Journal/generated/root.pdf journal.pdf + ## clean diff -r a8a442ba0dbf -r e93760534354 Journal/Paper.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Journal/Paper.thy Wed May 18 19:54:43 2011 +0000 @@ -0,0 +1,1403 @@ +(*<*) +theory Paper +imports "../Derivs" "~~/src/HOL/Library/LaTeXsugar" +begin + +declare [[show_question_marks = false]] + +consts + REL :: "(string \ string) \ bool" + UPLUS :: "'a set \ 'a set \ (nat \ 'a) set" + +abbreviation + "EClass x R \ R `` {x}" + +abbreviation + "Append_rexp2 r_itm r \ Append_rexp r r_itm" + + +notation (latex output) + str_eq_rel ("\\<^bsub>_\<^esub>") and + str_eq ("_ \\<^bsub>_\<^esub> _") and + Seq (infixr "\" 100) and + Star ("_\<^bsup>\\<^esup>") and + pow ("_\<^bsup>_\<^esup>" [100, 100] 100) and + Suc ("_+1" [100] 100) and + quotient ("_ \<^raw:\ensuremath{\!\sslash\!}> _" [90, 90] 90) and + REL ("\") and + UPLUS ("_ \<^raw:\ensuremath{\uplus}> _" [90, 90] 90) and + L ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and + Lam ("\'(_')" [100] 100) and + Trn ("'(_, _')" [100, 100] 100) and + EClass ("\_\\<^bsub>_\<^esub>" [100, 100] 100) and + transition ("_ \<^raw:\ensuremath{\stackrel{\text{>_\<^raw:}}{\Longmapsto}}> _" [100, 100, 100] 100) and + Setalt ("\<^raw:\ensuremath{\bigplus}>_" [1000] 999) and + Append_rexp2 ("_ \<^raw:\ensuremath{\triangleleft}> _" [100, 100] 100) and + Append_rexp_rhs ("_ \<^raw:\ensuremath{\triangleleft}> _" [100, 100] 50) and + uminus ("\<^raw:\ensuremath{\overline{>_\<^raw:}}>" [100] 100) and + tag_str_ALT ("tag\<^isub>A\<^isub>L\<^isub>T _ _" [100, 100] 100) and + tag_str_ALT ("tag\<^isub>A\<^isub>L\<^isub>T _ _ _" [100, 100, 100] 100) and + tag_str_SEQ ("tag\<^isub>S\<^isub>E\<^isub>Q _ _" [100, 100] 100) and + tag_str_SEQ ("tag\<^isub>S\<^isub>E\<^isub>Q _ _ _" [100, 100, 100] 100) and + tag_str_STAR ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _" [100] 100) and + tag_str_STAR ("tag\<^isub>S\<^isub>T\<^isub>A\<^isub>R _ _" [100, 100] 100) +lemma meta_eq_app: + shows "f \ \x. g x \ f x \ g x" + by auto + +(*>*) + + +section {* Introduction *} + +text {* + Regular languages are an important and well-understood subject in Computer + Science, with many beautiful theorems and many useful algorithms. There is a + wide range of textbooks on this subject, many of which are aimed at students + and contain very detailed `pencil-and-paper' proofs + (e.g.~\cite{Kozen97}). It seems natural to exercise theorem provers by + formalising the theorems and by verifying formally the algorithms. + + There is however a problem: the typical approach to regular languages is to + introduce finite automata and then define everything in terms of them. For + example, a regular language is normally defined as one whose strings are + recognised by a finite deterministic automaton. This approach has many + benefits. Among them is the fact that it is easy to convince oneself that + regular languages are closed under complementation: one just has to exchange + the accepting and non-accepting states in the corresponding automaton to + obtain an automaton for the complement language. The problem, however, lies with + formalising such reasoning in a HOL-based theorem prover, in our case + Isabelle/HOL. Automata are built up from states and transitions that + need to be represented as graphs, matrices or functions, none + of which can be defined as an inductive datatype. + + In case of graphs and matrices, this means we have to build our own + reasoning infrastructure for them, as neither Isabelle/HOL nor HOL4 nor + HOLlight support them with libraries. Even worse, reasoning about graphs and + matrices can be a real hassle in HOL-based theorem provers. Consider for + example the operation of sequencing two automata, say $A_1$ and $A_2$, by + connecting the accepting states of $A_1$ to the initial state of $A_2$:\\[-5.5mm] + % + \begin{center} + \begin{tabular}{ccc} + \begin{tikzpicture}[scale=0.8] + %\draw[step=2mm] (-1,-1) grid (1,1); + + \draw[rounded corners=1mm, very thick] (-1.0,-0.3) rectangle (-0.2,0.3); + \draw[rounded corners=1mm, very thick] ( 0.2,-0.3) rectangle ( 1.0,0.3); + + \node (A) at (-1.0,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (B) at ( 0.2,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + + \node (C) at (-0.2, 0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (D) at (-0.2,-0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + + \node (E) at (1.0, 0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (F) at (1.0,-0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (G) at (1.0,-0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + + \draw (-0.6,0.0) node {\footnotesize$A_1$}; + \draw ( 0.6,0.0) node {\footnotesize$A_2$}; + \end{tikzpicture} + + & + + \raisebox{1.1mm}{\bf\Large$\;\;\;\Rightarrow\,\;\;$} + + & + + \begin{tikzpicture}[scale=0.8] + %\draw[step=2mm] (-1,-1) grid (1,1); + + \draw[rounded corners=1mm, very thick] (-1.0,-0.3) rectangle (-0.2,0.3); + \draw[rounded corners=1mm, very thick] ( 0.2,-0.3) rectangle ( 1.0,0.3); + + \node (A) at (-1.0,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (B) at ( 0.2,0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + + \node (C) at (-0.2, 0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (D) at (-0.2,-0.13) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + + \node (E) at (1.0, 0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (F) at (1.0,-0.0) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + \node (G) at (1.0,-0.2) [circle, very thick, draw, fill=white, inner sep=0.4mm] {}; + + \draw (C) to [very thick, bend left=45] (B); + \draw (D) to [very thick, bend right=45] (B); + + \draw (-0.6,0.0) node {\footnotesize$A_1$}; + \draw ( 0.6,0.0) node {\footnotesize$A_2$}; + \end{tikzpicture} + + \end{tabular} + \end{center} + + \noindent + On `paper' we can define the corresponding graph in terms of the disjoint + union of the state nodes. Unfortunately in HOL, the standard definition for disjoint + union, namely + % + \begin{equation}\label{disjointunion} + @{term "UPLUS A\<^isub>1 A\<^isub>2 \ {(1, x) | x. x \ A\<^isub>1} \ {(2, y) | y. y \ A\<^isub>2}"} + \end{equation} + + \noindent + changes the type---the disjoint union is not a set, but a set of pairs. + Using this definition for disjoint union means we do not have a single type for automata + and hence will not be able to state certain properties about \emph{all} + automata, since there is no type quantification available in HOL (unlike in Coq, for example). An + alternative, which provides us with a single type for automata, is to give every + state node an identity, for example a natural + number, and then be careful to rename these identities apart whenever + connecting two automata. This results in clunky proofs + establishing that properties are invariant under renaming. Similarly, + connecting two automata represented as matrices results in very adhoc + constructions, which are not pleasant to reason about. + + Functions are much better supported in Isabelle/HOL, but they still lead to similar + problems as with graphs. Composing, for example, two non-deterministic automata in parallel + requires also the formalisation of disjoint unions. Nipkow \cite{Nipkow98} + dismisses for this the option of using identities, because it leads according to + him to ``messy proofs''. He + opts for a variant of \eqref{disjointunion} using bit lists, but writes + + \begin{quote} + \it% + \begin{tabular}{@ {}l@ {}p{0.88\textwidth}@ {}} + `` & All lemmas appear obvious given a picture of the composition of automata\ldots + Yet their proofs require a painful amount of detail.'' + \end{tabular} + \end{quote} + + \noindent + and + + \begin{quote} + \it% + \begin{tabular}{@ {}l@ {}p{0.88\textwidth}@ {}} + `` & If the reader finds the above treatment in terms of bit lists revoltingly + concrete, I cannot disagree. A more abstract approach is clearly desirable.'' + \end{tabular} + \end{quote} + + + \noindent + Moreover, it is not so clear how to conveniently impose a finiteness condition + upon functions in order to represent \emph{finite} automata. The best is + probably to resort to more advanced reasoning frameworks, such as \emph{locales} + or \emph{type classes}, + which are \emph{not} available in all HOL-based theorem provers. + + {\bf add commnets from Brozowski} + + Because of these problems to do with representing automata, there seems + to be no substantial formalisation of automata theory and regular languages + carried out in HOL-based theorem provers. Nipkow \cite{Nipkow98} establishes + the link between regular expressions and automata in + the context of lexing. Berghofer and Reiter \cite{BerghoferReiter09} + formalise automata working over + bit strings in the context of Presburger arithmetic. + The only larger formalisations of automata theory + are carried out in Nuprl \cite{Constable00} and in Coq \cite{Filliatre97}. + + In this paper, we will not attempt to formalise automata theory in + Isabelle/HOL, but take a different approach to regular + languages. Instead of defining a regular language as one where there exists + an automaton that recognises all strings of the language, we define a + regular language as: + + \begin{definition} + A language @{text A} is \emph{regular}, provided there is a regular expression that matches all + strings of @{text "A"}. + \end{definition} + + \noindent + The reason is that regular expressions, unlike graphs, matrices and functions, can + be easily defined as inductive datatype. Consequently a corresponding reasoning + infrastructure comes for free. This has recently been exploited in HOL4 with a formalisation + of regular expression matching based on derivatives \cite{OwensSlind08} and + with an equivalence checker for regular expressions in Isabelle/HOL \cite{KraussNipkow11}. + The purpose of this paper is to + show that a central result about regular languages---the Myhill-Nerode theorem---can + be recreated by only using regular expressions. This theorem gives necessary + and sufficient conditions for when a language is regular. As a corollary of this + theorem we can easily establish the usual closure properties, including + complementation, for regular languages.\smallskip + + \noindent + {\bf Contributions:} + There is an extensive literature on regular languages. + To our best knowledge, our proof of the Myhill-Nerode theorem is the + first that is based on regular expressions, only. We prove the part of this theorem + stating that a regular expression has only finitely many partitions using certain + tagging-functions. Again to our best knowledge, these tagging-functions have + not been used before to establish the Myhill-Nerode theorem. +*} + +section {* Preliminaries *} + +text {* + Strings in Isabelle/HOL are lists of characters with the \emph{empty string} + being represented by the empty list, written @{term "[]"}. \emph{Languages} + are sets of strings. The language containing all strings is written in + Isabelle/HOL as @{term "UNIV::string set"}. The concatenation of two languages + is written @{term "A ;; B"} and a language raised to the power @{text n} is written + @{term "A \ n"}. They are defined as usual + + \begin{center} + @{thm Seq_def[THEN eq_reflection, where A1="A" and B1="B"]} + \hspace{7mm} + @{thm pow.simps(1)[THEN eq_reflection, where A1="A"]} + \hspace{7mm} + @{thm pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]} + \end{center} + + \noindent + where @{text "@"} is the list-append operation. The Kleene-star of a language @{text A} + is defined as the union over all powers, namely @{thm Star_def}. In the paper + we will make use of the following properties of these constructions. + + \begin{proposition}\label{langprops}\mbox{}\\ + \begin{tabular}{@ {}ll} + (i) & @{thm star_cases} \\ + (ii) & @{thm[mode=IfThen] pow_length}\\ + (iii) & @{thm seq_Union_left} \\ + \end{tabular} + \end{proposition} + + \noindent + In @{text "(ii)"} we use the notation @{term "length s"} for the length of a + string; this property states that if \mbox{@{term "[] \ A"}} then the lengths of + the strings in @{term "A \ (Suc n)"} must be longer than @{text n}. We omit + the proofs for these properties, but invite the reader to consult our + formalisation.\footnote{Available at \url{http://www4.in.tum.de/~urbanc/regexp.html}} + + The notation in Isabelle/HOL for the quotient of a language @{text A} according to an + equivalence relation @{term REL} is @{term "A // REL"}. We will write + @{text "\x\\<^isub>\"} for the equivalence class defined + as \mbox{@{text "{y | y \ x}"}}. + + + Central to our proof will be the solution of equational systems + involving equivalence classes of languages. For this we will use Arden's Lemma \cite{Brzozowski64}, + which solves equations of the form @{term "X = A ;; X \ B"} provided + @{term "[] \ A"}. However we will need the following `reverse' + version of Arden's Lemma (`reverse' in the sense of changing the order of @{term "A ;; X"} to + \mbox{@{term "X ;; A"}}). + + \begin{lemma}[Reverse Arden's Lemma]\label{arden}\mbox{}\\ + If @{thm (prem 1) arden} then + @{thm (lhs) arden} if and only if + @{thm (rhs) arden}. + \end{lemma} + + \begin{proof} + For the right-to-left direction we assume @{thm (rhs) arden} and show + that @{thm (lhs) arden} holds. From Prop.~\ref{langprops}@{text "(i)"} + we have @{term "A\ = {[]} \ A ;; A\"}, + which is equal to @{term "A\ = {[]} \ A\ ;; A"}. Adding @{text B} to both + sides gives @{term "B ;; A\ = B ;; ({[]} \ A\ ;; A)"}, whose right-hand side + is equal to @{term "(B ;; A\) ;; A \ B"}. This completes this direction. + + For the other direction we assume @{thm (lhs) arden}. By a simple induction + on @{text n}, we can establish the property + + \begin{center} + @{text "(*)"}\hspace{5mm} @{thm (concl) arden_helper} + \end{center} + + \noindent + Using this property we can show that @{term "B ;; (A \ n) \ X"} holds for + all @{text n}. From this we can infer @{term "B ;; A\ \ X"} using the definition + of @{text "\"}. + For the inclusion in the other direction we assume a string @{text s} + with length @{text k} is an element in @{text X}. Since @{thm (prem 1) arden} + we know by Prop.~\ref{langprops}@{text "(ii)"} that + @{term "s \ X ;; (A \ Suc k)"} since its length is only @{text k} + (the strings in @{term "X ;; (A \ Suc k)"} are all longer). + From @{text "(*)"} it follows then that + @{term s} must be an element in @{term "(\m\{0..k}. B ;; (A \ m))"}. This in turn + implies that @{term s} is in @{term "(\n. B ;; (A \ n))"}. Using Prop.~\ref{langprops}@{text "(iii)"} + this is equal to @{term "B ;; A\"}, as we needed to show.\qed + \end{proof} + + \noindent + Regular expressions are defined as the inductive datatype + + \begin{center} + @{text r} @{text "::="} + @{term NULL}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} + @{term EMPTY}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} + @{term "CHAR c"}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} + @{term "SEQ r r"}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} + @{term "ALT r r"}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} + @{term "STAR r"} + \end{center} + + \noindent + and the language matched by a regular expression is defined as + + \begin{center} + \begin{tabular}{c@ {\hspace{10mm}}c} + \begin{tabular}{rcl} + @{thm (lhs) L_rexp.simps(1)} & @{text "\"} & @{thm (rhs) L_rexp.simps(1)}\\ + @{thm (lhs) L_rexp.simps(2)} & @{text "\"} & @{thm (rhs) L_rexp.simps(2)}\\ + @{thm (lhs) L_rexp.simps(3)[where c="c"]} & @{text "\"} & @{thm (rhs) L_rexp.simps(3)[where c="c"]}\\ + \end{tabular} + & + \begin{tabular}{rcl} + @{thm (lhs) L_rexp.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\"} & + @{thm (rhs) L_rexp.simps(4)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ + @{thm (lhs) L_rexp.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]} & @{text "\"} & + @{thm (rhs) L_rexp.simps(5)[where ?r1.0="r\<^isub>1" and ?r2.0="r\<^isub>2"]}\\ + @{thm (lhs) L_rexp.simps(6)[where r="r"]} & @{text "\"} & + @{thm (rhs) L_rexp.simps(6)[where r="r"]}\\ + \end{tabular} + \end{tabular} + \end{center} + + Given a finite set of regular expressions @{text rs}, we will make use of the operation of generating + a regular expression that matches the union of all languages of @{text rs}. We only need to know the + existence + of such a regular expression and therefore we use Isabelle/HOL's @{const "fold_graph"} and Hilbert's + @{text "\"} to define @{term "\rs"}. This operation, roughly speaking, folds @{const ALT} over the + set @{text rs} with @{const NULL} for the empty set. We can prove that for a finite set @{text rs} + % + \begin{equation}\label{uplus} + \mbox{@{thm (lhs) folds_alt_simp} @{text "= \ (\ ` rs)"}} + \end{equation} + + \noindent + holds, whereby @{text "\ ` rs"} stands for the + image of the set @{text rs} under function @{text "\"}. +*} + + +section {* The Myhill-Nerode Theorem, First Part *} + +text {* + The key definition in the Myhill-Nerode theorem is the + \emph{Myhill-Nerode relation}, which states that w.r.t.~a language two + strings are related, provided there is no distinguishing extension in this + language. This can be defined as a tertiary relation. + + \begin{definition}[Myhill-Nerode Relation] Given a language @{text A}, two strings @{text x} and + @{text y} are Myhill-Nerode related provided + \begin{center} + @{thm str_eq_def[simplified str_eq_rel_def Pair_Collect]} + \end{center} + \end{definition} + + \noindent + It is easy to see that @{term "\