# HG changeset patch # User Christian Urban # Date 1464086181 -3600 # Node ID 6bb15b8e63017e8459cba4f5d3313d5ce96f7231 # Parent 2a07222e2a8b6e71c8393cd21e198ba3a6ae2d0c added files that were submitted to afp diff -r 2a07222e2a8b -r 6bb15b8e6301 AFP-Submission/Derivatives.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AFP-Submission/Derivatives.thy Tue May 24 11:36:21 2016 +0100 @@ -0,0 +1,370 @@ +section "Derivatives of regular expressions" + +(* Author: Christian Urban *) + +theory Derivatives +imports Regular_Exp +begin + +text{* This theory is based on work by Brozowski \cite{Brzozowski64} and Antimirov \cite{Antimirov95}. *} + +subsection {* Brzozowski's derivatives of regular expressions *} + +primrec + deriv :: "'a \ 'a rexp \ 'a rexp" +where + "deriv c (Zero) = Zero" +| "deriv c (One) = Zero" +| "deriv c (Atom c') = (if c = c' then One else Zero)" +| "deriv c (Plus r1 r2) = Plus (deriv c r1) (deriv c r2)" +| "deriv c (Times r1 r2) = + (if nullable r1 then Plus (Times (deriv c r1) r2) (deriv c r2) else Times (deriv c r1) r2)" +| "deriv c (Star r) = Times (deriv c r) (Star r)" + +primrec + derivs :: "'a list \ 'a rexp \ 'a rexp" +where + "derivs [] r = r" +| "derivs (c # s) r = derivs s (deriv c r)" + + +lemma atoms_deriv_subset: "atoms (deriv x r) \ atoms r" +by (induction r) (auto) + +lemma atoms_derivs_subset: "atoms (derivs w r) \ atoms r" +by (induction w arbitrary: r) (auto dest: atoms_deriv_subset[THEN subsetD]) + +lemma lang_deriv: "lang (deriv c r) = Deriv c (lang r)" +by (induct r) (simp_all add: nullable_iff) + +lemma lang_derivs: "lang (derivs s r) = Derivs s (lang r)" +by (induct s arbitrary: r) (simp_all add: lang_deriv) + +text {* A regular expression matcher: *} + +definition matcher :: "'a rexp \ 'a list \ bool" where +"matcher r s = nullable (derivs s r)" + +lemma matcher_correctness: "matcher r s \ s \ lang r" +by (induct s arbitrary: r) + (simp_all add: nullable_iff lang_deriv matcher_def Deriv_def) + + +subsection {* Antimirov's partial derivatives *} + +abbreviation + "Timess rs r \ (\r' \ rs. {Times r' r})" + +primrec + pderiv :: "'a \ 'a rexp \ 'a rexp set" +where + "pderiv c Zero = {}" +| "pderiv c One = {}" +| "pderiv c (Atom c') = (if c = c' then {One} else {})" +| "pderiv c (Plus r1 r2) = (pderiv c r1) \ (pderiv c r2)" +| "pderiv c (Times r1 r2) = + (if nullable r1 then Timess (pderiv c r1) r2 \ pderiv c r2 else Timess (pderiv c r1) r2)" +| "pderiv c (Star r) = Timess (pderiv c r) (Star r)" + +primrec + pderivs :: "'a list \ 'a rexp \ ('a rexp) set" +where + "pderivs [] r = {r}" +| "pderivs (c # s) r = \ (pderivs s ` pderiv c r)" + +abbreviation + pderiv_set :: "'a \ 'a rexp set \ 'a rexp set" +where + "pderiv_set c rs \ \ (pderiv c ` rs)" + +abbreviation + pderivs_set :: "'a list \ 'a rexp set \ 'a rexp set" +where + "pderivs_set s rs \ \ (pderivs s ` rs)" + +lemma pderivs_append: + "pderivs (s1 @ s2) r = \ (pderivs s2 ` pderivs s1 r)" +by (induct s1 arbitrary: r) (simp_all) + +lemma pderivs_snoc: + shows "pderivs (s @ [c]) r = pderiv_set c (pderivs s r)" +by (simp add: pderivs_append) + +lemma pderivs_simps [simp]: + shows "pderivs s Zero = (if s = [] then {Zero} else {})" + and "pderivs s One = (if s = [] then {One} else {})" + and "pderivs s (Plus r1 r2) = (if s = [] then {Plus r1 r2} else (pderivs s r1) \ (pderivs s r2))" +by (induct s) (simp_all) + +lemma pderivs_Atom: + shows "pderivs s (Atom c) \ {Atom c, One}" +by (induct s) (simp_all) + +subsection {* Relating left-quotients and partial derivatives *} + +lemma Deriv_pderiv: + shows "Deriv c (lang r) = \ (lang ` pderiv c r)" +by (induct r) (auto simp add: nullable_iff conc_UNION_distrib) + +lemma Derivs_pderivs: + shows "Derivs s (lang r) = \ (lang ` pderivs s r)" +proof (induct s arbitrary: r) + case (Cons c s) + have ih: "\r. Derivs s (lang r) = \ (lang ` pderivs s r)" by fact + have "Derivs (c # s) (lang r) = Derivs s (Deriv c (lang r))" by simp + also have "\ = Derivs s (\ (lang ` pderiv c r))" by (simp add: Deriv_pderiv) + also have "\ = Derivss s (lang ` (pderiv c r))" + by (auto simp add: Derivs_def) + also have "\ = \ (lang ` (pderivs_set s (pderiv c r)))" + using ih by auto + also have "\ = \ (lang ` (pderivs (c # s) r))" by simp + finally show "Derivs (c # s) (lang r) = \ (lang ` pderivs (c # s) r)" . +qed (simp add: Derivs_def) + +subsection {* Relating derivatives and partial derivatives *} + +lemma deriv_pderiv: + shows "\ (lang ` (pderiv c r)) = lang (deriv c r)" +unfolding lang_deriv Deriv_pderiv by simp + +lemma derivs_pderivs: + shows "\ (lang ` (pderivs s r)) = lang (derivs s r)" +unfolding lang_derivs Derivs_pderivs by simp + + +subsection {* Finiteness property of partial derivatives *} + +definition + pderivs_lang :: "'a lang \ 'a rexp \ 'a rexp set" +where + "pderivs_lang A r \ \x \ A. pderivs x r" + +lemma pderivs_lang_subsetI: + assumes "\s. s \ A \ pderivs s r \ C" + shows "pderivs_lang A r \ C" +using assms unfolding pderivs_lang_def by (rule UN_least) + +lemma pderivs_lang_union: + shows "pderivs_lang (A \ B) r = (pderivs_lang A r \ pderivs_lang B r)" +by (simp add: pderivs_lang_def) + +lemma pderivs_lang_subset: + shows "A \ B \ pderivs_lang A r \ pderivs_lang B r" +by (auto simp add: pderivs_lang_def) + +definition + "UNIV1 \ UNIV - {[]}" + +lemma pderivs_lang_Zero [simp]: + shows "pderivs_lang UNIV1 Zero = {}" +unfolding UNIV1_def pderivs_lang_def by auto + +lemma pderivs_lang_One [simp]: + shows "pderivs_lang UNIV1 One = {}" +unfolding UNIV1_def pderivs_lang_def by (auto split: if_splits) + +lemma pderivs_lang_Atom [simp]: + shows "pderivs_lang UNIV1 (Atom c) = {One}" +unfolding UNIV1_def pderivs_lang_def +apply(auto) +apply(frule rev_subsetD) +apply(rule pderivs_Atom) +apply(simp) +apply(case_tac xa) +apply(auto split: if_splits) +done + +lemma pderivs_lang_Plus [simp]: + shows "pderivs_lang UNIV1 (Plus r1 r2) = pderivs_lang UNIV1 r1 \ pderivs_lang UNIV1 r2" +unfolding UNIV1_def pderivs_lang_def by auto + + +text {* Non-empty suffixes of a string (needed for the cases of @{const Times} and @{const Star} below) *} + +definition + "PSuf s \ {v. v \ [] \ (\u. u @ v = s)}" + +lemma PSuf_snoc: + shows "PSuf (s @ [c]) = (PSuf s) @@ {[c]} \ {[c]}" +unfolding PSuf_def conc_def +by (auto simp add: append_eq_append_conv2 append_eq_Cons_conv) + +lemma PSuf_Union: + shows "(\v \ PSuf s @@ {[c]}. f v) = (\v \ PSuf s. f (v @ [c]))" +by (auto simp add: conc_def) + +lemma pderivs_lang_snoc: + shows "pderivs_lang (PSuf s @@ {[c]}) r = (pderiv_set c (pderivs_lang (PSuf s) r))" +unfolding pderivs_lang_def +by (simp add: PSuf_Union pderivs_snoc) + +lemma pderivs_Times: + shows "pderivs s (Times r1 r2) \ Timess (pderivs s r1) r2 \ (pderivs_lang (PSuf s) r2)" +proof (induct s rule: rev_induct) + case (snoc c s) + have ih: "pderivs s (Times r1 r2) \ Timess (pderivs s r1) r2 \ (pderivs_lang (PSuf s) r2)" + by fact + have "pderivs (s @ [c]) (Times r1 r2) = pderiv_set c (pderivs s (Times r1 r2))" + by (simp add: pderivs_snoc) + also have "\ \ pderiv_set c (Timess (pderivs s r1) r2 \ (pderivs_lang (PSuf s) r2))" + using ih by fast + also have "\ = pderiv_set c (Timess (pderivs s r1) r2) \ pderiv_set c (pderivs_lang (PSuf s) r2)" + by (simp) + also have "\ = pderiv_set c (Timess (pderivs s r1) r2) \ pderivs_lang (PSuf s @@ {[c]}) r2" + by (simp add: pderivs_lang_snoc) + also + have "\ \ pderiv_set c (Timess (pderivs s r1) r2) \ pderiv c r2 \ pderivs_lang (PSuf s @@ {[c]}) r2" + by auto + also + have "\ \ Timess (pderiv_set c (pderivs s r1)) r2 \ pderiv c r2 \ pderivs_lang (PSuf s @@ {[c]}) r2" + by (auto simp add: if_splits) + also have "\ = Timess (pderivs (s @ [c]) r1) r2 \ pderiv c r2 \ pderivs_lang (PSuf s @@ {[c]}) r2" + by (simp add: pderivs_snoc) + also have "\ \ Timess (pderivs (s @ [c]) r1) r2 \ pderivs_lang (PSuf (s @ [c])) r2" + unfolding pderivs_lang_def by (auto simp add: PSuf_snoc) + finally show ?case . +qed (simp) + +lemma pderivs_lang_Times_aux1: + assumes a: "s \ UNIV1" + shows "pderivs_lang (PSuf s) r \ pderivs_lang UNIV1 r" +using a unfolding UNIV1_def PSuf_def pderivs_lang_def by auto + +lemma pderivs_lang_Times_aux2: + assumes a: "s \ UNIV1" + shows "Timess (pderivs s r1) r2 \ Timess (pderivs_lang UNIV1 r1) r2" +using a unfolding pderivs_lang_def by auto + +lemma pderivs_lang_Times: + shows "pderivs_lang UNIV1 (Times r1 r2) \ Timess (pderivs_lang UNIV1 r1) r2 \ pderivs_lang UNIV1 r2" +apply(rule pderivs_lang_subsetI) +apply(rule subset_trans) +apply(rule pderivs_Times) +using pderivs_lang_Times_aux1 pderivs_lang_Times_aux2 +apply(blast) +done + +lemma pderivs_Star: + assumes a: "s \ []" + shows "pderivs s (Star r) \ Timess (pderivs_lang (PSuf s) r) (Star r)" +using a +proof (induct s rule: rev_induct) + case (snoc c s) + have ih: "s \ [] \ pderivs s (Star r) \ Timess (pderivs_lang (PSuf s) r) (Star r)" by fact + { assume asm: "s \ []" + have "pderivs (s @ [c]) (Star r) = pderiv_set c (pderivs s (Star r))" by (simp add: pderivs_snoc) + also have "\ \ pderiv_set c (Timess (pderivs_lang (PSuf s) r) (Star r))" + using ih[OF asm] by fast + also have "\ \ Timess (pderiv_set c (pderivs_lang (PSuf s) r)) (Star r) \ pderiv c (Star r)" + by (auto split: if_splits) + also have "\ \ Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r) \ (Timess (pderiv c r) (Star r))" + by (simp only: PSuf_snoc pderivs_lang_snoc pderivs_lang_union) + (auto simp add: pderivs_lang_def) + also have "\ = Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r)" + by (auto simp add: PSuf_snoc PSuf_Union pderivs_snoc pderivs_lang_def) + finally have ?case . + } + moreover + { assume asm: "s = []" + then have ?case by (auto simp add: pderivs_lang_def pderivs_snoc PSuf_def) + } + ultimately show ?case by blast +qed (simp) + +lemma pderivs_lang_Star: + shows "pderivs_lang UNIV1 (Star r) \ Timess (pderivs_lang UNIV1 r) (Star r)" +apply(rule pderivs_lang_subsetI) +apply(rule subset_trans) +apply(rule pderivs_Star) +apply(simp add: UNIV1_def) +apply(simp add: UNIV1_def PSuf_def) +apply(auto simp add: pderivs_lang_def) +done + +lemma finite_Timess [simp]: + assumes a: "finite A" + shows "finite (Timess A r)" +using a by auto + +lemma finite_pderivs_lang_UNIV1: + shows "finite (pderivs_lang UNIV1 r)" +apply(induct r) +apply(simp_all add: + finite_subset[OF pderivs_lang_Times] + finite_subset[OF pderivs_lang_Star]) +done + +lemma pderivs_lang_UNIV: + shows "pderivs_lang UNIV r = pderivs [] r \ pderivs_lang UNIV1 r" +unfolding UNIV1_def pderivs_lang_def +by blast + +lemma finite_pderivs_lang_UNIV: + shows "finite (pderivs_lang UNIV r)" +unfolding pderivs_lang_UNIV +by (simp add: finite_pderivs_lang_UNIV1) + +lemma finite_pderivs_lang: + shows "finite (pderivs_lang A r)" +by (metis finite_pderivs_lang_UNIV pderivs_lang_subset rev_finite_subset subset_UNIV) + + +text{* The following relationship between the alphabetic width of regular expressions +(called @{text awidth} below) and the number of partial derivatives was proved +by Antimirov~\cite{Antimirov95} and formalized by Max Haslbeck. *} + +fun awidth :: "'a rexp \ nat" where +"awidth Zero = 0" | +"awidth One = 0" | +"awidth (Atom a) = 1" | +"awidth (Plus r1 r2) = awidth r1 + awidth r2" | +"awidth (Times r1 r2) = awidth r1 + awidth r2" | +"awidth (Star r1) = awidth r1" + +lemma card_Timess_pderivs_lang_le: + "card (Timess (pderivs_lang A r) s) \ card (pderivs_lang A r)" +by (metis card_image_le finite_pderivs_lang image_eq_UN) + +lemma card_pderivs_lang_UNIV1_le_awidth: "card (pderivs_lang UNIV1 r) \ awidth r" +proof (induction r) + case (Plus r1 r2) + have "card (pderivs_lang UNIV1 (Plus r1 r2)) = card (pderivs_lang UNIV1 r1 \ pderivs_lang UNIV1 r2)" by simp + also have "\ \ card (pderivs_lang UNIV1 r1) + card (pderivs_lang UNIV1 r2)" + by(simp add: card_Un_le) + also have "\ \ awidth (Plus r1 r2)" using Plus.IH by simp + finally show ?case . +next + case (Times r1 r2) + have "card (pderivs_lang UNIV1 (Times r1 r2)) \ card (Timess (pderivs_lang UNIV1 r1) r2 \ pderivs_lang UNIV1 r2)" + by (simp add: card_mono finite_pderivs_lang pderivs_lang_Times) + also have "\ \ card (Timess (pderivs_lang UNIV1 r1) r2) + card (pderivs_lang UNIV1 r2)" + by (simp add: card_Un_le) + also have "\ \ card (pderivs_lang UNIV1 r1) + card (pderivs_lang UNIV1 r2)" + by (simp add: card_Timess_pderivs_lang_le) + also have "\ \ awidth (Times r1 r2)" using Times.IH by simp + finally show ?case . +next + case (Star r) + have "card (pderivs_lang UNIV1 (Star r)) \ card (Timess (pderivs_lang UNIV1 r) (Star r))" + by (simp add: card_mono finite_pderivs_lang pderivs_lang_Star) + also have "\ \ card (pderivs_lang UNIV1 r)" by (rule card_Timess_pderivs_lang_le) + also have "\ \ awidth (Star r)" by (simp add: Star.IH) + finally show ?case . +qed (auto) + +text{* Antimirov's Theorem 3.4: *} +theorem card_pderivs_lang_UNIV_le_awidth: "card (pderivs_lang UNIV r) \ awidth r + 1" +proof - + have "card (insert r (pderivs_lang UNIV1 r)) \ Suc (card (pderivs_lang UNIV1 r))" + by(auto simp: card_insert_if[OF finite_pderivs_lang_UNIV1]) + also have "\ \ Suc (awidth r)" by(simp add: card_pderivs_lang_UNIV1_le_awidth) + finally show ?thesis by(simp add: pderivs_lang_UNIV) +qed + +text{* Antimirov's Corollary 3.5: *} +corollary card_pderivs_lang_le_awidth: "card (pderivs_lang A r) \ awidth r + 1" +by(rule order_trans[OF + card_mono[OF finite_pderivs_lang_UNIV pderivs_lang_subset[OF subset_UNIV]] + card_pderivs_lang_UNIV_le_awidth]) + +end \ No newline at end of file diff -r 2a07222e2a8b -r 6bb15b8e6301 AFP-Submission/Lexer.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AFP-Submission/Lexer.thy Tue May 24 11:36:21 2016 +0100 @@ -0,0 +1,493 @@ +(* Title: POSIX Lexing with Derivatives of Regular Expressions + Authors: Fahad Ausaf , 2016 + Roy Dyckhoff , 2016 + Christian Urban , 2016 + Maintainer: Christian Urban +*) + +theory Lexer + imports Derivatives +begin + +section {* Values *} + +datatype 'a val = + Void +| Atm 'a +| Seq "'a val" "'a val" +| Right "'a val" +| Left "'a val" +| Stars "('a val) list" + + +section {* The string behind a value *} + +fun + flat :: "'a val \ 'a list" +where + "flat (Void) = []" +| "flat (Atm c) = [c]" +| "flat (Left v) = flat v" +| "flat (Right v) = flat v" +| "flat (Seq v1 v2) = (flat v1) @ (flat v2)" +| "flat (Stars []) = []" +| "flat (Stars (v#vs)) = (flat v) @ (flat (Stars vs))" + +lemma flat_Stars [simp]: + "flat (Stars vs) = concat (map flat vs)" +by (induct vs) (auto) + +section {* Relation between values and regular expressions *} + +inductive + Prf :: "'a val \ 'a rexp \ bool" ("\ _ : _" [100, 100] 100) +where + "\\ v1 : r1; \ v2 : r2\ \ \ Seq v1 v2 : Times r1 r2" +| "\ v1 : r1 \ \ Left v1 : Plus r1 r2" +| "\ v2 : r2 \ \ Right v2 : Plus r1 r2" +| "\ Void : One" +| "\ Atm c : Atom c" +| "\ Stars [] : Star r" +| "\\ v : r; \ Stars vs : Star r\ \ \ Stars (v # vs) : Star r" + +inductive_cases Prf_elims: + "\ v : Zero" + "\ v : Times r1 r2" + "\ v : Plus r1 r2" + "\ v : One" + "\ v : Atom c" +(* "\ vs : Star r"*) + +lemma Prf_flat_lang: + assumes "\ v : r" shows "flat v \ lang r" +using assms +by(induct v r rule: Prf.induct) (auto) + +lemma Prf_Stars: + assumes "\v \ set vs. \ v : r" + shows "\ Stars vs : Star r" +using assms +by(induct vs) (auto intro: Prf.intros) + +lemma Star_string: + assumes "s \ star A" + shows "\ss. concat ss = s \ (\s \ set ss. s \ A)" +using assms +by (metis in_star_iff_concat set_mp) + +lemma Star_val: + assumes "\s\set ss. \v. s = flat v \ \ v : r" + shows "\vs. concat (map flat vs) = concat ss \ (\v\set vs. \ v : r)" +using assms +apply(induct ss) +apply(auto) +apply (metis empty_iff list.set(1)) +by (metis concat.simps(2) list.simps(9) set_ConsD) + +lemma L_flat_Prf1: + assumes "\ v : r" shows "flat v \ lang r" +using assms +by (induct)(auto) + +lemma L_flat_Prf2: + assumes "s \ lang r" shows "\v. \ v : r \ flat v = s" +using assms +apply(induct r arbitrary: s) +apply(auto intro: Prf.intros) +using Prf.intros(2) flat.simps(3) apply blast +using Prf.intros(3) flat.simps(4) apply blast +apply (metis Prf.intros(1) concE flat.simps(5)) +apply(subgoal_tac "\vs::('a val) list. concat (map flat vs) = s \ (\v \ set vs. \ v : r)") +apply(auto)[1] +apply(rule_tac x="Stars vs" in exI) +apply(simp) +apply (simp add: Prf_Stars) +apply(drule Star_string) +apply(auto) +apply(rule Star_val) +apply(auto) +done + +lemma L_flat_Prf: + "lang r = {flat v | v. \ v : r}" +using L_flat_Prf1 L_flat_Prf2 by blast + + +section {* Sulzmann and Lu functions *} + +fun + mkeps :: "'a rexp \ 'a val" +where + "mkeps(One) = Void" +| "mkeps(Times r1 r2) = Seq (mkeps r1) (mkeps r2)" +| "mkeps(Plus r1 r2) = (if nullable(r1) then Left (mkeps r1) else Right (mkeps r2))" +| "mkeps(Star r) = Stars []" + +fun injval :: "'a rexp \ 'a \ 'a val \ 'a val" +where + "injval (Atom d) c Void = Atm d" +| "injval (Plus r1 r2) c (Left v1) = Left(injval r1 c v1)" +| "injval (Plus r1 r2) c (Right v2) = Right(injval r2 c v2)" +| "injval (Times r1 r2) c (Seq v1 v2) = Seq (injval r1 c v1) v2" +| "injval (Times r1 r2) c (Left (Seq v1 v2)) = Seq (injval r1 c v1) v2" +| "injval (Times r1 r2) c (Right v2) = Seq (mkeps r1) (injval r2 c v2)" +| "injval (Star r) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)" + + +section {* Mkeps, injval *} + +lemma mkeps_nullable: + assumes "nullable r" + shows "\ mkeps r : r" +using assms +by (induct r) + (auto intro: Prf.intros) + +lemma mkeps_flat: + assumes "nullable r" + shows "flat (mkeps r) = []" +using assms +by (induct r) (auto) + + +lemma Prf_injval: + assumes "\ v : deriv c r" + shows "\ (injval r c v) : r" +using assms +apply(induct r arbitrary: c v rule: rexp.induct) +apply(auto intro!: Prf.intros mkeps_nullable elim!: Prf_elims split: if_splits) +(* Star *) +apply(rotate_tac 2) +apply(erule Prf.cases) +apply(simp_all)[7] +apply(auto) +apply (metis Prf.intros(6) Prf.intros(7)) +by (metis Prf.intros(7)) + +lemma Prf_injval_flat: + assumes "\ v : deriv c r" + shows "flat (injval r c v) = c # (flat v)" +using assms +apply(induct r arbitrary: v c) +apply(auto elim!: Prf_elims split: if_splits) +apply(metis mkeps_flat) +apply(rotate_tac 2) +apply(erule Prf.cases) +apply(simp_all)[7] +done + +(* HERE *) + +section {* Our Alternative Posix definition *} + +inductive + Posix :: "'a list \ 'a rexp \ 'a val \ bool" ("_ \ _ \ _" [100, 100, 100] 100) +where + Posix_One: "[] \ One \ Void" +| Posix_Atom: "[c] \ (Atom c) \ (Atm c)" +| Posix_Plus1: "s \ r1 \ v \ s \ (Plus r1 r2) \ (Left v)" +| Posix_Plus2: "\s \ r2 \ v; s \ lang r1\ \ s \ (Plus r1 r2) \ (Right v)" +| Posix_Times: "\s1 \ r1 \ v1; s2 \ r2 \ v2; + \(\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (s1 @ s\<^sub>3) \ lang r1 \ s\<^sub>4 \ lang r2)\ \ + (s1 @ s2) \ (Times r1 r2) \ (Seq v1 v2)" +| Posix_Star1: "\s1 \ r \ v; s2 \ Star r \ Stars vs; flat v \ []; + \(\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (s1 @ s\<^sub>3) \ lang r \ s\<^sub>4 \ lang (Star r))\ + \ (s1 @ s2) \ Star r \ Stars (v # vs)" +| Posix_Star2: "[] \ Star r \ Stars []" + +inductive_cases Posix_elims: + "s \ Zero \ v" + "s \ One \ v" + "s \ Atom c \ v" + "s \ Plus r1 r2 \ v" + "s \ Times r1 r2 \ v" + "s \ Star r \ v" + +lemma Posix1: + assumes "s \ r \ v" + shows "s \ lang r" "flat v = s" +using assms +by (induct s r v rule: Posix.induct) (auto) + + +lemma Posix1a: + assumes "s \ r \ v" + shows "\ v : r" +using assms +by (induct s r v rule: Posix.induct)(auto intro: Prf.intros) + + +lemma Posix_mkeps: + assumes "nullable r" + shows "[] \ r \ mkeps r" +using assms +apply(induct r) +apply(auto intro: Posix.intros simp add: nullable_iff) +apply(subst append.simps(1)[symmetric]) +apply(rule Posix.intros) +apply(auto) +done + + +lemma Posix_determ: + assumes "s \ r \ v1" "s \ r \ v2" + shows "v1 = v2" +using assms +proof (induct s r v1 arbitrary: v2 rule: Posix.induct) + case (Posix_One v2) + have "[] \ One \ v2" by fact + then show "Void = v2" by cases auto +next + case (Posix_Atom c v2) + have "[c] \ Atom c \ v2" by fact + then show "Atm c = v2" by cases auto +next + case (Posix_Plus1 s r1 v r2 v2) + have "s \ Plus r1 r2 \ v2" by fact + moreover + have "s \ r1 \ v" by fact + then have "s \ lang r1" by (simp add: Posix1) + ultimately obtain v' where eq: "v2 = Left v'" "s \ r1 \ v'" by cases auto + moreover + have IH: "\v2. s \ r1 \ v2 \ v = v2" by fact + ultimately have "v = v'" by simp + then show "Left v = v2" using eq by simp +next + case (Posix_Plus2 s r2 v r1 v2) + have "s \ Plus r1 r2 \ v2" by fact + moreover + have "s \ lang r1" by fact + ultimately obtain v' where eq: "v2 = Right v'" "s \ r2 \ v'" + by cases (auto simp add: Posix1) + moreover + have IH: "\v2. s \ r2 \ v2 \ v = v2" by fact + ultimately have "v = v'" by simp + then show "Right v = v2" using eq by simp +next + case (Posix_Times s1 r1 v1 s2 r2 v2 v') + have "(s1 @ s2) \ Times r1 r2 \ v'" + "s1 \ r1 \ v1" "s2 \ r2 \ v2" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ lang r1 \ s\<^sub>4 \ lang r2)" by fact+ + then obtain v1' v2' where "v' = Seq v1' v2'" "s1 \ r1 \ v1'" "s2 \ r2 \ v2'" + apply(cases) apply (auto simp add: append_eq_append_conv2) + using Posix1(1) by fastforce+ + moreover + have IHs: "\v1'. s1 \ r1 \ v1' \ v1 = v1'" + "\v2'. s2 \ r2 \ v2' \ v2 = v2'" by fact+ + ultimately show "Seq v1 v2 = v'" by simp +next + case (Posix_Star1 s1 r v s2 vs v2) + have "(s1 @ s2) \ Star r \ v2" + "s1 \ r \ v" "s2 \ Star r \ Stars vs" "flat v \ []" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ lang r \ s\<^sub>4 \ lang (Star r))" by fact+ + then obtain v' vs' where "v2 = Stars (v' # vs')" "s1 \ r \ v'" "s2 \ (Star r) \ (Stars vs')" + apply(cases) apply (auto simp add: append_eq_append_conv2) + using Posix1(1) apply fastforce + apply (metis Posix1(1) Posix_Star1.hyps(6) append_Nil append_Nil2) + using Posix1(2) by blast + moreover + have IHs: "\v2. s1 \ r \ v2 \ v = v2" + "\v2. s2 \ Star r \ v2 \ Stars vs = v2" by fact+ + ultimately show "Stars (v # vs) = v2" by auto +next + case (Posix_Star2 r v2) + have "[] \ Star r \ v2" by fact + then show "Stars [] = v2" by cases (auto simp add: Posix1) +qed + + +lemma Posix_injval: + assumes "s \ (deriv c r) \ v" + shows "(c # s) \ r \ (injval r c v)" +using assms +proof(induct r arbitrary: s v rule: rexp.induct) + case Zero + have "s \ deriv c Zero \ v" by fact + then have "s \ Zero \ v" by simp + then have "False" by cases + then show "(c # s) \ Zero \ (injval Zero c v)" by simp +next + case One + have "s \ deriv c One \ v" by fact + then have "s \ Zero \ v" by simp + then have "False" by cases + then show "(c # s) \ One \ (injval One c v)" by simp +next + case (Atom d) + consider (eq) "c = d" | (ineq) "c \ d" by blast + then show "(c # s) \ (Atom d) \ (injval (Atom d) c v)" + proof (cases) + case eq + have "s \ deriv c (Atom d) \ v" by fact + then have "s \ One \ v" using eq by simp + then have eqs: "s = [] \ v = Void" by cases simp + show "(c # s) \ Atom d \ injval (Atom d) c v" using eq eqs + by (auto intro: Posix.intros) + next + case ineq + have "s \ deriv c (Atom d) \ v" by fact + then have "s \ Zero \ v" using ineq by simp + then have "False" by cases + then show "(c # s) \ Atom d \ injval (Atom d) c v" by simp + qed +next + case (Plus r1 r2) + have IH1: "\s v. s \ deriv c r1 \ v \ (c # s) \ r1 \ injval r1 c v" by fact + have IH2: "\s v. s \ deriv c r2 \ v \ (c # s) \ r2 \ injval r2 c v" by fact + have "s \ deriv c (Plus r1 r2) \ v" by fact + then have "s \ Plus (deriv c r1) (deriv c r2) \ v" by simp + then consider (left) v' where "v = Left v'" "s \ deriv c r1 \ v'" + | (right) v' where "v = Right v'" "s \ lang (deriv c r1)" "s \ deriv c r2 \ v'" + by cases auto + then show "(c # s) \ Plus r1 r2 \ injval (Plus r1 r2) c v" + proof (cases) + case left + have "s \ deriv c r1 \ v'" by fact + then have "(c # s) \ r1 \ injval r1 c v'" using IH1 by simp + then have "(c # s) \ Plus r1 r2 \ injval (Plus r1 r2) c (Left v')" by (auto intro: Posix.intros) + then show "(c # s) \ Plus r1 r2 \ injval (Plus r1 r2) c v" using left by simp + next + case right + have "s \ lang (deriv c r1)" by fact + then have "c # s \ lang r1" by (simp add: lang_deriv Deriv_def) + moreover + have "s \ deriv c r2 \ v'" by fact + then have "(c # s) \ r2 \ injval r2 c v'" using IH2 by simp + ultimately have "(c # s) \ Plus r1 r2 \ injval (Plus r1 r2) c (Right v')" + by (auto intro: Posix.intros) + then show "(c # s) \ Plus r1 r2 \ injval (Plus r1 r2) c v" using right by simp + qed +next + case (Times r1 r2) + have IH1: "\s v. s \ deriv c r1 \ v \ (c # s) \ r1 \ injval r1 c v" by fact + have IH2: "\s v. s \ deriv c r2 \ v \ (c # s) \ r2 \ injval r2 c v" by fact + have "s \ deriv c (Times r1 r2) \ v" by fact + then consider + (left_nullable) v1 v2 s1 s2 where + "v = Left (Seq v1 v2)" "s = s1 @ s2" + "s1 \ deriv c r1 \ v1" "s2 \ r2 \ v2" "nullable r1" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ lang (deriv c r1) \ s\<^sub>4 \ lang r2)" + | (right_nullable) v1 s1 s2 where + "v = Right v1" "s = s1 @ s2" + "s \ deriv c r2 \ v1" "nullable r1" "s1 @ s2 \ lang (Times (deriv c r1) r2)" + | (not_nullable) v1 v2 s1 s2 where + "v = Seq v1 v2" "s = s1 @ s2" + "s1 \ deriv c r1 \ v1" "s2 \ r2 \ v2" "\nullable r1" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ lang (deriv c r1) \ s\<^sub>4 \ lang r2)" + by (force split: if_splits elim!: Posix_elims simp add: lang_deriv Deriv_def) + then show "(c # s) \ Times r1 r2 \ injval (Times r1 r2) c v" + proof (cases) + case left_nullable + have "s1 \ deriv c r1 \ v1" by fact + then have "(c # s1) \ r1 \ injval r1 c v1" using IH1 by simp + moreover + have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ lang (deriv c r1) \ s\<^sub>4 \ lang r2)" by fact + then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (c # s1) @ s\<^sub>3 \ lang r1 \ s\<^sub>4 \ lang r2)" + by (simp add: lang_deriv Deriv_def) + ultimately have "((c # s1) @ s2) \ Times r1 r2 \ Seq (injval r1 c v1) v2" using left_nullable by (rule_tac Posix.intros) + then show "(c # s) \ Times r1 r2 \ injval (Times r1 r2) c v" using left_nullable by simp + next + case right_nullable + have "nullable r1" by fact + then have "[] \ r1 \ (mkeps r1)" by (rule Posix_mkeps) + moreover + have "s \ deriv c r2 \ v1" by fact + then have "(c # s) \ r2 \ (injval r2 c v1)" using IH2 by simp + moreover + have "s1 @ s2 \ lang (Times (deriv c r1) r2)" by fact + then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = c # s \ [] @ s\<^sub>3 \ lang r1 \ s\<^sub>4 \ lang r2)" + using right_nullable + apply (auto simp add: lang_deriv Deriv_def append_eq_Cons_conv) + by (metis concI mem_Collect_eq) + ultimately have "([] @ (c # s)) \ Times r1 r2 \ Seq (mkeps r1) (injval r2 c v1)" + by(rule Posix.intros) + then show "(c # s) \ Times r1 r2 \ injval (Times r1 r2) c v" using right_nullable by simp + next + case not_nullable + have "s1 \ deriv c r1 \ v1" by fact + then have "(c # s1) \ r1 \ injval r1 c v1" using IH1 by simp + moreover + have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ lang (deriv c r1) \ s\<^sub>4 \ lang r2)" by fact + then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (c # s1) @ s\<^sub>3 \ lang r1 \ s\<^sub>4 \ lang r2)" by (simp add: lang_deriv Deriv_def) + ultimately have "((c # s1) @ s2) \ Times r1 r2 \ Seq (injval r1 c v1) v2" using not_nullable + by (rule_tac Posix.intros) (simp_all) + then show "(c # s) \ Times r1 r2 \ injval (Times r1 r2) c v" using not_nullable by simp + qed +next + case (Star r) + have IH: "\s v. s \ deriv c r \ v \ (c # s) \ r \ injval r c v" by fact + have "s \ deriv c (Star r) \ v" by fact + then consider + (cons) v1 vs s1 s2 where + "v = Seq v1 (Stars vs)" "s = s1 @ s2" + "s1 \ deriv c r \ v1" "s2 \ (Star r) \ (Stars vs)" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ lang (deriv c r) \ s\<^sub>4 \ lang (Star r))" + apply(auto elim!: Posix_elims(1-5) simp add: lang_deriv Deriv_def intro: Posix.intros) + apply(rotate_tac 3) + apply(erule_tac Posix_elims(6)) + apply (simp add: Posix.intros(6)) + using Posix.intros(7) by blast + then show "(c # s) \ Star r \ injval (Star r) c v" + proof (cases) + case cons + have "s1 \ deriv c r \ v1" by fact + then have "(c # s1) \ r \ injval r c v1" using IH by simp + moreover + have "s2 \ Star r \ Stars vs" by fact + moreover + have "(c # s1) \ r \ injval r c v1" by fact + then have "flat (injval r c v1) = (c # s1)" by (rule Posix1) + then have "flat (injval r c v1) \ []" by simp + moreover + have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ lang (deriv c r) \ s\<^sub>4 \ lang (Star r))" by fact + then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (c # s1) @ s\<^sub>3 \ lang r \ s\<^sub>4 \ lang (Star r))" + by (simp add: lang_deriv Deriv_def) + ultimately + have "((c # s1) @ s2) \ Star r \ Stars (injval r c v1 # vs)" by (rule Posix.intros) + then show "(c # s) \ Star r \ injval (Star r) c v" using cons by(simp) + qed +qed + + +section {* The Lexer by Sulzmann and Lu *} + +fun + lexer :: "'a rexp \ 'a list \ ('a val) option" +where + "lexer r [] = (if nullable r then Some(mkeps r) else None)" +| "lexer r (c#s) = (case (lexer (deriv c r) s) of + None \ None + | Some(v) \ Some(injval r c v))" + + +lemma lexer_correct_None: + shows "s \ lang r \ lexer r s = None" +using assms +apply(induct s arbitrary: r) +apply(simp add: nullable_iff) +apply(drule_tac x="deriv a r" in meta_spec) +apply(auto simp add: lang_deriv Deriv_def) +done + +lemma lexer_correct_Some: + shows "s \ lang r \ (\v. lexer r s = Some(v) \ s \ r \ v)" +using assms +apply(induct s arbitrary: r) +apply(auto simp add: Posix_mkeps nullable_iff)[1] +apply(drule_tac x="deriv a r" in meta_spec) +apply(simp add: lang_deriv Deriv_def) +apply(rule iffI) +apply(auto intro: Posix_injval simp add: Posix1(1)) +done + +lemma lexer_correctness: + shows "(lexer r s = Some v) \ s \ r \ v" + and "(lexer r s = None) \ \(\v. s \ r \ v)" +apply(auto) +using lexer_correct_None lexer_correct_Some apply fastforce +using Posix1(1) Posix_determ lexer_correct_Some apply blast +using Posix1(1) lexer_correct_None apply blast +using lexer_correct_None lexer_correct_Some by blast + + +end \ No newline at end of file diff -r 2a07222e2a8b -r 6bb15b8e6301 AFP-Submission/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AFP-Submission/README Tue May 24 11:36:21 2016 +0100 @@ -0,0 +1,53 @@ +Title: +====== +POSIX Lexing with Derivatives of Regular Expressions + + +Authors: +======== +Fahad Ausaf , 2016 +Roy Dyckhoff , 2016 +Christian Urban , 2016 + + +Abstract: +========= + +Brzozowski introduced the notion of derivatives for regular +expressions. They can be used for a very simple regular expression +matching algorithm. Sulzmann and Lu cleverly extended this algorithm +in order to deal with POSIX matching, which is the underlying +disambiguation strategy for regular expressions needed in +lexers. Sulzmann and Lu have made available on-line what they call a +``rigorous proof'' of the correctness of their algorithm w.r.t. their +specification; regrettably, it appears to us to have unfillable +gaps. In the first part of this paper we give our inductive definition +of what a POSIX value is and show (i) that such a value is unique (for +given regular expression and string being matched) and (ii) that +Sulzmann and Lu's algorithm always generates such a value (provided +that the regular expression matches the string). We also prove the +correctness of an optimised version of the POSIX matching +algorithm. Our definitions and proof are much simpler than those by +Sulzmann and Lu and can be easily formalised in Isabelle/HOL. In the +second part we analyse the correctness argument by Sulzmann and Lu and +explain why the gaps in this argument cannot be filled easily. + + +New Theories: +============= + + Lexer.thy + Simplifying.thy + +The repository can be checked using Isabelle 2016. + + isabelle build -c -v -d . Posix-Lexing + + + + + + + + + diff -r 2a07222e2a8b -r 6bb15b8e6301 AFP-Submission/ROOT --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AFP-Submission/ROOT Tue May 24 11:36:21 2016 +0100 @@ -0,0 +1,23 @@ +chapter AFP + +(* Session name, add to AFP group, list base session: *) +session "Posix-Lexing" (AFP) = HOL + + +(* Timeout (in sec) in case of non-termination problems *) + options [timeout = 600] + +(* The top-level theories of the submission: *) + theories [document = false] + "Regular_Set" + "Regular_Exp" + "Derivatives" + + theories + "Lexer" + "Simplifying" + +(* Dependencies on document source files: *) + document_files + "root.bib" + "root.tex" + diff -r 2a07222e2a8b -r 6bb15b8e6301 AFP-Submission/Regular_Exp.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AFP-Submission/Regular_Exp.thy Tue May 24 11:36:21 2016 +0100 @@ -0,0 +1,160 @@ +(* Author: Tobias Nipkow *) + +section "Regular expressions" + +theory Regular_Exp +imports Regular_Set +begin + +datatype (atoms: 'a) rexp = + is_Zero: Zero | + is_One: One | + Atom 'a | + Plus "('a rexp)" "('a rexp)" | + Times "('a rexp)" "('a rexp)" | + Star "('a rexp)" + +primrec lang :: "'a rexp => 'a lang" where +"lang Zero = {}" | +"lang One = {[]}" | +"lang (Atom a) = {[a]}" | +"lang (Plus r s) = (lang r) Un (lang s)" | +"lang (Times r s) = conc (lang r) (lang s)" | +"lang (Star r) = star(lang r)" + +primrec nullable :: "'a rexp \ bool" where +"nullable Zero = False" | +"nullable One = True" | +"nullable (Atom c) = False" | +"nullable (Plus r1 r2) = (nullable r1 \ nullable r2)" | +"nullable (Times r1 r2) = (nullable r1 \ nullable r2)" | +"nullable (Star r) = True" + +lemma nullable_iff: "nullable r \ [] \ lang r" +by (induct r) (auto simp add: conc_def split: if_splits) + +text{* Composition on rhs usually complicates matters: *} +lemma map_map_rexp: + "map_rexp f (map_rexp g r) = map_rexp (\r. f (g r)) r" + unfolding rexp.map_comp o_def .. + +lemma map_rexp_ident[simp]: "map_rexp (\x. x) = (\r. r)" + unfolding id_def[symmetric] fun_eq_iff rexp.map_id id_apply by (intro allI refl) + +lemma atoms_lang: "w : lang r \ set w \ atoms r" +proof(induction r arbitrary: w) + case Times thus ?case by fastforce +next + case Star thus ?case by (fastforce simp add: star_conv_concat) +qed auto + +lemma lang_eq_ext: "(lang r = lang s) = + (\w \ lists(atoms r \ atoms s). w \ lang r \ w \ lang s)" + by (auto simp: atoms_lang[unfolded subset_iff]) + +lemma lang_eq_ext_Nil_fold_Deriv: + fixes r s + defines "\ \ {(fold Deriv w (lang r), fold Deriv w (lang s))| w. w\lists (atoms r \ atoms s)}" + shows "lang r = lang s \ (\(K, L) \ \. [] \ K \ [] \ L)" + unfolding lang_eq_ext \_def by (subst (1 2) in_fold_Deriv[of "[]", simplified, symmetric]) auto + + +subsection {* Term ordering *} + +instantiation rexp :: (order) "{order}" +begin + +fun le_rexp :: "('a::order) rexp \ ('a::order) rexp \ bool" +where + "le_rexp Zero _ = True" +| "le_rexp _ Zero = False" +| "le_rexp One _ = True" +| "le_rexp _ One = False" +| "le_rexp (Atom a) (Atom b) = (a <= b)" +| "le_rexp (Atom _) _ = True" +| "le_rexp _ (Atom _) = False" +| "le_rexp (Star r) (Star s) = le_rexp r s" +| "le_rexp (Star _) _ = True" +| "le_rexp _ (Star _) = False" +| "le_rexp (Plus r r') (Plus s s') = + (if r = s then le_rexp r' s' else le_rexp r s)" +| "le_rexp (Plus _ _) _ = True" +| "le_rexp _ (Plus _ _) = False" +| "le_rexp (Times r r') (Times s s') = + (if r = s then le_rexp r' s' else le_rexp r s)" + +(* The class instance stuff is by Dmitriy Traytel *) + +definition less_eq_rexp where "r \ s \ le_rexp r s" +definition less_rexp where "r < s \ le_rexp r s \ r \ s" + +lemma le_rexp_Zero: "le_rexp r Zero \ r = Zero" +by (induction r) auto + +lemma le_rexp_refl: "le_rexp r r" +by (induction r) auto + +lemma le_rexp_antisym: "\le_rexp r s; le_rexp s r\ \ r = s" +by (induction r s rule: le_rexp.induct) (auto dest: le_rexp_Zero) + +lemma le_rexp_trans: "\le_rexp r s; le_rexp s t\ \ le_rexp r t" +proof (induction r s arbitrary: t rule: le_rexp.induct) + fix v t assume "le_rexp (Atom v) t" thus "le_rexp One t" by (cases t) auto +next + fix s1 s2 t assume "le_rexp (Plus s1 s2) t" thus "le_rexp One t" by (cases t) auto +next + fix s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp One t" by (cases t) auto +next + fix s t assume "le_rexp (Star s) t" thus "le_rexp One t" by (cases t) auto +next + fix v u t assume "le_rexp (Atom v) (Atom u)" "le_rexp (Atom u) t" + thus "le_rexp (Atom v) t" by (cases t) auto +next + fix v s1 s2 t assume "le_rexp (Plus s1 s2) t" thus "le_rexp (Atom v) t" by (cases t) auto +next + fix v s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp (Atom v) t" by (cases t) auto +next + fix v s t assume "le_rexp (Star s) t" thus "le_rexp (Atom v) t" by (cases t) auto +next + fix r s t + assume IH: "\t. le_rexp r s \ le_rexp s t \ le_rexp r t" + and "le_rexp (Star r) (Star s)" "le_rexp (Star s) t" + thus "le_rexp (Star r) t" by (cases t) auto +next + fix r s1 s2 t assume "le_rexp (Plus s1 s2) t" thus "le_rexp (Star r) t" by (cases t) auto +next + fix r s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp (Star r) t" by (cases t) auto +next + fix r1 r2 s1 s2 t + assume "\t. r1 = s1 \ le_rexp r2 s2 \ le_rexp s2 t \ le_rexp r2 t" + "\t. r1 \ s1 \ le_rexp r1 s1 \ le_rexp s1 t \ le_rexp r1 t" + "le_rexp (Plus r1 r2) (Plus s1 s2)" "le_rexp (Plus s1 s2) t" + thus "le_rexp (Plus r1 r2) t" by (cases t) (auto split: split_if_asm intro: le_rexp_antisym) +next + fix r1 r2 s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp (Plus r1 r2) t" by (cases t) auto +next + fix r1 r2 s1 s2 t + assume "\t. r1 = s1 \ le_rexp r2 s2 \ le_rexp s2 t \ le_rexp r2 t" + "\t. r1 \ s1 \ le_rexp r1 s1 \ le_rexp s1 t \ le_rexp r1 t" + "le_rexp (Times r1 r2) (Times s1 s2)" "le_rexp (Times s1 s2) t" + thus "le_rexp (Times r1 r2) t" by (cases t) (auto split: split_if_asm intro: le_rexp_antisym) +qed auto + +instance proof +qed (auto simp add: less_eq_rexp_def less_rexp_def + intro: le_rexp_refl le_rexp_antisym le_rexp_trans) + +end + +instantiation rexp :: (linorder) "{linorder}" +begin + +lemma le_rexp_total: "le_rexp (r :: 'a :: linorder rexp) s \ le_rexp s r" +by (induction r s rule: le_rexp.induct) auto + +instance proof +qed (unfold less_eq_rexp_def less_rexp_def, rule le_rexp_total) + +end + +end diff -r 2a07222e2a8b -r 6bb15b8e6301 AFP-Submission/Regular_Set.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AFP-Submission/Regular_Set.thy Tue May 24 11:36:21 2016 +0100 @@ -0,0 +1,481 @@ +(* Author: Tobias Nipkow, Alex Krauss, Christian Urban *) + +section "Regular sets" + +theory Regular_Set +imports Main +begin + +type_synonym 'a lang = "'a list set" + +definition conc :: "'a lang \ 'a lang \ 'a lang" (infixr "@@" 75) where +"A @@ B = {xs@ys | xs ys. xs:A & ys:B}" + +text {* checks the code preprocessor for set comprehensions *} +export_code conc checking SML + +overloading lang_pow == "compow :: nat \ 'a lang \ 'a lang" +begin + primrec lang_pow :: "nat \ 'a lang \ 'a lang" where + "lang_pow 0 A = {[]}" | + "lang_pow (Suc n) A = A @@ (lang_pow n A)" +end + +text {* for code generation *} + +definition lang_pow :: "nat \ 'a lang \ 'a lang" where + lang_pow_code_def [code_abbrev]: "lang_pow = compow" + +lemma [code]: + "lang_pow (Suc n) A = A @@ (lang_pow n A)" + "lang_pow 0 A = {[]}" + by (simp_all add: lang_pow_code_def) + +hide_const (open) lang_pow + +definition star :: "'a lang \ 'a lang" where +"star A = (\n. A ^^ n)" + + +subsection{* @{term "op @@"} *} + +lemma concI[simp,intro]: "u : A \ v : B \ u@v : A @@ B" +by (auto simp add: conc_def) + +lemma concE[elim]: +assumes "w \ A @@ B" +obtains u v where "u \ A" "v \ B" "w = u@v" +using assms by (auto simp: conc_def) + +lemma conc_mono: "A \ C \ B \ D \ A @@ B \ C @@ D" +by (auto simp: conc_def) + +lemma conc_empty[simp]: shows "{} @@ A = {}" and "A @@ {} = {}" +by auto + +lemma conc_epsilon[simp]: shows "{[]} @@ A = A" and "A @@ {[]} = A" +by (simp_all add:conc_def) + +lemma conc_assoc: "(A @@ B) @@ C = A @@ (B @@ C)" +by (auto elim!: concE) (simp only: append_assoc[symmetric] concI) + +lemma conc_Un_distrib: +shows "A @@ (B \ C) = A @@ B \ A @@ C" +and "(A \ B) @@ C = A @@ C \ B @@ C" +by auto + +lemma conc_UNION_distrib: +shows "A @@ UNION I M = UNION I (%i. A @@ M i)" +and "UNION I M @@ A = UNION I (%i. M i @@ A)" +by auto + +lemma conc_subset_lists: "A \ lists S \ B \ lists S \ A @@ B \ lists S" +by(fastforce simp: conc_def in_lists_conv_set) + +lemma Nil_in_conc[simp]: "[] \ A @@ B \ [] \ A \ [] \ B" +by (metis append_is_Nil_conv concE concI) + +lemma concI_if_Nil1: "[] \ A \ xs : B \ xs \ A @@ B" +by (metis append_Nil concI) + +lemma conc_Diff_if_Nil1: "[] \ A \ A @@ B = (A - {[]}) @@ B \ B" +by (fastforce elim: concI_if_Nil1) + +lemma concI_if_Nil2: "[] \ B \ xs : A \ xs \ A @@ B" +by (metis append_Nil2 concI) + +lemma conc_Diff_if_Nil2: "[] \ B \ A @@ B = A @@ (B - {[]}) \ A" +by (fastforce elim: concI_if_Nil2) + +lemma singleton_in_conc: + "[x] : A @@ B \ [x] : A \ [] : B \ [] : A \ [x] : B" +by (fastforce simp: Cons_eq_append_conv append_eq_Cons_conv + conc_Diff_if_Nil1 conc_Diff_if_Nil2) + + +subsection{* @{term "A ^^ n"} *} + +lemma lang_pow_add: "A ^^ (n + m) = A ^^ n @@ A ^^ m" +by (induct n) (auto simp: conc_assoc) + +lemma lang_pow_empty: "{} ^^ n = (if n = 0 then {[]} else {})" +by (induct n) auto + +lemma lang_pow_empty_Suc[simp]: "({}::'a lang) ^^ Suc n = {}" +by (simp add: lang_pow_empty) + +lemma conc_pow_comm: + shows "A @@ (A ^^ n) = (A ^^ n) @@ A" +by (induct n) (simp_all add: conc_assoc[symmetric]) + +lemma length_lang_pow_ub: + "ALL w : A. length w \ k \ w : A^^n \ length w \ k*n" +by(induct n arbitrary: w) (fastforce simp: conc_def)+ + +lemma length_lang_pow_lb: + "ALL w : A. length w \ k \ w : A^^n \ length w \ k*n" +by(induct n arbitrary: w) (fastforce simp: conc_def)+ + +lemma lang_pow_subset_lists: "A \ lists S \ A ^^ n \ lists S" +by(induction n)(auto simp: conc_subset_lists[OF assms]) + + +subsection{* @{const star} *} + +lemma star_subset_lists: "A \ lists S \ star A \ lists S" +unfolding star_def by(blast dest: lang_pow_subset_lists) + +lemma star_if_lang_pow[simp]: "w : A ^^ n \ w : star A" +by (auto simp: star_def) + +lemma Nil_in_star[iff]: "[] : star A" +proof (rule star_if_lang_pow) + show "[] : A ^^ 0" by simp +qed + +lemma star_if_lang[simp]: assumes "w : A" shows "w : star A" +proof (rule star_if_lang_pow) + show "w : A ^^ 1" using `w : A` by simp +qed + +lemma append_in_starI[simp]: +assumes "u : star A" and "v : star A" shows "u@v : star A" +proof - + from `u : star A` obtain m where "u : A ^^ m" by (auto simp: star_def) + moreover + from `v : star A` obtain n where "v : A ^^ n" by (auto simp: star_def) + ultimately have "u@v : A ^^ (m+n)" by (simp add: lang_pow_add) + thus ?thesis by simp +qed + +lemma conc_star_star: "star A @@ star A = star A" +by (auto simp: conc_def) + +lemma conc_star_comm: + shows "A @@ star A = star A @@ A" +unfolding star_def conc_pow_comm conc_UNION_distrib +by simp + +lemma star_induct[consumes 1, case_names Nil append, induct set: star]: +assumes "w : star A" + and "P []" + and step: "!!u v. u : A \ v : star A \ P v \ P (u@v)" +shows "P w" +proof - + { fix n have "w : A ^^ n \ P w" + by (induct n arbitrary: w) (auto intro: `P []` step star_if_lang_pow) } + with `w : star A` show "P w" by (auto simp: star_def) +qed + +lemma star_empty[simp]: "star {} = {[]}" +by (auto elim: star_induct) + +lemma star_epsilon[simp]: "star {[]} = {[]}" +by (auto elim: star_induct) + +lemma star_idemp[simp]: "star (star A) = star A" +by (auto elim: star_induct) + +lemma star_unfold_left: "star A = A @@ star A \ {[]}" (is "?L = ?R") +proof + show "?L \ ?R" by (rule, erule star_induct) auto +qed auto + +lemma concat_in_star: "set ws \ A \ concat ws : star A" +by (induct ws) simp_all + +lemma in_star_iff_concat: + "w : star A = (EX ws. set ws \ A & w = concat ws)" + (is "_ = (EX ws. ?R w ws)") +proof + assume "w : star A" thus "EX ws. ?R w ws" + proof induct + case Nil have "?R [] []" by simp + thus ?case .. + next + case (append u v) + moreover + then obtain ws where "set ws \ A \ v = concat ws" by blast + ultimately have "?R (u@v) (u#ws)" by auto + thus ?case .. + qed +next + assume "EX us. ?R w us" thus "w : star A" + by (auto simp: concat_in_star) +qed + +lemma star_conv_concat: "star A = {concat ws|ws. set ws \ A}" +by (fastforce simp: in_star_iff_concat) + +lemma star_insert_eps[simp]: "star (insert [] A) = star(A)" +proof- + { fix us + have "set us \ insert [] A \ EX vs. concat us = concat vs \ set vs \ A" + (is "?P \ EX vs. ?Q vs") + proof + let ?vs = "filter (%u. u \ []) us" + show "?P \ ?Q ?vs" by (induct us) auto + qed + } thus ?thesis by (auto simp: star_conv_concat) +qed + +lemma star_unfold_left_Nil: "star A = (A - {[]}) @@ (star A) \ {[]}" +by (metis insert_Diff_single star_insert_eps star_unfold_left) + +lemma star_Diff_Nil_fold: "(A - {[]}) @@ star A = star A - {[]}" +proof - + have "[] \ (A - {[]}) @@ star A" by simp + thus ?thesis using star_unfold_left_Nil by blast +qed + +lemma star_decom: + assumes a: "x \ star A" "x \ []" + shows "\a b. x = a @ b \ a \ [] \ a \ A \ b \ star A" +using a by (induct rule: star_induct) (blast)+ + + +subsection {* Left-Quotients of languages *} + +definition Deriv :: "'a \ 'a lang \ 'a lang" +where "Deriv x A = { xs. x#xs \ A }" + +definition Derivs :: "'a list \ 'a lang \ 'a lang" +where "Derivs xs A = { ys. xs @ ys \ A }" + +abbreviation + Derivss :: "'a list \ 'a lang set \ 'a lang" +where + "Derivss s As \ \ (Derivs s ` As)" + + +lemma Deriv_empty[simp]: "Deriv a {} = {}" + and Deriv_epsilon[simp]: "Deriv a {[]} = {}" + and Deriv_char[simp]: "Deriv a {[b]} = (if a = b then {[]} else {})" + and Deriv_union[simp]: "Deriv a (A \ B) = Deriv a A \ Deriv a B" + and Deriv_inter[simp]: "Deriv a (A \ B) = Deriv a A \ Deriv a B" + and Deriv_compl[simp]: "Deriv a (-A) = - Deriv a A" + and Deriv_Union[simp]: "Deriv a (Union M) = Union(Deriv a ` M)" + and Deriv_UN[simp]: "Deriv a (UN x:I. S x) = (UN x:I. Deriv a (S x))" +by (auto simp: Deriv_def) + +lemma Der_conc [simp]: + shows "Deriv c (A @@ B) = (Deriv c A) @@ B \ (if [] \ A then Deriv c B else {})" +unfolding Deriv_def conc_def +by (auto simp add: Cons_eq_append_conv) + +lemma Deriv_star [simp]: + shows "Deriv c (star A) = (Deriv c A) @@ star A" +proof - + have "Deriv c (star A) = Deriv c ({[]} \ A @@ star A)" + by (metis star_unfold_left sup.commute) + also have "... = Deriv c (A @@ star A)" + unfolding Deriv_union by (simp) + also have "... = (Deriv c A) @@ (star A) \ (if [] \ A then Deriv c (star A) else {})" + by simp + also have "... = (Deriv c A) @@ star A" + unfolding conc_def Deriv_def + using star_decom by (force simp add: Cons_eq_append_conv) + finally show "Deriv c (star A) = (Deriv c A) @@ star A" . +qed + +lemma Deriv_diff[simp]: + shows "Deriv c (A - B) = Deriv c A - Deriv c B" +by(auto simp add: Deriv_def) + +lemma Deriv_lists[simp]: "c : S \ Deriv c (lists S) = lists S" +by(auto simp add: Deriv_def) + +lemma Derivs_simps [simp]: + shows "Derivs [] A = A" + and "Derivs (c # s) A = Derivs s (Deriv c A)" + and "Derivs (s1 @ s2) A = Derivs s2 (Derivs s1 A)" +unfolding Derivs_def Deriv_def by auto + +lemma in_fold_Deriv: "v \ fold Deriv w L \ w @ v \ L" + by (induct w arbitrary: L) (simp_all add: Deriv_def) + +lemma Derivs_alt_def: "Derivs w L = fold Deriv w L" + by (induct w arbitrary: L) simp_all + + +subsection {* Shuffle product *} + +fun shuffle where + "shuffle [] ys = {ys}" +| "shuffle xs [] = {xs}" +| "shuffle (x # xs) (y # ys) = + {x # w | w . w \ shuffle xs (y # ys)} \ + {y # w | w . w \ shuffle (x # xs) ys}" + +lemma shuffle_empty2[simp]: "shuffle xs [] = {xs}" + by (cases xs) auto + +lemma Nil_in_shuffle[simp]: "[] \ shuffle xs ys \ xs = [] \ ys = []" + by (induct xs ys rule: shuffle.induct) auto + +definition Shuffle (infixr "\" 80) where + "Shuffle A B = \{shuffle xs ys | xs ys. xs \ A \ ys \ B}" + +lemma shuffleE: + "zs \ shuffle xs ys \ + (zs = xs \ ys = [] \ P) \ + (zs = ys \ xs = [] \ P) \ + (\x xs' z zs'. xs = x # xs' \ zs = z # zs' \ x = z \ zs' \ shuffle xs' ys \ P) \ + (\y ys' z zs'. ys = y # ys' \ zs = z # zs' \ y = z \ zs' \ shuffle xs ys' \ P) \ P" + by (induct xs ys rule: shuffle.induct) auto + +lemma Cons_in_shuffle_iff: + "z # zs \ shuffle xs ys \ + (xs \ [] \ hd xs = z \ zs \ shuffle (tl xs) ys \ + ys \ [] \ hd ys = z \ zs \ shuffle xs (tl ys))" + by (induct xs ys rule: shuffle.induct) auto + +lemma Deriv_Shuffle[simp]: + "Deriv a (A \ B) = Deriv a A \ B \ A \ Deriv a B" + unfolding Shuffle_def Deriv_def by (fastforce simp: Cons_in_shuffle_iff neq_Nil_conv) + +lemma shuffle_subset_lists: + assumes "A \ lists S" "B \ lists S" + shows "A \ B \ lists S" +unfolding Shuffle_def proof safe + fix x and zs xs ys :: "'a list" + assume zs: "zs \ shuffle xs ys" "x \ set zs" and "xs \ A" "ys \ B" + with assms have "xs \ lists S" "ys \ lists S" by auto + with zs show "x \ S" by (induct xs ys arbitrary: zs rule: shuffle.induct) auto +qed + +lemma Nil_in_Shuffle[simp]: "[] \ A \ B \ [] \ A \ [] \ B" + unfolding Shuffle_def by force + +lemma shuffle_Un_distrib: +shows "A \ (B \ C) = A \ B \ A \ C" +and "A \ (B \ C) = A \ B \ A \ C" +unfolding Shuffle_def by fast+ + +lemma shuffle_UNION_distrib: +shows "A \ UNION I M = UNION I (%i. A \ M i)" +and "UNION I M \ A = UNION I (%i. M i \ A)" +unfolding Shuffle_def by fast+ + +lemma Shuffle_empty[simp]: + "A \ {} = {}" + "{} \ B = {}" + unfolding Shuffle_def by auto + +lemma Shuffle_eps[simp]: + "A \ {[]} = A" + "{[]} \ B = B" + unfolding Shuffle_def by auto + + +subsection {* Arden's Lemma *} + +lemma arden_helper: + assumes eq: "X = A @@ X \ B" + shows "X = (A ^^ Suc n) @@ X \ (\m\n. (A ^^ m) @@ B)" +proof (induct n) + case 0 + show "X = (A ^^ Suc 0) @@ X \ (\m\0. (A ^^ m) @@ B)" + using eq by simp +next + case (Suc n) + have ih: "X = (A ^^ Suc n) @@ X \ (\m\n. (A ^^ m) @@ B)" by fact + also have "\ = (A ^^ Suc n) @@ (A @@ X \ B) \ (\m\n. (A ^^ m) @@ B)" using eq by simp + also have "\ = (A ^^ Suc (Suc n)) @@ X \ ((A ^^ Suc n) @@ B) \ (\m\n. (A ^^ m) @@ B)" + by (simp add: conc_Un_distrib conc_assoc[symmetric] conc_pow_comm) + also have "\ = (A ^^ Suc (Suc n)) @@ X \ (\m\Suc n. (A ^^ m) @@ B)" + by (auto simp add: le_Suc_eq) + finally show "X = (A ^^ Suc (Suc n)) @@ X \ (\m\Suc n. (A ^^ m) @@ B)" . +qed + +lemma Arden: + assumes "[] \ A" + shows "X = A @@ X \ B \ X = star A @@ B" +proof + assume eq: "X = A @@ X \ B" + { fix w assume "w : X" + let ?n = "size w" + from `[] \ A` have "ALL u : A. length u \ 1" + by (metis Suc_eq_plus1 add_leD2 le_0_eq length_0_conv not_less_eq_eq) + hence "ALL u : A^^(?n+1). length u \ ?n+1" + by (metis length_lang_pow_lb nat_mult_1) + hence "ALL u : A^^(?n+1)@@X. length u \ ?n+1" + by(auto simp only: conc_def length_append) + hence "w \ A^^(?n+1)@@X" by auto + hence "w : star A @@ B" using `w : X` using arden_helper[OF eq, where n="?n"] + by (auto simp add: star_def conc_UNION_distrib) + } moreover + { fix w assume "w : star A @@ B" + hence "EX n. w : A^^n @@ B" by(auto simp: conc_def star_def) + hence "w : X" using arden_helper[OF eq] by blast + } ultimately show "X = star A @@ B" by blast +next + assume eq: "X = star A @@ B" + have "star A = A @@ star A \ {[]}" + by (rule star_unfold_left) + then have "star A @@ B = (A @@ star A \ {[]}) @@ B" + by metis + also have "\ = (A @@ star A) @@ B \ B" + unfolding conc_Un_distrib by simp + also have "\ = A @@ (star A @@ B) \ B" + by (simp only: conc_assoc) + finally show "X = A @@ X \ B" + using eq by blast +qed + + +lemma reversed_arden_helper: + assumes eq: "X = X @@ A \ B" + shows "X = X @@ (A ^^ Suc n) \ (\m\n. B @@ (A ^^ m))" +proof (induct n) + case 0 + show "X = X @@ (A ^^ Suc 0) \ (\m\0. B @@ (A ^^ m))" + using eq by simp +next + case (Suc n) + have ih: "X = X @@ (A ^^ Suc n) \ (\m\n. B @@ (A ^^ m))" by fact + also have "\ = (X @@ A \ B) @@ (A ^^ Suc n) \ (\m\n. B @@ (A ^^ m))" using eq by simp + also have "\ = X @@ (A ^^ Suc (Suc n)) \ (B @@ (A ^^ Suc n)) \ (\m\n. B @@ (A ^^ m))" + by (simp add: conc_Un_distrib conc_assoc) + also have "\ = X @@ (A ^^ Suc (Suc n)) \ (\m\Suc n. B @@ (A ^^ m))" + by (auto simp add: le_Suc_eq) + finally show "X = X @@ (A ^^ Suc (Suc n)) \ (\m\Suc n. B @@ (A ^^ m))" . +qed + +theorem reversed_Arden: + assumes nemp: "[] \ A" + shows "X = X @@ A \ B \ X = B @@ star A" +proof + assume eq: "X = X @@ A \ B" + { fix w assume "w : X" + let ?n = "size w" + from `[] \ A` have "ALL u : A. length u \ 1" + by (metis Suc_eq_plus1 add_leD2 le_0_eq length_0_conv not_less_eq_eq) + hence "ALL u : A^^(?n+1). length u \ ?n+1" + by (metis length_lang_pow_lb nat_mult_1) + hence "ALL u : X @@ A^^(?n+1). length u \ ?n+1" + by(auto simp only: conc_def length_append) + hence "w \ X @@ A^^(?n+1)" by auto + hence "w : B @@ star A" using `w : X` using reversed_arden_helper[OF eq, where n="?n"] + by (auto simp add: star_def conc_UNION_distrib) + } moreover + { fix w assume "w : B @@ star A" + hence "EX n. w : B @@ A^^n" by (auto simp: conc_def star_def) + hence "w : X" using reversed_arden_helper[OF eq] by blast + } ultimately show "X = B @@ star A" by blast +next + assume eq: "X = B @@ star A" + have "star A = {[]} \ star A @@ A" + unfolding conc_star_comm[symmetric] + by(metis Un_commute star_unfold_left) + then have "B @@ star A = B @@ ({[]} \ star A @@ A)" + by metis + also have "\ = B \ B @@ (star A @@ A)" + unfolding conc_Un_distrib by simp + also have "\ = B \ (B @@ star A) @@ A" + by (simp only: conc_assoc) + finally show "X = X @@ A \ B" + using eq by blast +qed + +end diff -r 2a07222e2a8b -r 6bb15b8e6301 AFP-Submission/Simplifying.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AFP-Submission/Simplifying.thy Tue May 24 11:36:21 2016 +0100 @@ -0,0 +1,239 @@ +(* Title: POSIX Lexing with Derivatives of Regular Expressions + Authors: Fahad Ausaf , 2016 + Roy Dyckhoff , 2016 + Christian Urban , 2016 + Maintainer: Christian Urban +*) + +theory Simplifying + imports "Lexer" +begin + +section {* Lexer including simplifications *} + + +fun F_RIGHT where + "F_RIGHT f v = Right (f v)" + +fun F_LEFT where + "F_LEFT f v = Left (f v)" + +fun F_Plus where + "F_Plus f\<^sub>1 f\<^sub>2 (Right v) = Right (f\<^sub>2 v)" +| "F_Plus f\<^sub>1 f\<^sub>2 (Left v) = Left (f\<^sub>1 v)" +| "F_Plus f1 f2 v = v" + + +fun F_Times1 where + "F_Times1 f\<^sub>1 f\<^sub>2 v = Seq (f\<^sub>1 Void) (f\<^sub>2 v)" + +fun F_Times2 where + "F_Times2 f\<^sub>1 f\<^sub>2 v = Seq (f\<^sub>1 v) (f\<^sub>2 Void)" + +fun F_Times where + "F_Times f\<^sub>1 f\<^sub>2 (Seq v\<^sub>1 v\<^sub>2) = Seq (f\<^sub>1 v\<^sub>1) (f\<^sub>2 v\<^sub>2)" +| "F_Times f1 f2 v = v" + +fun simp_Plus where + "simp_Plus (Zero, f\<^sub>1) (r\<^sub>2, f\<^sub>2) = (r\<^sub>2, F_RIGHT f\<^sub>2)" +| "simp_Plus (r\<^sub>1, f\<^sub>1) (Zero, f\<^sub>2) = (r\<^sub>1, F_LEFT f\<^sub>1)" +| "simp_Plus (r\<^sub>1, f\<^sub>1) (r\<^sub>2, f\<^sub>2) = (Plus r\<^sub>1 r\<^sub>2, F_Plus f\<^sub>1 f\<^sub>2)" + +fun simp_Times where + "simp_Times (One, f\<^sub>1) (r\<^sub>2, f\<^sub>2) = (r\<^sub>2, F_Times1 f\<^sub>1 f\<^sub>2)" +| "simp_Times (r\<^sub>1, f\<^sub>1) (One, f\<^sub>2) = (r\<^sub>1, F_Times2 f\<^sub>1 f\<^sub>2)" +| "simp_Times (r\<^sub>1, f\<^sub>1) (r\<^sub>2, f\<^sub>2) = (Times r\<^sub>1 r\<^sub>2, F_Times f\<^sub>1 f\<^sub>2)" + +lemma simp_Times_simps[simp]: + "simp_Times p1 p2 = (if (fst p1 = One) then (fst p2, F_Times1 (snd p1) (snd p2)) + else (if (fst p2 = One) then (fst p1, F_Times2 (snd p1) (snd p2)) + else (Times (fst p1) (fst p2), F_Times (snd p1) (snd p2))))" +by (induct p1 p2 rule: simp_Times.induct) (auto) + +lemma simp_Plus_simps[simp]: + "simp_Plus p1 p2 = (if (fst p1 = Zero) then (fst p2, F_RIGHT (snd p2)) + else (if (fst p2 = Zero) then (fst p1, F_LEFT (snd p1)) + else (Plus (fst p1) (fst p2), F_Plus (snd p1) (snd p2))))" +by (induct p1 p2 rule: simp_Plus.induct) (auto) + +fun + simp :: "'a rexp \ 'a rexp * ('a val \ 'a val)" +where + "simp (Plus r1 r2) = simp_Plus (simp r1) (simp r2)" +| "simp (Times r1 r2) = simp_Times (simp r1) (simp r2)" +| "simp r = (r, id)" + +fun + slexer :: "'a rexp \ 'a list \ ('a val) option" +where + "slexer r [] = (if nullable r then Some(mkeps r) else None)" +| "slexer r (c#s) = (let (rs, fr) = simp (deriv c r) in + (case (slexer rs s) of + None \ None + | Some(v) \ Some(injval r c (fr v))))" + +lemma slexer_better_simp: + "slexer r (c#s) = (case (slexer (fst (simp (deriv c r))) s) of + None \ None + | Some(v) \ Some(injval r c ((snd (simp (deriv c r))) v)))" +by (auto split: prod.split option.split) + + +lemma L_fst_simp: + shows "lang r = lang (fst (simp r))" +using assms +by (induct r) (auto) + +lemma Posix_simp: + assumes "s \ (fst (simp r)) \ v" + shows "s \ r \ ((snd (simp r)) v)" +using assms +proof(induct r arbitrary: s v rule: rexp.induct) + case (Plus r1 r2 s v) + have IH1: "\s v. s \ fst (simp r1) \ v \ s \ r1 \ snd (simp r1) v" by fact + have IH2: "\s v. s \ fst (simp r2) \ v \ s \ r2 \ snd (simp r2) v" by fact + have as: "s \ fst (simp (Plus r1 r2)) \ v" by fact + consider (Zero_Zero) "fst (simp r1) = Zero" "fst (simp r2) = Zero" + | (Zero_NZero) "fst (simp r1) = Zero" "fst (simp r2) \ Zero" + | (NZero_Zero) "fst (simp r1) \ Zero" "fst (simp r2) = Zero" + | (NZero_NZero) "fst (simp r1) \ Zero" "fst (simp r2) \ Zero" by auto + then show "s \ Plus r1 r2 \ snd (simp (Plus r1 r2)) v" + proof(cases) + case (Zero_Zero) + with as have "s \ Zero \ v" by simp + then show "s \ Plus r1 r2 \ snd (simp (Plus r1 r2)) v" by (rule Posix_elims(1)) + next + case (Zero_NZero) + with as have "s \ fst (simp r2) \ v" by simp + with IH2 have "s \ r2 \ snd (simp r2) v" by simp + moreover + from Zero_NZero have "fst (simp r1) = Zero" by simp + then have "lang (fst (simp r1)) = {}" by simp + then have "lang r1 = {}" using L_fst_simp by auto + then have "s \ lang r1" by simp + ultimately have "s \ Plus r1 r2 \ Right (snd (simp r2) v)" by (rule Posix_Plus2) + then show "s \ Plus r1 r2 \ snd (simp (Plus r1 r2)) v" + using Zero_NZero by simp + next + case (NZero_Zero) + with as have "s \ fst (simp r1) \ v" by simp + with IH1 have "s \ r1 \ snd (simp r1) v" by simp + then have "s \ Plus r1 r2 \ Left (snd (simp r1) v)" by (rule Posix_Plus1) + then show "s \ Plus r1 r2 \ snd (simp (Plus r1 r2)) v" using NZero_Zero by simp + next + case (NZero_NZero) + with as have "s \ Plus (fst (simp r1)) (fst (simp r2)) \ v" by simp + then consider (Left) v1 where "v = Left v1" "s \ (fst (simp r1)) \ v1" + | (Right) v2 where "v = Right v2" "s \ (fst (simp r2)) \ v2" "s \ lang (fst (simp r1))" + by (erule_tac Posix_elims(4)) + then show "s \ Plus r1 r2 \ snd (simp (Plus r1 r2)) v" + proof(cases) + case (Left) + then have "v = Left v1" "s \ r1 \ (snd (simp r1) v1)" using IH1 by simp_all + then show "s \ Plus r1 r2 \ snd (simp (Plus r1 r2)) v" using NZero_NZero + by (simp_all add: Posix_Plus1) + next + case (Right) + then have "v = Right v2" "s \ r2 \ (snd (simp r2) v2)" "s \ lang r1" using IH2 L_fst_simp by auto + then show "s \ Plus r1 r2 \ snd (simp (Plus r1 r2)) v" using NZero_NZero + by (simp_all add: Posix_Plus2) + qed + qed +next + case (Times r1 r2 s v) + have IH1: "\s v. s \ fst (simp r1) \ v \ s \ r1 \ snd (simp r1) v" by fact + have IH2: "\s v. s \ fst (simp r2) \ v \ s \ r2 \ snd (simp r2) v" by fact + have as: "s \ fst (simp (Times r1 r2)) \ v" by fact + consider (One_One) "fst (simp r1) = One" "fst (simp r2) = One" + | (One_NOne) "fst (simp r1) = One" "fst (simp r2) \ One" + | (NOne_One) "fst (simp r1) \ One" "fst (simp r2) = One" + | (NOne_NOne) "fst (simp r1) \ One" "fst (simp r2) \ One" by auto + then show "s \ Times r1 r2 \ snd (simp (Times r1 r2)) v" + proof(cases) + case (One_One) + with as have b: "s \ One \ v" by simp + from b have "s \ r1 \ snd (simp r1) v" using IH1 One_One by simp + moreover + from b have c: "s = []" "v = Void" using Posix_elims(2) by auto + moreover + have "[] \ One \ Void" by (simp add: Posix_One) + then have "[] \ fst (simp r2) \ Void" using One_One by simp + then have "[] \ r2 \ snd (simp r2) Void" using IH2 by simp + ultimately have "([] @ []) \ Times r1 r2 \ Seq (snd (simp r1) Void) (snd (simp r2) Void)" + using Posix_Times by blast + then show "s \ Times r1 r2 \ snd (simp (Times r1 r2)) v" using c One_One by simp + next + case (One_NOne) + with as have b: "s \ fst (simp r2) \ v" by simp + from b have "s \ r2 \ snd (simp r2) v" using IH2 One_NOne by simp + moreover + have "[] \ One \ Void" by (simp add: Posix_One) + then have "[] \ fst (simp r1) \ Void" using One_NOne by simp + then have "[] \ r1 \ snd (simp r1) Void" using IH1 by simp + moreover + from One_NOne(1) have "lang (fst (simp r1)) = {[]}" by simp + then have "lang r1 = {[]}" by (simp add: L_fst_simp[symmetric]) + ultimately have "([] @ s) \ Times r1 r2 \ Seq (snd (simp r1) Void) (snd (simp r2) v)" + by(rule_tac Posix_Times) auto + then show "s \ Times r1 r2 \ snd (simp (Times r1 r2)) v" using One_NOne by simp + next + case (NOne_One) + with as have "s \ fst (simp r1) \ v" by simp + with IH1 have "s \ r1 \ snd (simp r1) v" by simp + moreover + have "[] \ One \ Void" by (simp add: Posix_One) + then have "[] \ fst (simp r2) \ Void" using NOne_One by simp + then have "[] \ r2 \ snd (simp r2) Void" using IH2 by simp + ultimately have "(s @ []) \ Times r1 r2 \ Seq (snd (simp r1) v) (snd (simp r2) Void)" + by(rule_tac Posix_Times) auto + then show "s \ Times r1 r2 \ snd (simp (Times r1 r2)) v" using NOne_One by simp + next + case (NOne_NOne) + with as have "s \ Times (fst (simp r1)) (fst (simp r2)) \ v" by simp + then obtain s1 s2 v1 v2 where eqs: "s = s1 @ s2" "v = Seq v1 v2" + "s1 \ (fst (simp r1)) \ v1" "s2 \ (fst (simp r2)) \ v2" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ lang r1 \ s\<^sub>4 \ lang r2)" + by (erule_tac Posix_elims(5)) (auto simp add: L_fst_simp[symmetric]) + then have "s1 \ r1 \ (snd (simp r1) v1)" "s2 \ r2 \ (snd (simp r2) v2)" + using IH1 IH2 by auto + then show "s \ Times r1 r2 \ snd (simp (Times r1 r2)) v" using eqs NOne_NOne + by(auto intro: Posix_Times) + qed +qed (simp_all) + + +lemma slexer_correctness: + shows "slexer r s = lexer r s" +proof(induct s arbitrary: r) + case Nil + show "slexer r [] = lexer r []" by simp +next + case (Cons c s r) + have IH: "\r. slexer r s = lexer r s" by fact + show "slexer r (c # s) = lexer r (c # s)" + proof (cases "s \ lang (deriv c r)") + case True + assume a1: "s \ lang (deriv c r)" + then obtain v1 where a2: "lexer (deriv c r) s = Some v1" "s \ deriv c r \ v1" + using lexer_correct_Some by auto + from a1 have "s \ lang (fst (simp (deriv c r)))" using L_fst_simp[symmetric] by auto + then obtain v2 where a3: "lexer (fst (simp (deriv c r))) s = Some v2" "s \ (fst (simp (deriv c r))) \ v2" + using lexer_correct_Some by auto + then have a4: "slexer (fst (simp (deriv c r))) s = Some v2" using IH by simp + from a3(2) have "s \ deriv c r \ (snd (simp (deriv c r))) v2" using Posix_simp by auto + with a2(2) have "v1 = (snd (simp (deriv c r))) v2" using Posix_determ by auto + with a2(1) a4 show "slexer r (c # s) = lexer r (c # s)" by (auto split: prod.split) + next + case False + assume b1: "s \ lang (deriv c r)" + then have "lexer (deriv c r) s = None" using lexer_correct_None by auto + moreover + from b1 have "s \ lang (fst (simp (deriv c r)))" using L_fst_simp[symmetric] by auto + then have "lexer (fst (simp (deriv c r))) s = None" using lexer_correct_None by auto + then have "slexer (fst (simp (deriv c r))) s = None" using IH by simp + ultimately show "slexer r (c # s) = lexer r (c # s)" + by (simp del: slexer.simps add: slexer_better_simp) + qed +qed + +end \ No newline at end of file diff -r 2a07222e2a8b -r 6bb15b8e6301 AFP-Submission/document/root.bib --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AFP-Submission/document/root.bib Tue May 24 11:36:21 2016 +0100 @@ -0,0 +1,10 @@ + +@inproceedings{Sulzmann2014, + author = {M.~Sulzmann and K.~Lu}, + title = {{POSIX} {R}egular {E}xpression {P}arsing with {D}erivatives}, + booktitle = {Proc.~of the 12th International Conference on Functional and Logic Programming (FLOPS)}, + pages = {203--220}, + year = {2014}, + volume = {8475}, + series = {LNCS} +} \ No newline at end of file diff -r 2a07222e2a8b -r 6bb15b8e6301 AFP-Submission/document/root.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AFP-Submission/document/root.tex Tue May 24 11:36:21 2016 +0100 @@ -0,0 +1,41 @@ +\documentclass[11pt,a4paper]{article} +\usepackage{isabelle,isabellesym} + +% this should be the last package used +\usepackage{pdfsetup} + +% urls in roman style, theory text in math-similar italics +\urlstyle{rm} +\isabellestyle{it} + + +\begin{document} + +\title{POSIX Lexing with Derivatives of Regular Expressions} +\author{Fahad Ausaf \and Roy Dyckhoff \and Christian Urban} +\maketitle + +\begin{abstract} + Brzozowski introduced the notion of derivatives for regular + expressions. They can be used for a very simple regular expression + matching algorithm. Sulzmann and Lu \cite{Sulzmann2014} cleverly extended this algorithm + in order to deal with POSIX matching, which is the underlying + disambiguation strategy for regular expressions needed in + lexers. In this entry we give our inductive definition + of what a POSIX value is and show (i) that such a value is unique (for + given regular expression and string being matched) and (ii) that + Sulzmann and Lu's algorithm always generates such a value (provided + that the regular expression matches the string). We also prove the + correctness of an optimised version of the POSIX matching + algorithm. +\end{abstract} + +\tableofcontents + +% include generated text of all theories +\input{session} + +\bibliographystyle{abbrv} +\bibliography{root} + +\end{document}