# HG changeset patch # User Christian Urban # Date 1463583466 -3600 # Node ID 841f7b9c0a6a32a1a2509711d334a4cec5b0579a # Parent a42c773ec8abd08c461dd88d7df9893947433283 updated diff -r a42c773ec8ab -r 841f7b9c0a6a thys/Lexer.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/thys/Lexer.thy Wed May 18 15:57:46 2016 +0100 @@ -0,0 +1,642 @@ + +theory Lexer + imports Main +begin + + +section {* Sequential Composition of Languages *} + +definition + Sequ :: "string set \ string set \ string set" ("_ ;; _" [100,100] 100) +where + "A ;; B = {s1 @ s2 | s1 s2. s1 \ A \ s2 \ B}" + +text {* Two Simple Properties about Sequential Composition *} + +lemma seq_empty [simp]: + shows "A ;; {[]} = A" + and "{[]} ;; A = A" +by (simp_all add: Sequ_def) + +lemma seq_null [simp]: + shows "A ;; {} = {}" + and "{} ;; A = {}" +by (simp_all add: Sequ_def) + + +section {* Semantic Derivative (Left Quotient) of Languages *} + +definition + Der :: "char \ string set \ string set" +where + "Der c A \ {s. c # s \ A}" + +lemma Der_null [simp]: + shows "Der c {} = {}" +unfolding Der_def +by auto + +lemma Der_empty [simp]: + shows "Der c {[]} = {}" +unfolding Der_def +by auto + +lemma Der_char [simp]: + shows "Der c {[d]} = (if c = d then {[]} else {})" +unfolding Der_def +by auto + +lemma Der_union [simp]: + shows "Der c (A \ B) = Der c A \ Der c B" +unfolding Der_def +by auto + +lemma Der_Sequ [simp]: + shows "Der c (A ;; B) = (Der c A) ;; B \ (if [] \ A then Der c B else {})" +unfolding Der_def Sequ_def +by (auto simp add: Cons_eq_append_conv) + + +section {* Kleene Star for Languages *} + +inductive_set + Star :: "string set \ string set" ("_\" [101] 102) + for A :: "string set" +where + start[intro]: "[] \ A\" +| step[intro]: "\s1 \ A; s2 \ A\\ \ s1 @ s2 \ A\" + +lemma star_cases: + shows "A\ = {[]} \ A ;; A\" +unfolding Sequ_def +by (auto) (metis Star.simps) + +lemma star_decomp: + assumes a: "c # x \ A\" + shows "\a b. x = a @ b \ c # a \ A \ b \ A\" +using a +by (induct x\"c # x" rule: Star.induct) + (auto simp add: append_eq_Cons_conv) + +lemma Der_star [simp]: + shows "Der c (A\) = (Der c A) ;; A\" +proof - + have "Der c (A\) = Der c ({[]} \ A ;; A\)" + by (simp only: star_cases[symmetric]) + also have "... = Der c (A ;; A\)" + by (simp only: Der_union Der_empty) (simp) + also have "... = (Der c A) ;; A\ \ (if [] \ A then Der c (A\) else {})" + by simp + also have "... = (Der c A) ;; A\" + unfolding Sequ_def Der_def + by (auto dest: star_decomp) + finally show "Der c (A\) = (Der c A) ;; A\" . +qed + + +section {* Regular Expressions *} + +datatype rexp = + ZERO +| ONE +| CHAR char +| SEQ rexp rexp +| ALT rexp rexp +| STAR rexp + +section {* Semantics of Regular Expressions *} + +fun + L :: "rexp \ string set" +where + "L (ZERO) = {}" +| "L (ONE) = {[]}" +| "L (CHAR c) = {[c]}" +| "L (SEQ r1 r2) = (L r1) ;; (L r2)" +| "L (ALT r1 r2) = (L r1) \ (L r2)" +| "L (STAR r) = (L r)\" + + +section {* Nullable, Derivatives *} + +fun + nullable :: "rexp \ bool" +where + "nullable (ZERO) = False" +| "nullable (ONE) = True" +| "nullable (CHAR c) = False" +| "nullable (ALT r1 r2) = (nullable r1 \ nullable r2)" +| "nullable (SEQ r1 r2) = (nullable r1 \ nullable r2)" +| "nullable (STAR r) = True" + + +fun + der :: "char \ rexp \ rexp" +where + "der c (ZERO) = ZERO" +| "der c (ONE) = ZERO" +| "der c (CHAR d) = (if c = d then ONE else ZERO)" +| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" +| "der c (SEQ r1 r2) = + (if nullable r1 + then ALT (SEQ (der c r1) r2) (der c r2) + else SEQ (der c r1) r2)" +| "der c (STAR r) = SEQ (der c r) (STAR r)" + +fun + ders :: "string \ rexp \ rexp" +where + "ders [] r = r" +| "ders (c # s) r = ders s (der c r)" + + +lemma nullable_correctness: + shows "nullable r \ [] \ (L r)" +by (induct r) (auto simp add: Sequ_def) + + +lemma der_correctness: + shows "L (der c r) = Der c (L r)" +by (induct r) (simp_all add: nullable_correctness) + + +section {* Values *} + +datatype val = + Void +| Char char +| Seq val val +| Right val +| Left val +| Stars "val list" + + +section {* The string behind a value *} + +fun + flat :: "val \ string" +where + "flat (Void) = []" +| "flat (Char c) = [c]" +| "flat (Left v) = flat v" +| "flat (Right v) = flat v" +| "flat (Seq v1 v2) = (flat v1) @ (flat v2)" +| "flat (Stars []) = []" +| "flat (Stars (v#vs)) = (flat v) @ (flat (Stars vs))" + +lemma flat_Stars [simp]: + "flat (Stars vs) = concat (map flat vs)" +by (induct vs) (auto) + + +section {* Relation between values and regular expressions *} + +inductive + Prf :: "val \ rexp \ bool" ("\ _ : _" [100, 100] 100) +where + "\\ v1 : r1; \ v2 : r2\ \ \ Seq v1 v2 : SEQ r1 r2" +| "\ v1 : r1 \ \ Left v1 : ALT r1 r2" +| "\ v2 : r2 \ \ Right v2 : ALT r1 r2" +| "\ Void : ONE" +| "\ Char c : CHAR c" +| "\ Stars [] : STAR r" +| "\\ v : r; \ Stars vs : STAR r\ \ \ Stars (v # vs) : STAR r" + +inductive_cases Prf_elims: + "\ v : ZERO" + "\ v : SEQ r1 r2" + "\ v : ALT r1 r2" + "\ v : ONE" + "\ v : CHAR c" +(* "\ vs : STAR r"*) + +lemma Prf_flat_L: + assumes "\ v : r" shows "flat v \ L r" +using assms +by(induct v r rule: Prf.induct) + (auto simp add: Sequ_def) + +lemma Prf_Stars: + assumes "\v \ set vs. \ v : r" + shows "\ Stars vs : STAR r" +using assms +by(induct vs) (auto intro: Prf.intros) + +lemma Star_string: + assumes "s \ A\" + shows "\ss. concat ss = s \ (\s \ set ss. s \ A)" +using assms +apply(induct rule: Star.induct) +apply(auto) +apply(rule_tac x="[]" in exI) +apply(simp) +apply(rule_tac x="s1#ss" in exI) +apply(simp) +done + + +lemma Star_val: + assumes "\s\set ss. \v. s = flat v \ \ v : r" + shows "\vs. concat (map flat vs) = concat ss \ (\v\set vs. \ v : r)" +using assms +apply(induct ss) +apply(auto) +apply (metis empty_iff list.set(1)) +by (metis concat.simps(2) list.simps(9) set_ConsD) + +lemma L_flat_Prf1: + assumes "\ v : r" shows "flat v \ L r" +using assms +by (induct)(auto simp add: Sequ_def) + +lemma L_flat_Prf2: + assumes "s \ L r" shows "\v. \ v : r \ flat v = s" +using assms +apply(induct r arbitrary: s) +apply(auto simp add: Sequ_def intro: Prf.intros) +using Prf.intros(1) flat.simps(5) apply blast +using Prf.intros(2) flat.simps(3) apply blast +using Prf.intros(3) flat.simps(4) apply blast +apply(subgoal_tac "\vs::val list. concat (map flat vs) = s \ (\v \ set vs. \ v : r)") +apply(auto)[1] +apply(rule_tac x="Stars vs" in exI) +apply(simp) +apply (simp add: Prf_Stars) +apply(drule Star_string) +apply(auto) +apply(rule Star_val) +apply(auto) +done + +lemma L_flat_Prf: + "L(r) = {flat v | v. \ v : r}" +using L_flat_Prf1 L_flat_Prf2 by blast + + +section {* Sulzmann and Lu functions *} + +fun + mkeps :: "rexp \ val" +where + "mkeps(ONE) = Void" +| "mkeps(SEQ r1 r2) = Seq (mkeps r1) (mkeps r2)" +| "mkeps(ALT r1 r2) = (if nullable(r1) then Left (mkeps r1) else Right (mkeps r2))" +| "mkeps(STAR r) = Stars []" + +fun injval :: "rexp \ char \ val \ val" +where + "injval (CHAR d) c Void = Char d" +| "injval (ALT r1 r2) c (Left v1) = Left(injval r1 c v1)" +| "injval (ALT r1 r2) c (Right v2) = Right(injval r2 c v2)" +| "injval (SEQ r1 r2) c (Seq v1 v2) = Seq (injval r1 c v1) v2" +| "injval (SEQ r1 r2) c (Left (Seq v1 v2)) = Seq (injval r1 c v1) v2" +| "injval (SEQ r1 r2) c (Right v2) = Seq (mkeps r1) (injval r2 c v2)" +| "injval (STAR r) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)" + + +section {* Mkeps, injval *} + +lemma mkeps_nullable: + assumes "nullable(r)" + shows "\ mkeps r : r" +using assms +by (induct rule: nullable.induct) + (auto intro: Prf.intros) + +lemma mkeps_flat: + assumes "nullable(r)" + shows "flat (mkeps r) = []" +using assms +by (induct rule: nullable.induct) (auto) + + +lemma Prf_injval: + assumes "\ v : der c r" + shows "\ (injval r c v) : r" +using assms +apply(induct r arbitrary: c v rule: rexp.induct) +apply(auto intro!: Prf.intros mkeps_nullable elim!: Prf_elims split: if_splits) +(* STAR *) +apply(rotate_tac 2) +apply(erule Prf.cases) +apply(simp_all)[7] +apply(auto) +apply (metis Prf.intros(6) Prf.intros(7)) +by (metis Prf.intros(7)) + +lemma Prf_injval_flat: + assumes "\ v : der c r" + shows "flat (injval r c v) = c # (flat v)" +using assms +apply(induct arbitrary: v rule: der.induct) +apply(auto elim!: Prf_elims split: if_splits) +apply(metis mkeps_flat) +apply(rotate_tac 2) +apply(erule Prf.cases) +apply(simp_all)[7] +done + + + +section {* Our Alternative Posix definition *} + +inductive + Posix :: "string \ rexp \ val \ bool" ("_ \ _ \ _" [100, 100, 100] 100) +where + Posix_ONE: "[] \ ONE \ Void" +| Posix_CHAR: "[c] \ (CHAR c) \ (Char c)" +| Posix_ALT1: "s \ r1 \ v \ s \ (ALT r1 r2) \ (Left v)" +| Posix_ALT2: "\s \ r2 \ v; s \ L(r1)\ \ s \ (ALT r1 r2) \ (Right v)" +| Posix_SEQ: "\s1 \ r1 \ v1; s2 \ r2 \ v2; + \(\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (s1 @ s\<^sub>3) \ L r1 \ s\<^sub>4 \ L r2)\ \ + (s1 @ s2) \ (SEQ r1 r2) \ (Seq v1 v2)" +| Posix_STAR1: "\s1 \ r \ v; s2 \ STAR r \ Stars vs; flat v \ []; + \(\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (s1 @ s\<^sub>3) \ L r \ s\<^sub>4 \ L (STAR r))\ + \ (s1 @ s2) \ STAR r \ Stars (v # vs)" +| Posix_STAR2: "[] \ STAR r \ Stars []" + +inductive_cases Posix_elims: + "s \ ZERO \ v" + "s \ ONE \ v" + "s \ CHAR c \ v" + "s \ ALT r1 r2 \ v" + "s \ SEQ r1 r2 \ v" + "s \ STAR r \ v" + +lemma Posix1: + assumes "s \ r \ v" + shows "s \ L r" "flat v = s" +using assms +by (induct s r v rule: Posix.induct) + (auto simp add: Sequ_def) + + +lemma Posix1a: + assumes "s \ r \ v" + shows "\ v : r" +using assms +by (induct s r v rule: Posix.induct)(auto intro: Prf.intros) + + +lemma Posix_mkeps: + assumes "nullable r" + shows "[] \ r \ mkeps r" +using assms +apply(induct r rule: nullable.induct) +apply(auto intro: Posix.intros simp add: nullable_correctness Sequ_def) +apply(subst append.simps(1)[symmetric]) +apply(rule Posix.intros) +apply(auto) +done + + +lemma Posix_determ: + assumes "s \ r \ v1" "s \ r \ v2" + shows "v1 = v2" +using assms +proof (induct s r v1 arbitrary: v2 rule: Posix.induct) + case (Posix_ONE v2) + have "[] \ ONE \ v2" by fact + then show "Void = v2" by cases auto +next + case (Posix_CHAR c v2) + have "[c] \ CHAR c \ v2" by fact + then show "Char c = v2" by cases auto +next + case (Posix_ALT1 s r1 v r2 v2) + have "s \ ALT r1 r2 \ v2" by fact + moreover + have "s \ r1 \ v" by fact + then have "s \ L r1" by (simp add: Posix1) + ultimately obtain v' where eq: "v2 = Left v'" "s \ r1 \ v'" by cases auto + moreover + have IH: "\v2. s \ r1 \ v2 \ v = v2" by fact + ultimately have "v = v'" by simp + then show "Left v = v2" using eq by simp +next + case (Posix_ALT2 s r2 v r1 v2) + have "s \ ALT r1 r2 \ v2" by fact + moreover + have "s \ L r1" by fact + ultimately obtain v' where eq: "v2 = Right v'" "s \ r2 \ v'" + by cases (auto simp add: Posix1) + moreover + have IH: "\v2. s \ r2 \ v2 \ v = v2" by fact + ultimately have "v = v'" by simp + then show "Right v = v2" using eq by simp +next + case (Posix_SEQ s1 r1 v1 s2 r2 v2 v') + have "(s1 @ s2) \ SEQ r1 r2 \ v'" + "s1 \ r1 \ v1" "s2 \ r2 \ v2" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L r1 \ s\<^sub>4 \ L r2)" by fact+ + then obtain v1' v2' where "v' = Seq v1' v2'" "s1 \ r1 \ v1'" "s2 \ r2 \ v2'" + apply(cases) apply (auto simp add: append_eq_append_conv2) + using Posix1(1) by fastforce+ + moreover + have IHs: "\v1'. s1 \ r1 \ v1' \ v1 = v1'" + "\v2'. s2 \ r2 \ v2' \ v2 = v2'" by fact+ + ultimately show "Seq v1 v2 = v'" by simp +next + case (Posix_STAR1 s1 r v s2 vs v2) + have "(s1 @ s2) \ STAR r \ v2" + "s1 \ r \ v" "s2 \ STAR r \ Stars vs" "flat v \ []" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L r \ s\<^sub>4 \ L (STAR r))" by fact+ + then obtain v' vs' where "v2 = Stars (v' # vs')" "s1 \ r \ v'" "s2 \ (STAR r) \ (Stars vs')" + apply(cases) apply (auto simp add: append_eq_append_conv2) + using Posix1(1) apply fastforce + apply (metis Posix1(1) Posix_STAR1.hyps(6) append_Nil append_Nil2) + using Posix1(2) by blast + moreover + have IHs: "\v2. s1 \ r \ v2 \ v = v2" + "\v2. s2 \ STAR r \ v2 \ Stars vs = v2" by fact+ + ultimately show "Stars (v # vs) = v2" by auto +next + case (Posix_STAR2 r v2) + have "[] \ STAR r \ v2" by fact + then show "Stars [] = v2" by cases (auto simp add: Posix1) +qed + + +lemma Posix_injval: + assumes "s \ (der c r) \ v" + shows "(c # s) \ r \ (injval r c v)" +using assms +proof(induct r arbitrary: s v rule: rexp.induct) + case ZERO + have "s \ der c ZERO \ v" by fact + then have "s \ ZERO \ v" by simp + then have "False" by cases + then show "(c # s) \ ZERO \ (injval ZERO c v)" by simp +next + case ONE + have "s \ der c ONE \ v" by fact + then have "s \ ZERO \ v" by simp + then have "False" by cases + then show "(c # s) \ ONE \ (injval ONE c v)" by simp +next + case (CHAR d) + consider (eq) "c = d" | (ineq) "c \ d" by blast + then show "(c # s) \ (CHAR d) \ (injval (CHAR d) c v)" + proof (cases) + case eq + have "s \ der c (CHAR d) \ v" by fact + then have "s \ ONE \ v" using eq by simp + then have eqs: "s = [] \ v = Void" by cases simp + show "(c # s) \ CHAR d \ injval (CHAR d) c v" using eq eqs + by (auto intro: Posix.intros) + next + case ineq + have "s \ der c (CHAR d) \ v" by fact + then have "s \ ZERO \ v" using ineq by simp + then have "False" by cases + then show "(c # s) \ CHAR d \ injval (CHAR d) c v" by simp + qed +next + case (ALT r1 r2) + have IH1: "\s v. s \ der c r1 \ v \ (c # s) \ r1 \ injval r1 c v" by fact + have IH2: "\s v. s \ der c r2 \ v \ (c # s) \ r2 \ injval r2 c v" by fact + have "s \ der c (ALT r1 r2) \ v" by fact + then have "s \ ALT (der c r1) (der c r2) \ v" by simp + then consider (left) v' where "v = Left v'" "s \ der c r1 \ v'" + | (right) v' where "v = Right v'" "s \ L (der c r1)" "s \ der c r2 \ v'" + by cases auto + then show "(c # s) \ ALT r1 r2 \ injval (ALT r1 r2) c v" + proof (cases) + case left + have "s \ der c r1 \ v'" by fact + then have "(c # s) \ r1 \ injval r1 c v'" using IH1 by simp + then have "(c # s) \ ALT r1 r2 \ injval (ALT r1 r2) c (Left v')" by (auto intro: Posix.intros) + then show "(c # s) \ ALT r1 r2 \ injval (ALT r1 r2) c v" using left by simp + next + case right + have "s \ L (der c r1)" by fact + then have "c # s \ L r1" by (simp add: der_correctness Der_def) + moreover + have "s \ der c r2 \ v'" by fact + then have "(c # s) \ r2 \ injval r2 c v'" using IH2 by simp + ultimately have "(c # s) \ ALT r1 r2 \ injval (ALT r1 r2) c (Right v')" + by (auto intro: Posix.intros) + then show "(c # s) \ ALT r1 r2 \ injval (ALT r1 r2) c v" using right by simp + qed +next + case (SEQ r1 r2) + have IH1: "\s v. s \ der c r1 \ v \ (c # s) \ r1 \ injval r1 c v" by fact + have IH2: "\s v. s \ der c r2 \ v \ (c # s) \ r2 \ injval r2 c v" by fact + have "s \ der c (SEQ r1 r2) \ v" by fact + then consider + (left_nullable) v1 v2 s1 s2 where + "v = Left (Seq v1 v2)" "s = s1 @ s2" + "s1 \ der c r1 \ v1" "s2 \ r2 \ v2" "nullable r1" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r1) \ s\<^sub>4 \ L r2)" + | (right_nullable) v1 s1 s2 where + "v = Right v1" "s = s1 @ s2" + "s \ der c r2 \ v1" "nullable r1" "s1 @ s2 \ L (SEQ (der c r1) r2)" + | (not_nullable) v1 v2 s1 s2 where + "v = Seq v1 v2" "s = s1 @ s2" + "s1 \ der c r1 \ v1" "s2 \ r2 \ v2" "\nullable r1" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r1) \ s\<^sub>4 \ L r2)" + by (force split: if_splits elim!: Posix_elims simp add: Sequ_def der_correctness Der_def) + then show "(c # s) \ SEQ r1 r2 \ injval (SEQ r1 r2) c v" + proof (cases) + case left_nullable + have "s1 \ der c r1 \ v1" by fact + then have "(c # s1) \ r1 \ injval r1 c v1" using IH1 by simp + moreover + have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r1) \ s\<^sub>4 \ L r2)" by fact + then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (c # s1) @ s\<^sub>3 \ L r1 \ s\<^sub>4 \ L r2)" by (simp add: der_correctness Der_def) + ultimately have "((c # s1) @ s2) \ SEQ r1 r2 \ Seq (injval r1 c v1) v2" using left_nullable by (rule_tac Posix.intros) + then show "(c # s) \ SEQ r1 r2 \ injval (SEQ r1 r2) c v" using left_nullable by simp + next + case right_nullable + have "nullable r1" by fact + then have "[] \ r1 \ (mkeps r1)" by (rule Posix_mkeps) + moreover + have "s \ der c r2 \ v1" by fact + then have "(c # s) \ r2 \ (injval r2 c v1)" using IH2 by simp + moreover + have "s1 @ s2 \ L (SEQ (der c r1) r2)" by fact + then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = c # s \ [] @ s\<^sub>3 \ L r1 \ s\<^sub>4 \ L r2)" using right_nullable + by(auto simp add: der_correctness Der_def append_eq_Cons_conv Sequ_def) + ultimately have "([] @ (c # s)) \ SEQ r1 r2 \ Seq (mkeps r1) (injval r2 c v1)" + by(rule Posix.intros) + then show "(c # s) \ SEQ r1 r2 \ injval (SEQ r1 r2) c v" using right_nullable by simp + next + case not_nullable + have "s1 \ der c r1 \ v1" by fact + then have "(c # s1) \ r1 \ injval r1 c v1" using IH1 by simp + moreover + have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r1) \ s\<^sub>4 \ L r2)" by fact + then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (c # s1) @ s\<^sub>3 \ L r1 \ s\<^sub>4 \ L r2)" by (simp add: der_correctness Der_def) + ultimately have "((c # s1) @ s2) \ SEQ r1 r2 \ Seq (injval r1 c v1) v2" using not_nullable + by (rule_tac Posix.intros) (simp_all) + then show "(c # s) \ SEQ r1 r2 \ injval (SEQ r1 r2) c v" using not_nullable by simp + qed +next + case (STAR r) + have IH: "\s v. s \ der c r \ v \ (c # s) \ r \ injval r c v" by fact + have "s \ der c (STAR r) \ v" by fact + then consider + (cons) v1 vs s1 s2 where + "v = Seq v1 (Stars vs)" "s = s1 @ s2" + "s1 \ der c r \ v1" "s2 \ (STAR r) \ (Stars vs)" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r) \ s\<^sub>4 \ L (STAR r))" + apply(auto elim!: Posix_elims(1-5) simp add: der_correctness Der_def intro: Posix.intros) + apply(rotate_tac 3) + apply(erule_tac Posix_elims(6)) + apply (simp add: Posix.intros(6)) + using Posix.intros(7) by blast + then show "(c # s) \ STAR r \ injval (STAR r) c v" + proof (cases) + case cons + have "s1 \ der c r \ v1" by fact + then have "(c # s1) \ r \ injval r c v1" using IH by simp + moreover + have "s2 \ STAR r \ Stars vs" by fact + moreover + have "(c # s1) \ r \ injval r c v1" by fact + then have "flat (injval r c v1) = (c # s1)" by (rule Posix1) + then have "flat (injval r c v1) \ []" by simp + moreover + have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r) \ s\<^sub>4 \ L (STAR r))" by fact + then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (c # s1) @ s\<^sub>3 \ L r \ s\<^sub>4 \ L (STAR r))" + by (simp add: der_correctness Der_def) + ultimately + have "((c # s1) @ s2) \ STAR r \ Stars (injval r c v1 # vs)" by (rule Posix.intros) + then show "(c # s) \ STAR r \ injval (STAR r) c v" using cons by(simp) + qed +qed + + +section {* The Lexer by Sulzmann and Lu *} + +fun + lexer :: "rexp \ string \ val option" +where + "lexer r [] = (if nullable r then Some(mkeps r) else None)" +| "lexer r (c#s) = (case (lexer (der c r) s) of + None \ None + | Some(v) \ Some(injval r c v))" + + +lemma lexer_correct_None: + shows "s \ L r \ lexer r s = None" +using assms +apply(induct s arbitrary: r) +apply(simp add: nullable_correctness) +apply(drule_tac x="der a r" in meta_spec) +apply(auto simp add: der_correctness Der_def) +done + +lemma lexer_correct_Some: + shows "s \ L r \ (\v. lexer r s = Some(v) \ s \ r \ v)" +using assms +apply(induct s arbitrary: r) +apply(auto simp add: Posix_mkeps nullable_correctness)[1] +apply(drule_tac x="der a r" in meta_spec) +apply(simp add: der_correctness Der_def) +apply(rule iffI) +apply(auto intro: Posix_injval simp add: Posix1(1)) +done + + +end \ No newline at end of file diff -r a42c773ec8ab -r 841f7b9c0a6a thys/Paper/Paper.thy --- a/thys/Paper/Paper.thy Tue May 17 14:28:22 2016 +0100 +++ b/thys/Paper/Paper.thy Wed May 18 15:57:46 2016 +0100 @@ -97,7 +97,7 @@ derivatives is that they are neatly expressible in any functional language, and easily definable and reasoned about in theorem provers---the definitions just consist of inductive datatypes and simple recursive functions. A -completely formalised correctness proof of this matcher in for example HOL4 +mechanised correctness proof of Brzozowski's matcher in for example HOL4 has been mentioned by Owens and Slind~\cite{Owens2008}. Another one in Isabelle/HOL is part of the work by Krauss and Nipkow \cite{Krauss2011}. And another one in Coq is given by Coquand and Siles \cite{Coquand2012}. @@ -135,7 +135,7 @@ that can match determines the token. \end{itemize} -\noindent Consider for example @{text "r\<^bsub>key\<^esub>"} recognising keywords +\noindent Consider for example a regular expression @{text "r\<^bsub>key\<^esub>"} for recognising keywords such as @{text "if"}, @{text "then"} and so on; and @{text "r\<^bsub>id\<^esub>"} recognising identifiers (say, a single character followed by characters or numbers). Then we can form the regular expression @@ -269,10 +269,15 @@ "s\<^sub>1 @ s\<^sub>2"} is in the star of this language. It will also be convenient to use the following notion of a \emph{semantic derivative} (or \emph{left quotient}) of a language defined as - @{thm (lhs) Der_def} $\dn$ @{thm (rhs) Der_def}. + % + \begin{center} + @{thm Der_def}\;. + \end{center} + + \noindent For semantic derivatives we have the following equations (for example mechanically proved in \cite{Krauss2011}): - + % \begin{equation}\label{SemDer} \begin{array}{lcl} @{thm (lhs) Der_null} & \dn & @{thm (rhs) Der_null}\\ @@ -322,9 +327,6 @@ \begin{center} \begin{tabular}{lcl} @{thm (lhs) ders.simps(1)} & $\dn$ & @{thm (rhs) ders.simps(1)}\\ - \end{tabular} - \hspace{20mm} - \begin{tabular}{lcl} @{thm (lhs) ders.simps(2)} & $\dn$ & @{thm (rhs) ders.simps(2)}\\ \end{tabular} \end{center} @@ -405,10 +407,10 @@ \begin{tabular}{c} \\[-8mm] @{thm[mode=Axiom] Prf.intros(4)} \qquad - @{thm[mode=Axiom] Prf.intros(5)[of "c"]}\\[3mm] + @{thm[mode=Axiom] Prf.intros(5)[of "c"]}\\[4mm] @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} \qquad - @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\\[3mm] - @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\\[3mm] + @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\\[4mm] + @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\\[4mm] @{thm[mode=Axiom] Prf.intros(6)[of "r"]} \qquad @{thm[mode=Rule] Prf.intros(7)[of "v" "r" "vs"]} \end{tabular} @@ -472,7 +474,7 @@ left to right) is \Brz's matcher building successive derivatives. If the last regular expression is @{term nullable}, then the functions of the second phase are called (the top-down and right-to-left arrows): first -@{term mkeps} calculates a value witnessing +@{term mkeps} calculates a value @{term "v\<^sub>4"} witnessing how the empty string has been recognised by @{term "r\<^sub>4"}. After that the function @{term inj} ``injects back'' the characters of the string into the values. @@ -502,12 +504,12 @@ The most interesting idea from Sulzmann and Lu \cite{Sulzmann2014} is the construction of a value for how @{term "r\<^sub>1"} can match the string @{term "[a,b,c]"} from the value how the last derivative, @{term - "r\<^sub>4"} in Fig~\ref{Sulz}, can match the empty string. Sulzmann and + "r\<^sub>4"} in Fig.~\ref{Sulz}, can match the empty string. Sulzmann and Lu achieve this by stepwise ``injecting back'' the characters into the values thus inverting the operation of building derivatives, but on the level of values. The corresponding function, called @{term inj}, takes three arguments, a regular expression, a character and a value. For example in - the first (or right-most) @{term inj}-step in Fig~\ref{Sulz} the regular + the first (or right-most) @{term inj}-step in Fig.~\ref{Sulz} the regular expression @{term "r\<^sub>3"}, the character @{term c} from the last derivative step and @{term "v\<^sub>4"}, which is the value corresponding to the derivative regular expression @{term "r\<^sub>4"}. The result is @@ -538,7 +540,7 @@ might be instructive to look first at the three sequence cases (clauses (4)--(6)). In each case we need to construct an ``injected value'' for @{term "SEQ r\<^sub>1 r\<^sub>2"}. This must be a value of the form @{term - "Seq DUMMY DUMMY"}. Recall the clause of the @{text derivative}-function + "Seq DUMMY DUMMY"}\,. Recall the clause of the @{text derivative}-function for sequence regular expressions: \begin{center} @@ -674,7 +676,7 @@ "PS"}). The first two premises state that @{term "v\<^sub>1"} and @{term "v\<^sub>2"} are the POSIX values for @{term "(s\<^sub>1, r\<^sub>1)"} and @{term "(s\<^sub>2, r\<^sub>2)"} respectively. Consider now the third premise and note that the POSIX value - of this rule should match the string @{term "s\<^sub>1 @ s\<^sub>2"}. According to the + of this rule should match the string \mbox{@{term "s\<^sub>1 @ s\<^sub>2"}}. According to the longest match rule, we want that the @{term "s\<^sub>1"} is the longest initial split of \mbox{@{term "s\<^sub>1 @ s\<^sub>2"}} such that @{term "s\<^sub>2"} is still recognised by @{term "r\<^sub>2"}. Let us assume, contrary to the third premise, that there @@ -685,7 +687,7 @@ matched by @{term "r\<^sub>2"}. In this case @{term "s\<^sub>1"} would \emph{not} be the longest initial split of \mbox{@{term "s\<^sub>1 @ s\<^sub>2"}} and therefore @{term "Seq v\<^sub>1 v\<^sub>2"} cannot be a POSIX value for @{term "(s\<^sub>1 @ s\<^sub>2, SEQ r\<^sub>1 r\<^sub>2)"}. - The main point is that this side-condition ensures the longest + The main point is that our side-condition ensures the longest match rule is satisfied. A similar condition is imposed on the POSIX value in the @{text @@ -716,8 +718,10 @@ \end{lemma} \begin{proof} + By induction on @{text r}. We explain two cases. - By induction on @{text r}. Suppose @{term "r = ALT r\<^sub>1 r\<^sub>2"}. There are + \begin{itemize} + \item[$\bullet$] Case @{term "r = ALT r\<^sub>1 r\<^sub>2"}. There are two subcases, namely @{text "(a)"} \mbox{@{term "v = Left v'"}} and @{term "s \ der c r\<^sub>1 \ v'"}; and @{text "(b)"} @{term "v = Right v'"}, @{term "s \ L (der c r\<^sub>1)"} and @{term "s \ der c r\<^sub>2 \ v'"}. In @{text "(a)"} we @@ -728,7 +732,7 @@ Prop.~\ref{derprop}(2) in order to infer @{term "c # s \ L r\<^sub>1"} from @{term "s \ L (der c r\<^sub>1)"}. - Suppose @{term "r = SEQ r\<^sub>1 r\<^sub>2"}. There are three subcases: + \item[$\bullet$] Case @{term "r = SEQ r\<^sub>1 r\<^sub>2"}. There are three subcases: \begin{quote} \begin{description} @@ -768,6 +772,7 @@ c v\<^sub>1) \ []"}. This follows from @{term "(c # s\<^sub>1) \ r\<^sub>1 \ injval r\<^sub>1 c v\<^sub>1"} (which in turn follows from @{term "s\<^sub>1 \ der c r\<^sub>1 \ v\<^sub>1"} and the induction hypothesis).\qed + \end{itemize} \end{proof} \noindent @@ -787,11 +792,14 @@ By induction on @{term s} using Lem.~\ref{lemmkeps} and \ref{Posix2}.\qed \end{proof} - \noindent This concludes our correctness proof. Note that we have not - changed the algorithm of Sulzmann and Lu,\footnote{All deviations we - introduced are harmless.} but introduced our own specification for what a - correct result---a POSIX value---should be. A strong point in favour of - Sulzmann and Lu's algorithm is that it can be extended in various ways. + \noindent In (2) we further know by Thm.~\ref{posixdeterm} that the + value returned by the lexer must be unique. This concludes our + correctness proof. Note that we have not changed the algorithm of + Sulzmann and Lu,\footnote{All deviations we introduced are + harmless.} but introduced our own specification for what a correct + result---a POSIX value---should be. A strong point in favour of + Sulzmann and Lu's algorithm is that it can be extended in various + ways. *} @@ -843,9 +851,7 @@ algorithms considerably, as noted in \cite{Sulzmann2014}. One of the advantages of having a simple specification and correctness proof is that the latter can be refined to prove the correctness of such simplification - steps. - - While the simplification of regular expressions according to + steps. While the simplification of regular expressions according to rules like \begin{equation}\label{Simpl} @@ -930,7 +936,7 @@ @{term v} (that is construct @{term "f\<^sub>r v"}). Before we can establish the correctness of @{term "slexer"}, we need to show that simplification preserves the language and simplification preserves our POSIX relation once the value is rectified - (recall @{const "simp"} generates a regular expression / rectification function pair): + (recall @{const "simp"} generates a (regular expression, rectification function) pair): \begin{lemma}\mbox{}\smallskip\\\label{slexeraux} \begin{tabular}{ll} @@ -1108,7 +1114,7 @@ Although they do not give an explicit proof of the transitivity property, they give a closely related property about the existence of maximal - elements. They state that this can be verified by an induction on $r$. We + elements. They state that this can be verified by an induction on @{term r}. We disagree with this as we shall show next in case of transitivity. The case where the reasoning breaks down is the sequence case, say @{term "SEQ r\<^sub>1 r\<^sub>2"}. The induction hypotheses in this case are diff -r a42c773ec8ab -r 841f7b9c0a6a thys/README --- a/thys/README Tue May 17 14:28:22 2016 +0100 +++ b/thys/README Wed May 18 15:57:46 2016 +0100 @@ -1,9 +1,10 @@ Theories: ========= - ReStar.thy + Lexer.thy + Simplifying.thy -The repository can be checked using Isabelle 2014. +The repository can be checked using Isabelle 2016. isabelle build -c -v -d . Lex diff -r a42c773ec8ab -r 841f7b9c0a6a thys/ROOT --- a/thys/ROOT Tue May 17 14:28:22 2016 +0100 +++ b/thys/ROOT Wed May 18 15:57:46 2016 +0100 @@ -1,6 +1,6 @@ session "Lex" = HOL + theories [document = false] - "ReStar" + "Lexer" "Simplifying" "Sulzmann" diff -r a42c773ec8ab -r 841f7b9c0a6a thys/ReStar.thy --- a/thys/ReStar.thy Tue May 17 14:28:22 2016 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,639 +0,0 @@ - -theory ReStar - imports "Main" -begin - - -section {* Sequential Composition of Languages *} - -definition - Sequ :: "string set \ string set \ string set" ("_ ;; _" [100,100] 100) -where - "A ;; B = {s1 @ s2 | s1 s2. s1 \ A \ s2 \ B}" - -text {* Two Simple Properties about Sequential Composition *} - -lemma seq_empty [simp]: - shows "A ;; {[]} = A" - and "{[]} ;; A = A" -by (simp_all add: Sequ_def) - -lemma seq_null [simp]: - shows "A ;; {} = {}" - and "{} ;; A = {}" -by (simp_all add: Sequ_def) - - -section {* Semantic Derivative (Left Quotient) of Languages *} - -definition - Der :: "char \ string set \ string set" -where - "Der c A \ {s. c # s \ A}" - -lemma Der_null [simp]: - shows "Der c {} = {}" -unfolding Der_def -by auto - -lemma Der_empty [simp]: - shows "Der c {[]} = {}" -unfolding Der_def -by auto - -lemma Der_char [simp]: - shows "Der c {[d]} = (if c = d then {[]} else {})" -unfolding Der_def -by auto - -lemma Der_union [simp]: - shows "Der c (A \ B) = Der c A \ Der c B" -unfolding Der_def -by auto - -lemma Der_Sequ [simp]: - shows "Der c (A ;; B) = (Der c A) ;; B \ (if [] \ A then Der c B else {})" -unfolding Der_def Sequ_def -by (auto simp add: Cons_eq_append_conv) - - -section {* Kleene Star for Languages *} - -inductive_set - Star :: "string set \ string set" ("_\" [101] 102) - for A :: "string set" -where - start[intro]: "[] \ A\" -| step[intro]: "\s1 \ A; s2 \ A\\ \ s1 @ s2 \ A\" - -lemma star_cases: - shows "A\ = {[]} \ A ;; A\" -unfolding Sequ_def -by (auto) (metis Star.simps) - -lemma star_decomp: - assumes a: "c # x \ A\" - shows "\a b. x = a @ b \ c # a \ A \ b \ A\" -using a -by (induct x\"c # x" rule: Star.induct) - (auto simp add: append_eq_Cons_conv) - -lemma Der_star [simp]: - shows "Der c (A\) = (Der c A) ;; A\" -proof - - have "Der c (A\) = Der c ({[]} \ A ;; A\)" - by (simp only: star_cases[symmetric]) - also have "... = Der c (A ;; A\)" - by (simp only: Der_union Der_empty) (simp) - also have "... = (Der c A) ;; A\ \ (if [] \ A then Der c (A\) else {})" - by simp - also have "... = (Der c A) ;; A\" - unfolding Sequ_def Der_def - by (auto dest: star_decomp) - finally show "Der c (A\) = (Der c A) ;; A\" . -qed - - -section {* Regular Expressions *} - -datatype rexp = - ZERO -| ONE -| CHAR char -| SEQ rexp rexp -| ALT rexp rexp -| STAR rexp - -section {* Semantics of Regular Expressions *} - -fun - L :: "rexp \ string set" -where - "L (ZERO) = {}" -| "L (ONE) = {[]}" -| "L (CHAR c) = {[c]}" -| "L (SEQ r1 r2) = (L r1) ;; (L r2)" -| "L (ALT r1 r2) = (L r1) \ (L r2)" -| "L (STAR r) = (L r)\" - - -section {* Nullable, Derivatives *} - -fun - nullable :: "rexp \ bool" -where - "nullable (ZERO) = False" -| "nullable (ONE) = True" -| "nullable (CHAR c) = False" -| "nullable (ALT r1 r2) = (nullable r1 \ nullable r2)" -| "nullable (SEQ r1 r2) = (nullable r1 \ nullable r2)" -| "nullable (STAR r) = True" - - -fun - der :: "char \ rexp \ rexp" -where - "der c (ZERO) = ZERO" -| "der c (ONE) = ZERO" -| "der c (CHAR d) = (if c = d then ONE else ZERO)" -| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" -| "der c (SEQ r1 r2) = - (if nullable r1 - then ALT (SEQ (der c r1) r2) (der c r2) - else SEQ (der c r1) r2)" -| "der c (STAR r) = SEQ (der c r) (STAR r)" - -fun - ders :: "string \ rexp \ rexp" -where - "ders [] r = r" -| "ders (c # s) r = ders s (der c r)" - - -lemma nullable_correctness: - shows "nullable r \ [] \ (L r)" -by (induct r) (auto simp add: Sequ_def) - - -lemma der_correctness: - shows "L (der c r) = Der c (L r)" -by (induct r) (simp_all add: nullable_correctness) - - -section {* Values *} - -datatype val = - Void -| Char char -| Seq val val -| Right val -| Left val -| Stars "val list" - - -section {* The string behind a value *} - -fun - flat :: "val \ string" -where - "flat (Void) = []" -| "flat (Char c) = [c]" -| "flat (Left v) = flat v" -| "flat (Right v) = flat v" -| "flat (Seq v1 v2) = (flat v1) @ (flat v2)" -| "flat (Stars []) = []" -| "flat (Stars (v#vs)) = (flat v) @ (flat (Stars vs))" - -lemma flat_Stars [simp]: - "flat (Stars vs) = concat (map flat vs)" -by (induct vs) (auto) - - -section {* Relation between values and regular expressions *} - -inductive - Prf :: "val \ rexp \ bool" ("\ _ : _" [100, 100] 100) -where - "\\ v1 : r1; \ v2 : r2\ \ \ Seq v1 v2 : SEQ r1 r2" -| "\ v1 : r1 \ \ Left v1 : ALT r1 r2" -| "\ v2 : r2 \ \ Right v2 : ALT r1 r2" -| "\ Void : ONE" -| "\ Char c : CHAR c" -| "\ Stars [] : STAR r" -| "\\ v : r; \ Stars vs : STAR r\ \ \ Stars (v # vs) : STAR r" - -inductive_cases Prf_elims: - "\ v : ZERO" - "\ v : SEQ r1 r2" - "\ v : ALT r1 r2" - "\ v : ONE" - "\ v : CHAR c" -(* "\ vs : STAR r"*) - -lemma Prf_flat_L: - assumes "\ v : r" shows "flat v \ L r" -using assms -by(induct v r rule: Prf.induct) - (auto simp add: Sequ_def) - -lemma Prf_Stars: - assumes "\v \ set vs. \ v : r" - shows "\ Stars vs : STAR r" -using assms -apply(induct vs) -apply (metis Prf.intros(6)) -by (metis Prf.intros(7) insert_iff set_simps(2)) - -lemma Star_string: - assumes "s \ A\" - shows "\ss. concat ss = s \ (\s \ set ss. s \ A)" -using assms -apply(induct rule: Star.induct) -apply(auto) -apply(rule_tac x="[]" in exI) -apply(simp) -apply(rule_tac x="s1#ss" in exI) -apply(simp) -done - - -lemma Star_val: - assumes "\s\set ss. \v. s = flat v \ \ v : r" - shows "\vs. concat (map flat vs) = concat ss \ (\v\set vs. \ v : r)" -using assms -apply(induct ss) -apply(auto) -apply (metis empty_iff list.set(1)) -by (metis concat.simps(2) list.simps(9) set_ConsD) - -lemma L_flat_Prf: - "L(r) = {flat v | v. \ v : r}" -apply(induct r) -apply(auto dest: Prf_flat_L simp add: Sequ_def) -apply (metis Prf.intros(4) flat.simps(1)) -apply (metis Prf.intros(5) flat.simps(2)) -apply (metis Prf.intros(1) flat.simps(5)) -apply (metis Prf.intros(2) flat.simps(3)) -apply (metis Prf.intros(3) flat.simps(4)) -apply(auto elim!: Prf_elims) -apply(subgoal_tac "\vs::val list. concat (map flat vs) = x \ (\v \ set vs. \ v : r)") -apply(auto)[1] -apply(rule_tac x="Stars vs" in exI) -apply(simp) -apply(rule Prf_Stars) -apply(simp) -apply(drule Star_string) -apply(auto) -apply(rule Star_val) -apply(simp) -done - - -section {* Sulzmann and Lu functions *} - -fun - mkeps :: "rexp \ val" -where - "mkeps(ONE) = Void" -| "mkeps(SEQ r1 r2) = Seq (mkeps r1) (mkeps r2)" -| "mkeps(ALT r1 r2) = (if nullable(r1) then Left (mkeps r1) else Right (mkeps r2))" -| "mkeps(STAR r) = Stars []" - -fun injval :: "rexp \ char \ val \ val" -where - "injval (CHAR d) c Void = Char d" -| "injval (ALT r1 r2) c (Left v1) = Left(injval r1 c v1)" -| "injval (ALT r1 r2) c (Right v2) = Right(injval r2 c v2)" -| "injval (SEQ r1 r2) c (Seq v1 v2) = Seq (injval r1 c v1) v2" -| "injval (SEQ r1 r2) c (Left (Seq v1 v2)) = Seq (injval r1 c v1) v2" -| "injval (SEQ r1 r2) c (Right v2) = Seq (mkeps r1) (injval r2 c v2)" -| "injval (STAR r) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)" - - -section {* Mkeps, injval *} - -lemma mkeps_nullable: - assumes "nullable(r)" - shows "\ mkeps r : r" -using assms -by (induct rule: nullable.induct) - (auto intro: Prf.intros) - -lemma mkeps_flat: - assumes "nullable(r)" - shows "flat (mkeps r) = []" -using assms -by (induct rule: nullable.induct) (auto) - - -lemma Prf_injval: - assumes "\ v : der c r" - shows "\ (injval r c v) : r" -using assms -apply(induct r arbitrary: c v rule: rexp.induct) -apply(auto intro!: Prf.intros mkeps_nullable elim!: Prf_elims split: if_splits) -(* STAR *) -apply(rotate_tac 2) -apply(erule Prf.cases) -apply(simp_all)[7] -apply(auto) -apply (metis Prf.intros(6) Prf.intros(7)) -by (metis Prf.intros(7)) - -lemma Prf_injval_flat: - assumes "\ v : der c r" - shows "flat (injval r c v) = c # (flat v)" -using assms -apply(induct arbitrary: v rule: der.induct) -apply(auto elim!: Prf_elims split: if_splits) -apply(metis mkeps_flat) -apply(rotate_tac 2) -apply(erule Prf.cases) -apply(simp_all)[7] -done - - - -section {* Our Alternative Posix definition *} - -inductive - Posix :: "string \ rexp \ val \ bool" ("_ \ _ \ _" [100, 100, 100] 100) -where - Posix_ONE: "[] \ ONE \ Void" -| Posix_CHAR: "[c] \ (CHAR c) \ (Char c)" -| Posix_ALT1: "s \ r1 \ v \ s \ (ALT r1 r2) \ (Left v)" -| Posix_ALT2: "\s \ r2 \ v; s \ L(r1)\ \ s \ (ALT r1 r2) \ (Right v)" -| Posix_SEQ: "\s1 \ r1 \ v1; s2 \ r2 \ v2; - \(\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (s1 @ s\<^sub>3) \ L r1 \ s\<^sub>4 \ L r2)\ \ - (s1 @ s2) \ (SEQ r1 r2) \ (Seq v1 v2)" -| Posix_STAR1: "\s1 \ r \ v; s2 \ STAR r \ Stars vs; flat v \ []; - \(\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (s1 @ s\<^sub>3) \ L r \ s\<^sub>4 \ L (STAR r))\ - \ (s1 @ s2) \ STAR r \ Stars (v # vs)" -| Posix_STAR2: "[] \ STAR r \ Stars []" - -inductive_cases Posix_elims: - "s \ ZERO \ v" - "s \ ONE \ v" - "s \ CHAR c \ v" - "s \ ALT r1 r2 \ v" - "s \ SEQ r1 r2 \ v" - "s \ STAR r \ v" - -lemma Posix1: - assumes "s \ r \ v" - shows "s \ L r" "flat v = s" -using assms -by (induct s r v rule: Posix.induct) - (auto simp add: Sequ_def) - - -lemma Posix1a: - assumes "s \ r \ v" - shows "\ v : r" -using assms -apply(induct s r v rule: Posix.induct) -apply(auto intro: Prf.intros) -done - -lemma Posix_mkeps: - assumes "nullable r" - shows "[] \ r \ mkeps r" -using assms -apply(induct r) -apply(auto intro: Posix.intros simp add: nullable_correctness Sequ_def) -apply(subst append.simps(1)[symmetric]) -apply(rule Posix.intros) -apply(auto) -done - - -lemma Posix_determ: - assumes "s \ r \ v1" "s \ r \ v2" - shows "v1 = v2" -using assms -proof (induct s r v1 arbitrary: v2 rule: Posix.induct) - case (Posix_ONE v2) - have "[] \ ONE \ v2" by fact - then show "Void = v2" by cases auto -next - case (Posix_CHAR c v2) - have "[c] \ CHAR c \ v2" by fact - then show "Char c = v2" by cases auto -next - case (Posix_ALT1 s r1 v r2 v2) - have "s \ ALT r1 r2 \ v2" by fact - moreover - have "s \ r1 \ v" by fact - then have "s \ L r1" by (simp add: Posix1) - ultimately obtain v' where eq: "v2 = Left v'" "s \ r1 \ v'" by cases auto - moreover - have IH: "\v2. s \ r1 \ v2 \ v = v2" by fact - ultimately have "v = v'" by simp - then show "Left v = v2" using eq by simp -next - case (Posix_ALT2 s r2 v r1 v2) - have "s \ ALT r1 r2 \ v2" by fact - moreover - have "s \ L r1" by fact - ultimately obtain v' where eq: "v2 = Right v'" "s \ r2 \ v'" - by cases (auto simp add: Posix1) - moreover - have IH: "\v2. s \ r2 \ v2 \ v = v2" by fact - ultimately have "v = v'" by simp - then show "Right v = v2" using eq by simp -next - case (Posix_SEQ s1 r1 v1 s2 r2 v2 v') - have "(s1 @ s2) \ SEQ r1 r2 \ v'" - "s1 \ r1 \ v1" "s2 \ r2 \ v2" - "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L r1 \ s\<^sub>4 \ L r2)" by fact+ - then obtain v1' v2' where "v' = Seq v1' v2'" "s1 \ r1 \ v1'" "s2 \ r2 \ v2'" - apply(cases) apply (auto simp add: append_eq_append_conv2) - using Posix1(1) by fastforce+ - moreover - have IHs: "\v1'. s1 \ r1 \ v1' \ v1 = v1'" - "\v2'. s2 \ r2 \ v2' \ v2 = v2'" by fact+ - ultimately show "Seq v1 v2 = v'" by simp -next - case (Posix_STAR1 s1 r v s2 vs v2) - have "(s1 @ s2) \ STAR r \ v2" - "s1 \ r \ v" "s2 \ STAR r \ Stars vs" "flat v \ []" - "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L r \ s\<^sub>4 \ L (STAR r))" by fact+ - then obtain v' vs' where "v2 = Stars (v' # vs')" "s1 \ r \ v'" "s2 \ (STAR r) \ (Stars vs')" - apply(cases) apply (auto simp add: append_eq_append_conv2) - using Posix1(1) apply fastforce - apply (metis Posix1(1) Posix_STAR1.hyps(6) append_Nil append_Nil2) - using Posix1(2) by blast - moreover - have IHs: "\v2. s1 \ r \ v2 \ v = v2" - "\v2. s2 \ STAR r \ v2 \ Stars vs = v2" by fact+ - ultimately show "Stars (v # vs) = v2" by auto -next - case (Posix_STAR2 r v2) - have "[] \ STAR r \ v2" by fact - then show "Stars [] = v2" by cases (auto simp add: Posix1) -qed - - -lemma Posix_injval: - assumes "s \ (der c r) \ v" - shows "(c # s) \ r \ (injval r c v)" -using assms -proof(induct r arbitrary: s v rule: rexp.induct) - case ZERO - have "s \ der c ZERO \ v" by fact - then have "s \ ZERO \ v" by simp - then have "False" by cases - then show "(c # s) \ ZERO \ (injval ZERO c v)" by simp -next - case ONE - have "s \ der c ONE \ v" by fact - then have "s \ ZERO \ v" by simp - then have "False" by cases - then show "(c # s) \ ONE \ (injval ONE c v)" by simp -next - case (CHAR d) - consider (eq) "c = d" | (ineq) "c \ d" by blast - then show "(c # s) \ (CHAR d) \ (injval (CHAR d) c v)" - proof (cases) - case eq - have "s \ der c (CHAR d) \ v" by fact - then have "s \ ONE \ v" using eq by simp - then have eqs: "s = [] \ v = Void" by cases simp - show "(c # s) \ CHAR d \ injval (CHAR d) c v" using eq eqs - by (auto intro: Posix.intros) - next - case ineq - have "s \ der c (CHAR d) \ v" by fact - then have "s \ ZERO \ v" using ineq by simp - then have "False" by cases - then show "(c # s) \ CHAR d \ injval (CHAR d) c v" by simp - qed -next - case (ALT r1 r2) - have IH1: "\s v. s \ der c r1 \ v \ (c # s) \ r1 \ injval r1 c v" by fact - have IH2: "\s v. s \ der c r2 \ v \ (c # s) \ r2 \ injval r2 c v" by fact - have "s \ der c (ALT r1 r2) \ v" by fact - then have "s \ ALT (der c r1) (der c r2) \ v" by simp - then consider (left) v' where "v = Left v'" "s \ der c r1 \ v'" - | (right) v' where "v = Right v'" "s \ L (der c r1)" "s \ der c r2 \ v'" - by cases auto - then show "(c # s) \ ALT r1 r2 \ injval (ALT r1 r2) c v" - proof (cases) - case left - have "s \ der c r1 \ v'" by fact - then have "(c # s) \ r1 \ injval r1 c v'" using IH1 by simp - then have "(c # s) \ ALT r1 r2 \ injval (ALT r1 r2) c (Left v')" by (auto intro: Posix.intros) - then show "(c # s) \ ALT r1 r2 \ injval (ALT r1 r2) c v" using left by simp - next - case right - have "s \ L (der c r1)" by fact - then have "c # s \ L r1" by (simp add: der_correctness Der_def) - moreover - have "s \ der c r2 \ v'" by fact - then have "(c # s) \ r2 \ injval r2 c v'" using IH2 by simp - ultimately have "(c # s) \ ALT r1 r2 \ injval (ALT r1 r2) c (Right v')" - by (auto intro: Posix.intros) - then show "(c # s) \ ALT r1 r2 \ injval (ALT r1 r2) c v" using right by simp - qed -next - case (SEQ r1 r2) - have IH1: "\s v. s \ der c r1 \ v \ (c # s) \ r1 \ injval r1 c v" by fact - have IH2: "\s v. s \ der c r2 \ v \ (c # s) \ r2 \ injval r2 c v" by fact - have "s \ der c (SEQ r1 r2) \ v" by fact - then consider - (left_nullable) v1 v2 s1 s2 where - "v = Left (Seq v1 v2)" "s = s1 @ s2" - "s1 \ der c r1 \ v1" "s2 \ r2 \ v2" "nullable r1" - "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r1) \ s\<^sub>4 \ L r2)" - | (right_nullable) v1 s1 s2 where - "v = Right v1" "s = s1 @ s2" - "s \ der c r2 \ v1" "nullable r1" "s1 @ s2 \ L (SEQ (der c r1) r2)" - | (not_nullable) v1 v2 s1 s2 where - "v = Seq v1 v2" "s = s1 @ s2" - "s1 \ der c r1 \ v1" "s2 \ r2 \ v2" "\nullable r1" - "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r1) \ s\<^sub>4 \ L r2)" - by (force split: if_splits elim!: Posix_elims simp add: Sequ_def der_correctness Der_def) - then show "(c # s) \ SEQ r1 r2 \ injval (SEQ r1 r2) c v" - proof (cases) - case left_nullable - have "s1 \ der c r1 \ v1" by fact - then have "(c # s1) \ r1 \ injval r1 c v1" using IH1 by simp - moreover - have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r1) \ s\<^sub>4 \ L r2)" by fact - then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (c # s1) @ s\<^sub>3 \ L r1 \ s\<^sub>4 \ L r2)" by (simp add: der_correctness Der_def) - ultimately have "((c # s1) @ s2) \ SEQ r1 r2 \ Seq (injval r1 c v1) v2" using left_nullable by (rule_tac Posix.intros) - then show "(c # s) \ SEQ r1 r2 \ injval (SEQ r1 r2) c v" using left_nullable by simp - next - case right_nullable - have "nullable r1" by fact - then have "[] \ r1 \ (mkeps r1)" by (rule Posix_mkeps) - moreover - have "s \ der c r2 \ v1" by fact - then have "(c # s) \ r2 \ (injval r2 c v1)" using IH2 by simp - moreover - have "s1 @ s2 \ L (SEQ (der c r1) r2)" by fact - then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = c # s \ [] @ s\<^sub>3 \ L r1 \ s\<^sub>4 \ L r2)" using right_nullable - by(auto simp add: der_correctness Der_def append_eq_Cons_conv Sequ_def) - ultimately have "([] @ (c # s)) \ SEQ r1 r2 \ Seq (mkeps r1) (injval r2 c v1)" - by(rule Posix.intros) - then show "(c # s) \ SEQ r1 r2 \ injval (SEQ r1 r2) c v" using right_nullable by simp - next - case not_nullable - have "s1 \ der c r1 \ v1" by fact - then have "(c # s1) \ r1 \ injval r1 c v1" using IH1 by simp - moreover - have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r1) \ s\<^sub>4 \ L r2)" by fact - then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (c # s1) @ s\<^sub>3 \ L r1 \ s\<^sub>4 \ L r2)" by (simp add: der_correctness Der_def) - ultimately have "((c # s1) @ s2) \ SEQ r1 r2 \ Seq (injval r1 c v1) v2" using not_nullable - by (rule_tac Posix.intros) (simp_all) - then show "(c # s) \ SEQ r1 r2 \ injval (SEQ r1 r2) c v" using not_nullable by simp - qed -next - case (STAR r) - have IH: "\s v. s \ der c r \ v \ (c # s) \ r \ injval r c v" by fact - have "s \ der c (STAR r) \ v" by fact - then consider - (cons) v1 vs s1 s2 where - "v = Seq v1 (Stars vs)" "s = s1 @ s2" - "s1 \ der c r \ v1" "s2 \ (STAR r) \ (Stars vs)" - "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r) \ s\<^sub>4 \ L (STAR r))" - apply(auto elim!: Posix_elims(1-5) simp add: der_correctness Der_def intro: Posix.intros) - apply(rotate_tac 3) - apply(erule_tac Posix_elims(6)) - apply (simp add: Posix.intros(6)) - using Posix.intros(7) by blast - then show "(c # s) \ STAR r \ injval (STAR r) c v" - proof (cases) - case cons - have "s1 \ der c r \ v1" by fact - then have "(c # s1) \ r \ injval r c v1" using IH by simp - moreover - have "s2 \ STAR r \ Stars vs" by fact - moreover - have "(c # s1) \ r \ injval r c v1" by fact - then have "flat (injval r c v1) = (c # s1)" by (rule Posix1) - then have "flat (injval r c v1) \ []" by simp - moreover - have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r) \ s\<^sub>4 \ L (STAR r))" by fact - then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (c # s1) @ s\<^sub>3 \ L r \ s\<^sub>4 \ L (STAR r))" - by (simp add: der_correctness Der_def) - ultimately - have "((c # s1) @ s2) \ STAR r \ Stars (injval r c v1 # vs)" by (rule Posix.intros) - then show "(c # s) \ STAR r \ injval (STAR r) c v" using cons by(simp) - qed -qed - - -section {* The Lexer by Sulzmann and Lu *} - -fun - lexer :: "rexp \ string \ val option" -where - "lexer r [] = (if nullable r then Some(mkeps r) else None)" -| "lexer r (c#s) = (case (lexer (der c r) s) of - None \ None - | Some(v) \ Some(injval r c v))" - - -lemma lexer_correct_None: - shows "s \ L r \ lexer r s = None" -using assms -apply(induct s arbitrary: r) -apply(simp add: nullable_correctness) -apply(drule_tac x="der a r" in meta_spec) -apply(auto simp add: der_correctness Der_def) -done - -lemma lexer_correct_Some: - shows "s \ L r \ (\!v. lexer r s = Some(v) \ s \ r \ v)" -using assms -apply(induct s arbitrary: r) -apply(auto simp add: Posix_mkeps nullable_correctness)[1] -apply(drule_tac x="der a r" in meta_spec) -apply(simp add: der_correctness Der_def) -apply(rule iffI) -apply(auto intro: Posix_injval simp add: Posix1(1)) -done - - -end \ No newline at end of file diff -r a42c773ec8ab -r 841f7b9c0a6a thys/Simplifying.thy --- a/thys/Simplifying.thy Tue May 17 14:28:22 2016 +0100 +++ b/thys/Simplifying.thy Wed May 18 15:57:46 2016 +0100 @@ -1,5 +1,5 @@ theory Simplifying - imports "ReStar" + imports "Lexer" begin section {* Lexer including simplifications *} diff -r a42c773ec8ab -r 841f7b9c0a6a thys/Sulzmann.thy --- a/thys/Sulzmann.thy Tue May 17 14:28:22 2016 +0100 +++ b/thys/Sulzmann.thy Wed May 18 15:57:46 2016 +0100 @@ -1,6 +1,6 @@ theory Sulzmann - imports "ReStar" + imports "Lexer" begin diff -r a42c773ec8ab -r 841f7b9c0a6a thys/notes.tex --- a/thys/notes.tex Tue May 17 14:28:22 2016 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,442 +0,0 @@ -\documentclass[11pt]{article} -\usepackage[left]{lineno} -\usepackage{amsmath} -\usepackage{stmaryrd} - -\begin{document} -%%%\linenumbers - -\noindent -We already proved that - -\[ -\text{If}\;nullable(r)\;\text{then}\;POSIX\;(mkeps\; r)\;r -\] - -\noindent -holds. This is essentially the ``base case'' for the -correctness proof of the algorithm. For the ``induction -case'' we need the following main theorem, which we are -currently after: - -\begin{center} -\begin{tabular}{lll} -If & (*) & $POSIX\;v\;(der\;c\;r)$ and $\vdash v : der\;c\;r$\\ -then & & $POSIX\;(inj\;r\;c\;v)\;r$ -\end{tabular} -\end{center} - -\noindent -That means a POSIX value $v$ is still $POSIX$ after injection. -I am not sure whether this theorem is actually true in this -full generality. Maybe it requires some restrictions. - -If we unfold the $POSIX$ definition in the then-part, we -arrive at - -\[ -\forall v'.\; -\text{if}\;\vdash v' : r\; \text{and} \;|inj\;r\;c\;v| = |v'|\; -\text{then}\; |inj\;r\;c\;v| \succ_r v' -\] - -\noindent -which is what we need to prove assuming the if-part (*) in the -theorem above. Since this is a universally quantified formula, -we just need to fix a $v'$. We can then prove the implication -by assuming - -\[ -\text{(a)}\;\;\vdash v' : r\;\; \text{and} \;\; -\text{(b)}\;\;inj\;r\;c\;v = |v'| -\] - -\noindent -and our goal is - -\[ -(goal)\;\;inj\;r\;c\;v \succ_r v' -\] - -\noindent -There are already two lemmas proved that can transform -the assumptions (a) and (b) into - -\[ -\text{(a*)}\;\;\vdash proj\;r\;c\;v' : der\;c\;r\;\; \text{and} \;\; -\text{(b*)}\;\;c\,\#\,|v| = |v'| -\] - -\noindent -Another lemma shows that - -\[ -|v'| = c\,\#\,|proj\;r\;c\;v| -\] - -\noindent -Using (b*) we can therefore infer - -\[ -\text{(b**)}\;\;|v| = |proj\;r\;c\;v| -\] - -\noindent -The main idea of the proof is now a simple instantiation -of the assumption $POSIX\;v\;(der\;c\;r)$. If we unfold -the $POSIX$ definition, we get - -\[ -\forall v'.\; -\text{if}\;\vdash v' : der\;c\;r\; \text{and} \;|v| = |v'|\; -\text{then}\; v \succ_{der\;c\;r}\; v' -\] - -\noindent -We can instantiate this $v'$ with $proj\;r\;c\;v'$ and can use -(a*) and (b**) in order to infer - -\[ -v \succ_{der\;c\;r}\; proj\;r\;c\;v' -\] - -\noindent -The point of the side-lemma below is that we can ``add'' an -$inj$ to both sides to obtain - -\[ -inj\;r\;c\;v \succ_r\; inj\;r\;c\;(proj\;r\;c\;v') -\] - -\noindent Finally there is already a lemma proved that shows -that an injection and projection is the identity, meaning - -\[ -inj\;r\;c\;(proj\;r\;c\;v') = v' -\] - -\noindent -With this we have shown our goal (pending a proof of the side-lemma -next). - - -\subsection*{Side-Lemma} - -A side-lemma needed for the theorem above which might be true, but can also be false, is as follows: - -\begin{center} -\begin{tabular}{lll} -If & (1) & $v_1 \succ_{der\;c\;r} v_2$,\\ - & (2) & $\vdash v_1 : der\;c\;r$, and\\ - & (3) & $\vdash v_2 : der\;c\;r$ holds,\\ -then & & $inj\;r\;c\;v_1 \succ_r inj\;r\;c\;v_2$ also holds. -\end{tabular} -\end{center} - -\noindent It essentially states that if one value $v_1$ is -bigger than $v_2$ then this ordering is preserved under -injections. This is proved by induction (on the definition of -$der$\ldots this is very similar to an induction on $r$). -\bigskip - -\noindent -The case that is still unproved is the sequence case where we -assume $r = r_1\cdot r_2$ and also $r_1$ being nullable. -The derivative $der\;c\;r$ is then - -\begin{center} -$der\;c\;r = ((der\;c\;r_1) \cdot r_2) + (der\;c\;r_2)$ -\end{center} - -\noindent -or without the parentheses - -\begin{center} -$der\;c\;r = (der\;c\;r_1) \cdot r_2 + der\;c\;r_2$ -\end{center} - -\noindent -In this case the assumptions are - -\begin{center} -\begin{tabular}{ll} -(a) & $v_1 \succ_{(der\;c\;r_1) \cdot r_2 + der\;c\;r_2} v_2$\\ -(b) & $\vdash v_1 : (der\;c\;r_1) \cdot r_2 + der\;c\;r_2$\\ -(c) & $\vdash v_2 : (der\;c\;r_1) \cdot r_2 + der\;c\;r_2$\\ -(d) & $nullable(r_1)$ -\end{tabular} -\end{center} - -\noindent -The induction hypotheses are - -\begin{center} -\begin{tabular}{ll} -(IH1) & $\forall v_1 v_2.\;v_1 \succ_{der\;c\;r_1} v_2 -\;\wedge\; \vdash v_1 : der\;c\;r_1 \;\wedge\; -\vdash v_2 : der\;c\;r_1\qquad$\\ - & $\hfill\longrightarrow - inj\;r_1\;c\;v_1 \succ{r_1} \;inj\;r_1\;c\;v_2$\smallskip\\ -(IH2) & $\forall v_1 v_2.\;v_1 \succ_{der\;c\;r_2} v_2 -\;\wedge\; \vdash v_2 : der\;c\;r_2 \;\wedge\; -\vdash v_2 : der\;c\;r_2\qquad$\\ - & $\hfill\longrightarrow - inj\;r_2\;c\;v_1 \succ{r_2} \;inj\;r_2\;c\;v_2$\\ -\end{tabular} -\end{center} - -\noindent -The goal is - -\[ -(goal)\qquad -inj\; (r_1 \cdot r_2)\;c\;v_1 \succ_{r_1 \cdot r_2} -inj\; (r_1 \cdot r_2)\;c\;v_2 -\] - -\noindent -If we analyse how (a) could have arisen (that is make a case -distinction), then we will find four cases: - -\begin{center} -\begin{tabular}{ll} -LL & $v_1 = Left(w_1)$, $v_2 = Left(w_2)$\\ -LR & $v_1 = Left(w_1)$, $v_2 = Right(w_2)$\\ -RL & $v_1 = Right(w_1)$, $v_2 = Left(w_2)$\\ -RR & $v_1 = Right(w_1)$, $v_2 = Right(w_2)$\\ -\end{tabular} -\end{center} - - -\noindent -We have to establish our goal in all four cases. - - -\subsubsection*{Case LR} - -The corresponding rule (instantiated) is: - -\begin{center} -\begin{tabular}{c} -$len\,|w_1| \geq len\,|w_2|$\\ -\hline -$Left(w_1) \succ_{(der\;c\;r_1) \cdot r_2 + der\;c\;r_2} Right(w_2)$ -\end{tabular} -\end{center} - -\noindent -This means we can also assume in this case - -\[ -(e)\quad len\,|w_1| \geq len\,|w_2| -\] - -\noindent -which is the premise of the rule above. -Instantiating $v_1$ and $v_2$ in the assumptions (b) and (c) -gives us - -\begin{center} -\begin{tabular}{ll} -(b*) & $\vdash Left(w_1) : (der\;c\;r_1) \cdot r_2 + der\;c\;r_2$\\ -(c*) & $\vdash Right(w_2) : (der\;c\;r_1) \cdot r_2 + der\;c\;r_2$\\ -\end{tabular} -\end{center} - -\noindent Since these are assumptions, we can further analyse -how they could have arisen according to the rules of $\vdash -\_ : \_\,$. This gives us two new assumptions - -\begin{center} -\begin{tabular}{ll} -(b**) & $\vdash w_1 : (der\;c\;r_1) \cdot r_2$\\ -(c**) & $\vdash w_2 : der\;c\;r_2$\\ -\end{tabular} -\end{center} - -\noindent -Looking at (b**) we can further analyse how this -judgement could have arisen. This tells us that $w_1$ -must have been a sequence, say $u_1\cdot u_2$, with - -\begin{center} -\begin{tabular}{ll} -(b***) & $\vdash u_1 : der\;c\;r_1$\\ - & $\vdash u_2 : r_2$\\ -\end{tabular} -\end{center} - -\noindent -Instantiating the goal means we need to prove - -\[ -inj\; (r_1 \cdot r_2)\;c\;(Left(u_1\cdot u_2)) \succ_{r_1 \cdot r_2} -inj\; (r_1 \cdot r_2)\;c\;(Right(w_2)) -\] - -\noindent -We can simplify this according to the rules of $inj$: - -\[ -(inj\; r_1\;c\;u_1)\cdot u_2 \succ_{r_1 \cdot r_2} -(mkeps\;r_1) \cdot (inj\; r_2\;c\;w_2) -\] - -\noindent -This is what we need to prove. There are only two rules that -can be used to prove this judgement: - -\begin{center} -\begin{tabular}{cc} -\begin{tabular}{c} -$v_1 = v_1'$\qquad $v_2 \succ_{r_2} v_2'$\\ -\hline -$v_1\cdot v_2 \succ_{r_1\cdot r_2} v_1'\cdot v_2'$ -\end{tabular} & -\begin{tabular}{c} -$v_1 \succ_{r_1} v_1'$\\ -\hline -$v_1\cdot v_2 \succ_{r_1\cdot r_2} v_1'\cdot v_2'$ -\end{tabular} -\end{tabular} -\end{center} - -\noindent -Using the left rule would mean we need to show that - -\[ -inj\; r_1\;c\;u_1 = mkeps\;r_1 -\] - -\noindent -but this can never be the case.\footnote{Actually Isabelle -found this out after analysing its argument. ;o)} Lets assume -it would be true, then also if we flat each side, it must hold -that - -\[ -|inj\; r_1\;c\;u_1| = |mkeps\;r_1| -\] - -\noindent -But this leads to a contradiction, because the right-hand side -will be equal to the empty list, or empty string. This is -because we assumed $nullable(r_1)$ and there is a lemma -called \texttt{mkeps\_flat} which shows this. On the other -side we know by assumption (b***) and lemma \texttt{v4} that -the other side needs to be a string starting with $c$ (since -we inject $c$ into $u_1$). The empty string can never be equal -to something starting with $c$\ldots therefore there is a -contradiction. - -That means we can only use the rule on the right-hand side to -prove our goal. This implies we need to prove - -\[ -inj\; r_1\;c\;u_1 \succ_{r_1} mkeps\;r_1 -\] - - -\subsubsection*{Case RL} - -The corresponding rule (instantiated) is: - -\begin{center} -\begin{tabular}{c} -$len\,|w_1| > len\,|w_2|$\\ -\hline -$Right(w_1) \succ_{(der\;c\;r_1) \cdot r_2 + der\;c\;r_2} Left(w_2)$ -\end{tabular} -\end{center} - -\subsection*{Test Proof} - -We want to prove that - -\[ -nullable(r) \;\text{implies}\; POSIX (mkeps\; r)\; r -\] - -\noindent -We prove this by induction on $r$. There are 5 subcases, and -only the $r_1 + r_2$-case is interesting. In this case we know the -induction hypotheses are - -\begin{center} -\begin{tabular}{ll} -(IMP1) & $nullable(r_1) \;\text{implies}\; - POSIX (mkeps\; r_1)\; r_1$ \\ -(IMP2) & $nullable(r_2) \;\text{implies}\; - POSIX (mkeps\; r_2)\; r_2$ -\end{tabular} -\end{center} - -\noindent and know that $nullable(r_1 + r_2)$ holds. From this -we know that either $nullable(r_1)$ holds or $nullable(r_2)$. -Let us consider the first case where we know $nullable(r_1)$. - - -\subsection*{Problems in the paper proof} - -I cannot verify\ldots - - - -\newpage -\section*{Isabelle Cheat-Sheet} - -\begin{itemize} -\item The main notion in Isabelle is a \emph{theorem}. - Definitions, inductive predicates and recursive - functions all have underlying theorems. If a definition - is called \texttt{foo}, then the theorem will be called - \texttt{foo\_def}. Take a recursive function, say - \texttt{bar}, it will have a theorem that is called - \texttt{bar.simps} and will be added to the simplifier. - That means the simplifier will automatically - Inductive predicates called \texttt{baz} will be called - \texttt{baz.intros}. For inductive predicates, there are - also theorems \texttt{baz.induct} and - \texttt{baz.cases}. - -\item A \emph{goal-state} consists of one or more subgoals. If - there are \texttt{No more subgoals!} then the theorem is - proved. Each subgoal is of the form - - \[ - \llbracket \ldots{}premises\ldots \rrbracket \Longrightarrow - conclusion - \] - - \noindent - where $premises$ and $conclusion$ are formulas of type - \texttt{bool}. - -\item There are three low-level methods for applying one or - more theorem to a subgoal, called \texttt{rule}, - \texttt{drule} and \texttt{erule}. The first applies a - theorem to a conclusion of a goal. For example - - \[\texttt{apply}(\texttt{rule}\;thm) - \] - - If the conclusion is of the form $\_ \wedge \_$, - $\_ \longrightarrow \_$ and $\forall\,x. \_$ the - $thm$ is called - - \begin{center} - \begin{tabular}{lcl} - $\_ \wedge \_$ & $\Rightarrow$ & $conjI$\\ - $\_ \longrightarrow \_$ & $\Rightarrow$ & $impI$\\ - $\forall\,x.\_$ & $\Rightarrow$ & $allI$ - \end{tabular} - \end{center} - - Many of such rule are called intro-rules and end with - an ``$I$'', or in case of inductive predicates $\_.intros$. - - -\end{itemize} - - -\end{document} diff -r a42c773ec8ab -r 841f7b9c0a6a thys/paper.pdf Binary file thys/paper.pdf has changed