diff -r d36be1e356c0 -r fff2e1b40dfc thys/Lexer.thy --- a/thys/Lexer.thy Tue Jul 18 18:39:20 2017 +0100 +++ b/thys/Lexer.thy Wed Jul 19 14:55:46 2017 +0100 @@ -1,349 +1,10 @@ theory Lexer - imports Main + imports Spec begin -section {* Sequential Composition of Languages *} - -definition - Sequ :: "string set \ string set \ string set" ("_ ;; _" [100,100] 100) -where - "A ;; B = {s1 @ s2 | s1 s2. s1 \ A \ s2 \ B}" - -text {* Two Simple Properties about Sequential Composition *} - -lemma Sequ_empty_string [simp]: - shows "A ;; {[]} = A" - and "{[]} ;; A = A" -by (simp_all add: Sequ_def) - -lemma Sequ_empty [simp]: - shows "A ;; {} = {}" - and "{} ;; A = {}" -by (simp_all add: Sequ_def) - - -section {* Semantic Derivative (Left Quotient) of Languages *} - -definition - Der :: "char \ string set \ string set" -where - "Der c A \ {s. c # s \ A}" - -definition - Ders :: "string \ string set \ string set" -where - "Ders s A \ {s'. s @ s' \ A}" - -lemma Der_null [simp]: - shows "Der c {} = {}" -unfolding Der_def -by auto - -lemma Der_empty [simp]: - shows "Der c {[]} = {}" -unfolding Der_def -by auto - -lemma Der_char [simp]: - shows "Der c {[d]} = (if c = d then {[]} else {})" -unfolding Der_def -by auto - -lemma Der_union [simp]: - shows "Der c (A \ B) = Der c A \ Der c B" -unfolding Der_def -by auto - -lemma Der_Sequ [simp]: - shows "Der c (A ;; B) = (Der c A) ;; B \ (if [] \ A then Der c B else {})" -unfolding Der_def Sequ_def -by (auto simp add: Cons_eq_append_conv) - - -section {* Kleene Star for Languages *} - -inductive_set - Star :: "string set \ string set" ("_\" [101] 102) - for A :: "string set" -where - start[intro]: "[] \ A\" -| step[intro]: "\s1 \ A; s2 \ A\\ \ s1 @ s2 \ A\" - -(* Arden's lemma *) - -lemma Star_cases: - shows "A\ = {[]} \ A ;; A\" -unfolding Sequ_def -by (auto) (metis Star.simps) - -lemma Star_decomp: - assumes "c # x \ A\" - shows "\s1 s2. x = s1 @ s2 \ c # s1 \ A \ s2 \ A\" -using assms -by (induct x\"c # x" rule: Star.induct) - (auto simp add: append_eq_Cons_conv) - -lemma Star_Der_Sequ: - shows "Der c (A\) \ (Der c A) ;; A\" -unfolding Der_def -apply(rule subsetI) -apply(simp) -unfolding Sequ_def -apply(simp) -by(auto simp add: Sequ_def Star_decomp) - - -lemma Der_star [simp]: - shows "Der c (A\) = (Der c A) ;; A\" -proof - - have "Der c (A\) = Der c ({[]} \ A ;; A\)" - by (simp only: Star_cases[symmetric]) - also have "... = Der c (A ;; A\)" - by (simp only: Der_union Der_empty) (simp) - also have "... = (Der c A) ;; A\ \ (if [] \ A then Der c (A\) else {})" - by simp - also have "... = (Der c A) ;; A\" - using Star_Der_Sequ by auto - finally show "Der c (A\) = (Der c A) ;; A\" . -qed - - -section {* Regular Expressions *} - -datatype rexp = - ZERO -| ONE -| CHAR char -| SEQ rexp rexp -| ALT rexp rexp -| STAR rexp - -section {* Semantics of Regular Expressions *} - -fun - L :: "rexp \ string set" -where - "L (ZERO) = {}" -| "L (ONE) = {[]}" -| "L (CHAR c) = {[c]}" -| "L (SEQ r1 r2) = (L r1) ;; (L r2)" -| "L (ALT r1 r2) = (L r1) \ (L r2)" -| "L (STAR r) = (L r)\" - - -section {* Nullable, Derivatives *} - -fun - nullable :: "rexp \ bool" -where - "nullable (ZERO) = False" -| "nullable (ONE) = True" -| "nullable (CHAR c) = False" -| "nullable (ALT r1 r2) = (nullable r1 \ nullable r2)" -| "nullable (SEQ r1 r2) = (nullable r1 \ nullable r2)" -| "nullable (STAR r) = True" - - -fun - der :: "char \ rexp \ rexp" -where - "der c (ZERO) = ZERO" -| "der c (ONE) = ZERO" -| "der c (CHAR d) = (if c = d then ONE else ZERO)" -| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" -| "der c (SEQ r1 r2) = - (if nullable r1 - then ALT (SEQ (der c r1) r2) (der c r2) - else SEQ (der c r1) r2)" -| "der c (STAR r) = SEQ (der c r) (STAR r)" - -fun - ders :: "string \ rexp \ rexp" -where - "ders [] r = r" -| "ders (c # s) r = ders s (der c r)" - - -lemma nullable_correctness: - shows "nullable r \ [] \ (L r)" -by (induct r) (auto simp add: Sequ_def) - -lemma der_correctness: - shows "L (der c r) = Der c (L r)" -by (induct r) (simp_all add: nullable_correctness) - -lemma ders_correctness: - shows "L (ders s r) = Ders s (L r)" -apply(induct s arbitrary: r) -apply(simp_all add: Ders_def der_correctness Der_def) -done - - -section {* Lemmas about ders *} - -lemma ders_ZERO: - shows "ders s (ZERO) = ZERO" -apply(induct s) -apply(simp_all) -done - -lemma ders_ONE: - shows "ders s (ONE) = (if s = [] then ONE else ZERO)" -apply(induct s) -apply(simp_all add: ders_ZERO) -done - -lemma ders_CHAR: - shows "ders s (CHAR c) = - (if s = [c] then ONE else - (if s = [] then (CHAR c) else ZERO))" -apply(induct s) -apply(simp_all add: ders_ZERO ders_ONE) -done - -lemma ders_ALT: - shows "ders s (ALT r1 r2) = ALT (ders s r1) (ders s r2)" -apply(induct s arbitrary: r1 r2) -apply(simp_all) -done - -section {* Values *} - -datatype val = - Void -| Char char -| Seq val val -| Right val -| Left val -| Stars "val list" - - -section {* The string behind a value *} - -fun - flat :: "val \ string" -where - "flat (Void) = []" -| "flat (Char c) = [c]" -| "flat (Left v) = flat v" -| "flat (Right v) = flat v" -| "flat (Seq v1 v2) = (flat v1) @ (flat v2)" -| "flat (Stars []) = []" -| "flat (Stars (v#vs)) = (flat v) @ (flat (Stars vs))" - -lemma flat_Stars [simp]: - "flat (Stars vs) = concat (map flat vs)" -by (induct vs) (auto) - - -section {* Relation between values and regular expressions *} - -inductive - Prf :: "val \ rexp \ bool" ("\ _ : _" [100, 100] 100) -where - "\\ v1 : r1; \ v2 : r2\ \ \ Seq v1 v2 : SEQ r1 r2" -| "\ v1 : r1 \ \ Left v1 : ALT r1 r2" -| "\ v2 : r2 \ \ Right v2 : ALT r1 r2" -| "\ Void : ONE" -| "\ Char c : CHAR c" -| "\v \ set vs. \ v : r \ \ Stars vs : STAR r" - -inductive_cases Prf_elims: - "\ v : ZERO" - "\ v : SEQ r1 r2" - "\ v : ALT r1 r2" - "\ v : ONE" - "\ v : CHAR c" - "\ vs : STAR r" - -lemma Star_concat: - assumes "\s \ set ss. s \ A" - shows "concat ss \ A\" -using assms by (induct ss) (auto) - - -lemma Prf_flat_L: - assumes "\ v : r" shows "flat v \ L r" -using assms -by (induct v r rule: Prf.induct) - (auto simp add: Sequ_def Star_concat) - -lemma Prf_Stars_append: - assumes "\ Stars vs1 : STAR r" "\ Stars vs2 : STAR r" - shows "\ Stars (vs1 @ vs2) : STAR r" -using assms -by (auto intro!: Prf.intros elim!: Prf_elims) - -lemma Star_string: - assumes "s \ A\" - shows "\ss. concat ss = s \ (\s \ set ss. s \ A)" -using assms -apply(induct rule: Star.induct) -apply(auto) -apply(rule_tac x="[]" in exI) -apply(simp) -apply(rule_tac x="s1#ss" in exI) -apply(simp) -done - -lemma Star_val: - assumes "\s\set ss. \v. s = flat v \ \ v : r" - shows "\vs. concat (map flat vs) = concat ss \ (\v\set vs. \ v : r)" -using assms -apply(induct ss) -apply(auto) -apply (metis empty_iff list.set(1)) -by (metis concat.simps(2) list.simps(9) set_ConsD) - - -lemma L_flat_Prf1: - assumes "\ v : r" - shows "flat v \ L r" -using assms -by (induct) (auto simp add: Sequ_def Star_concat) - -lemma L_flat_Prf2: - assumes "s \ L r" - shows "\v. \ v : r \ flat v = s" -using assms -proof(induct r arbitrary: s) - case (STAR r s) - have IH: "\s. s \ L r \ \v. \ v : r \ flat v = s" by fact - have "s \ L (STAR r)" by fact - then obtain ss where "concat ss = s" "\s \ set ss. s \ L r" - using Star_string by auto - then obtain vs where "concat (map flat vs) = s" "\v\set vs. \ v : r" - using IH Star_val by blast - then show "\v. \ v : STAR r \ flat v = s" - using Prf.intros(6) flat_Stars by blast -next - case (SEQ r1 r2 s) - then show "\v. \ v : SEQ r1 r2 \ flat v = s" - unfolding Sequ_def L.simps by (fastforce intro: Prf.intros) -next - case (ALT r1 r2 s) - then show "\v. \ v : ALT r1 r2 \ flat v = s" - unfolding L.simps by (fastforce intro: Prf.intros) -qed (auto intro: Prf.intros) - -lemma L_flat_Prf: - "L(r) = {flat v | v. \ v : r}" -using L_flat_Prf1 L_flat_Prf2 by blast - -(* -lemma Star_values_exists: - assumes "s \ (L r)\" - shows "\vs. concat (map flat vs) = s \ \ Stars vs : STAR r" -using assms -apply(drule_tac Star_string) -apply(auto) -by (metis L_flat_Prf2 Prf.intros(6) Star_val) -*) - - -section {* Sulzmann and Lu functions *} +section {* The Lexer Functions by Sulzmann and Lu *} fun mkeps :: "rexp \ val" @@ -363,8 +24,17 @@ | "injval (SEQ r1 r2) c (Right v2) = Seq (mkeps r1) (injval r2 c v2)" | "injval (STAR r) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)" +fun + lexer :: "rexp \ string \ val option" +where + "lexer r [] = (if nullable r then Some(mkeps r) else None)" +| "lexer r (c#s) = (case (lexer (der c r) s) of + None \ None + | Some(v) \ Some(injval r c v))" -section {* Mkeps, injval *} + + +section {* Mkeps, Injval Properties *} lemma mkeps_nullable: assumes "nullable(r)" @@ -379,7 +49,6 @@ using assms by (induct rule: nullable.induct) (auto) - lemma Prf_injval: assumes "\ v : der c r" shows "\ (injval r c v) : r" @@ -396,49 +65,9 @@ apply(auto elim!: Prf_elims intro: mkeps_flat split: if_splits) done - - -section {* Our Alternative Posix definition *} - -inductive - Posix :: "string \ rexp \ val \ bool" ("_ \ _ \ _" [100, 100, 100] 100) -where - Posix_ONE: "[] \ ONE \ Void" -| Posix_CHAR: "[c] \ (CHAR c) \ (Char c)" -| Posix_ALT1: "s \ r1 \ v \ s \ (ALT r1 r2) \ (Left v)" -| Posix_ALT2: "\s \ r2 \ v; s \ L(r1)\ \ s \ (ALT r1 r2) \ (Right v)" -| Posix_SEQ: "\s1 \ r1 \ v1; s2 \ r2 \ v2; - \(\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (s1 @ s\<^sub>3) \ L r1 \ s\<^sub>4 \ L r2)\ \ - (s1 @ s2) \ (SEQ r1 r2) \ (Seq v1 v2)" -| Posix_STAR1: "\s1 \ r \ v; s2 \ STAR r \ Stars vs; flat v \ []; - \(\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (s1 @ s\<^sub>3) \ L r \ s\<^sub>4 \ L (STAR r))\ - \ (s1 @ s2) \ STAR r \ Stars (v # vs)" -| Posix_STAR2: "[] \ STAR r \ Stars []" - -inductive_cases Posix_elims: - "s \ ZERO \ v" - "s \ ONE \ v" - "s \ CHAR c \ v" - "s \ ALT r1 r2 \ v" - "s \ SEQ r1 r2 \ v" - "s \ STAR r \ v" - -lemma Posix1: - assumes "s \ r \ v" - shows "s \ L r" "flat v = s" -using assms -by (induct s r v rule: Posix.induct) - (auto simp add: Sequ_def) - - -lemma Posix1a: - assumes "s \ r \ v" - shows "\ v : r" -using assms -apply(induct s r v rule: Posix.induct) -apply(auto intro!: Prf.intros elim!: Prf_elims) -done - +text {* + Mkeps and injval produce, or preserve, Posix values. +*} lemma Posix_mkeps: assumes "nullable r" @@ -451,74 +80,6 @@ apply(auto) done - -lemma Posix_determ: - assumes "s \ r \ v1" "s \ r \ v2" - shows "v1 = v2" -using assms -proof (induct s r v1 arbitrary: v2 rule: Posix.induct) - case (Posix_ONE v2) - have "[] \ ONE \ v2" by fact - then show "Void = v2" by cases auto -next - case (Posix_CHAR c v2) - have "[c] \ CHAR c \ v2" by fact - then show "Char c = v2" by cases auto -next - case (Posix_ALT1 s r1 v r2 v2) - have "s \ ALT r1 r2 \ v2" by fact - moreover - have "s \ r1 \ v" by fact - then have "s \ L r1" by (simp add: Posix1) - ultimately obtain v' where eq: "v2 = Left v'" "s \ r1 \ v'" by cases auto - moreover - have IH: "\v2. s \ r1 \ v2 \ v = v2" by fact - ultimately have "v = v'" by simp - then show "Left v = v2" using eq by simp -next - case (Posix_ALT2 s r2 v r1 v2) - have "s \ ALT r1 r2 \ v2" by fact - moreover - have "s \ L r1" by fact - ultimately obtain v' where eq: "v2 = Right v'" "s \ r2 \ v'" - by cases (auto simp add: Posix1) - moreover - have IH: "\v2. s \ r2 \ v2 \ v = v2" by fact - ultimately have "v = v'" by simp - then show "Right v = v2" using eq by simp -next - case (Posix_SEQ s1 r1 v1 s2 r2 v2 v') - have "(s1 @ s2) \ SEQ r1 r2 \ v'" - "s1 \ r1 \ v1" "s2 \ r2 \ v2" - "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L r1 \ s\<^sub>4 \ L r2)" by fact+ - then obtain v1' v2' where "v' = Seq v1' v2'" "s1 \ r1 \ v1'" "s2 \ r2 \ v2'" - apply(cases) apply (auto simp add: append_eq_append_conv2) - using Posix1(1) by fastforce+ - moreover - have IHs: "\v1'. s1 \ r1 \ v1' \ v1 = v1'" - "\v2'. s2 \ r2 \ v2' \ v2 = v2'" by fact+ - ultimately show "Seq v1 v2 = v'" by simp -next - case (Posix_STAR1 s1 r v s2 vs v2) - have "(s1 @ s2) \ STAR r \ v2" - "s1 \ r \ v" "s2 \ STAR r \ Stars vs" "flat v \ []" - "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L r \ s\<^sub>4 \ L (STAR r))" by fact+ - then obtain v' vs' where "v2 = Stars (v' # vs')" "s1 \ r \ v'" "s2 \ (STAR r) \ (Stars vs')" - apply(cases) apply (auto simp add: append_eq_append_conv2) - using Posix1(1) apply fastforce - apply (metis Posix1(1) Posix_STAR1.hyps(6) append_Nil append_Nil2) - using Posix1(2) by blast - moreover - have IHs: "\v2. s1 \ r \ v2 \ v = v2" - "\v2. s2 \ STAR r \ v2 \ Stars vs = v2" by fact+ - ultimately show "Stars (v # vs) = v2" by auto -next - case (Posix_STAR2 r v2) - have "[] \ STAR r \ v2" by fact - then show "Stars [] = v2" by cases (auto simp add: Posix1) -qed - - lemma Posix_injval: assumes "s \ (der c r) \ v" shows "(c # s) \ r \ (injval r c v)" @@ -669,15 +230,7 @@ qed -section {* The Lexer by Sulzmann and Lu *} - -fun - lexer :: "rexp \ string \ val option" -where - "lexer r [] = (if nullable r then Some(mkeps r) else None)" -| "lexer r (c#s) = (case (lexer (der c r) s) of - None \ None - | Some(v) \ Some(injval r c v))" +section {* Lexer Correctness *} lemma lexer_correct_None: