--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/thys/Lexer.thy Wed May 18 15:57:46 2016 +0100
@@ -0,0 +1,642 @@
+
+theory Lexer
+ imports Main
+begin
+
+
+section {* Sequential Composition of Languages *}
+
+definition
+ Sequ :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ;; _" [100,100] 100)
+where
+ "A ;; B = {s1 @ s2 | s1 s2. s1 \<in> A \<and> s2 \<in> B}"
+
+text {* Two Simple Properties about Sequential Composition *}
+
+lemma seq_empty [simp]:
+ shows "A ;; {[]} = A"
+ and "{[]} ;; A = A"
+by (simp_all add: Sequ_def)
+
+lemma seq_null [simp]:
+ shows "A ;; {} = {}"
+ and "{} ;; A = {}"
+by (simp_all add: Sequ_def)
+
+
+section {* Semantic Derivative (Left Quotient) of Languages *}
+
+definition
+ Der :: "char \<Rightarrow> string set \<Rightarrow> string set"
+where
+ "Der c A \<equiv> {s. c # s \<in> A}"
+
+lemma Der_null [simp]:
+ shows "Der c {} = {}"
+unfolding Der_def
+by auto
+
+lemma Der_empty [simp]:
+ shows "Der c {[]} = {}"
+unfolding Der_def
+by auto
+
+lemma Der_char [simp]:
+ shows "Der c {[d]} = (if c = d then {[]} else {})"
+unfolding Der_def
+by auto
+
+lemma Der_union [simp]:
+ shows "Der c (A \<union> B) = Der c A \<union> Der c B"
+unfolding Der_def
+by auto
+
+lemma Der_Sequ [simp]:
+ shows "Der c (A ;; B) = (Der c A) ;; B \<union> (if [] \<in> A then Der c B else {})"
+unfolding Der_def Sequ_def
+by (auto simp add: Cons_eq_append_conv)
+
+
+section {* Kleene Star for Languages *}
+
+inductive_set
+ Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
+ for A :: "string set"
+where
+ start[intro]: "[] \<in> A\<star>"
+| step[intro]: "\<lbrakk>s1 \<in> A; s2 \<in> A\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> A\<star>"
+
+lemma star_cases:
+ shows "A\<star> = {[]} \<union> A ;; A\<star>"
+unfolding Sequ_def
+by (auto) (metis Star.simps)
+
+lemma star_decomp:
+ assumes a: "c # x \<in> A\<star>"
+ shows "\<exists>a b. x = a @ b \<and> c # a \<in> A \<and> b \<in> A\<star>"
+using a
+by (induct x\<equiv>"c # x" rule: Star.induct)
+ (auto simp add: append_eq_Cons_conv)
+
+lemma Der_star [simp]:
+ shows "Der c (A\<star>) = (Der c A) ;; A\<star>"
+proof -
+ have "Der c (A\<star>) = Der c ({[]} \<union> A ;; A\<star>)"
+ by (simp only: star_cases[symmetric])
+ also have "... = Der c (A ;; A\<star>)"
+ by (simp only: Der_union Der_empty) (simp)
+ also have "... = (Der c A) ;; A\<star> \<union> (if [] \<in> A then Der c (A\<star>) else {})"
+ by simp
+ also have "... = (Der c A) ;; A\<star>"
+ unfolding Sequ_def Der_def
+ by (auto dest: star_decomp)
+ finally show "Der c (A\<star>) = (Der c A) ;; A\<star>" .
+qed
+
+
+section {* Regular Expressions *}
+
+datatype rexp =
+ ZERO
+| ONE
+| CHAR char
+| SEQ rexp rexp
+| ALT rexp rexp
+| STAR rexp
+
+section {* Semantics of Regular Expressions *}
+
+fun
+ L :: "rexp \<Rightarrow> string set"
+where
+ "L (ZERO) = {}"
+| "L (ONE) = {[]}"
+| "L (CHAR c) = {[c]}"
+| "L (SEQ r1 r2) = (L r1) ;; (L r2)"
+| "L (ALT r1 r2) = (L r1) \<union> (L r2)"
+| "L (STAR r) = (L r)\<star>"
+
+
+section {* Nullable, Derivatives *}
+
+fun
+ nullable :: "rexp \<Rightarrow> bool"
+where
+ "nullable (ZERO) = False"
+| "nullable (ONE) = True"
+| "nullable (CHAR c) = False"
+| "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
+| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
+| "nullable (STAR r) = True"
+
+
+fun
+ der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
+where
+ "der c (ZERO) = ZERO"
+| "der c (ONE) = ZERO"
+| "der c (CHAR d) = (if c = d then ONE else ZERO)"
+| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
+| "der c (SEQ r1 r2) =
+ (if nullable r1
+ then ALT (SEQ (der c r1) r2) (der c r2)
+ else SEQ (der c r1) r2)"
+| "der c (STAR r) = SEQ (der c r) (STAR r)"
+
+fun
+ ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
+where
+ "ders [] r = r"
+| "ders (c # s) r = ders s (der c r)"
+
+
+lemma nullable_correctness:
+ shows "nullable r \<longleftrightarrow> [] \<in> (L r)"
+by (induct r) (auto simp add: Sequ_def)
+
+
+lemma der_correctness:
+ shows "L (der c r) = Der c (L r)"
+by (induct r) (simp_all add: nullable_correctness)
+
+
+section {* Values *}
+
+datatype val =
+ Void
+| Char char
+| Seq val val
+| Right val
+| Left val
+| Stars "val list"
+
+
+section {* The string behind a value *}
+
+fun
+ flat :: "val \<Rightarrow> string"
+where
+ "flat (Void) = []"
+| "flat (Char c) = [c]"
+| "flat (Left v) = flat v"
+| "flat (Right v) = flat v"
+| "flat (Seq v1 v2) = (flat v1) @ (flat v2)"
+| "flat (Stars []) = []"
+| "flat (Stars (v#vs)) = (flat v) @ (flat (Stars vs))"
+
+lemma flat_Stars [simp]:
+ "flat (Stars vs) = concat (map flat vs)"
+by (induct vs) (auto)
+
+
+section {* Relation between values and regular expressions *}
+
+inductive
+ Prf :: "val \<Rightarrow> rexp \<Rightarrow> bool" ("\<turnstile> _ : _" [100, 100] 100)
+where
+ "\<lbrakk>\<turnstile> v1 : r1; \<turnstile> v2 : r2\<rbrakk> \<Longrightarrow> \<turnstile> Seq v1 v2 : SEQ r1 r2"
+| "\<turnstile> v1 : r1 \<Longrightarrow> \<turnstile> Left v1 : ALT r1 r2"
+| "\<turnstile> v2 : r2 \<Longrightarrow> \<turnstile> Right v2 : ALT r1 r2"
+| "\<turnstile> Void : ONE"
+| "\<turnstile> Char c : CHAR c"
+| "\<turnstile> Stars [] : STAR r"
+| "\<lbrakk>\<turnstile> v : r; \<turnstile> Stars vs : STAR r\<rbrakk> \<Longrightarrow> \<turnstile> Stars (v # vs) : STAR r"
+
+inductive_cases Prf_elims:
+ "\<turnstile> v : ZERO"
+ "\<turnstile> v : SEQ r1 r2"
+ "\<turnstile> v : ALT r1 r2"
+ "\<turnstile> v : ONE"
+ "\<turnstile> v : CHAR c"
+(* "\<turnstile> vs : STAR r"*)
+
+lemma Prf_flat_L:
+ assumes "\<turnstile> v : r" shows "flat v \<in> L r"
+using assms
+by(induct v r rule: Prf.induct)
+ (auto simp add: Sequ_def)
+
+lemma Prf_Stars:
+ assumes "\<forall>v \<in> set vs. \<turnstile> v : r"
+ shows "\<turnstile> Stars vs : STAR r"
+using assms
+by(induct vs) (auto intro: Prf.intros)
+
+lemma Star_string:
+ assumes "s \<in> A\<star>"
+ shows "\<exists>ss. concat ss = s \<and> (\<forall>s \<in> set ss. s \<in> A)"
+using assms
+apply(induct rule: Star.induct)
+apply(auto)
+apply(rule_tac x="[]" in exI)
+apply(simp)
+apply(rule_tac x="s1#ss" in exI)
+apply(simp)
+done
+
+
+lemma Star_val:
+ assumes "\<forall>s\<in>set ss. \<exists>v. s = flat v \<and> \<turnstile> v : r"
+ shows "\<exists>vs. concat (map flat vs) = concat ss \<and> (\<forall>v\<in>set vs. \<turnstile> v : r)"
+using assms
+apply(induct ss)
+apply(auto)
+apply (metis empty_iff list.set(1))
+by (metis concat.simps(2) list.simps(9) set_ConsD)
+
+lemma L_flat_Prf1:
+ assumes "\<turnstile> v : r" shows "flat v \<in> L r"
+using assms
+by (induct)(auto simp add: Sequ_def)
+
+lemma L_flat_Prf2:
+ assumes "s \<in> L r" shows "\<exists>v. \<turnstile> v : r \<and> flat v = s"
+using assms
+apply(induct r arbitrary: s)
+apply(auto simp add: Sequ_def intro: Prf.intros)
+using Prf.intros(1) flat.simps(5) apply blast
+using Prf.intros(2) flat.simps(3) apply blast
+using Prf.intros(3) flat.simps(4) apply blast
+apply(subgoal_tac "\<exists>vs::val list. concat (map flat vs) = s \<and> (\<forall>v \<in> set vs. \<turnstile> v : r)")
+apply(auto)[1]
+apply(rule_tac x="Stars vs" in exI)
+apply(simp)
+apply (simp add: Prf_Stars)
+apply(drule Star_string)
+apply(auto)
+apply(rule Star_val)
+apply(auto)
+done
+
+lemma L_flat_Prf:
+ "L(r) = {flat v | v. \<turnstile> v : r}"
+using L_flat_Prf1 L_flat_Prf2 by blast
+
+
+section {* Sulzmann and Lu functions *}
+
+fun
+ mkeps :: "rexp \<Rightarrow> val"
+where
+ "mkeps(ONE) = Void"
+| "mkeps(SEQ r1 r2) = Seq (mkeps r1) (mkeps r2)"
+| "mkeps(ALT r1 r2) = (if nullable(r1) then Left (mkeps r1) else Right (mkeps r2))"
+| "mkeps(STAR r) = Stars []"
+
+fun injval :: "rexp \<Rightarrow> char \<Rightarrow> val \<Rightarrow> val"
+where
+ "injval (CHAR d) c Void = Char d"
+| "injval (ALT r1 r2) c (Left v1) = Left(injval r1 c v1)"
+| "injval (ALT r1 r2) c (Right v2) = Right(injval r2 c v2)"
+| "injval (SEQ r1 r2) c (Seq v1 v2) = Seq (injval r1 c v1) v2"
+| "injval (SEQ r1 r2) c (Left (Seq v1 v2)) = Seq (injval r1 c v1) v2"
+| "injval (SEQ r1 r2) c (Right v2) = Seq (mkeps r1) (injval r2 c v2)"
+| "injval (STAR r) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)"
+
+
+section {* Mkeps, injval *}
+
+lemma mkeps_nullable:
+ assumes "nullable(r)"
+ shows "\<turnstile> mkeps r : r"
+using assms
+by (induct rule: nullable.induct)
+ (auto intro: Prf.intros)
+
+lemma mkeps_flat:
+ assumes "nullable(r)"
+ shows "flat (mkeps r) = []"
+using assms
+by (induct rule: nullable.induct) (auto)
+
+
+lemma Prf_injval:
+ assumes "\<turnstile> v : der c r"
+ shows "\<turnstile> (injval r c v) : r"
+using assms
+apply(induct r arbitrary: c v rule: rexp.induct)
+apply(auto intro!: Prf.intros mkeps_nullable elim!: Prf_elims split: if_splits)
+(* STAR *)
+apply(rotate_tac 2)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+apply(auto)
+apply (metis Prf.intros(6) Prf.intros(7))
+by (metis Prf.intros(7))
+
+lemma Prf_injval_flat:
+ assumes "\<turnstile> v : der c r"
+ shows "flat (injval r c v) = c # (flat v)"
+using assms
+apply(induct arbitrary: v rule: der.induct)
+apply(auto elim!: Prf_elims split: if_splits)
+apply(metis mkeps_flat)
+apply(rotate_tac 2)
+apply(erule Prf.cases)
+apply(simp_all)[7]
+done
+
+
+
+section {* Our Alternative Posix definition *}
+
+inductive
+ Posix :: "string \<Rightarrow> rexp \<Rightarrow> val \<Rightarrow> bool" ("_ \<in> _ \<rightarrow> _" [100, 100, 100] 100)
+where
+ Posix_ONE: "[] \<in> ONE \<rightarrow> Void"
+| Posix_CHAR: "[c] \<in> (CHAR c) \<rightarrow> (Char c)"
+| Posix_ALT1: "s \<in> r1 \<rightarrow> v \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Left v)"
+| Posix_ALT2: "\<lbrakk>s \<in> r2 \<rightarrow> v; s \<notin> L(r1)\<rbrakk> \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Right v)"
+| Posix_SEQ: "\<lbrakk>s1 \<in> r1 \<rightarrow> v1; s2 \<in> r2 \<rightarrow> v2;
+ \<not>(\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> (s1 @ s\<^sub>3) \<in> L r1 \<and> s\<^sub>4 \<in> L r2)\<rbrakk> \<Longrightarrow>
+ (s1 @ s2) \<in> (SEQ r1 r2) \<rightarrow> (Seq v1 v2)"
+| Posix_STAR1: "\<lbrakk>s1 \<in> r \<rightarrow> v; s2 \<in> STAR r \<rightarrow> Stars vs; flat v \<noteq> [];
+ \<not>(\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> (s1 @ s\<^sub>3) \<in> L r \<and> s\<^sub>4 \<in> L (STAR r))\<rbrakk>
+ \<Longrightarrow> (s1 @ s2) \<in> STAR r \<rightarrow> Stars (v # vs)"
+| Posix_STAR2: "[] \<in> STAR r \<rightarrow> Stars []"
+
+inductive_cases Posix_elims:
+ "s \<in> ZERO \<rightarrow> v"
+ "s \<in> ONE \<rightarrow> v"
+ "s \<in> CHAR c \<rightarrow> v"
+ "s \<in> ALT r1 r2 \<rightarrow> v"
+ "s \<in> SEQ r1 r2 \<rightarrow> v"
+ "s \<in> STAR r \<rightarrow> v"
+
+lemma Posix1:
+ assumes "s \<in> r \<rightarrow> v"
+ shows "s \<in> L r" "flat v = s"
+using assms
+by (induct s r v rule: Posix.induct)
+ (auto simp add: Sequ_def)
+
+
+lemma Posix1a:
+ assumes "s \<in> r \<rightarrow> v"
+ shows "\<turnstile> v : r"
+using assms
+by (induct s r v rule: Posix.induct)(auto intro: Prf.intros)
+
+
+lemma Posix_mkeps:
+ assumes "nullable r"
+ shows "[] \<in> r \<rightarrow> mkeps r"
+using assms
+apply(induct r rule: nullable.induct)
+apply(auto intro: Posix.intros simp add: nullable_correctness Sequ_def)
+apply(subst append.simps(1)[symmetric])
+apply(rule Posix.intros)
+apply(auto)
+done
+
+
+lemma Posix_determ:
+ assumes "s \<in> r \<rightarrow> v1" "s \<in> r \<rightarrow> v2"
+ shows "v1 = v2"
+using assms
+proof (induct s r v1 arbitrary: v2 rule: Posix.induct)
+ case (Posix_ONE v2)
+ have "[] \<in> ONE \<rightarrow> v2" by fact
+ then show "Void = v2" by cases auto
+next
+ case (Posix_CHAR c v2)
+ have "[c] \<in> CHAR c \<rightarrow> v2" by fact
+ then show "Char c = v2" by cases auto
+next
+ case (Posix_ALT1 s r1 v r2 v2)
+ have "s \<in> ALT r1 r2 \<rightarrow> v2" by fact
+ moreover
+ have "s \<in> r1 \<rightarrow> v" by fact
+ then have "s \<in> L r1" by (simp add: Posix1)
+ ultimately obtain v' where eq: "v2 = Left v'" "s \<in> r1 \<rightarrow> v'" by cases auto
+ moreover
+ have IH: "\<And>v2. s \<in> r1 \<rightarrow> v2 \<Longrightarrow> v = v2" by fact
+ ultimately have "v = v'" by simp
+ then show "Left v = v2" using eq by simp
+next
+ case (Posix_ALT2 s r2 v r1 v2)
+ have "s \<in> ALT r1 r2 \<rightarrow> v2" by fact
+ moreover
+ have "s \<notin> L r1" by fact
+ ultimately obtain v' where eq: "v2 = Right v'" "s \<in> r2 \<rightarrow> v'"
+ by cases (auto simp add: Posix1)
+ moreover
+ have IH: "\<And>v2. s \<in> r2 \<rightarrow> v2 \<Longrightarrow> v = v2" by fact
+ ultimately have "v = v'" by simp
+ then show "Right v = v2" using eq by simp
+next
+ case (Posix_SEQ s1 r1 v1 s2 r2 v2 v')
+ have "(s1 @ s2) \<in> SEQ r1 r2 \<rightarrow> v'"
+ "s1 \<in> r1 \<rightarrow> v1" "s2 \<in> r2 \<rightarrow> v2"
+ "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L r1 \<and> s\<^sub>4 \<in> L r2)" by fact+
+ then obtain v1' v2' where "v' = Seq v1' v2'" "s1 \<in> r1 \<rightarrow> v1'" "s2 \<in> r2 \<rightarrow> v2'"
+ apply(cases) apply (auto simp add: append_eq_append_conv2)
+ using Posix1(1) by fastforce+
+ moreover
+ have IHs: "\<And>v1'. s1 \<in> r1 \<rightarrow> v1' \<Longrightarrow> v1 = v1'"
+ "\<And>v2'. s2 \<in> r2 \<rightarrow> v2' \<Longrightarrow> v2 = v2'" by fact+
+ ultimately show "Seq v1 v2 = v'" by simp
+next
+ case (Posix_STAR1 s1 r v s2 vs v2)
+ have "(s1 @ s2) \<in> STAR r \<rightarrow> v2"
+ "s1 \<in> r \<rightarrow> v" "s2 \<in> STAR r \<rightarrow> Stars vs" "flat v \<noteq> []"
+ "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L r \<and> s\<^sub>4 \<in> L (STAR r))" by fact+
+ then obtain v' vs' where "v2 = Stars (v' # vs')" "s1 \<in> r \<rightarrow> v'" "s2 \<in> (STAR r) \<rightarrow> (Stars vs')"
+ apply(cases) apply (auto simp add: append_eq_append_conv2)
+ using Posix1(1) apply fastforce
+ apply (metis Posix1(1) Posix_STAR1.hyps(6) append_Nil append_Nil2)
+ using Posix1(2) by blast
+ moreover
+ have IHs: "\<And>v2. s1 \<in> r \<rightarrow> v2 \<Longrightarrow> v = v2"
+ "\<And>v2. s2 \<in> STAR r \<rightarrow> v2 \<Longrightarrow> Stars vs = v2" by fact+
+ ultimately show "Stars (v # vs) = v2" by auto
+next
+ case (Posix_STAR2 r v2)
+ have "[] \<in> STAR r \<rightarrow> v2" by fact
+ then show "Stars [] = v2" by cases (auto simp add: Posix1)
+qed
+
+
+lemma Posix_injval:
+ assumes "s \<in> (der c r) \<rightarrow> v"
+ shows "(c # s) \<in> r \<rightarrow> (injval r c v)"
+using assms
+proof(induct r arbitrary: s v rule: rexp.induct)
+ case ZERO
+ have "s \<in> der c ZERO \<rightarrow> v" by fact
+ then have "s \<in> ZERO \<rightarrow> v" by simp
+ then have "False" by cases
+ then show "(c # s) \<in> ZERO \<rightarrow> (injval ZERO c v)" by simp
+next
+ case ONE
+ have "s \<in> der c ONE \<rightarrow> v" by fact
+ then have "s \<in> ZERO \<rightarrow> v" by simp
+ then have "False" by cases
+ then show "(c # s) \<in> ONE \<rightarrow> (injval ONE c v)" by simp
+next
+ case (CHAR d)
+ consider (eq) "c = d" | (ineq) "c \<noteq> d" by blast
+ then show "(c # s) \<in> (CHAR d) \<rightarrow> (injval (CHAR d) c v)"
+ proof (cases)
+ case eq
+ have "s \<in> der c (CHAR d) \<rightarrow> v" by fact
+ then have "s \<in> ONE \<rightarrow> v" using eq by simp
+ then have eqs: "s = [] \<and> v = Void" by cases simp
+ show "(c # s) \<in> CHAR d \<rightarrow> injval (CHAR d) c v" using eq eqs
+ by (auto intro: Posix.intros)
+ next
+ case ineq
+ have "s \<in> der c (CHAR d) \<rightarrow> v" by fact
+ then have "s \<in> ZERO \<rightarrow> v" using ineq by simp
+ then have "False" by cases
+ then show "(c # s) \<in> CHAR d \<rightarrow> injval (CHAR d) c v" by simp
+ qed
+next
+ case (ALT r1 r2)
+ have IH1: "\<And>s v. s \<in> der c r1 \<rightarrow> v \<Longrightarrow> (c # s) \<in> r1 \<rightarrow> injval r1 c v" by fact
+ have IH2: "\<And>s v. s \<in> der c r2 \<rightarrow> v \<Longrightarrow> (c # s) \<in> r2 \<rightarrow> injval r2 c v" by fact
+ have "s \<in> der c (ALT r1 r2) \<rightarrow> v" by fact
+ then have "s \<in> ALT (der c r1) (der c r2) \<rightarrow> v" by simp
+ then consider (left) v' where "v = Left v'" "s \<in> der c r1 \<rightarrow> v'"
+ | (right) v' where "v = Right v'" "s \<notin> L (der c r1)" "s \<in> der c r2 \<rightarrow> v'"
+ by cases auto
+ then show "(c # s) \<in> ALT r1 r2 \<rightarrow> injval (ALT r1 r2) c v"
+ proof (cases)
+ case left
+ have "s \<in> der c r1 \<rightarrow> v'" by fact
+ then have "(c # s) \<in> r1 \<rightarrow> injval r1 c v'" using IH1 by simp
+ then have "(c # s) \<in> ALT r1 r2 \<rightarrow> injval (ALT r1 r2) c (Left v')" by (auto intro: Posix.intros)
+ then show "(c # s) \<in> ALT r1 r2 \<rightarrow> injval (ALT r1 r2) c v" using left by simp
+ next
+ case right
+ have "s \<notin> L (der c r1)" by fact
+ then have "c # s \<notin> L r1" by (simp add: der_correctness Der_def)
+ moreover
+ have "s \<in> der c r2 \<rightarrow> v'" by fact
+ then have "(c # s) \<in> r2 \<rightarrow> injval r2 c v'" using IH2 by simp
+ ultimately have "(c # s) \<in> ALT r1 r2 \<rightarrow> injval (ALT r1 r2) c (Right v')"
+ by (auto intro: Posix.intros)
+ then show "(c # s) \<in> ALT r1 r2 \<rightarrow> injval (ALT r1 r2) c v" using right by simp
+ qed
+next
+ case (SEQ r1 r2)
+ have IH1: "\<And>s v. s \<in> der c r1 \<rightarrow> v \<Longrightarrow> (c # s) \<in> r1 \<rightarrow> injval r1 c v" by fact
+ have IH2: "\<And>s v. s \<in> der c r2 \<rightarrow> v \<Longrightarrow> (c # s) \<in> r2 \<rightarrow> injval r2 c v" by fact
+ have "s \<in> der c (SEQ r1 r2) \<rightarrow> v" by fact
+ then consider
+ (left_nullable) v1 v2 s1 s2 where
+ "v = Left (Seq v1 v2)" "s = s1 @ s2"
+ "s1 \<in> der c r1 \<rightarrow> v1" "s2 \<in> r2 \<rightarrow> v2" "nullable r1"
+ "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L (der c r1) \<and> s\<^sub>4 \<in> L r2)"
+ | (right_nullable) v1 s1 s2 where
+ "v = Right v1" "s = s1 @ s2"
+ "s \<in> der c r2 \<rightarrow> v1" "nullable r1" "s1 @ s2 \<notin> L (SEQ (der c r1) r2)"
+ | (not_nullable) v1 v2 s1 s2 where
+ "v = Seq v1 v2" "s = s1 @ s2"
+ "s1 \<in> der c r1 \<rightarrow> v1" "s2 \<in> r2 \<rightarrow> v2" "\<not>nullable r1"
+ "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L (der c r1) \<and> s\<^sub>4 \<in> L r2)"
+ by (force split: if_splits elim!: Posix_elims simp add: Sequ_def der_correctness Der_def)
+ then show "(c # s) \<in> SEQ r1 r2 \<rightarrow> injval (SEQ r1 r2) c v"
+ proof (cases)
+ case left_nullable
+ have "s1 \<in> der c r1 \<rightarrow> v1" by fact
+ then have "(c # s1) \<in> r1 \<rightarrow> injval r1 c v1" using IH1 by simp
+ moreover
+ have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L (der c r1) \<and> s\<^sub>4 \<in> L r2)" by fact
+ then have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> (c # s1) @ s\<^sub>3 \<in> L r1 \<and> s\<^sub>4 \<in> L r2)" by (simp add: der_correctness Der_def)
+ ultimately have "((c # s1) @ s2) \<in> SEQ r1 r2 \<rightarrow> Seq (injval r1 c v1) v2" using left_nullable by (rule_tac Posix.intros)
+ then show "(c # s) \<in> SEQ r1 r2 \<rightarrow> injval (SEQ r1 r2) c v" using left_nullable by simp
+ next
+ case right_nullable
+ have "nullable r1" by fact
+ then have "[] \<in> r1 \<rightarrow> (mkeps r1)" by (rule Posix_mkeps)
+ moreover
+ have "s \<in> der c r2 \<rightarrow> v1" by fact
+ then have "(c # s) \<in> r2 \<rightarrow> (injval r2 c v1)" using IH2 by simp
+ moreover
+ have "s1 @ s2 \<notin> L (SEQ (der c r1) r2)" by fact
+ then have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = c # s \<and> [] @ s\<^sub>3 \<in> L r1 \<and> s\<^sub>4 \<in> L r2)" using right_nullable
+ by(auto simp add: der_correctness Der_def append_eq_Cons_conv Sequ_def)
+ ultimately have "([] @ (c # s)) \<in> SEQ r1 r2 \<rightarrow> Seq (mkeps r1) (injval r2 c v1)"
+ by(rule Posix.intros)
+ then show "(c # s) \<in> SEQ r1 r2 \<rightarrow> injval (SEQ r1 r2) c v" using right_nullable by simp
+ next
+ case not_nullable
+ have "s1 \<in> der c r1 \<rightarrow> v1" by fact
+ then have "(c # s1) \<in> r1 \<rightarrow> injval r1 c v1" using IH1 by simp
+ moreover
+ have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L (der c r1) \<and> s\<^sub>4 \<in> L r2)" by fact
+ then have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> (c # s1) @ s\<^sub>3 \<in> L r1 \<and> s\<^sub>4 \<in> L r2)" by (simp add: der_correctness Der_def)
+ ultimately have "((c # s1) @ s2) \<in> SEQ r1 r2 \<rightarrow> Seq (injval r1 c v1) v2" using not_nullable
+ by (rule_tac Posix.intros) (simp_all)
+ then show "(c # s) \<in> SEQ r1 r2 \<rightarrow> injval (SEQ r1 r2) c v" using not_nullable by simp
+ qed
+next
+ case (STAR r)
+ have IH: "\<And>s v. s \<in> der c r \<rightarrow> v \<Longrightarrow> (c # s) \<in> r \<rightarrow> injval r c v" by fact
+ have "s \<in> der c (STAR r) \<rightarrow> v" by fact
+ then consider
+ (cons) v1 vs s1 s2 where
+ "v = Seq v1 (Stars vs)" "s = s1 @ s2"
+ "s1 \<in> der c r \<rightarrow> v1" "s2 \<in> (STAR r) \<rightarrow> (Stars vs)"
+ "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L (der c r) \<and> s\<^sub>4 \<in> L (STAR r))"
+ apply(auto elim!: Posix_elims(1-5) simp add: der_correctness Der_def intro: Posix.intros)
+ apply(rotate_tac 3)
+ apply(erule_tac Posix_elims(6))
+ apply (simp add: Posix.intros(6))
+ using Posix.intros(7) by blast
+ then show "(c # s) \<in> STAR r \<rightarrow> injval (STAR r) c v"
+ proof (cases)
+ case cons
+ have "s1 \<in> der c r \<rightarrow> v1" by fact
+ then have "(c # s1) \<in> r \<rightarrow> injval r c v1" using IH by simp
+ moreover
+ have "s2 \<in> STAR r \<rightarrow> Stars vs" by fact
+ moreover
+ have "(c # s1) \<in> r \<rightarrow> injval r c v1" by fact
+ then have "flat (injval r c v1) = (c # s1)" by (rule Posix1)
+ then have "flat (injval r c v1) \<noteq> []" by simp
+ moreover
+ have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L (der c r) \<and> s\<^sub>4 \<in> L (STAR r))" by fact
+ then have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> (c # s1) @ s\<^sub>3 \<in> L r \<and> s\<^sub>4 \<in> L (STAR r))"
+ by (simp add: der_correctness Der_def)
+ ultimately
+ have "((c # s1) @ s2) \<in> STAR r \<rightarrow> Stars (injval r c v1 # vs)" by (rule Posix.intros)
+ then show "(c # s) \<in> STAR r \<rightarrow> injval (STAR r) c v" using cons by(simp)
+ qed
+qed
+
+
+section {* The Lexer by Sulzmann and Lu *}
+
+fun
+ lexer :: "rexp \<Rightarrow> string \<Rightarrow> val option"
+where
+ "lexer r [] = (if nullable r then Some(mkeps r) else None)"
+| "lexer r (c#s) = (case (lexer (der c r) s) of
+ None \<Rightarrow> None
+ | Some(v) \<Rightarrow> Some(injval r c v))"
+
+
+lemma lexer_correct_None:
+ shows "s \<notin> L r \<longleftrightarrow> lexer r s = None"
+using assms
+apply(induct s arbitrary: r)
+apply(simp add: nullable_correctness)
+apply(drule_tac x="der a r" in meta_spec)
+apply(auto simp add: der_correctness Der_def)
+done
+
+lemma lexer_correct_Some:
+ shows "s \<in> L r \<longleftrightarrow> (\<exists>v. lexer r s = Some(v) \<and> s \<in> r \<rightarrow> v)"
+using assms
+apply(induct s arbitrary: r)
+apply(auto simp add: Posix_mkeps nullable_correctness)[1]
+apply(drule_tac x="der a r" in meta_spec)
+apply(simp add: der_correctness Der_def)
+apply(rule iffI)
+apply(auto intro: Posix_injval simp add: Posix1(1))
+done
+
+
+end
\ No newline at end of file
--- a/thys/Paper/Paper.thy Tue May 17 14:28:22 2016 +0100
+++ b/thys/Paper/Paper.thy Wed May 18 15:57:46 2016 +0100
@@ -97,7 +97,7 @@
derivatives is that they are neatly expressible in any functional language,
and easily definable and reasoned about in theorem provers---the definitions
just consist of inductive datatypes and simple recursive functions. A
-completely formalised correctness proof of this matcher in for example HOL4
+mechanised correctness proof of Brzozowski's matcher in for example HOL4
has been mentioned by Owens and Slind~\cite{Owens2008}. Another one in Isabelle/HOL is part
of the work by Krauss and Nipkow \cite{Krauss2011}. And another one in Coq is given
by Coquand and Siles \cite{Coquand2012}.
@@ -135,7 +135,7 @@
that can match determines the token.
\end{itemize}
-\noindent Consider for example @{text "r\<^bsub>key\<^esub>"} recognising keywords
+\noindent Consider for example a regular expression @{text "r\<^bsub>key\<^esub>"} for recognising keywords
such as @{text "if"}, @{text "then"} and so on; and @{text "r\<^bsub>id\<^esub>"}
recognising identifiers (say, a single character followed by
characters or numbers). Then we can form the regular expression
@@ -269,10 +269,15 @@
"s\<^sub>1 @ s\<^sub>2"} is in the star of this language. It will also be convenient
to use the following notion of a \emph{semantic derivative} (or \emph{left
quotient}) of a language defined as
- @{thm (lhs) Der_def} $\dn$ @{thm (rhs) Der_def}.
+ %
+ \begin{center}
+ @{thm Der_def}\;.
+ \end{center}
+
+ \noindent
For semantic derivatives we have the following equations (for example
mechanically proved in \cite{Krauss2011}):
-
+ %
\begin{equation}\label{SemDer}
\begin{array}{lcl}
@{thm (lhs) Der_null} & \dn & @{thm (rhs) Der_null}\\
@@ -322,9 +327,6 @@
\begin{center}
\begin{tabular}{lcl}
@{thm (lhs) ders.simps(1)} & $\dn$ & @{thm (rhs) ders.simps(1)}\\
- \end{tabular}
- \hspace{20mm}
- \begin{tabular}{lcl}
@{thm (lhs) ders.simps(2)} & $\dn$ & @{thm (rhs) ders.simps(2)}\\
\end{tabular}
\end{center}
@@ -405,10 +407,10 @@
\begin{tabular}{c}
\\[-8mm]
@{thm[mode=Axiom] Prf.intros(4)} \qquad
- @{thm[mode=Axiom] Prf.intros(5)[of "c"]}\\[3mm]
+ @{thm[mode=Axiom] Prf.intros(5)[of "c"]}\\[4mm]
@{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} \qquad
- @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\\[3mm]
- @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\\[3mm]
+ @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\\[4mm]
+ @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\\[4mm]
@{thm[mode=Axiom] Prf.intros(6)[of "r"]} \qquad
@{thm[mode=Rule] Prf.intros(7)[of "v" "r" "vs"]}
\end{tabular}
@@ -472,7 +474,7 @@
left to right) is \Brz's matcher building successive derivatives. If the
last regular expression is @{term nullable}, then the functions of the
second phase are called (the top-down and right-to-left arrows): first
-@{term mkeps} calculates a value witnessing
+@{term mkeps} calculates a value @{term "v\<^sub>4"} witnessing
how the empty string has been recognised by @{term "r\<^sub>4"}. After
that the function @{term inj} ``injects back'' the characters of the string into
the values.
@@ -502,12 +504,12 @@
The most interesting idea from Sulzmann and Lu \cite{Sulzmann2014} is
the construction of a value for how @{term "r\<^sub>1"} can match the
string @{term "[a,b,c]"} from the value how the last derivative, @{term
- "r\<^sub>4"} in Fig~\ref{Sulz}, can match the empty string. Sulzmann and
+ "r\<^sub>4"} in Fig.~\ref{Sulz}, can match the empty string. Sulzmann and
Lu achieve this by stepwise ``injecting back'' the characters into the
values thus inverting the operation of building derivatives, but on the level
of values. The corresponding function, called @{term inj}, takes three
arguments, a regular expression, a character and a value. For example in
- the first (or right-most) @{term inj}-step in Fig~\ref{Sulz} the regular
+ the first (or right-most) @{term inj}-step in Fig.~\ref{Sulz} the regular
expression @{term "r\<^sub>3"}, the character @{term c} from the last
derivative step and @{term "v\<^sub>4"}, which is the value corresponding
to the derivative regular expression @{term "r\<^sub>4"}. The result is
@@ -538,7 +540,7 @@
might be instructive to look first at the three sequence cases (clauses
(4)--(6)). In each case we need to construct an ``injected value'' for
@{term "SEQ r\<^sub>1 r\<^sub>2"}. This must be a value of the form @{term
- "Seq DUMMY DUMMY"}. Recall the clause of the @{text derivative}-function
+ "Seq DUMMY DUMMY"}\,. Recall the clause of the @{text derivative}-function
for sequence regular expressions:
\begin{center}
@@ -674,7 +676,7 @@
"PS"}). The first two premises state that @{term "v\<^sub>1"} and @{term "v\<^sub>2"}
are the POSIX values for @{term "(s\<^sub>1, r\<^sub>1)"} and @{term "(s\<^sub>2, r\<^sub>2)"}
respectively. Consider now the third premise and note that the POSIX value
- of this rule should match the string @{term "s\<^sub>1 @ s\<^sub>2"}. According to the
+ of this rule should match the string \mbox{@{term "s\<^sub>1 @ s\<^sub>2"}}. According to the
longest match rule, we want that the @{term "s\<^sub>1"} is the longest initial
split of \mbox{@{term "s\<^sub>1 @ s\<^sub>2"}} such that @{term "s\<^sub>2"} is still recognised
by @{term "r\<^sub>2"}. Let us assume, contrary to the third premise, that there
@@ -685,7 +687,7 @@
matched by @{term "r\<^sub>2"}. In this case @{term "s\<^sub>1"} would \emph{not} be the
longest initial split of \mbox{@{term "s\<^sub>1 @ s\<^sub>2"}} and therefore @{term "Seq v\<^sub>1
v\<^sub>2"} cannot be a POSIX value for @{term "(s\<^sub>1 @ s\<^sub>2, SEQ r\<^sub>1 r\<^sub>2)"}.
- The main point is that this side-condition ensures the longest
+ The main point is that our side-condition ensures the longest
match rule is satisfied.
A similar condition is imposed on the POSIX value in the @{text
@@ -716,8 +718,10 @@
\end{lemma}
\begin{proof}
+ By induction on @{text r}. We explain two cases.
- By induction on @{text r}. Suppose @{term "r = ALT r\<^sub>1 r\<^sub>2"}. There are
+ \begin{itemize}
+ \item[$\bullet$] Case @{term "r = ALT r\<^sub>1 r\<^sub>2"}. There are
two subcases, namely @{text "(a)"} \mbox{@{term "v = Left v'"}} and @{term
"s \<in> der c r\<^sub>1 \<rightarrow> v'"}; and @{text "(b)"} @{term "v = Right v'"}, @{term
"s \<notin> L (der c r\<^sub>1)"} and @{term "s \<in> der c r\<^sub>2 \<rightarrow> v'"}. In @{text "(a)"} we
@@ -728,7 +732,7 @@
Prop.~\ref{derprop}(2) in order to infer @{term "c # s \<notin> L r\<^sub>1"} from @{term
"s \<notin> L (der c r\<^sub>1)"}.
- Suppose @{term "r = SEQ r\<^sub>1 r\<^sub>2"}. There are three subcases:
+ \item[$\bullet$] Case @{term "r = SEQ r\<^sub>1 r\<^sub>2"}. There are three subcases:
\begin{quote}
\begin{description}
@@ -768,6 +772,7 @@
c v\<^sub>1) \<noteq> []"}. This follows from @{term "(c # s\<^sub>1)
\<in> r\<^sub>1 \<rightarrow> injval r\<^sub>1 c v\<^sub>1"} (which in turn follows from @{term "s\<^sub>1 \<in> der c
r\<^sub>1 \<rightarrow> v\<^sub>1"} and the induction hypothesis).\qed
+ \end{itemize}
\end{proof}
\noindent
@@ -787,11 +792,14 @@
By induction on @{term s} using Lem.~\ref{lemmkeps} and \ref{Posix2}.\qed
\end{proof}
- \noindent This concludes our correctness proof. Note that we have not
- changed the algorithm of Sulzmann and Lu,\footnote{All deviations we
- introduced are harmless.} but introduced our own specification for what a
- correct result---a POSIX value---should be. A strong point in favour of
- Sulzmann and Lu's algorithm is that it can be extended in various ways.
+ \noindent In (2) we further know by Thm.~\ref{posixdeterm} that the
+ value returned by the lexer must be unique. This concludes our
+ correctness proof. Note that we have not changed the algorithm of
+ Sulzmann and Lu,\footnote{All deviations we introduced are
+ harmless.} but introduced our own specification for what a correct
+ result---a POSIX value---should be. A strong point in favour of
+ Sulzmann and Lu's algorithm is that it can be extended in various
+ ways.
*}
@@ -843,9 +851,7 @@
algorithms considerably, as noted in \cite{Sulzmann2014}. One of the
advantages of having a simple specification and correctness proof is that
the latter can be refined to prove the correctness of such simplification
- steps.
-
- While the simplification of regular expressions according to
+ steps. While the simplification of regular expressions according to
rules like
\begin{equation}\label{Simpl}
@@ -930,7 +936,7 @@
@{term v} (that is construct @{term "f\<^sub>r v"}). Before we can establish the correctness
of @{term "slexer"}, we need to show that simplification preserves the language
and simplification preserves our POSIX relation once the value is rectified
- (recall @{const "simp"} generates a regular expression / rectification function pair):
+ (recall @{const "simp"} generates a (regular expression, rectification function) pair):
\begin{lemma}\mbox{}\smallskip\\\label{slexeraux}
\begin{tabular}{ll}
@@ -1108,7 +1114,7 @@
Although they do not give an explicit proof of the transitivity property,
they give a closely related property about the existence of maximal
- elements. They state that this can be verified by an induction on $r$. We
+ elements. They state that this can be verified by an induction on @{term r}. We
disagree with this as we shall show next in case of transitivity. The case
where the reasoning breaks down is the sequence case, say @{term "SEQ r\<^sub>1 r\<^sub>2"}.
The induction hypotheses in this case are
--- a/thys/README Tue May 17 14:28:22 2016 +0100
+++ b/thys/README Wed May 18 15:57:46 2016 +0100
@@ -1,9 +1,10 @@
Theories:
=========
- ReStar.thy
+ Lexer.thy
+ Simplifying.thy
-The repository can be checked using Isabelle 2014.
+The repository can be checked using Isabelle 2016.
isabelle build -c -v -d . Lex
--- a/thys/ROOT Tue May 17 14:28:22 2016 +0100
+++ b/thys/ROOT Wed May 18 15:57:46 2016 +0100
@@ -1,6 +1,6 @@
session "Lex" = HOL +
theories [document = false]
- "ReStar"
+ "Lexer"
"Simplifying"
"Sulzmann"
--- a/thys/ReStar.thy Tue May 17 14:28:22 2016 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,639 +0,0 @@
-
-theory ReStar
- imports "Main"
-begin
-
-
-section {* Sequential Composition of Languages *}
-
-definition
- Sequ :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ;; _" [100,100] 100)
-where
- "A ;; B = {s1 @ s2 | s1 s2. s1 \<in> A \<and> s2 \<in> B}"
-
-text {* Two Simple Properties about Sequential Composition *}
-
-lemma seq_empty [simp]:
- shows "A ;; {[]} = A"
- and "{[]} ;; A = A"
-by (simp_all add: Sequ_def)
-
-lemma seq_null [simp]:
- shows "A ;; {} = {}"
- and "{} ;; A = {}"
-by (simp_all add: Sequ_def)
-
-
-section {* Semantic Derivative (Left Quotient) of Languages *}
-
-definition
- Der :: "char \<Rightarrow> string set \<Rightarrow> string set"
-where
- "Der c A \<equiv> {s. c # s \<in> A}"
-
-lemma Der_null [simp]:
- shows "Der c {} = {}"
-unfolding Der_def
-by auto
-
-lemma Der_empty [simp]:
- shows "Der c {[]} = {}"
-unfolding Der_def
-by auto
-
-lemma Der_char [simp]:
- shows "Der c {[d]} = (if c = d then {[]} else {})"
-unfolding Der_def
-by auto
-
-lemma Der_union [simp]:
- shows "Der c (A \<union> B) = Der c A \<union> Der c B"
-unfolding Der_def
-by auto
-
-lemma Der_Sequ [simp]:
- shows "Der c (A ;; B) = (Der c A) ;; B \<union> (if [] \<in> A then Der c B else {})"
-unfolding Der_def Sequ_def
-by (auto simp add: Cons_eq_append_conv)
-
-
-section {* Kleene Star for Languages *}
-
-inductive_set
- Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
- for A :: "string set"
-where
- start[intro]: "[] \<in> A\<star>"
-| step[intro]: "\<lbrakk>s1 \<in> A; s2 \<in> A\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> A\<star>"
-
-lemma star_cases:
- shows "A\<star> = {[]} \<union> A ;; A\<star>"
-unfolding Sequ_def
-by (auto) (metis Star.simps)
-
-lemma star_decomp:
- assumes a: "c # x \<in> A\<star>"
- shows "\<exists>a b. x = a @ b \<and> c # a \<in> A \<and> b \<in> A\<star>"
-using a
-by (induct x\<equiv>"c # x" rule: Star.induct)
- (auto simp add: append_eq_Cons_conv)
-
-lemma Der_star [simp]:
- shows "Der c (A\<star>) = (Der c A) ;; A\<star>"
-proof -
- have "Der c (A\<star>) = Der c ({[]} \<union> A ;; A\<star>)"
- by (simp only: star_cases[symmetric])
- also have "... = Der c (A ;; A\<star>)"
- by (simp only: Der_union Der_empty) (simp)
- also have "... = (Der c A) ;; A\<star> \<union> (if [] \<in> A then Der c (A\<star>) else {})"
- by simp
- also have "... = (Der c A) ;; A\<star>"
- unfolding Sequ_def Der_def
- by (auto dest: star_decomp)
- finally show "Der c (A\<star>) = (Der c A) ;; A\<star>" .
-qed
-
-
-section {* Regular Expressions *}
-
-datatype rexp =
- ZERO
-| ONE
-| CHAR char
-| SEQ rexp rexp
-| ALT rexp rexp
-| STAR rexp
-
-section {* Semantics of Regular Expressions *}
-
-fun
- L :: "rexp \<Rightarrow> string set"
-where
- "L (ZERO) = {}"
-| "L (ONE) = {[]}"
-| "L (CHAR c) = {[c]}"
-| "L (SEQ r1 r2) = (L r1) ;; (L r2)"
-| "L (ALT r1 r2) = (L r1) \<union> (L r2)"
-| "L (STAR r) = (L r)\<star>"
-
-
-section {* Nullable, Derivatives *}
-
-fun
- nullable :: "rexp \<Rightarrow> bool"
-where
- "nullable (ZERO) = False"
-| "nullable (ONE) = True"
-| "nullable (CHAR c) = False"
-| "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
-| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
-| "nullable (STAR r) = True"
-
-
-fun
- der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
-where
- "der c (ZERO) = ZERO"
-| "der c (ONE) = ZERO"
-| "der c (CHAR d) = (if c = d then ONE else ZERO)"
-| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
-| "der c (SEQ r1 r2) =
- (if nullable r1
- then ALT (SEQ (der c r1) r2) (der c r2)
- else SEQ (der c r1) r2)"
-| "der c (STAR r) = SEQ (der c r) (STAR r)"
-
-fun
- ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
-where
- "ders [] r = r"
-| "ders (c # s) r = ders s (der c r)"
-
-
-lemma nullable_correctness:
- shows "nullable r \<longleftrightarrow> [] \<in> (L r)"
-by (induct r) (auto simp add: Sequ_def)
-
-
-lemma der_correctness:
- shows "L (der c r) = Der c (L r)"
-by (induct r) (simp_all add: nullable_correctness)
-
-
-section {* Values *}
-
-datatype val =
- Void
-| Char char
-| Seq val val
-| Right val
-| Left val
-| Stars "val list"
-
-
-section {* The string behind a value *}
-
-fun
- flat :: "val \<Rightarrow> string"
-where
- "flat (Void) = []"
-| "flat (Char c) = [c]"
-| "flat (Left v) = flat v"
-| "flat (Right v) = flat v"
-| "flat (Seq v1 v2) = (flat v1) @ (flat v2)"
-| "flat (Stars []) = []"
-| "flat (Stars (v#vs)) = (flat v) @ (flat (Stars vs))"
-
-lemma flat_Stars [simp]:
- "flat (Stars vs) = concat (map flat vs)"
-by (induct vs) (auto)
-
-
-section {* Relation between values and regular expressions *}
-
-inductive
- Prf :: "val \<Rightarrow> rexp \<Rightarrow> bool" ("\<turnstile> _ : _" [100, 100] 100)
-where
- "\<lbrakk>\<turnstile> v1 : r1; \<turnstile> v2 : r2\<rbrakk> \<Longrightarrow> \<turnstile> Seq v1 v2 : SEQ r1 r2"
-| "\<turnstile> v1 : r1 \<Longrightarrow> \<turnstile> Left v1 : ALT r1 r2"
-| "\<turnstile> v2 : r2 \<Longrightarrow> \<turnstile> Right v2 : ALT r1 r2"
-| "\<turnstile> Void : ONE"
-| "\<turnstile> Char c : CHAR c"
-| "\<turnstile> Stars [] : STAR r"
-| "\<lbrakk>\<turnstile> v : r; \<turnstile> Stars vs : STAR r\<rbrakk> \<Longrightarrow> \<turnstile> Stars (v # vs) : STAR r"
-
-inductive_cases Prf_elims:
- "\<turnstile> v : ZERO"
- "\<turnstile> v : SEQ r1 r2"
- "\<turnstile> v : ALT r1 r2"
- "\<turnstile> v : ONE"
- "\<turnstile> v : CHAR c"
-(* "\<turnstile> vs : STAR r"*)
-
-lemma Prf_flat_L:
- assumes "\<turnstile> v : r" shows "flat v \<in> L r"
-using assms
-by(induct v r rule: Prf.induct)
- (auto simp add: Sequ_def)
-
-lemma Prf_Stars:
- assumes "\<forall>v \<in> set vs. \<turnstile> v : r"
- shows "\<turnstile> Stars vs : STAR r"
-using assms
-apply(induct vs)
-apply (metis Prf.intros(6))
-by (metis Prf.intros(7) insert_iff set_simps(2))
-
-lemma Star_string:
- assumes "s \<in> A\<star>"
- shows "\<exists>ss. concat ss = s \<and> (\<forall>s \<in> set ss. s \<in> A)"
-using assms
-apply(induct rule: Star.induct)
-apply(auto)
-apply(rule_tac x="[]" in exI)
-apply(simp)
-apply(rule_tac x="s1#ss" in exI)
-apply(simp)
-done
-
-
-lemma Star_val:
- assumes "\<forall>s\<in>set ss. \<exists>v. s = flat v \<and> \<turnstile> v : r"
- shows "\<exists>vs. concat (map flat vs) = concat ss \<and> (\<forall>v\<in>set vs. \<turnstile> v : r)"
-using assms
-apply(induct ss)
-apply(auto)
-apply (metis empty_iff list.set(1))
-by (metis concat.simps(2) list.simps(9) set_ConsD)
-
-lemma L_flat_Prf:
- "L(r) = {flat v | v. \<turnstile> v : r}"
-apply(induct r)
-apply(auto dest: Prf_flat_L simp add: Sequ_def)
-apply (metis Prf.intros(4) flat.simps(1))
-apply (metis Prf.intros(5) flat.simps(2))
-apply (metis Prf.intros(1) flat.simps(5))
-apply (metis Prf.intros(2) flat.simps(3))
-apply (metis Prf.intros(3) flat.simps(4))
-apply(auto elim!: Prf_elims)
-apply(subgoal_tac "\<exists>vs::val list. concat (map flat vs) = x \<and> (\<forall>v \<in> set vs. \<turnstile> v : r)")
-apply(auto)[1]
-apply(rule_tac x="Stars vs" in exI)
-apply(simp)
-apply(rule Prf_Stars)
-apply(simp)
-apply(drule Star_string)
-apply(auto)
-apply(rule Star_val)
-apply(simp)
-done
-
-
-section {* Sulzmann and Lu functions *}
-
-fun
- mkeps :: "rexp \<Rightarrow> val"
-where
- "mkeps(ONE) = Void"
-| "mkeps(SEQ r1 r2) = Seq (mkeps r1) (mkeps r2)"
-| "mkeps(ALT r1 r2) = (if nullable(r1) then Left (mkeps r1) else Right (mkeps r2))"
-| "mkeps(STAR r) = Stars []"
-
-fun injval :: "rexp \<Rightarrow> char \<Rightarrow> val \<Rightarrow> val"
-where
- "injval (CHAR d) c Void = Char d"
-| "injval (ALT r1 r2) c (Left v1) = Left(injval r1 c v1)"
-| "injval (ALT r1 r2) c (Right v2) = Right(injval r2 c v2)"
-| "injval (SEQ r1 r2) c (Seq v1 v2) = Seq (injval r1 c v1) v2"
-| "injval (SEQ r1 r2) c (Left (Seq v1 v2)) = Seq (injval r1 c v1) v2"
-| "injval (SEQ r1 r2) c (Right v2) = Seq (mkeps r1) (injval r2 c v2)"
-| "injval (STAR r) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)"
-
-
-section {* Mkeps, injval *}
-
-lemma mkeps_nullable:
- assumes "nullable(r)"
- shows "\<turnstile> mkeps r : r"
-using assms
-by (induct rule: nullable.induct)
- (auto intro: Prf.intros)
-
-lemma mkeps_flat:
- assumes "nullable(r)"
- shows "flat (mkeps r) = []"
-using assms
-by (induct rule: nullable.induct) (auto)
-
-
-lemma Prf_injval:
- assumes "\<turnstile> v : der c r"
- shows "\<turnstile> (injval r c v) : r"
-using assms
-apply(induct r arbitrary: c v rule: rexp.induct)
-apply(auto intro!: Prf.intros mkeps_nullable elim!: Prf_elims split: if_splits)
-(* STAR *)
-apply(rotate_tac 2)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-apply(auto)
-apply (metis Prf.intros(6) Prf.intros(7))
-by (metis Prf.intros(7))
-
-lemma Prf_injval_flat:
- assumes "\<turnstile> v : der c r"
- shows "flat (injval r c v) = c # (flat v)"
-using assms
-apply(induct arbitrary: v rule: der.induct)
-apply(auto elim!: Prf_elims split: if_splits)
-apply(metis mkeps_flat)
-apply(rotate_tac 2)
-apply(erule Prf.cases)
-apply(simp_all)[7]
-done
-
-
-
-section {* Our Alternative Posix definition *}
-
-inductive
- Posix :: "string \<Rightarrow> rexp \<Rightarrow> val \<Rightarrow> bool" ("_ \<in> _ \<rightarrow> _" [100, 100, 100] 100)
-where
- Posix_ONE: "[] \<in> ONE \<rightarrow> Void"
-| Posix_CHAR: "[c] \<in> (CHAR c) \<rightarrow> (Char c)"
-| Posix_ALT1: "s \<in> r1 \<rightarrow> v \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Left v)"
-| Posix_ALT2: "\<lbrakk>s \<in> r2 \<rightarrow> v; s \<notin> L(r1)\<rbrakk> \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Right v)"
-| Posix_SEQ: "\<lbrakk>s1 \<in> r1 \<rightarrow> v1; s2 \<in> r2 \<rightarrow> v2;
- \<not>(\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> (s1 @ s\<^sub>3) \<in> L r1 \<and> s\<^sub>4 \<in> L r2)\<rbrakk> \<Longrightarrow>
- (s1 @ s2) \<in> (SEQ r1 r2) \<rightarrow> (Seq v1 v2)"
-| Posix_STAR1: "\<lbrakk>s1 \<in> r \<rightarrow> v; s2 \<in> STAR r \<rightarrow> Stars vs; flat v \<noteq> [];
- \<not>(\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> (s1 @ s\<^sub>3) \<in> L r \<and> s\<^sub>4 \<in> L (STAR r))\<rbrakk>
- \<Longrightarrow> (s1 @ s2) \<in> STAR r \<rightarrow> Stars (v # vs)"
-| Posix_STAR2: "[] \<in> STAR r \<rightarrow> Stars []"
-
-inductive_cases Posix_elims:
- "s \<in> ZERO \<rightarrow> v"
- "s \<in> ONE \<rightarrow> v"
- "s \<in> CHAR c \<rightarrow> v"
- "s \<in> ALT r1 r2 \<rightarrow> v"
- "s \<in> SEQ r1 r2 \<rightarrow> v"
- "s \<in> STAR r \<rightarrow> v"
-
-lemma Posix1:
- assumes "s \<in> r \<rightarrow> v"
- shows "s \<in> L r" "flat v = s"
-using assms
-by (induct s r v rule: Posix.induct)
- (auto simp add: Sequ_def)
-
-
-lemma Posix1a:
- assumes "s \<in> r \<rightarrow> v"
- shows "\<turnstile> v : r"
-using assms
-apply(induct s r v rule: Posix.induct)
-apply(auto intro: Prf.intros)
-done
-
-lemma Posix_mkeps:
- assumes "nullable r"
- shows "[] \<in> r \<rightarrow> mkeps r"
-using assms
-apply(induct r)
-apply(auto intro: Posix.intros simp add: nullable_correctness Sequ_def)
-apply(subst append.simps(1)[symmetric])
-apply(rule Posix.intros)
-apply(auto)
-done
-
-
-lemma Posix_determ:
- assumes "s \<in> r \<rightarrow> v1" "s \<in> r \<rightarrow> v2"
- shows "v1 = v2"
-using assms
-proof (induct s r v1 arbitrary: v2 rule: Posix.induct)
- case (Posix_ONE v2)
- have "[] \<in> ONE \<rightarrow> v2" by fact
- then show "Void = v2" by cases auto
-next
- case (Posix_CHAR c v2)
- have "[c] \<in> CHAR c \<rightarrow> v2" by fact
- then show "Char c = v2" by cases auto
-next
- case (Posix_ALT1 s r1 v r2 v2)
- have "s \<in> ALT r1 r2 \<rightarrow> v2" by fact
- moreover
- have "s \<in> r1 \<rightarrow> v" by fact
- then have "s \<in> L r1" by (simp add: Posix1)
- ultimately obtain v' where eq: "v2 = Left v'" "s \<in> r1 \<rightarrow> v'" by cases auto
- moreover
- have IH: "\<And>v2. s \<in> r1 \<rightarrow> v2 \<Longrightarrow> v = v2" by fact
- ultimately have "v = v'" by simp
- then show "Left v = v2" using eq by simp
-next
- case (Posix_ALT2 s r2 v r1 v2)
- have "s \<in> ALT r1 r2 \<rightarrow> v2" by fact
- moreover
- have "s \<notin> L r1" by fact
- ultimately obtain v' where eq: "v2 = Right v'" "s \<in> r2 \<rightarrow> v'"
- by cases (auto simp add: Posix1)
- moreover
- have IH: "\<And>v2. s \<in> r2 \<rightarrow> v2 \<Longrightarrow> v = v2" by fact
- ultimately have "v = v'" by simp
- then show "Right v = v2" using eq by simp
-next
- case (Posix_SEQ s1 r1 v1 s2 r2 v2 v')
- have "(s1 @ s2) \<in> SEQ r1 r2 \<rightarrow> v'"
- "s1 \<in> r1 \<rightarrow> v1" "s2 \<in> r2 \<rightarrow> v2"
- "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L r1 \<and> s\<^sub>4 \<in> L r2)" by fact+
- then obtain v1' v2' where "v' = Seq v1' v2'" "s1 \<in> r1 \<rightarrow> v1'" "s2 \<in> r2 \<rightarrow> v2'"
- apply(cases) apply (auto simp add: append_eq_append_conv2)
- using Posix1(1) by fastforce+
- moreover
- have IHs: "\<And>v1'. s1 \<in> r1 \<rightarrow> v1' \<Longrightarrow> v1 = v1'"
- "\<And>v2'. s2 \<in> r2 \<rightarrow> v2' \<Longrightarrow> v2 = v2'" by fact+
- ultimately show "Seq v1 v2 = v'" by simp
-next
- case (Posix_STAR1 s1 r v s2 vs v2)
- have "(s1 @ s2) \<in> STAR r \<rightarrow> v2"
- "s1 \<in> r \<rightarrow> v" "s2 \<in> STAR r \<rightarrow> Stars vs" "flat v \<noteq> []"
- "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L r \<and> s\<^sub>4 \<in> L (STAR r))" by fact+
- then obtain v' vs' where "v2 = Stars (v' # vs')" "s1 \<in> r \<rightarrow> v'" "s2 \<in> (STAR r) \<rightarrow> (Stars vs')"
- apply(cases) apply (auto simp add: append_eq_append_conv2)
- using Posix1(1) apply fastforce
- apply (metis Posix1(1) Posix_STAR1.hyps(6) append_Nil append_Nil2)
- using Posix1(2) by blast
- moreover
- have IHs: "\<And>v2. s1 \<in> r \<rightarrow> v2 \<Longrightarrow> v = v2"
- "\<And>v2. s2 \<in> STAR r \<rightarrow> v2 \<Longrightarrow> Stars vs = v2" by fact+
- ultimately show "Stars (v # vs) = v2" by auto
-next
- case (Posix_STAR2 r v2)
- have "[] \<in> STAR r \<rightarrow> v2" by fact
- then show "Stars [] = v2" by cases (auto simp add: Posix1)
-qed
-
-
-lemma Posix_injval:
- assumes "s \<in> (der c r) \<rightarrow> v"
- shows "(c # s) \<in> r \<rightarrow> (injval r c v)"
-using assms
-proof(induct r arbitrary: s v rule: rexp.induct)
- case ZERO
- have "s \<in> der c ZERO \<rightarrow> v" by fact
- then have "s \<in> ZERO \<rightarrow> v" by simp
- then have "False" by cases
- then show "(c # s) \<in> ZERO \<rightarrow> (injval ZERO c v)" by simp
-next
- case ONE
- have "s \<in> der c ONE \<rightarrow> v" by fact
- then have "s \<in> ZERO \<rightarrow> v" by simp
- then have "False" by cases
- then show "(c # s) \<in> ONE \<rightarrow> (injval ONE c v)" by simp
-next
- case (CHAR d)
- consider (eq) "c = d" | (ineq) "c \<noteq> d" by blast
- then show "(c # s) \<in> (CHAR d) \<rightarrow> (injval (CHAR d) c v)"
- proof (cases)
- case eq
- have "s \<in> der c (CHAR d) \<rightarrow> v" by fact
- then have "s \<in> ONE \<rightarrow> v" using eq by simp
- then have eqs: "s = [] \<and> v = Void" by cases simp
- show "(c # s) \<in> CHAR d \<rightarrow> injval (CHAR d) c v" using eq eqs
- by (auto intro: Posix.intros)
- next
- case ineq
- have "s \<in> der c (CHAR d) \<rightarrow> v" by fact
- then have "s \<in> ZERO \<rightarrow> v" using ineq by simp
- then have "False" by cases
- then show "(c # s) \<in> CHAR d \<rightarrow> injval (CHAR d) c v" by simp
- qed
-next
- case (ALT r1 r2)
- have IH1: "\<And>s v. s \<in> der c r1 \<rightarrow> v \<Longrightarrow> (c # s) \<in> r1 \<rightarrow> injval r1 c v" by fact
- have IH2: "\<And>s v. s \<in> der c r2 \<rightarrow> v \<Longrightarrow> (c # s) \<in> r2 \<rightarrow> injval r2 c v" by fact
- have "s \<in> der c (ALT r1 r2) \<rightarrow> v" by fact
- then have "s \<in> ALT (der c r1) (der c r2) \<rightarrow> v" by simp
- then consider (left) v' where "v = Left v'" "s \<in> der c r1 \<rightarrow> v'"
- | (right) v' where "v = Right v'" "s \<notin> L (der c r1)" "s \<in> der c r2 \<rightarrow> v'"
- by cases auto
- then show "(c # s) \<in> ALT r1 r2 \<rightarrow> injval (ALT r1 r2) c v"
- proof (cases)
- case left
- have "s \<in> der c r1 \<rightarrow> v'" by fact
- then have "(c # s) \<in> r1 \<rightarrow> injval r1 c v'" using IH1 by simp
- then have "(c # s) \<in> ALT r1 r2 \<rightarrow> injval (ALT r1 r2) c (Left v')" by (auto intro: Posix.intros)
- then show "(c # s) \<in> ALT r1 r2 \<rightarrow> injval (ALT r1 r2) c v" using left by simp
- next
- case right
- have "s \<notin> L (der c r1)" by fact
- then have "c # s \<notin> L r1" by (simp add: der_correctness Der_def)
- moreover
- have "s \<in> der c r2 \<rightarrow> v'" by fact
- then have "(c # s) \<in> r2 \<rightarrow> injval r2 c v'" using IH2 by simp
- ultimately have "(c # s) \<in> ALT r1 r2 \<rightarrow> injval (ALT r1 r2) c (Right v')"
- by (auto intro: Posix.intros)
- then show "(c # s) \<in> ALT r1 r2 \<rightarrow> injval (ALT r1 r2) c v" using right by simp
- qed
-next
- case (SEQ r1 r2)
- have IH1: "\<And>s v. s \<in> der c r1 \<rightarrow> v \<Longrightarrow> (c # s) \<in> r1 \<rightarrow> injval r1 c v" by fact
- have IH2: "\<And>s v. s \<in> der c r2 \<rightarrow> v \<Longrightarrow> (c # s) \<in> r2 \<rightarrow> injval r2 c v" by fact
- have "s \<in> der c (SEQ r1 r2) \<rightarrow> v" by fact
- then consider
- (left_nullable) v1 v2 s1 s2 where
- "v = Left (Seq v1 v2)" "s = s1 @ s2"
- "s1 \<in> der c r1 \<rightarrow> v1" "s2 \<in> r2 \<rightarrow> v2" "nullable r1"
- "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L (der c r1) \<and> s\<^sub>4 \<in> L r2)"
- | (right_nullable) v1 s1 s2 where
- "v = Right v1" "s = s1 @ s2"
- "s \<in> der c r2 \<rightarrow> v1" "nullable r1" "s1 @ s2 \<notin> L (SEQ (der c r1) r2)"
- | (not_nullable) v1 v2 s1 s2 where
- "v = Seq v1 v2" "s = s1 @ s2"
- "s1 \<in> der c r1 \<rightarrow> v1" "s2 \<in> r2 \<rightarrow> v2" "\<not>nullable r1"
- "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L (der c r1) \<and> s\<^sub>4 \<in> L r2)"
- by (force split: if_splits elim!: Posix_elims simp add: Sequ_def der_correctness Der_def)
- then show "(c # s) \<in> SEQ r1 r2 \<rightarrow> injval (SEQ r1 r2) c v"
- proof (cases)
- case left_nullable
- have "s1 \<in> der c r1 \<rightarrow> v1" by fact
- then have "(c # s1) \<in> r1 \<rightarrow> injval r1 c v1" using IH1 by simp
- moreover
- have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L (der c r1) \<and> s\<^sub>4 \<in> L r2)" by fact
- then have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> (c # s1) @ s\<^sub>3 \<in> L r1 \<and> s\<^sub>4 \<in> L r2)" by (simp add: der_correctness Der_def)
- ultimately have "((c # s1) @ s2) \<in> SEQ r1 r2 \<rightarrow> Seq (injval r1 c v1) v2" using left_nullable by (rule_tac Posix.intros)
- then show "(c # s) \<in> SEQ r1 r2 \<rightarrow> injval (SEQ r1 r2) c v" using left_nullable by simp
- next
- case right_nullable
- have "nullable r1" by fact
- then have "[] \<in> r1 \<rightarrow> (mkeps r1)" by (rule Posix_mkeps)
- moreover
- have "s \<in> der c r2 \<rightarrow> v1" by fact
- then have "(c # s) \<in> r2 \<rightarrow> (injval r2 c v1)" using IH2 by simp
- moreover
- have "s1 @ s2 \<notin> L (SEQ (der c r1) r2)" by fact
- then have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = c # s \<and> [] @ s\<^sub>3 \<in> L r1 \<and> s\<^sub>4 \<in> L r2)" using right_nullable
- by(auto simp add: der_correctness Der_def append_eq_Cons_conv Sequ_def)
- ultimately have "([] @ (c # s)) \<in> SEQ r1 r2 \<rightarrow> Seq (mkeps r1) (injval r2 c v1)"
- by(rule Posix.intros)
- then show "(c # s) \<in> SEQ r1 r2 \<rightarrow> injval (SEQ r1 r2) c v" using right_nullable by simp
- next
- case not_nullable
- have "s1 \<in> der c r1 \<rightarrow> v1" by fact
- then have "(c # s1) \<in> r1 \<rightarrow> injval r1 c v1" using IH1 by simp
- moreover
- have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L (der c r1) \<and> s\<^sub>4 \<in> L r2)" by fact
- then have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> (c # s1) @ s\<^sub>3 \<in> L r1 \<and> s\<^sub>4 \<in> L r2)" by (simp add: der_correctness Der_def)
- ultimately have "((c # s1) @ s2) \<in> SEQ r1 r2 \<rightarrow> Seq (injval r1 c v1) v2" using not_nullable
- by (rule_tac Posix.intros) (simp_all)
- then show "(c # s) \<in> SEQ r1 r2 \<rightarrow> injval (SEQ r1 r2) c v" using not_nullable by simp
- qed
-next
- case (STAR r)
- have IH: "\<And>s v. s \<in> der c r \<rightarrow> v \<Longrightarrow> (c # s) \<in> r \<rightarrow> injval r c v" by fact
- have "s \<in> der c (STAR r) \<rightarrow> v" by fact
- then consider
- (cons) v1 vs s1 s2 where
- "v = Seq v1 (Stars vs)" "s = s1 @ s2"
- "s1 \<in> der c r \<rightarrow> v1" "s2 \<in> (STAR r) \<rightarrow> (Stars vs)"
- "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L (der c r) \<and> s\<^sub>4 \<in> L (STAR r))"
- apply(auto elim!: Posix_elims(1-5) simp add: der_correctness Der_def intro: Posix.intros)
- apply(rotate_tac 3)
- apply(erule_tac Posix_elims(6))
- apply (simp add: Posix.intros(6))
- using Posix.intros(7) by blast
- then show "(c # s) \<in> STAR r \<rightarrow> injval (STAR r) c v"
- proof (cases)
- case cons
- have "s1 \<in> der c r \<rightarrow> v1" by fact
- then have "(c # s1) \<in> r \<rightarrow> injval r c v1" using IH by simp
- moreover
- have "s2 \<in> STAR r \<rightarrow> Stars vs" by fact
- moreover
- have "(c # s1) \<in> r \<rightarrow> injval r c v1" by fact
- then have "flat (injval r c v1) = (c # s1)" by (rule Posix1)
- then have "flat (injval r c v1) \<noteq> []" by simp
- moreover
- have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L (der c r) \<and> s\<^sub>4 \<in> L (STAR r))" by fact
- then have "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> (c # s1) @ s\<^sub>3 \<in> L r \<and> s\<^sub>4 \<in> L (STAR r))"
- by (simp add: der_correctness Der_def)
- ultimately
- have "((c # s1) @ s2) \<in> STAR r \<rightarrow> Stars (injval r c v1 # vs)" by (rule Posix.intros)
- then show "(c # s) \<in> STAR r \<rightarrow> injval (STAR r) c v" using cons by(simp)
- qed
-qed
-
-
-section {* The Lexer by Sulzmann and Lu *}
-
-fun
- lexer :: "rexp \<Rightarrow> string \<Rightarrow> val option"
-where
- "lexer r [] = (if nullable r then Some(mkeps r) else None)"
-| "lexer r (c#s) = (case (lexer (der c r) s) of
- None \<Rightarrow> None
- | Some(v) \<Rightarrow> Some(injval r c v))"
-
-
-lemma lexer_correct_None:
- shows "s \<notin> L r \<longleftrightarrow> lexer r s = None"
-using assms
-apply(induct s arbitrary: r)
-apply(simp add: nullable_correctness)
-apply(drule_tac x="der a r" in meta_spec)
-apply(auto simp add: der_correctness Der_def)
-done
-
-lemma lexer_correct_Some:
- shows "s \<in> L r \<longleftrightarrow> (\<exists>!v. lexer r s = Some(v) \<and> s \<in> r \<rightarrow> v)"
-using assms
-apply(induct s arbitrary: r)
-apply(auto simp add: Posix_mkeps nullable_correctness)[1]
-apply(drule_tac x="der a r" in meta_spec)
-apply(simp add: der_correctness Der_def)
-apply(rule iffI)
-apply(auto intro: Posix_injval simp add: Posix1(1))
-done
-
-
-end
\ No newline at end of file
--- a/thys/Simplifying.thy Tue May 17 14:28:22 2016 +0100
+++ b/thys/Simplifying.thy Wed May 18 15:57:46 2016 +0100
@@ -1,5 +1,5 @@
theory Simplifying
- imports "ReStar"
+ imports "Lexer"
begin
section {* Lexer including simplifications *}
--- a/thys/Sulzmann.thy Tue May 17 14:28:22 2016 +0100
+++ b/thys/Sulzmann.thy Wed May 18 15:57:46 2016 +0100
@@ -1,6 +1,6 @@
theory Sulzmann
- imports "ReStar"
+ imports "Lexer"
begin
--- a/thys/notes.tex Tue May 17 14:28:22 2016 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,442 +0,0 @@
-\documentclass[11pt]{article}
-\usepackage[left]{lineno}
-\usepackage{amsmath}
-\usepackage{stmaryrd}
-
-\begin{document}
-%%%\linenumbers
-
-\noindent
-We already proved that
-
-\[
-\text{If}\;nullable(r)\;\text{then}\;POSIX\;(mkeps\; r)\;r
-\]
-
-\noindent
-holds. This is essentially the ``base case'' for the
-correctness proof of the algorithm. For the ``induction
-case'' we need the following main theorem, which we are
-currently after:
-
-\begin{center}
-\begin{tabular}{lll}
-If & (*) & $POSIX\;v\;(der\;c\;r)$ and $\vdash v : der\;c\;r$\\
-then & & $POSIX\;(inj\;r\;c\;v)\;r$
-\end{tabular}
-\end{center}
-
-\noindent
-That means a POSIX value $v$ is still $POSIX$ after injection.
-I am not sure whether this theorem is actually true in this
-full generality. Maybe it requires some restrictions.
-
-If we unfold the $POSIX$ definition in the then-part, we
-arrive at
-
-\[
-\forall v'.\;
-\text{if}\;\vdash v' : r\; \text{and} \;|inj\;r\;c\;v| = |v'|\;
-\text{then}\; |inj\;r\;c\;v| \succ_r v'
-\]
-
-\noindent
-which is what we need to prove assuming the if-part (*) in the
-theorem above. Since this is a universally quantified formula,
-we just need to fix a $v'$. We can then prove the implication
-by assuming
-
-\[
-\text{(a)}\;\;\vdash v' : r\;\; \text{and} \;\;
-\text{(b)}\;\;inj\;r\;c\;v = |v'|
-\]
-
-\noindent
-and our goal is
-
-\[
-(goal)\;\;inj\;r\;c\;v \succ_r v'
-\]
-
-\noindent
-There are already two lemmas proved that can transform
-the assumptions (a) and (b) into
-
-\[
-\text{(a*)}\;\;\vdash proj\;r\;c\;v' : der\;c\;r\;\; \text{and} \;\;
-\text{(b*)}\;\;c\,\#\,|v| = |v'|
-\]
-
-\noindent
-Another lemma shows that
-
-\[
-|v'| = c\,\#\,|proj\;r\;c\;v|
-\]
-
-\noindent
-Using (b*) we can therefore infer
-
-\[
-\text{(b**)}\;\;|v| = |proj\;r\;c\;v|
-\]
-
-\noindent
-The main idea of the proof is now a simple instantiation
-of the assumption $POSIX\;v\;(der\;c\;r)$. If we unfold
-the $POSIX$ definition, we get
-
-\[
-\forall v'.\;
-\text{if}\;\vdash v' : der\;c\;r\; \text{and} \;|v| = |v'|\;
-\text{then}\; v \succ_{der\;c\;r}\; v'
-\]
-
-\noindent
-We can instantiate this $v'$ with $proj\;r\;c\;v'$ and can use
-(a*) and (b**) in order to infer
-
-\[
-v \succ_{der\;c\;r}\; proj\;r\;c\;v'
-\]
-
-\noindent
-The point of the side-lemma below is that we can ``add'' an
-$inj$ to both sides to obtain
-
-\[
-inj\;r\;c\;v \succ_r\; inj\;r\;c\;(proj\;r\;c\;v')
-\]
-
-\noindent Finally there is already a lemma proved that shows
-that an injection and projection is the identity, meaning
-
-\[
-inj\;r\;c\;(proj\;r\;c\;v') = v'
-\]
-
-\noindent
-With this we have shown our goal (pending a proof of the side-lemma
-next).
-
-
-\subsection*{Side-Lemma}
-
-A side-lemma needed for the theorem above which might be true, but can also be false, is as follows:
-
-\begin{center}
-\begin{tabular}{lll}
-If & (1) & $v_1 \succ_{der\;c\;r} v_2$,\\
- & (2) & $\vdash v_1 : der\;c\;r$, and\\
- & (3) & $\vdash v_2 : der\;c\;r$ holds,\\
-then & & $inj\;r\;c\;v_1 \succ_r inj\;r\;c\;v_2$ also holds.
-\end{tabular}
-\end{center}
-
-\noindent It essentially states that if one value $v_1$ is
-bigger than $v_2$ then this ordering is preserved under
-injections. This is proved by induction (on the definition of
-$der$\ldots this is very similar to an induction on $r$).
-\bigskip
-
-\noindent
-The case that is still unproved is the sequence case where we
-assume $r = r_1\cdot r_2$ and also $r_1$ being nullable.
-The derivative $der\;c\;r$ is then
-
-\begin{center}
-$der\;c\;r = ((der\;c\;r_1) \cdot r_2) + (der\;c\;r_2)$
-\end{center}
-
-\noindent
-or without the parentheses
-
-\begin{center}
-$der\;c\;r = (der\;c\;r_1) \cdot r_2 + der\;c\;r_2$
-\end{center}
-
-\noindent
-In this case the assumptions are
-
-\begin{center}
-\begin{tabular}{ll}
-(a) & $v_1 \succ_{(der\;c\;r_1) \cdot r_2 + der\;c\;r_2} v_2$\\
-(b) & $\vdash v_1 : (der\;c\;r_1) \cdot r_2 + der\;c\;r_2$\\
-(c) & $\vdash v_2 : (der\;c\;r_1) \cdot r_2 + der\;c\;r_2$\\
-(d) & $nullable(r_1)$
-\end{tabular}
-\end{center}
-
-\noindent
-The induction hypotheses are
-
-\begin{center}
-\begin{tabular}{ll}
-(IH1) & $\forall v_1 v_2.\;v_1 \succ_{der\;c\;r_1} v_2
-\;\wedge\; \vdash v_1 : der\;c\;r_1 \;\wedge\;
-\vdash v_2 : der\;c\;r_1\qquad$\\
- & $\hfill\longrightarrow
- inj\;r_1\;c\;v_1 \succ{r_1} \;inj\;r_1\;c\;v_2$\smallskip\\
-(IH2) & $\forall v_1 v_2.\;v_1 \succ_{der\;c\;r_2} v_2
-\;\wedge\; \vdash v_2 : der\;c\;r_2 \;\wedge\;
-\vdash v_2 : der\;c\;r_2\qquad$\\
- & $\hfill\longrightarrow
- inj\;r_2\;c\;v_1 \succ{r_2} \;inj\;r_2\;c\;v_2$\\
-\end{tabular}
-\end{center}
-
-\noindent
-The goal is
-
-\[
-(goal)\qquad
-inj\; (r_1 \cdot r_2)\;c\;v_1 \succ_{r_1 \cdot r_2}
-inj\; (r_1 \cdot r_2)\;c\;v_2
-\]
-
-\noindent
-If we analyse how (a) could have arisen (that is make a case
-distinction), then we will find four cases:
-
-\begin{center}
-\begin{tabular}{ll}
-LL & $v_1 = Left(w_1)$, $v_2 = Left(w_2)$\\
-LR & $v_1 = Left(w_1)$, $v_2 = Right(w_2)$\\
-RL & $v_1 = Right(w_1)$, $v_2 = Left(w_2)$\\
-RR & $v_1 = Right(w_1)$, $v_2 = Right(w_2)$\\
-\end{tabular}
-\end{center}
-
-
-\noindent
-We have to establish our goal in all four cases.
-
-
-\subsubsection*{Case LR}
-
-The corresponding rule (instantiated) is:
-
-\begin{center}
-\begin{tabular}{c}
-$len\,|w_1| \geq len\,|w_2|$\\
-\hline
-$Left(w_1) \succ_{(der\;c\;r_1) \cdot r_2 + der\;c\;r_2} Right(w_2)$
-\end{tabular}
-\end{center}
-
-\noindent
-This means we can also assume in this case
-
-\[
-(e)\quad len\,|w_1| \geq len\,|w_2|
-\]
-
-\noindent
-which is the premise of the rule above.
-Instantiating $v_1$ and $v_2$ in the assumptions (b) and (c)
-gives us
-
-\begin{center}
-\begin{tabular}{ll}
-(b*) & $\vdash Left(w_1) : (der\;c\;r_1) \cdot r_2 + der\;c\;r_2$\\
-(c*) & $\vdash Right(w_2) : (der\;c\;r_1) \cdot r_2 + der\;c\;r_2$\\
-\end{tabular}
-\end{center}
-
-\noindent Since these are assumptions, we can further analyse
-how they could have arisen according to the rules of $\vdash
-\_ : \_\,$. This gives us two new assumptions
-
-\begin{center}
-\begin{tabular}{ll}
-(b**) & $\vdash w_1 : (der\;c\;r_1) \cdot r_2$\\
-(c**) & $\vdash w_2 : der\;c\;r_2$\\
-\end{tabular}
-\end{center}
-
-\noindent
-Looking at (b**) we can further analyse how this
-judgement could have arisen. This tells us that $w_1$
-must have been a sequence, say $u_1\cdot u_2$, with
-
-\begin{center}
-\begin{tabular}{ll}
-(b***) & $\vdash u_1 : der\;c\;r_1$\\
- & $\vdash u_2 : r_2$\\
-\end{tabular}
-\end{center}
-
-\noindent
-Instantiating the goal means we need to prove
-
-\[
-inj\; (r_1 \cdot r_2)\;c\;(Left(u_1\cdot u_2)) \succ_{r_1 \cdot r_2}
-inj\; (r_1 \cdot r_2)\;c\;(Right(w_2))
-\]
-
-\noindent
-We can simplify this according to the rules of $inj$:
-
-\[
-(inj\; r_1\;c\;u_1)\cdot u_2 \succ_{r_1 \cdot r_2}
-(mkeps\;r_1) \cdot (inj\; r_2\;c\;w_2)
-\]
-
-\noindent
-This is what we need to prove. There are only two rules that
-can be used to prove this judgement:
-
-\begin{center}
-\begin{tabular}{cc}
-\begin{tabular}{c}
-$v_1 = v_1'$\qquad $v_2 \succ_{r_2} v_2'$\\
-\hline
-$v_1\cdot v_2 \succ_{r_1\cdot r_2} v_1'\cdot v_2'$
-\end{tabular} &
-\begin{tabular}{c}
-$v_1 \succ_{r_1} v_1'$\\
-\hline
-$v_1\cdot v_2 \succ_{r_1\cdot r_2} v_1'\cdot v_2'$
-\end{tabular}
-\end{tabular}
-\end{center}
-
-\noindent
-Using the left rule would mean we need to show that
-
-\[
-inj\; r_1\;c\;u_1 = mkeps\;r_1
-\]
-
-\noindent
-but this can never be the case.\footnote{Actually Isabelle
-found this out after analysing its argument. ;o)} Lets assume
-it would be true, then also if we flat each side, it must hold
-that
-
-\[
-|inj\; r_1\;c\;u_1| = |mkeps\;r_1|
-\]
-
-\noindent
-But this leads to a contradiction, because the right-hand side
-will be equal to the empty list, or empty string. This is
-because we assumed $nullable(r_1)$ and there is a lemma
-called \texttt{mkeps\_flat} which shows this. On the other
-side we know by assumption (b***) and lemma \texttt{v4} that
-the other side needs to be a string starting with $c$ (since
-we inject $c$ into $u_1$). The empty string can never be equal
-to something starting with $c$\ldots therefore there is a
-contradiction.
-
-That means we can only use the rule on the right-hand side to
-prove our goal. This implies we need to prove
-
-\[
-inj\; r_1\;c\;u_1 \succ_{r_1} mkeps\;r_1
-\]
-
-
-\subsubsection*{Case RL}
-
-The corresponding rule (instantiated) is:
-
-\begin{center}
-\begin{tabular}{c}
-$len\,|w_1| > len\,|w_2|$\\
-\hline
-$Right(w_1) \succ_{(der\;c\;r_1) \cdot r_2 + der\;c\;r_2} Left(w_2)$
-\end{tabular}
-\end{center}
-
-\subsection*{Test Proof}
-
-We want to prove that
-
-\[
-nullable(r) \;\text{implies}\; POSIX (mkeps\; r)\; r
-\]
-
-\noindent
-We prove this by induction on $r$. There are 5 subcases, and
-only the $r_1 + r_2$-case is interesting. In this case we know the
-induction hypotheses are
-
-\begin{center}
-\begin{tabular}{ll}
-(IMP1) & $nullable(r_1) \;\text{implies}\;
- POSIX (mkeps\; r_1)\; r_1$ \\
-(IMP2) & $nullable(r_2) \;\text{implies}\;
- POSIX (mkeps\; r_2)\; r_2$
-\end{tabular}
-\end{center}
-
-\noindent and know that $nullable(r_1 + r_2)$ holds. From this
-we know that either $nullable(r_1)$ holds or $nullable(r_2)$.
-Let us consider the first case where we know $nullable(r_1)$.
-
-
-\subsection*{Problems in the paper proof}
-
-I cannot verify\ldots
-
-
-
-\newpage
-\section*{Isabelle Cheat-Sheet}
-
-\begin{itemize}
-\item The main notion in Isabelle is a \emph{theorem}.
- Definitions, inductive predicates and recursive
- functions all have underlying theorems. If a definition
- is called \texttt{foo}, then the theorem will be called
- \texttt{foo\_def}. Take a recursive function, say
- \texttt{bar}, it will have a theorem that is called
- \texttt{bar.simps} and will be added to the simplifier.
- That means the simplifier will automatically
- Inductive predicates called \texttt{baz} will be called
- \texttt{baz.intros}. For inductive predicates, there are
- also theorems \texttt{baz.induct} and
- \texttt{baz.cases}.
-
-\item A \emph{goal-state} consists of one or more subgoals. If
- there are \texttt{No more subgoals!} then the theorem is
- proved. Each subgoal is of the form
-
- \[
- \llbracket \ldots{}premises\ldots \rrbracket \Longrightarrow
- conclusion
- \]
-
- \noindent
- where $premises$ and $conclusion$ are formulas of type
- \texttt{bool}.
-
-\item There are three low-level methods for applying one or
- more theorem to a subgoal, called \texttt{rule},
- \texttt{drule} and \texttt{erule}. The first applies a
- theorem to a conclusion of a goal. For example
-
- \[\texttt{apply}(\texttt{rule}\;thm)
- \]
-
- If the conclusion is of the form $\_ \wedge \_$,
- $\_ \longrightarrow \_$ and $\forall\,x. \_$ the
- $thm$ is called
-
- \begin{center}
- \begin{tabular}{lcl}
- $\_ \wedge \_$ & $\Rightarrow$ & $conjI$\\
- $\_ \longrightarrow \_$ & $\Rightarrow$ & $impI$\\
- $\forall\,x.\_$ & $\Rightarrow$ & $allI$
- \end{tabular}
- \end{center}
-
- Many of such rule are called intro-rules and end with
- an ``$I$'', or in case of inductive predicates $\_.intros$.
-
-
-\end{itemize}
-
-
-\end{document}
Binary file thys/paper.pdf has changed