thys/Lexer.thy
changeset 266 fff2e1b40dfc
parent 265 d36be1e356c0
child 268 6746f5e1f1f8
--- a/thys/Lexer.thy	Tue Jul 18 18:39:20 2017 +0100
+++ b/thys/Lexer.thy	Wed Jul 19 14:55:46 2017 +0100
@@ -1,349 +1,10 @@
    
 theory Lexer
-  imports Main 
+  imports Spec 
 begin
 
 
-section {* Sequential Composition of Languages *}
-
-definition
-  Sequ :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ;; _" [100,100] 100)
-where 
-  "A ;; B = {s1 @ s2 | s1 s2. s1 \<in> A \<and> s2 \<in> B}"
-
-text {* Two Simple Properties about Sequential Composition *}
-
-lemma Sequ_empty_string [simp]:
-  shows "A ;; {[]} = A"
-  and   "{[]} ;; A = A"
-by (simp_all add: Sequ_def)
-
-lemma Sequ_empty [simp]:
-  shows "A ;; {} = {}"
-  and   "{} ;; A = {}"
-by (simp_all add: Sequ_def)
-
-
-section {* Semantic Derivative (Left Quotient) of Languages *}
-
-definition
-  Der :: "char \<Rightarrow> string set \<Rightarrow> string set"
-where
-  "Der c A \<equiv> {s. c # s \<in> A}"
-
-definition
-  Ders :: "string \<Rightarrow> string set \<Rightarrow> string set"
-where
-  "Ders s A \<equiv> {s'. s @ s' \<in> A}"
-
-lemma Der_null [simp]:
-  shows "Der c {} = {}"
-unfolding Der_def
-by auto
-
-lemma Der_empty [simp]:
-  shows "Der c {[]} = {}"
-unfolding Der_def
-by auto
-
-lemma Der_char [simp]:
-  shows "Der c {[d]} = (if c = d then {[]} else {})"
-unfolding Der_def
-by auto
-
-lemma Der_union [simp]:
-  shows "Der c (A \<union> B) = Der c A \<union> Der c B"
-unfolding Der_def
-by auto
-
-lemma Der_Sequ [simp]:
-  shows "Der c (A ;; B) = (Der c A) ;; B \<union> (if [] \<in> A then Der c B else {})"
-unfolding Der_def Sequ_def
-by (auto simp add: Cons_eq_append_conv)
-
-
-section {* Kleene Star for Languages *}
-
-inductive_set
-  Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
-  for A :: "string set"
-where
-  start[intro]: "[] \<in> A\<star>"
-| step[intro]:  "\<lbrakk>s1 \<in> A; s2 \<in> A\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> A\<star>"
-
-(* Arden's lemma *)
-
-lemma Star_cases:
-  shows "A\<star> = {[]} \<union> A ;; A\<star>"
-unfolding Sequ_def
-by (auto) (metis Star.simps)
-
-lemma Star_decomp: 
-  assumes "c # x \<in> A\<star>" 
-  shows "\<exists>s1 s2. x = s1 @ s2 \<and> c # s1 \<in> A \<and> s2 \<in> A\<star>"
-using assms
-by (induct x\<equiv>"c # x" rule: Star.induct) 
-   (auto simp add: append_eq_Cons_conv)
-
-lemma Star_Der_Sequ: 
-  shows "Der c (A\<star>) \<subseteq> (Der c A) ;; A\<star>"
-unfolding Der_def
-apply(rule subsetI)
-apply(simp)
-unfolding Sequ_def
-apply(simp)
-by(auto simp add: Sequ_def Star_decomp)
-
-
-lemma Der_star [simp]:
-  shows "Der c (A\<star>) = (Der c A) ;; A\<star>"
-proof -    
-  have "Der c (A\<star>) = Der c ({[]} \<union> A ;; A\<star>)"  
-    by (simp only: Star_cases[symmetric])
-  also have "... = Der c (A ;; A\<star>)"
-    by (simp only: Der_union Der_empty) (simp)
-  also have "... = (Der c A) ;; A\<star> \<union> (if [] \<in> A then Der c (A\<star>) else {})"
-    by simp
-  also have "... =  (Der c A) ;; A\<star>"
-    using Star_Der_Sequ by auto
-  finally show "Der c (A\<star>) = (Der c A) ;; A\<star>" .
-qed
-
-
-section {* Regular Expressions *}
-
-datatype rexp =
-  ZERO
-| ONE
-| CHAR char
-| SEQ rexp rexp
-| ALT rexp rexp
-| STAR rexp
-
-section {* Semantics of Regular Expressions *}
- 
-fun
-  L :: "rexp \<Rightarrow> string set"
-where
-  "L (ZERO) = {}"
-| "L (ONE) = {[]}"
-| "L (CHAR c) = {[c]}"
-| "L (SEQ r1 r2) = (L r1) ;; (L r2)"
-| "L (ALT r1 r2) = (L r1) \<union> (L r2)"
-| "L (STAR r) = (L r)\<star>"
-
-
-section {* Nullable, Derivatives *}
-
-fun
- nullable :: "rexp \<Rightarrow> bool"
-where
-  "nullable (ZERO) = False"
-| "nullable (ONE) = True"
-| "nullable (CHAR c) = False"
-| "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
-| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
-| "nullable (STAR r) = True"
-
-
-fun
- der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
-where
-  "der c (ZERO) = ZERO"
-| "der c (ONE) = ZERO"
-| "der c (CHAR d) = (if c = d then ONE else ZERO)"
-| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
-| "der c (SEQ r1 r2) = 
-     (if nullable r1
-      then ALT (SEQ (der c r1) r2) (der c r2)
-      else SEQ (der c r1) r2)"
-| "der c (STAR r) = SEQ (der c r) (STAR r)"
-
-fun 
- ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
-where
-  "ders [] r = r"
-| "ders (c # s) r = ders s (der c r)"
-
-
-lemma nullable_correctness:
-  shows "nullable r  \<longleftrightarrow> [] \<in> (L r)"
-by (induct r) (auto simp add: Sequ_def) 
-
-lemma der_correctness:
-  shows "L (der c r) = Der c (L r)"
-by (induct r) (simp_all add: nullable_correctness)
-
-lemma ders_correctness:
-  shows "L (ders s r) = Ders s (L r)"
-apply(induct s arbitrary: r)
-apply(simp_all add: Ders_def der_correctness Der_def)
-done
-
-
-section {* Lemmas about ders *}
-
-lemma ders_ZERO:
-  shows "ders s (ZERO) = ZERO"
-apply(induct s)
-apply(simp_all)
-done
-
-lemma ders_ONE:
-  shows "ders s (ONE) = (if s = [] then ONE else ZERO)"
-apply(induct s)
-apply(simp_all add: ders_ZERO)
-done
-
-lemma ders_CHAR:
-  shows "ders s (CHAR c) = 
-           (if s = [c] then ONE else 
-           (if s = [] then (CHAR c) else ZERO))"
-apply(induct s)
-apply(simp_all add: ders_ZERO ders_ONE)
-done
-
-lemma  ders_ALT:
-  shows "ders s (ALT r1 r2) = ALT (ders s r1) (ders s r2)"
-apply(induct s arbitrary: r1 r2)
-apply(simp_all)
-done
-
-section {* Values *}
-
-datatype val = 
-  Void
-| Char char
-| Seq val val
-| Right val
-| Left val
-| Stars "val list"
-
-
-section {* The string behind a value *}
-
-fun 
-  flat :: "val \<Rightarrow> string"
-where
-  "flat (Void) = []"
-| "flat (Char c) = [c]"
-| "flat (Left v) = flat v"
-| "flat (Right v) = flat v"
-| "flat (Seq v1 v2) = (flat v1) @ (flat v2)"
-| "flat (Stars []) = []"
-| "flat (Stars (v#vs)) = (flat v) @ (flat (Stars vs))" 
-
-lemma flat_Stars [simp]:
- "flat (Stars vs) = concat (map flat vs)"
-by (induct vs) (auto)
-
-
-section {* Relation between values and regular expressions *}
-
-inductive 
-  Prf :: "val \<Rightarrow> rexp \<Rightarrow> bool" ("\<turnstile> _ : _" [100, 100] 100)
-where
- "\<lbrakk>\<turnstile> v1 : r1; \<turnstile> v2 : r2\<rbrakk> \<Longrightarrow> \<turnstile> Seq v1 v2 : SEQ r1 r2"
-| "\<turnstile> v1 : r1 \<Longrightarrow> \<turnstile> Left v1 : ALT r1 r2"
-| "\<turnstile> v2 : r2 \<Longrightarrow> \<turnstile> Right v2 : ALT r1 r2"
-| "\<turnstile> Void : ONE"
-| "\<turnstile> Char c : CHAR c"
-| "\<forall>v \<in> set vs. \<turnstile> v : r \<Longrightarrow> \<turnstile> Stars vs : STAR r"
-
-inductive_cases Prf_elims:
-  "\<turnstile> v : ZERO"
-  "\<turnstile> v : SEQ r1 r2"
-  "\<turnstile> v : ALT r1 r2"
-  "\<turnstile> v : ONE"
-  "\<turnstile> v : CHAR c"
-  "\<turnstile> vs : STAR r"
-
-lemma Star_concat:
-  assumes "\<forall>s \<in> set ss. s \<in> A"  
-  shows "concat ss \<in> A\<star>"
-using assms by (induct ss) (auto)
-
-
-lemma Prf_flat_L:
-  assumes "\<turnstile> v : r" shows "flat v \<in> L r"
-using assms
-by (induct v r rule: Prf.induct)
-   (auto simp add: Sequ_def Star_concat)
-
-lemma Prf_Stars_append:
-  assumes "\<turnstile> Stars vs1 : STAR r" "\<turnstile> Stars vs2 : STAR r"
-  shows "\<turnstile> Stars (vs1 @ vs2) : STAR r"
-using assms
-by (auto intro!: Prf.intros elim!: Prf_elims)
-
-lemma Star_string:
-  assumes "s \<in> A\<star>"
-  shows "\<exists>ss. concat ss = s \<and> (\<forall>s \<in> set ss. s \<in> A)"
-using assms
-apply(induct rule: Star.induct)
-apply(auto)
-apply(rule_tac x="[]" in exI)
-apply(simp)
-apply(rule_tac x="s1#ss" in exI)
-apply(simp)
-done
-
-lemma Star_val:
-  assumes "\<forall>s\<in>set ss. \<exists>v. s = flat v \<and> \<turnstile> v : r"
-  shows "\<exists>vs. concat (map flat vs) = concat ss \<and> (\<forall>v\<in>set vs. \<turnstile> v : r)"
-using assms
-apply(induct ss)
-apply(auto)
-apply (metis empty_iff list.set(1))
-by (metis concat.simps(2) list.simps(9) set_ConsD)
-
-
-lemma L_flat_Prf1:
-  assumes "\<turnstile> v : r" 
-  shows "flat v \<in> L r"
-using assms
-by (induct) (auto simp add: Sequ_def Star_concat)
-
-lemma L_flat_Prf2:
-  assumes "s \<in> L r" 
-  shows "\<exists>v. \<turnstile> v : r \<and> flat v = s"
-using assms
-proof(induct r arbitrary: s)
-  case (STAR r s)
-  have IH: "\<And>s. s \<in> L r \<Longrightarrow> \<exists>v. \<turnstile> v : r \<and> flat v = s" by fact
-  have "s \<in> L (STAR r)" by fact
-  then obtain ss where "concat ss = s" "\<forall>s \<in> set ss. s \<in> L r"
-  using Star_string by auto
-  then obtain vs where "concat (map flat vs) = s" "\<forall>v\<in>set vs. \<turnstile> v : r"
-  using IH Star_val by blast
-  then show "\<exists>v. \<turnstile> v : STAR r \<and> flat v = s"
-  using Prf.intros(6) flat_Stars by blast
-next 
-  case (SEQ r1 r2 s)
-  then show "\<exists>v. \<turnstile> v : SEQ r1 r2 \<and> flat v = s"
-  unfolding Sequ_def L.simps by (fastforce intro: Prf.intros)
-next
-  case (ALT r1 r2 s)
-  then show "\<exists>v. \<turnstile> v : ALT r1 r2 \<and> flat v = s"
-  unfolding L.simps by (fastforce intro: Prf.intros)
-qed (auto intro: Prf.intros)
-
-lemma L_flat_Prf:
-  "L(r) = {flat v | v. \<turnstile> v : r}"
-using L_flat_Prf1 L_flat_Prf2 by blast
-
-(*
-lemma Star_values_exists:
-  assumes "s \<in> (L r)\<star>"
-  shows "\<exists>vs. concat (map flat vs) = s \<and> \<turnstile> Stars vs : STAR r"
-using assms
-apply(drule_tac Star_string)
-apply(auto)
-by (metis L_flat_Prf2 Prf.intros(6) Star_val)
-*)
-
-
-section {* Sulzmann and Lu functions *}
+section {* The Lexer Functions by Sulzmann and Lu  *}
 
 fun 
   mkeps :: "rexp \<Rightarrow> val"
@@ -363,8 +24,17 @@
 | "injval (SEQ r1 r2) c (Right v2) = Seq (mkeps r1) (injval r2 c v2)"
 | "injval (STAR r) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)" 
 
+fun 
+  lexer :: "rexp \<Rightarrow> string \<Rightarrow> val option"
+where
+  "lexer r [] = (if nullable r then Some(mkeps r) else None)"
+| "lexer r (c#s) = (case (lexer (der c r) s) of  
+                    None \<Rightarrow> None
+                  | Some(v) \<Rightarrow> Some(injval r c v))"
 
-section {* Mkeps, injval *}
+
+
+section {* Mkeps, Injval Properties *}
 
 lemma mkeps_nullable:
   assumes "nullable(r)" 
@@ -379,7 +49,6 @@
 using assms
 by (induct rule: nullable.induct) (auto)
 
-
 lemma Prf_injval:
   assumes "\<turnstile> v : der c r" 
   shows "\<turnstile> (injval r c v) : r"
@@ -396,49 +65,9 @@
 apply(auto elim!: Prf_elims intro: mkeps_flat split: if_splits)
 done
 
-
-
-section {* Our Alternative Posix definition *}
-
-inductive 
-  Posix :: "string \<Rightarrow> rexp \<Rightarrow> val \<Rightarrow> bool" ("_ \<in> _ \<rightarrow> _" [100, 100, 100] 100)
-where
-  Posix_ONE: "[] \<in> ONE \<rightarrow> Void"
-| Posix_CHAR: "[c] \<in> (CHAR c) \<rightarrow> (Char c)"
-| Posix_ALT1: "s \<in> r1 \<rightarrow> v \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Left v)"
-| Posix_ALT2: "\<lbrakk>s \<in> r2 \<rightarrow> v; s \<notin> L(r1)\<rbrakk> \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Right v)"
-| Posix_SEQ: "\<lbrakk>s1 \<in> r1 \<rightarrow> v1; s2 \<in> r2 \<rightarrow> v2;
-    \<not>(\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> (s1 @ s\<^sub>3) \<in> L r1 \<and> s\<^sub>4 \<in> L r2)\<rbrakk> \<Longrightarrow> 
-    (s1 @ s2) \<in> (SEQ r1 r2) \<rightarrow> (Seq v1 v2)"
-| Posix_STAR1: "\<lbrakk>s1 \<in> r \<rightarrow> v; s2 \<in> STAR r \<rightarrow> Stars vs; flat v \<noteq> [];
-    \<not>(\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> (s1 @ s\<^sub>3) \<in> L r \<and> s\<^sub>4 \<in> L (STAR r))\<rbrakk>
-    \<Longrightarrow> (s1 @ s2) \<in> STAR r \<rightarrow> Stars (v # vs)"
-| Posix_STAR2: "[] \<in> STAR r \<rightarrow> Stars []"
-
-inductive_cases Posix_elims:
-  "s \<in> ZERO \<rightarrow> v"
-  "s \<in> ONE \<rightarrow> v"
-  "s \<in> CHAR c \<rightarrow> v"
-  "s \<in> ALT r1 r2 \<rightarrow> v"
-  "s \<in> SEQ r1 r2 \<rightarrow> v"
-  "s \<in> STAR r \<rightarrow> v"
-
-lemma Posix1:
-  assumes "s \<in> r \<rightarrow> v"
-  shows "s \<in> L r" "flat v = s"
-using assms
-by (induct s r v rule: Posix.induct)
-   (auto simp add: Sequ_def)
-
-
-lemma Posix1a:
-  assumes "s \<in> r \<rightarrow> v"
-  shows "\<turnstile> v : r"
-using assms
-apply(induct s r v rule: Posix.induct)
-apply(auto intro!: Prf.intros elim!: Prf_elims)
-done
-
+text {*
+  Mkeps and injval produce, or preserve, Posix values.
+*}
 
 lemma Posix_mkeps:
   assumes "nullable r"
@@ -451,74 +80,6 @@
 apply(auto)
 done
 
-
-lemma Posix_determ:
-  assumes "s \<in> r \<rightarrow> v1" "s \<in> r \<rightarrow> v2"
-  shows "v1 = v2"
-using assms
-proof (induct s r v1 arbitrary: v2 rule: Posix.induct)
-  case (Posix_ONE v2)
-  have "[] \<in> ONE \<rightarrow> v2" by fact
-  then show "Void = v2" by cases auto
-next 
-  case (Posix_CHAR c v2)
-  have "[c] \<in> CHAR c \<rightarrow> v2" by fact
-  then show "Char c = v2" by cases auto
-next 
-  case (Posix_ALT1 s r1 v r2 v2)
-  have "s \<in> ALT r1 r2 \<rightarrow> v2" by fact
-  moreover
-  have "s \<in> r1 \<rightarrow> v" by fact
-  then have "s \<in> L r1" by (simp add: Posix1)
-  ultimately obtain v' where eq: "v2 = Left v'" "s \<in> r1 \<rightarrow> v'" by cases auto 
-  moreover
-  have IH: "\<And>v2. s \<in> r1 \<rightarrow> v2 \<Longrightarrow> v = v2" by fact
-  ultimately have "v = v'" by simp
-  then show "Left v = v2" using eq by simp
-next 
-  case (Posix_ALT2 s r2 v r1 v2)
-  have "s \<in> ALT r1 r2 \<rightarrow> v2" by fact
-  moreover
-  have "s \<notin> L r1" by fact
-  ultimately obtain v' where eq: "v2 = Right v'" "s \<in> r2 \<rightarrow> v'" 
-    by cases (auto simp add: Posix1) 
-  moreover
-  have IH: "\<And>v2. s \<in> r2 \<rightarrow> v2 \<Longrightarrow> v = v2" by fact
-  ultimately have "v = v'" by simp
-  then show "Right v = v2" using eq by simp
-next
-  case (Posix_SEQ s1 r1 v1 s2 r2 v2 v')
-  have "(s1 @ s2) \<in> SEQ r1 r2 \<rightarrow> v'" 
-       "s1 \<in> r1 \<rightarrow> v1" "s2 \<in> r2 \<rightarrow> v2"
-       "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L r1 \<and> s\<^sub>4 \<in> L r2)" by fact+
-  then obtain v1' v2' where "v' = Seq v1' v2'" "s1 \<in> r1 \<rightarrow> v1'" "s2 \<in> r2 \<rightarrow> v2'"
-  apply(cases) apply (auto simp add: append_eq_append_conv2)
-  using Posix1(1) by fastforce+
-  moreover
-  have IHs: "\<And>v1'. s1 \<in> r1 \<rightarrow> v1' \<Longrightarrow> v1 = v1'"
-            "\<And>v2'. s2 \<in> r2 \<rightarrow> v2' \<Longrightarrow> v2 = v2'" by fact+
-  ultimately show "Seq v1 v2 = v'" by simp
-next
-  case (Posix_STAR1 s1 r v s2 vs v2)
-  have "(s1 @ s2) \<in> STAR r \<rightarrow> v2" 
-       "s1 \<in> r \<rightarrow> v" "s2 \<in> STAR r \<rightarrow> Stars vs" "flat v \<noteq> []"
-       "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L r \<and> s\<^sub>4 \<in> L (STAR r))" by fact+
-  then obtain v' vs' where "v2 = Stars (v' # vs')" "s1 \<in> r \<rightarrow> v'" "s2 \<in> (STAR r) \<rightarrow> (Stars vs')"
-  apply(cases) apply (auto simp add: append_eq_append_conv2)
-  using Posix1(1) apply fastforce
-  apply (metis Posix1(1) Posix_STAR1.hyps(6) append_Nil append_Nil2)
-  using Posix1(2) by blast
-  moreover
-  have IHs: "\<And>v2. s1 \<in> r \<rightarrow> v2 \<Longrightarrow> v = v2"
-            "\<And>v2. s2 \<in> STAR r \<rightarrow> v2 \<Longrightarrow> Stars vs = v2" by fact+
-  ultimately show "Stars (v # vs) = v2" by auto
-next
-  case (Posix_STAR2 r v2)
-  have "[] \<in> STAR r \<rightarrow> v2" by fact
-  then show "Stars [] = v2" by cases (auto simp add: Posix1)
-qed
-
-
 lemma Posix_injval:
   assumes "s \<in> (der c r) \<rightarrow> v"
   shows "(c # s) \<in> r \<rightarrow> (injval r c v)"
@@ -669,15 +230,7 @@
 qed
 
 
-section {* The Lexer by Sulzmann and Lu  *}
-
-fun 
-  lexer :: "rexp \<Rightarrow> string \<Rightarrow> val option"
-where
-  "lexer r [] = (if nullable r then Some(mkeps r) else None)"
-| "lexer r (c#s) = (case (lexer (der c r) s) of  
-                    None \<Rightarrow> None
-                  | Some(v) \<Rightarrow> Some(injval r c v))"
+section {* Lexer Correctness *}
 
 
 lemma lexer_correct_None: