updated Sizebound4
authorChristian Urban <christian.urban@kcl.ac.uk>
Thu, 27 Jan 2022 23:25:26 +0000
changeset 397 e1b74d618f1b
parent 396 cc8e231529fb
child 398 dac6d27c99c6
updated Sizebound4
thys/LexerExt.thy
thys/SpecExt.thy
thys2/Paper/Paper.thy
thys2/Paper/document/root.tex
thys2/SizeBound4.thy
thys2/paper.pdf
--- a/thys/LexerExt.thy	Tue Jan 25 13:12:50 2022 +0000
+++ b/thys/LexerExt.thy	Thu Jan 27 23:25:26 2022 +0000
@@ -17,10 +17,15 @@
 | "mkeps(NTIMES r n) = Stars (replicate n (mkeps r))"
 | "mkeps(FROMNTIMES r n) = Stars (replicate n (mkeps r))"
 | "mkeps(NMTIMES r n m) = Stars (replicate n (mkeps r))"
-  
+| "mkeps(NOT ZERO) = Nt Void"
+| "mkeps(NOT (CH _ )) = Nt Void"
+| "mkeps(NOT (SEQ r1 r2)) = Seq (mkeps (NOT r1)) (mkeps (NOT r1))"
+| "mkeps(NOT (ALT r1 r2)) = (if nullable(r1) then Right (mkeps (NOT r2)) else  (mkeps (NOT r1)))"
+
+
 fun injval :: "rexp \<Rightarrow> char \<Rightarrow> val \<Rightarrow> val"
 where
-  "injval (CHAR d) c Void = Char d"
+  "injval (CH d) c Void = Char d"
 | "injval (ALT r1 r2) c (Left v1) = Left(injval r1 c v1)"
 | "injval (ALT r1 r2) c (Right v2) = Right(injval r2 c v2)"
 | "injval (SEQ r1 r2) c (Seq v1 v2) = Seq (injval r1 c v1) v2"
@@ -49,9 +54,11 @@
   shows "flat (mkeps r) = []"
 using assms
   apply(induct rule: nullable.induct) 
-         apply(auto)
-  by presburger  
-  
+          apply(auto)
+  apply presburger  
+  apply(case_tac r)
+  apply(auto)
+  sorry
   
 lemma mkeps_nullable:
   assumes "nullable(r)" 
@@ -81,7 +88,7 @@
     apply (simp add: mkeps_flat)
    apply(simp)
   apply(simp)
-done
+  sorry
     
 
 lemma Prf_injval_flat:
@@ -90,7 +97,7 @@
 using assms
 apply(induct arbitrary: v rule: der.induct)
 apply(auto elim!: Prf_elims intro: mkeps_flat split: if_splits)
-done
+  sorry
 
 lemma Prf_injval:
   assumes "\<Turnstile> v : der c r" 
@@ -170,7 +177,8 @@
      apply(simp add: Prf_injval_flat)
      apply(simp)
    apply(simp)
-done
+  sorry
+
 
 
 
@@ -186,8 +194,10 @@
 apply(auto intro: Posix.intros simp add: nullable_correctness Sequ_def)
 apply(subst append.simps(1)[symmetric])
 apply(rule Posix.intros)
-      apply(auto)
-  done
+   apply(auto)
+  apply(case_tac r)
+  apply(auto)
+  sorry
     
 
 lemma Posix_injval:
@@ -207,22 +217,22 @@
   then have "False" by cases
   then show "(c # s) \<in> ONE \<rightarrow> (injval ONE c v)" by simp
 next 
-  case (CHAR d)
+  case (CH d)
   consider (eq) "c = d" | (ineq) "c \<noteq> d" by blast
-  then show "(c # s) \<in> (CHAR d) \<rightarrow> (injval (CHAR d) c v)"
+  then show "(c # s) \<in> (CH d) \<rightarrow> (injval (CH d) c v)"
   proof (cases)
     case eq
-    have "s \<in> der c (CHAR d) \<rightarrow> v" by fact
+    have "s \<in> der c (CH d) \<rightarrow> v" by fact
     then have "s \<in> ONE \<rightarrow> v" using eq by simp
     then have eqs: "s = [] \<and> v = Void" by cases simp
-    show "(c # s) \<in> CHAR d \<rightarrow> injval (CHAR d) c v" using eq eqs 
+    show "(c # s) \<in> CH d \<rightarrow> injval (CH d) c v" using eq eqs 
     by (auto intro: Posix.intros)
   next
     case ineq
-    have "s \<in> der c (CHAR d) \<rightarrow> v" by fact
+    have "s \<in> der c (CH d) \<rightarrow> v" by fact
     then have "s \<in> ZERO \<rightarrow> v" using ineq by simp
     then have "False" by cases
-    then show "(c # s) \<in> CHAR d \<rightarrow> injval (CHAR d) c v" by simp
+    then show "(c # s) \<in> CH d \<rightarrow> injval (CH d) c v" by simp
   qed
 next
   case (ALT r1 r2)
@@ -319,7 +329,6 @@
     apply(simp)
     apply(subgoal_tac "\<exists>vss. v2 = Stars vss")
     apply(clarify)
-    apply(drule_tac x="v1" in meta_spec)
     apply(drule_tac x="vss" in meta_spec)
     apply(drule_tac x="s1" in meta_spec)
     apply(drule_tac x="s2" in meta_spec)
@@ -401,7 +410,6 @@
     apply(simp)
     apply(subgoal_tac "\<exists>vss. v2 = Stars vss")
     apply(clarify)
-    apply(drule_tac x="v1" in meta_spec)
     apply(drule_tac x="vss" in meta_spec)
     apply(drule_tac x="s1" in meta_spec)
     apply(drule_tac x="s2" in meta_spec)
@@ -454,7 +462,6 @@
     apply(simp)
     apply(subgoal_tac "\<exists>vss. v2 = Stars vss")
     apply(clarify)
-    apply(drule_tac x="v1" in meta_spec)
     apply(drule_tac x="vss" in meta_spec)
     apply(drule_tac x="s1" in meta_spec)
     apply(drule_tac x="s2" in meta_spec)
@@ -544,7 +551,6 @@
     apply(simp)
     apply(subgoal_tac "\<exists>vss. v2 = Stars vss")
     apply(clarify)
-    apply(drule_tac x="v1" in meta_spec)
     apply(drule_tac x="vss" in meta_spec)
     apply(drule_tac x="s1" in meta_spec)
     apply(drule_tac x="s2" in meta_spec)
@@ -618,6 +624,9 @@
           apply(simp)
           done  
       qed    
+   next
+     case (NOT r s v)
+     then show ?case sorry
 qed
 
 section {* Lexer Correctness *}
--- a/thys/SpecExt.thy	Tue Jan 25 13:12:50 2022 +0000
+++ b/thys/SpecExt.thy	Thu Jan 27 23:25:26 2022 +0000
@@ -1,6 +1,6 @@
    
 theory SpecExt
-  imports Main (*"~~/src/HOL/Library/Sublist"*)
+  imports Main "HOL-Library.Sublist" (*"~~/src/HOL/Library/Sublist"*)
 begin
 
 section {* Sequential Composition of Languages *}
@@ -165,10 +165,6 @@
 apply(auto simp add: Sequ_def)
   done
 
-lemma
-  assumes "[] \<in> A" "n \<noteq> 0" "A \<noteq> {}"
-  shows "A \<up> (Suc n) = A \<up> n"
-
 lemma Der_Pow_0:
   shows "Der c (A \<up> 0) = {}"
 by(simp add: Der_def)
@@ -221,7 +217,7 @@
 datatype rexp =
   ZERO
 | ONE
-| CHAR char
+| CH char
 | SEQ rexp rexp
 | ALT rexp rexp
 | STAR rexp
@@ -238,7 +234,7 @@
 where
   "L (ZERO) = {}"
 | "L (ONE) = {[]}"
-| "L (CHAR c) = {[c]}"
+| "L (CH c) = {[c]}"
 | "L (SEQ r1 r2) = (L r1) ;; (L r2)"
 | "L (ALT r1 r2) = (L r1) \<union> (L r2)"
 | "L (STAR r) = (L r)\<star>"
@@ -255,7 +251,7 @@
 where
   "nullable (ZERO) = False"
 | "nullable (ONE) = True"
-| "nullable (CHAR c) = False"
+| "nullable (CH c) = False"
 | "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
 | "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
 | "nullable (STAR r) = True"
@@ -270,7 +266,7 @@
 where
   "der c (ZERO) = ZERO"
 | "der c (ONE) = ZERO"
-| "der c (CHAR d) = (if c = d then ONE else ZERO)"
+| "der c (CH d) = (if c = d then ONE else ZERO)"
 | "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
 | "der c (SEQ r1 r2) = 
      (if nullable r1
@@ -340,48 +336,6 @@
   by (metis append.assoc)
   
 
-
-lemma
- "L(FROMNTIMES r n) = L(SEQ (NTIMES r n) (STAR r))"
-  apply(auto simp add: Sequ_def)
-  defer
-   apply(subgoal_tac "\<exists>m. s2 \<in> (L r) \<up> m")
-  prefer 2
-    apply (simp add: Star_Pow)
-  apply(auto)
-  apply(rule_tac x="n + m" in bexI)
-    apply (simp add: pow_add)
-   apply simp
-  apply(subgoal_tac "\<exists>m. m + n = xa")
-   apply(auto)
-   prefer 2
-  using le_add_diff_inverse2 apply auto[1]
-  by (smt Pow_Star Sequ_def add.commute mem_Collect_eq pow_add2)
-  
-lemma
-   "L (der c (FROMNTIMES r n)) = 
-     L (SEQ (der c r) (FROMNTIMES r (n - 1)))"
-  apply(auto simp add: Sequ_def)
-  using Star_Pow apply blast
-  using Pow_Star by blast
-  
-lemma
- "L (der c (UPNTIMES r n)) = 
-    L(if n = 0 then ZERO  else 
-      ALT (der c r) (SEQ (der c r) (UPNTIMES r (n - 1))))"
-  apply(auto simp add: Sequ_def)
-  using SpecExt.Pow_empty by blast 
-
-abbreviation "FROM \<equiv> FROMNTIMES"
-
-lemma
-  shows "L (der c (FROM r n)) = 
-         L (if n <= 0 then SEQ (der c r) (ALT ONE (FROM r 0))
-                      else SEQ (der c r) (ALT ZERO (FROM r (n -1))))"
-  apply(auto simp add: Sequ_def)
-  oops
-
-
 fun 
  ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
 where
@@ -454,8 +408,10 @@
 apply(auto simp add: Sequ_def Der_def Cons_eq_append_conv)[1]
 apply(rule_tac x="Suc xa" in bexI)
 apply(auto simp add: Sequ_def)[2]
-apply (metis append_Cons)
-apply (metis (no_types, hide_lams) Pow_decomp atMost_iff diff_Suc_eq_diff_pred diff_is_0_eq)
+     apply (metis append_Cons)
+ apply(rule_tac x="xa - 1" in bexI)
+     apply(auto simp add: Sequ_def)[2]
+  apply (metis One_nat_def Pow_decomp)
 apply(rule impI)+
 apply(subst Sequ_Union_in)
 apply(subst Der_Pow_Sequ[symmetric])
@@ -485,7 +441,7 @@
 | Right val
 | Left val
 | Stars "val list"
-
+| Nt val
 
 section {* The string behind a value *}
 
@@ -499,6 +455,7 @@
 | "flat (Seq v1 v2) = (flat v1) @ (flat v2)"
 | "flat (Stars []) = []"
 | "flat (Stars (v#vs)) = (flat v) @ (flat (Stars vs))" 
+| "flat (Nt v) = flat v"
 
 abbreviation
   "flats vs \<equiv> concat (map flat vs)"
@@ -597,7 +554,7 @@
 | "\<Turnstile> v1 : r1 \<Longrightarrow> \<Turnstile> Left v1 : ALT r1 r2"
 | "\<Turnstile> v2 : r2 \<Longrightarrow> \<Turnstile> Right v2 : ALT r1 r2"
 | "\<Turnstile> Void : ONE"
-| "\<Turnstile> Char c : CHAR c"
+| "\<Turnstile> Char c : CH c"
 | "\<lbrakk>\<forall>v \<in> set vs. \<Turnstile> v : r \<and> flat v \<noteq> []\<rbrakk> \<Longrightarrow> \<Turnstile> Stars vs : STAR r"
 | "\<lbrakk>\<forall>v \<in> set vs. \<Turnstile> v : r \<and> flat v \<noteq> []; length vs \<le> n\<rbrakk> \<Longrightarrow> \<Turnstile> Stars vs : UPNTIMES r n"
 | "\<lbrakk>\<forall>v \<in> set vs1. \<Turnstile> v : r \<and> flat v \<noteq> []; 
@@ -622,7 +579,7 @@
   "\<Turnstile> v : SEQ r1 r2"
   "\<Turnstile> v : ALT r1 r2"
   "\<Turnstile> v : ONE"
-  "\<Turnstile> v : CHAR c"
+  "\<Turnstile> v : CH c"
   "\<Turnstile> vs : STAR r"
   "\<Turnstile> vs : UPNTIMES r n"
   "\<Turnstile> vs : NTIMES r n"
@@ -867,7 +824,10 @@
   then obtain vs where "flats vs = s" "\<forall>v\<in>set vs. \<Turnstile> v : r \<and> flat v \<noteq> []" "length vs \<le> n"
   using IH flats_cval_nonempty by (smt order.trans) 
   then show "\<exists>v. \<Turnstile> v : UPNTIMES r n \<and> flat v = s"
-  using Prf.intros(7) flat_Stars by blast
+    using Prf.intros(7) flat_Stars by blast
+next 
+  case (NOT r)
+  then show ?case sorry
 qed (auto intro: Prf.intros)
 
 
@@ -917,7 +877,7 @@
 lemma LV_simps:
   shows "LV ZERO s = {}"
   and   "LV ONE s = (if s = [] then {Void} else {})"
-  and   "LV (CHAR c) s = (if s = [c] then {Char c} else {})"
+  and   "LV (CH c) s = (if s = [c] then {Char c} else {})"
   and   "LV (ALT r1 r2) s = Left ` LV r1 s \<union> Right ` LV r2 s"
 unfolding LV_def
 apply(auto intro: Prf.intros elim: Prf.cases)
@@ -1226,8 +1186,8 @@
   case (ONE s)
   show "finite (LV ONE s)" by (simp add: LV_simps)
 next
-  case (CHAR c s)
-  show "finite (LV (CHAR c) s)" by (simp add: LV_simps)
+  case (CH c s)
+  show "finite (LV (CH c) s)" by (simp add: LV_simps)
 next 
   case (ALT r1 r2 s)
   then show "finite (LV (ALT r1 r2) s)" by (simp add: LV_simps)
@@ -1270,7 +1230,10 @@
   case (NMTIMES r n m s)
   have "\<And>s. finite (LV r s)" by fact
   then show "finite (LV (NMTIMES r n m) s)"
-    by (simp add: LV_NMTIMES_6)         
+    by (simp add: LV_NMTIMES_6)    
+next 
+  case (NOT r s)
+  then show ?case sorry
 qed
 
 
@@ -1281,7 +1244,7 @@
   Posix :: "string \<Rightarrow> rexp \<Rightarrow> val \<Rightarrow> bool" ("_ \<in> _ \<rightarrow> _" [100, 100, 100] 100)
 where
   Posix_ONE: "[] \<in> ONE \<rightarrow> Void"
-| Posix_CHAR: "[c] \<in> (CHAR c) \<rightarrow> (Char c)"
+| Posix_CHAR: "[c] \<in> (CH c) \<rightarrow> (Char c)"
 | Posix_ALT1: "s \<in> r1 \<rightarrow> v \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Left v)"
 | Posix_ALT2: "\<lbrakk>s \<in> r2 \<rightarrow> v; s \<notin> L(r1)\<rbrakk> \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Right v)"
 | Posix_SEQ: "\<lbrakk>s1 \<in> r1 \<rightarrow> v1; s2 \<in> r2 \<rightarrow> v2;
@@ -1320,7 +1283,7 @@
 inductive_cases Posix_elims:
   "s \<in> ZERO \<rightarrow> v"
   "s \<in> ONE \<rightarrow> v"
-  "s \<in> CHAR c \<rightarrow> v"
+  "s \<in> CH c \<rightarrow> v"
   "s \<in> ALT r1 r2 \<rightarrow> v"
   "s \<in> SEQ r1 r2 \<rightarrow> v"
   "s \<in> STAR r \<rightarrow> v"
@@ -1396,7 +1359,7 @@
   then show "Void = v2" by cases auto
 next 
   case (Posix_CHAR c v2)
-  have "[c] \<in> CHAR c \<rightarrow> v2" by fact
+  have "[c] \<in> CH c \<rightarrow> v2" by fact
   then show "Char c = v2" by cases auto
 next 
   case (Posix_ALT1 s r1 v r2 v2)
@@ -1503,14 +1466,9 @@
     using Posix1(1) Posix1(2) apply blast 
      apply(case_tac n)
       apply(simp)
-      apply(simp)
-    apply(drule_tac x="va" in meta_spec)
-    apply(drule_tac x="vs" in meta_spec)
-    apply(simp)
-     apply(drule meta_mp)
-    apply (metis L.simps(9) Posix1(1) UN_E append.right_neutral append_Nil diff_Suc_1 local.Posix_FROMNTIMES1(4) val.inject(5))
-    apply (metis L.simps(9) Posix1(1) UN_E append.right_neutral append_Nil)
-    by (metis One_nat_def Posix1(1) Posix_FROMNTIMES1.hyps(7) self_append_conv self_append_conv2)
+     apply(simp)
+    apply (smt (verit, ccfv_threshold) L.simps(9) Posix1(1) UN_E append_eq_append_conv2)
+    by (metis One_nat_def Posix1(1) Posix_FROMNTIMES1.hyps(7) append_Nil2 append_self_conv2)
   moreover
   have IHs: "\<And>v2. s1 \<in> r \<rightarrow> v2 \<Longrightarrow> v = v2"
             "\<And>v2. s2 \<in> FROMNTIMES r (n - 1) \<rightarrow> v2 \<Longrightarrow> Stars vs = v2" by fact+
@@ -1555,18 +1513,8 @@
        apply(case_tac m)
       apply(simp)
      apply(simp)
-    apply(drule_tac x="va" in meta_spec)
-    apply(drule_tac x="vs" in meta_spec)
-    apply(simp)
-     apply(drule meta_mp)
-      apply(drule Posix1(1))
-      apply(drule Posix1(1))
-      apply(drule Posix1(1))
-      apply(frule Posix1(1))
-      apply(simp)
-    using Posix_NMTIMES1.hyps(4) apply force
-     apply (metis L.simps(10) Posix1(1) UN_E append_Nil2 append_self_conv2)
-    by (metis One_nat_def Posix1(1) Posix_NMTIMES1.hyps(8) append.right_neutral append_Nil)      
+    apply (metis L.simps(10) Posix1(1) UN_E append.right_neutral append_Nil)
+    by (metis One_nat_def Posix1(1) Posix_NMTIMES1.hyps(8) append_Nil self_append_conv)
   moreover
   have IHs: "\<And>v2. s1 \<in> r \<rightarrow> v2 \<Longrightarrow> v = v2"
             "\<And>v2. s2 \<in> NMTIMES r (n - 1) (m - 1) \<rightarrow> v2 \<Longrightarrow> Stars vs = v2" by fact+
@@ -1616,7 +1564,6 @@
      apply(auto simp add: intro!: Prf.intros elim!: Prf_elims)[2]
   apply (metis (mono_tags, lifting) Prf.intros(9) append_Nil empty_iff flat_Stars flats_empty list.set(1) mem_Collect_eq)
      apply(simp)
-     apply(clarify)
      apply(case_tac n)
       apply(simp)
      apply(simp)
@@ -1632,7 +1579,6 @@
       apply(simp)
      apply(simp)
     apply(simp)
-   apply(clarify)
    apply(erule Prf_elims)
       apply(simp)
   apply(rule Prf.intros)  
@@ -1660,7 +1606,6 @@
   apply(simp)
   apply (metis Prf.intros(11) append_Nil empty_iff list.set(1))
   apply(simp)
-  apply(clarify)
   apply(rotate_tac 6)
   apply(erule Prf_elims)
    apply(simp)
@@ -1675,7 +1620,6 @@
   apply(simp)
   apply(simp)
   apply(simp)
-  apply(clarify)
   apply(rotate_tac 6)
   apply(erule Prf_elims)
    apply(simp)
--- a/thys2/Paper/Paper.thy	Tue Jan 25 13:12:50 2022 +0000
+++ b/thys2/Paper/Paper.thy	Thu Jan 27 23:25:26 2022 +0000
@@ -7,7 +7,164 @@
    "../SizeBound4" 
    "HOL-Library.LaTeXsugar"
 begin
+
+declare [[show_question_marks = false]]
+
+abbreviation 
+  "der_syn r c \<equiv> der c r"
+
+notation (latex output)
+  der_syn ("_\\_" [79, 1000] 76) and
+
+  ZERO ("\<^bold>0" 81) and 
+  ONE ("\<^bold>1" 81) and 
+  CH ("_" [1000] 80) and
+  ALT ("_ + _" [77,77] 78) and
+  SEQ ("_ \<cdot> _" [77,77] 78) and
+  STAR ("_\<^sup>\<star>" [79] 78) 
+
 (*>*)
+
+section {* Introduction *}
+
+text {*
+
+Brzozowski \cite{Brzozowski1964} introduced the notion of the {\em
+derivative} @{term "der c r"} of a regular expression \<open>r\<close> w.r.t.\
+a character~\<open>c\<close>, and showed that it gave a simple solution to the
+problem of matching a string @{term s} with a regular expression @{term
+r}: if the derivative of @{term r} w.r.t.\ (in succession) all the
+characters of the string matches the empty string, then @{term r}
+matches @{term s} (and {\em vice versa}). The derivative has the
+property (which may almost be regarded as its specification) that, for
+every string @{term s} and regular expression @{term r} and character
+@{term c}, one has @{term "cs \<in> L(r)"} if and only if \mbox{@{term "s \<in> L(der c r)"}}. 
+The beauty of Brzozowski's derivatives is that
+they are neatly expressible in any functional language, and easily
+definable and reasoned about in theorem provers---the definitions just
+consist of inductive datatypes and simple recursive functions. A
+mechanised correctness proof of Brzozowski's matcher in for example HOL4
+has been mentioned by Owens and Slind~\cite{Owens2008}. Another one in
+Isabelle/HOL is part of the work by Krauss and Nipkow \cite{Krauss2011}.
+And another one in Coq is given by Coquand and Siles \cite{Coquand2012}.
+
+If a regular expression matches a string, then in general there is more
+than one way of how the string is matched. There are two commonly used
+disambiguation strategies to generate a unique answer: one is called
+GREEDY matching \cite{Frisch2004} and the other is POSIX
+matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}.
+For example consider the string @{term xy} and the regular expression
+\mbox{@{term "STAR (ALT (ALT x y) xy)"}}. Either the string can be
+matched in two `iterations' by the single letter-regular expressions
+@{term x} and @{term y}, or directly in one iteration by @{term xy}. The
+first case corresponds to GREEDY matching, which first matches with the
+left-most symbol and only matches the next symbol in case of a mismatch
+(this is greedy in the sense of preferring instant gratification to
+delayed repletion). The second case is POSIX matching, which prefers the
+longest match.
+
+
+
+\begin{figure}[t]
+\begin{center}
+\begin{tikzpicture}[scale=2,node distance=1.3cm,
+                    every node/.style={minimum size=6mm}]
+\node (r1)  {@{term "r\<^sub>1"}};
+\node (r2) [right=of r1]{@{term "r\<^sub>2"}};
+\draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}};
+\node (r3) [right=of r2]{@{term "r\<^sub>3"}};
+\draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}};
+\node (r4) [right=of r3]{@{term "r\<^sub>4"}};
+\draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}};
+\draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}};
+\node (v4) [below=of r4]{@{term "v\<^sub>4"}};
+\draw[->,line width=1mm](r4) -- (v4);
+\node (v3) [left=of v4] {@{term "v\<^sub>3"}};
+\draw[->,line width=1mm](v4)--(v3) node[below,midway] {\<open>inj r\<^sub>3 c\<close>};
+\node (v2) [left=of v3]{@{term "v\<^sub>2"}};
+\draw[->,line width=1mm](v3)--(v2) node[below,midway] {\<open>inj r\<^sub>2 b\<close>};
+\node (v1) [left=of v2] {@{term "v\<^sub>1"}};
+\draw[->,line width=1mm](v2)--(v1) node[below,midway] {\<open>inj r\<^sub>1 a\<close>};
+\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}};
+\end{tikzpicture}
+\end{center}
+\mbox{}\\[-13mm]
+
+\caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014},
+matching the string @{term "[a,b,c]"}. The first phase (the arrows from 
+left to right) is \Brz's matcher building successive derivatives. If the 
+last regular expression is @{term nullable}, then the functions of the 
+second phase are called (the top-down and right-to-left arrows): first 
+@{term mkeps} calculates a value @{term "v\<^sub>4"} witnessing
+how the empty string has been recognised by @{term "r\<^sub>4"}. After
+that the function @{term inj} ``injects back'' the characters of the string into
+the values.
+\label{Sulz}}
+\end{figure} 
+
+
+*}
+
+section {* Background *}
+
+text {*
+  Sulzmann-Lu algorithm with inj. State that POSIX rules.
+  metion slg is correct.
+
+
+  \begin{figure}[t]
+  \begin{center}
+  \begin{tabular}{c}
+  @{thm[mode=Axiom] Posix.intros(1)}\<open>P\<close>@{term "ONE"} \qquad
+  @{thm[mode=Axiom] Posix.intros(2)}\<open>P\<close>@{term "c"}\medskip\\
+  @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\<open>P+L\<close>\qquad
+  @{thm[mode=Rule] Posix.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\<open>P+R\<close>\medskip\\
+  $\mprset{flushleft}
+   \inferrule
+   {@{thm (prem 1) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad
+    @{thm (prem 2) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\
+    @{thm (prem 3) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}
+   {@{thm (concl) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\<open>PS\<close>\\
+  @{thm[mode=Axiom] Posix.intros(7)}\<open>P[]\<close>\medskip\\
+  $\mprset{flushleft}
+   \inferrule
+   {@{thm (prem 1) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad
+    @{thm (prem 2) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad
+    @{thm (prem 3) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\
+    @{thm (prem 4) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}
+   {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$\<open>P\<star>\<close>
+  \end{tabular}
+  \end{center}
+  \caption{Our inductive definition of POSIX values.}\label{POSIXrules}
+  \end{figure}
+
+
+*}
+
+section {* Bitcoded Derivatives *}
+
+text {*
+  bitcoded regexes / decoding / bmkeps
+  gets rid of the second phase (only single phase)   
+  correctness
+*}
+
+
+section {* Simplification *}
+
+text {*
+   not direct correspondence with PDERs, because of example
+   problem with retrieve 
+
+   correctness
+*}
+
+section {* Bound - NO *}
+
+section {* Bounded Regex / Not *}
+
+section {* Conclusion *}
+
 text {*
 
 \cite{AusafDyckhoffUrban2016}
--- a/thys2/Paper/document/root.tex	Tue Jan 25 13:12:50 2022 +0000
+++ b/thys2/Paper/document/root.tex	Thu Jan 27 23:25:26 2022 +0000
@@ -45,33 +45,50 @@
 \titlerunning{POSIX Lexing with Bitcoded Derivatives}
 \author{Chengsong Tan}{King's College London}{chengsong.tan@kcl.ac.uk}{}{}
 \author{Christian Urban}{King's College London}{christian.urban@kcl.ac.uk}{}{}
+\keywords{POSIX matching, Derivatives of Regular Expressions, Isabelle/HOL}
+\category{}
+\ccsdesc[100]{Design and analysis of algorithms}
+\ccsdesc[100]{Formal languages and automata theory}
+\Copyright{\mbox{}}
+\nolinenumbers
 
 
 \begin{document}
 \maketitle
 
 \begin{abstract}
-Brzozowski introduced the notion of derivatives for regular
-expressions. They can be used for a very simple regular expression
-matching algorithm.  Sulzmann and Lu cleverly extended this algorithm
-in order to deal with POSIX matching, which is the underlying
-disambiguation strategy for regular expressions needed in lexers.
-Their algorithm generates POSIX values which encode the information of
-\emph{how} a regular expression matches a string---that is, which part
-of the string is matched by which part of the regular expression.  In
-this paper we give our inductive definition of what a POSIX value is
-and show $(i)$ that such a value is unique (for given regular
-expression and string being matched) and $(ii)$ that Sulzmann and Lu's
-algorithm always generates such a value (provided that the regular
-expression matches the string). We show that $(iii)$ our inductive
-definition of a POSIX value is equivalent to an alternative definition
-by Okui and Suzuki which identifies POSIX values as least elements
-according to an ordering of values.  We also prove the correctness of
-Sulzmann's bitcoded version of the POSIX matching algorithm and extend the
-results to additional constructors for regular expressions.  \smallskip
-
-{\bf Keywords:} POSIX matching, Derivatives of Regular Expressions,
-Isabelle/HOL
+  Sulzmann and Lu described a lexing algorithm that calculates
+  Brzozowski derivatives using bit-sequences annotated to regular
+  expressions.  Their algorithm generates POSIX values which encode
+  the information of \emph{how} a regular expression matches a
+  string---that is, which part of the string is matched by which part
+  of the regular expression.  The purpose of the bit-sequences in
+  Sulzmann and Lu's algorithm is to keep the size of derivatives small
+  which is achieved by `aggressively' simplifying regular expressions.
+  In this paper we describe a slight variant of Sulzmann and Lu's
+  algorithm and \textit{(i)} prove that this algorithm generates
+  unique POSIX values; \textit{(ii)} we also establish a cubic bound
+  for the size of the derivatives---in earlier works, derivatives can
+  grow exponentially even after simplification.
+  
+%Brzozowski introduced the notion of derivatives for regular
+%expressions. They can be used for a very simple regular expression
+%matching algorithm.  Sulzmann and Lu cleverly extended this algorithm
+%in order to deal with POSIX matching, which is the underlying
+%disambiguation strategy for regular expressions needed in lexers.
+%Their algorithm generates POSIX values which encode the information of
+%\emph{how} a regular expression matches a string---that is, which part
+%of the string is matched by which part of the regular expression.  In
+%this paper we give our inductive definition of what a POSIX value is
+%and show $(i)$ that such a value is unique (for given regular
+%expression and string being matched) and $(ii)$ that Sulzmann and Lu's
+%algorithm always generates such a value (provided that the regular
+%expression matches the string). We show that $(iii)$ our inductive
+%definition of a POSIX value is equivalent to an alternative definition
+%by Okui and Suzuki which identifies POSIX values as least elements
+%according to an ordering of values.  We also prove the correctness of
+%Sulzmann's bitcoded version of the POSIX matching algorithm and extend the
+%results to additional constructors for regular expressions.  \smallskip
 \end{abstract}
 
 
--- a/thys2/SizeBound4.thy	Tue Jan 25 13:12:50 2022 +0000
+++ b/thys2/SizeBound4.thy	Thu Jan 27 23:25:26 2022 +0000
@@ -132,15 +132,6 @@
   apply(auto)
   done
 
-lemma fuse_Nil:
-  shows "fuse [] r = r"
-  by (induct r)(simp_all)
-
-(*
-lemma map_fuse_Nil:
-  shows "map (fuse []) rs = rs"
-  by (induct rs)(simp_all add: fuse_Nil)
-*)
 
 fun intern :: "rexp \<Rightarrow> arexp" where
   "intern ZERO = AZERO"
@@ -280,23 +271,23 @@
   shows "retrieve (fuse bs r) v = bs @ retrieve r v"
   using assms
   apply(induct r arbitrary: v bs)
-         apply(auto elim: Prf_elims)[4]
-   defer
-  using retrieve_encode_STARS
-   apply(auto elim!: Prf_elims)[1]
-   apply(case_tac vs)
-    apply(simp)
-   apply(simp)
-  (* AALTs  case *)
+  apply(auto elim: Prf_elims)[4]
+  apply(case_tac x2a)
+  apply(simp)
+  using Prf_elims(1) apply blast
+  apply(case_tac x2a)
+  apply(simp)
+  apply(simp)
+  apply(case_tac list)
   apply(simp)
-  apply(case_tac x2a)
-   apply(simp)
-   apply(auto elim!: Prf_elims)[1]
+  apply(simp)
+  apply (smt (verit, best) Prf_elims(3) append_assoc retrieve.simps(4) retrieve.simps(5))
   apply(simp)
-   apply(case_tac list)
-   apply(simp)
-  apply(auto)
+  using retrieve_encode_STARS
   apply(auto elim!: Prf_elims)[1]
+  apply(case_tac vs)
+  apply(simp)
+  apply(simp)
   done
 
 lemma retrieve_fuse:
@@ -314,139 +305,92 @@
   apply(simp_all add: retrieve_fuse retrieve_encode_STARS)
   done
 
-(*
-lemma bnullable_Hdbmkeps_Hd:
-  assumes "bnullable a" 
-  shows  "bmkeps (AALTs bs (a # rs)) = bs @ (bmkeps a)"
-  using assms by simp
-*)
 
-
-lemma r2:
-  assumes "x \<in> set rs" "bnullable x"
-  shows "bnullable (AALTs bs rs)"
+lemma retrieve_AALTs_bnullable1:
+  assumes "bnullable r"
+  shows "retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs))))
+         = bs @ retrieve r (mkeps (erase r))"
   using assms
-  apply(induct rs)
-   apply(auto)
+  apply(case_tac rs)
+  apply(auto simp add: bnullable_correctness)
   done
 
-lemma  r3:
-  assumes "\<not> bnullable r" 
-          "bnullables rs"
-  shows "retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs))) =
-         retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs))))"
+lemma retrieve_AALTs_bnullable2:
+  assumes "\<not>bnullable r" "bnullables rs"
+  shows "retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs))))
+         = retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs)))"
   using assms
   apply(induct rs arbitrary: r bs)
-   apply(auto)[1]
   apply(auto)
   using bnullable_correctness apply blast
-    apply(auto simp add: bnullable_correctness mkeps_nullable retrieve_fuse2)
-   apply(subst retrieve_fuse2[symmetric])
-  apply (smt bnullable.simps(4) bnullable_correctness erase.simps(5) erase.simps(6) insert_iff list.exhaust list.set(2) mkeps.simps(3) mkeps_nullable)
-   apply(simp)
-  apply(case_tac "bnullable a")
-  apply (smt append_Nil2 bnullable.simps(4) bnullable_correctness erase.simps(5) erase.simps(6) fuse.simps(4) insert_iff list.exhaust list.set(2) mkeps.simps(3) mkeps_nullable retrieve_fuse2)
-  apply(drule_tac x="a" in meta_spec)
-  apply(drule_tac x="bs" in meta_spec)
-  apply(drule meta_mp)
-   apply(simp)
-  apply(drule meta_mp)
-   apply(auto)
-  apply(subst retrieve_fuse2[symmetric])
   apply(case_tac rs)
-    apply(simp)
-   apply(auto)[1]
-      apply (simp add: bnullable_correctness)
-  
-  apply (metis append_Nil2 bnullable_correctness erase_fuse fuse.simps(4) list.set_intros(1) mkeps.simps(3) mkeps_nullable nullable.simps(4) r2)
-    apply (simp add: bnullable_correctness)
-  apply (metis append_Nil2 bnullable_correctness erase.simps(6) erase_fuse fuse.simps(4) list.set_intros(2) mkeps.simps(3) mkeps_nullable r2)
-  apply(simp)
+  apply(auto)
+  using bnullable_correctness apply blast
+  apply(case_tac rs)
+  apply(auto)
   done
 
-lemma t: 
+lemma bmkeps_retrieve_AALTs: 
   assumes "\<forall>r \<in> set rs. bnullable r \<longrightarrow> bmkeps r = retrieve r (mkeps (erase r))" 
           "bnullables rs"
   shows "bs @ bmkepss rs = retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs)))"
  using assms
   apply(induct rs arbitrary: bs)
   apply(auto)
-  apply (metis (no_types, opaque_lifting) bmkepss.cases bnullable_correctness erase.simps(5) erase.simps(6) mkeps.simps(3) retrieve.simps(3) retrieve.simps(4))
-  apply (metis r3)
-  apply (metis (no_types, lifting) bmkepss.cases bnullable_correctness empty_iff erase.simps(6) list.set(1) mkeps.simps(3) retrieve.simps(4))
-  apply (metis r3)
-  done
-      
+  using retrieve_AALTs_bnullable1 apply presburger
+  apply (metis retrieve_AALTs_bnullable2)
+  apply (simp add: retrieve_AALTs_bnullable1)
+  by (metis retrieve_AALTs_bnullable2)
+
+    
 lemma bmkeps_retrieve:
   assumes "bnullable r"
   shows "bmkeps r = retrieve r (mkeps (erase r))"
   using assms
   apply(induct r)
-  apply(auto)
-  using t by auto
+  apply(auto)  
+  using bmkeps_retrieve_AALTs by auto
 
 lemma bder_retrieve:
   assumes "\<Turnstile> v : der c (erase r)"
   shows "retrieve (bder c r) v = retrieve r (injval (erase r) c v)"
   using assms  
   apply(induct r arbitrary: v rule: erase.induct)
-         apply(simp)
-         apply(erule Prf_elims)
-        apply(simp)
-        apply(erule Prf_elims) 
-        apply(simp)
-      apply(case_tac "c = ca")
-       apply(simp)
-       apply(erule Prf_elims)
-       apply(simp)
-      apply(simp)
-       apply(erule Prf_elims)
-  apply(simp)
-      apply(erule Prf_elims)
-     apply(simp)
-    apply(simp)
-  apply(rename_tac "r\<^sub>1" "r\<^sub>2" rs v)
-    apply(erule Prf_elims)
-     apply(simp)
-    apply(simp)
-    apply(case_tac rs)
-     apply(simp)
-    apply(simp)
-  apply (smt Prf_elims(3) injval.simps(2) injval.simps(3) retrieve.simps(4) retrieve.simps(5) same_append_eq)
-   apply(simp)
-   apply(case_tac "nullable (erase r1)")
-    apply(simp)
-  apply(erule Prf_elims)
-     apply(subgoal_tac "bnullable r1")
-  prefer 2
-  using bnullable_correctness apply blast
-    apply(simp)
-     apply(erule Prf_elims)
-     apply(simp)
-   apply(subgoal_tac "bnullable r1")
-  prefer 2
-  using bnullable_correctness apply blast
-    apply(simp)
-    apply(simp add: retrieve_fuse2)
-    apply(simp add: bmkeps_retrieve)
-   apply(simp)
-   apply(erule Prf_elims)
-   apply(simp)
-  using bnullable_correctness apply blast
-  apply(rename_tac bs r v)
+  using Prf_elims(1) apply auto[1]
+  using Prf_elims(1) apply auto[1]
+  apply(auto)[1]
+  apply (metis Prf_elims(4) injval.simps(1) retrieve.simps(1) retrieve.simps(2))
+  using Prf_elims(1) apply blast
+  (* AALTs case *)
   apply(simp)
   apply(erule Prf_elims)
-     apply(clarify)
+  apply(simp)
+  apply(simp)
+  apply(rename_tac "r\<^sub>1" "r\<^sub>2" rs v)
+  apply(erule Prf_elims)
+  apply(simp)
+  apply(simp)
+  apply(case_tac rs)
+  apply(simp)
+  apply(simp)
+  using Prf_elims(3) apply fastforce
+  (* ASEQ case *) 
+  apply(simp)
+  apply(case_tac "nullable (erase r1)")
+  apply(simp)
+  apply(erule Prf_elims)
+  using Prf_elims(2) bnullable_correctness apply force
+  apply (simp add: bmkeps_retrieve bnullable_correctness retrieve_fuse2)
+  apply (simp add: bmkeps_retrieve bnullable_correctness retrieve_fuse2)
+  using Prf_elims(2) apply force
+  (* ASTAR case *)  
+  apply(rename_tac bs r v)
+  apply(simp)  
   apply(erule Prf_elims)
   apply(clarify)
-  apply(subst injval.simps)
-  apply(simp del: retrieve.simps)
-  apply(subst retrieve.simps)
-  apply(subst retrieve.simps)
-  apply(simp)
-  apply(simp add: retrieve_fuse2)
-  done
-  
+  apply(erule Prf_elims)
+  apply(clarify)
+  by (simp add: retrieve_fuse2)
 
 
 lemma MAIN_decode:
@@ -581,79 +525,6 @@
   apply(simp_all)
   done
 
-lemma L_bsimp_ASEQ:
-  "L (erase (ASEQ bs r1 r2)) = L (erase (bsimp_ASEQ bs r1 r2))"
-  apply(induct bs r1 r2 rule: bsimp_ASEQ.induct)
-  apply(simp_all)
-  by (metis erase_fuse fuse.simps(4))
-
-lemma L_bsimp_AALTs:
-  "L (erase (AALTs bs rs)) = L (erase (bsimp_AALTs bs rs))"
-  apply(induct bs rs rule: bsimp_AALTs.induct)
-  apply(simp_all add: erase_fuse)
-  done
-
-lemma L_erase_AALTs:
-  shows "L (erase (AALTs bs rs)) = \<Union> (L ` erase ` (set rs))"
-  apply(induct rs)
-   apply(simp)
-  apply(simp)
-  apply(case_tac rs)
-   apply(simp)
-  apply(simp)
-  done
-
-lemma L_erase_flts:
-  shows "\<Union> (L ` erase ` (set (flts rs))) = \<Union> (L ` erase ` (set rs))"
-  apply(induct rs rule: flts.induct)
-  apply(simp_all)
-  apply(auto)
-  using L_erase_AALTs erase_fuse apply auto[1]
-  by (simp add: L_erase_AALTs erase_fuse)
-
-lemma L_erase_dB_acc:
-  shows "(\<Union> (L ` acc) \<union> (\<Union> (L ` erase ` (set (distinctBy rs erase acc))))) 
-            = \<Union> (L ` acc) \<union>  \<Union> (L ` erase ` (set rs))"
-  apply(induction rs arbitrary: acc)
-  apply simp_all
-  by (smt (z3) SUP_absorb UN_insert sup_assoc sup_commute)
-
-
-lemma L_erase_dB:
-  shows "(\<Union> (L ` erase ` (set (distinctBy rs erase {})))) = \<Union> (L ` erase ` (set rs))"
-  by (metis L_erase_dB_acc Un_commute Union_image_empty)
-
-lemma L_bsimp_erase:
-  shows "L (erase r) = L (erase (bsimp r))"
-  apply(induct r)
-  apply(simp)
-  apply(simp)
-  apply(simp)
-  apply(auto simp add: Sequ_def)[1]
-  apply(subst L_bsimp_ASEQ[symmetric])
-  apply(auto simp add: Sequ_def)[1]
-  apply(subst (asm)  L_bsimp_ASEQ[symmetric])
-  apply(auto simp add: Sequ_def)[1]
-  apply(simp)
-  apply(subst L_bsimp_AALTs[symmetric])
-  defer
-  apply(simp)
-  apply(subst (2)L_erase_AALTs)  
-  apply(subst L_erase_dB)
-  apply(subst L_erase_flts)
-  apply (simp add: L_erase_AALTs)
-  done
-
-lemma L_bders_simp:
-  shows "L (erase (bders_simp r s)) = L (erase (bders r s))"
-  apply(induct s arbitrary: r rule: rev_induct)
-  apply(simp)
-  apply(simp)
-  apply(simp add: ders_append)
-  apply(simp add: bders_simp_append)
-  apply(simp add: L_bsimp_erase[symmetric])
-  by (simp add: der_correctness)
-
 
 lemma bmkeps_fuse:
   assumes "bnullable r"
@@ -668,17 +539,6 @@
   apply(auto simp add: bmkeps_fuse bnullable_fuse)
   done
 
-
-lemma b4:
-  shows "bnullable (bders_simp r s) = bnullable (bders r s)"
-proof -
-  have "L (erase (bders_simp r s)) = L (erase (bders r s))"
-    using L_bders_simp by force
-  then show "bnullable (bders_simp r s) = bnullable (bders r s)"
-    using bnullable_correctness nullable_correctness by blast
-qed
-
-
 lemma bder_fuse:
   shows "bder c (fuse bs a) = fuse bs  (bder c a)"
   apply(induct a arbitrary: bs c)
@@ -860,7 +720,7 @@
     apply(rule rrewrite_srewrite.ss6[simplified])
     apply(simp add: erase_fuse)
     done
- qed (auto intro: rrewrite_srewrite.intros)
+qed (auto intro: rrewrite_srewrite.intros)
 
 
 lemma rewrites_fuse:  
@@ -1173,7 +1033,7 @@
 theorem main_blexer_simp: 
   shows "blexer r s = blexer_simp r s"
   unfolding blexer_def blexer_simp_def
-  using b4 main_aux by simp
+  by (metis central main_aux rewrites_bnullable_eq)
 
 
 theorem blexersimp_correctness: 
Binary file thys2/paper.pdf has changed