updated some of the text and cardinality proof
authorChristian Urban <christian.urban@kcl.ac.uk>
Wed, 02 Feb 2022 14:52:41 +0000
changeset 405 3cfea5bb5e23
parent 404 1500f96707b0
child 406 4511cc1bf1f0
child 407 d73b722efe0e
updated some of the text and cardinality proof
thys2/PDerivs.thy
thys2/Paper/Paper.thy
thys2/Paper/document/root.tex
thys2/SizeBound4.thy
thys2/paper.pdf
--- a/thys2/PDerivs.thy	Sun Jan 30 23:37:29 2022 +0000
+++ b/thys2/PDerivs.thy	Wed Feb 02 14:52:41 2022 +0000
@@ -12,7 +12,7 @@
   "SEQs rs r = (\<lambda>r'. SEQ r' r) ` rs"
   by auto
 
-primrec
+fun
   pder :: "char \<Rightarrow> rexp \<Rightarrow> rexp set"
 where
   "pder c ZERO = {}"
@@ -23,7 +23,7 @@
     (if nullable r1 then SEQs (pder c r1) r2 \<union> pder c r2 else SEQs (pder c r1) r2)"
 | "pder c (STAR r) = SEQs (pder c r) (STAR r)"
 
-primrec
+fun
   pders :: "char list \<Rightarrow> rexp \<Rightarrow> rexp set"
 where
   "pders [] r = {r}"
--- a/thys2/Paper/Paper.thy	Sun Jan 30 23:37:29 2022 +0000
+++ b/thys2/Paper/Paper.thy	Wed Feb 02 14:52:41 2022 +0000
@@ -45,17 +45,19 @@
   set ("_" [73] 73) and
 
   AZERO ("ZERO" 81) and 
-  AONE ("ONE _" [79] 81) and 
+  AONE ("ONE _" [79] 78) and 
   ACHAR ("CHAR _ _" [79, 79] 80) and
   AALTs ("ALTs _ _" [77,77] 78) and
-  ASEQ ("SEQ _ _ _" [79, 77,77] 78) and
+  ASEQ ("SEQ _ _ _" [79, 79,79] 78) and
   ASTAR ("STAR _ _" [79, 79] 78) and
 
   code ("code _" [79] 74) and
   intern ("_\<^latex>\<open>\\mbox{$^\\uparrow$}\<close>" [900] 80) and
   erase ("_\<^latex>\<open>\\mbox{$^\\downarrow$}\<close>" [1000] 74) and
   bnullable ("nullable\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80) and
-  bmkeps ("mkeps\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80)
+  bmkeps ("mkeps\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80) and
+
+  srewrite ("_\<^latex>\<open>\\mbox{$\\,\\stackrel{s}{\\leadsto}$}\<close> _" [71, 71] 80) 
 
 
 lemma better_retrieve:
@@ -244,10 +246,32 @@
 section {* Bitcoded Regular Expressions and Derivatives *}
 
 text {*
-  bitcoded regexes / decoding / bmkeps
-  gets rid of the second phase (only single phase)   
-  correctness
+
+  Sulzmann and Lu describe another algorithm that generates POSIX
+  values but dispences with the second phase where characters are
+  injected ``back'' into values. For this they annotate bitcodes to
+  regular expressions, which we define in Isabelle/HOL as the datatype
 
+  \begin{center}
+  \begin{tabular}{lcl}
+  @{term breg} & $::=$ & @{term "AZERO"}\\
+               & $\mid$ & @{term "AONE bs"}\\
+               & $\mid$ & @{term "ACHAR bs c"}\\
+               & $\mid$ & @{term "AALTs bs rs"}\\
+               & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\
+               & $\mid$ & @{term "ASTAR bs r"}
+  \end{tabular}
+  \end{center}
+
+  \noindent where @{text bs} stands for a bitsequences; @{text r},
+  @{text "r\<^sub>1"} and @{text "r\<^sub>2"} for annotated regular
+  expressions; and @{text rs} for a list of annotated regular
+  expressions.  In contrast to Sulzmann and Lu we generalise the
+  alternative regular expressions to lists, instead of just having
+  binary regular expressions.  The idea with annotated regular
+  expressions is to incrementally generate the value information by
+  recording bitsequences. Sulzmann and Lu then  
+  define a coding function for how values can be coded into bitsequences.
 
   \begin{center}
   \begin{tabular}{lcl}
@@ -261,6 +285,26 @@
   \end{tabular}
   \end{center}
 
+  There is also a corresponding decoding function that takes a bitsequence
+  and generates back a value. However, since the bitsequences are a ``lossy''
+  coding (@{term Seq}s are not coded) the decoding function depends also
+  on a regular expression in order to decode values. 
+
+  \begin{center}
+  \begin{tabular}{lcll}
+  %@{thm (lhs) decode'.simps(1)} & $\dn$ & @{thm (rhs) decode'.simps(1)}\\
+  @{thm (lhs) decode'.simps(2)} & $\dn$ & @{thm (rhs) decode'.simps(2)}\\
+  @{thm (lhs) decode'.simps(3)} & $\dn$ & @{thm (rhs) decode'.simps(3)}\\
+  @{thm (lhs) decode'.simps(4)} & $\dn$ & @{thm (rhs) decode'.simps(4)}\\
+  @{thm (lhs) decode'.simps(5)} & $\dn$ & @{thm (rhs) decode'.simps(5)}\\
+  @{thm (lhs) decode'.simps(6)} & $\dn$ & @{thm (rhs) decode'.simps(6)}\\
+  @{thm (lhs) decode'.simps(7)} & $\dn$ & @{thm (rhs) decode'.simps(7)}\\
+  @{thm (lhs) decode'.simps(8)} & $\dn$ & @{thm (rhs) decode'.simps(8)}\\
+  @{thm (lhs) decode'.simps(9)} & $\dn$ & @{thm (rhs) decode'.simps(9)}\\
+  @{thm (lhs) decode'.simps(10)} & $\dn$ & @{thm (rhs) decode'.simps(10)} & fix
+  \end{tabular}
+  \end{center}
+
 
   The idea of the bitcodes is to annotate them to regular expressions and generate values
   incrementally. The bitcodes can be read off from the @{text breg} and then decoded into a value.
@@ -296,6 +340,11 @@
   \begin{theorem}
   @{thm blexer_correctness} 
   \end{theorem}
+
+
+  bitcoded regexes / decoding / bmkeps
+  gets rid of the second phase (only single phase)   
+  correctness
 *}
 
 
--- a/thys2/Paper/document/root.tex	Sun Jan 30 23:37:29 2022 +0000
+++ b/thys2/Paper/document/root.tex	Wed Feb 02 14:52:41 2022 +0000
@@ -65,8 +65,8 @@
   string---that is, which part of the string is matched by which part
   of the regular expression.  The purpose of the bitcodes is to generate POSIX values incrementally while
   derivatives are calculated. They also help with designing
-  an `aggressive' simplification function that keeps the size of
-  derivatives small. Without simplification derivatives can grow
+  an ``aggressive'' simplification function that keeps the size of
+  derivatives small. Without simplification the size derivatives can grow
   exponentially resulting in an extremely slow lexing algorithm.  In this
   paper we describe a variant of Sulzmann and Lu's algorithm: Our
   algorithm is a recursive functional program, whereas Sulzmann
--- a/thys2/SizeBound4.thy	Sun Jan 30 23:37:29 2022 +0000
+++ b/thys2/SizeBound4.thy	Wed Feb 02 14:52:41 2022 +0000
@@ -1,6 +1,6 @@
 
 theory SizeBound4
-  imports "Lexer" 
+  imports "Lexer" "PDerivs"
 begin
 
 section \<open>Bit-Encodings\<close>
@@ -26,26 +26,26 @@
 function
   decode' :: "bit list \<Rightarrow> rexp \<Rightarrow> (val * bit list)"
 where
-  "decode' ds ZERO = (Void, [])"
-| "decode' ds ONE = (Void, ds)"
-| "decode' ds (CH d) = (Char d, ds)"
+  "decode' bs ZERO = (undefined, bs)"
+| "decode' bs ONE = (Void, bs)"
+| "decode' bs (CH d) = (Char d, bs)"
 | "decode' [] (ALT r1 r2) = (Void, [])"
-| "decode' (Z # ds) (ALT r1 r2) = (let (v, ds') = decode' ds r1 in (Left v, ds'))"
-| "decode' (S # ds) (ALT r1 r2) = (let (v, ds') = decode' ds r2 in (Right v, ds'))"
-| "decode' ds (SEQ r1 r2) = (let (v1, ds') = decode' ds r1 in
-                             let (v2, ds'') = decode' ds' r2 in (Seq v1 v2, ds''))"
+| "decode' (Z # bs) (ALT r1 r2) = (let (v, bs') = decode' bs r1 in (Left v, bs'))"
+| "decode' (S # bs) (ALT r1 r2) = (let (v, bs') = decode' bs r2 in (Right v, bs'))"
+| "decode' bs (SEQ r1 r2) = (let (v1, bs') = decode' bs r1 in
+                             let (v2, bs'') = decode' bs' r2 in (Seq v1 v2, bs''))"
 | "decode' [] (STAR r) = (Void, [])"
-| "decode' (S # ds) (STAR r) = (Stars [], ds)"
-| "decode' (Z # ds) (STAR r) = (let (v, ds') = decode' ds r in
-                                    let (vs, ds'') = decode' ds' (STAR r) 
-                                    in (Stars_add v vs, ds''))"
+| "decode' (S # bs) (STAR r) = (Stars [], bs)"
+| "decode' (Z # bs) (STAR r) = (let (v, bs') = decode' bs r in
+                                    let (vs, bs'') = decode' bs' (STAR r) 
+                                    in (Stars_add v vs, bs''))"
 by pat_completeness auto
 
 lemma decode'_smaller:
-  assumes "decode'_dom (ds, r)"
-  shows "length (snd (decode' ds r)) \<le> length ds"
+  assumes "decode'_dom (bs, r)"
+  shows "length (snd (decode' bs r)) \<le> length bs"
 using assms
-apply(induct ds r)
+apply(induct bs r)
 apply(auto simp add: decode'.psimps split: prod.split)
 using dual_order.trans apply blast
 by (meson dual_order.trans le_SucI)
@@ -1032,6 +1032,152 @@
   using blexer_correctness main_blexer_simp by simp
 
 
+(* some tests *)
+
+fun awidth :: "arexp \<Rightarrow> nat" where
+  "awidth AZERO = 1"
+| "awidth (AONE cs) = 1" 
+| "awidth (ACHAR cs c) = 1"
+| "awidth (AALTs cs rs) = (sum_list (map awidth rs))"
+| "awidth (ASEQ cs r1 r2) = (awidth r1 + awidth r2)"
+| "awidth (ASTAR cs r) = (awidth r)"
+
+
+
+lemma 
+  shows "s \<notin> L r \<Longrightarrow> blexer_simp r s = None"
+  by (simp add: blexersimp_correctness lexer_correct_None)
+
+lemma g1:
+  "bders_simp AZERO s = AZERO"
+ apply(induct s)
+ apply(simp)
+ apply(simp)
+  done
+
+lemma g2:
+  "s \<noteq> Nil \<Longrightarrow> bders_simp (AONE bs) s = AZERO"
+ apply(induct s)
+ apply(simp)
+  apply(simp)
+  apply(case_tac s)
+  apply(simp)
+       apply(simp)
+  done
+
+lemma finite_pder:
+  shows "finite (pder c r)"
+  apply(induct c r rule: pder.induct)
+  apply(auto)
+  done
+
+lemma asize_fuse:
+  shows "asize (fuse bs r) = asize r"
+  apply(induct r arbitrary: bs)
+  apply(auto)
+  done
+
+lemma awidth_fuse:
+  shows "awidth (fuse bs r) = awidth r"
+  apply(induct r arbitrary: bs)
+  apply(auto)
+  done
+
+lemma pders_SEQs:
+  assumes "finite A"
+  shows "card (SEQs A (STAR r)) \<le> card A"
+  using assms
+  by (simp add: SEQs_eq_image card_image_le)
+
+lemma binullable_intern:
+  shows "bnullable (intern r) = nullable r"
+  apply(induct r)
+  apply(auto simp add: bnullable_fuse)
+  done
+
+lemma
+  "asize (bsimp (bder c (intern r))) \<le> Suc ((Suc (card (pder c r))) * (size r) * (size r))"
+  oops
+
+lemma
+  "card (pder c r) \<le> awidth (bsimp (bder c (intern r)))"
+ apply(induct c r rule: pder.induct)
+  apply(simp)
+      apply(simp)
+     apply(simp)
+    apply(simp)
+  oops
+
+lemma
+  "card (pder c r) \<le> awidth (bder c (intern r))"
+  apply(induct c r rule: pder.induct)
+  apply(simp)
+      apply(simp)
+     apply(simp)
+    apply(simp)
+  apply(rule order_trans)
+     apply(rule card_Un_le)
+  apply (simp add: awidth_fuse bder_fuse)
+   defer
+   apply(simp)
+  apply(rule order_trans)
+    apply(rule pders_SEQs)
+  using finite_pder apply presburger
+  apply (simp add: awidth_fuse)
+  apply(auto)
+     apply(rule order_trans)
+      apply(rule card_Un_le)
+     apply(simp add: awidth_fuse)
+     defer
+  using binullable_intern apply blast
+  using binullable_intern apply blast
+  apply (smt (verit, best) SEQs_eq_image add.commute add_Suc_right card_image_le dual_order.trans finite_pder trans_le_add2)
+  apply(subgoal_tac "card (SEQs (pder c r1) r2) \<le> card (pder c r1)")
+  apply(linarith)
+    by (simp add: UNION_singleton_eq_range card_image_le finite_pder)
+  
+lemma
+  "card (pder c r) \<le> asize (bder c (intern r))"
+  apply(induct c r rule: pder.induct)
+       apply(simp)
+  apply(simp)
+     apply(simp)
+    apply(simp)
+  apply (metis add_mono_thms_linordered_semiring(1) asize_fuse bder_fuse card_Un_le le_Suc_eq order_trans)
+  defer 
+   apply(simp)
+   apply(rule order_trans)
+    apply(rule pders_SEQs)
+  using finite_pder apply presburger
+  apply (simp add: asize_fuse)
+  apply(simp)
+  apply(auto)
+  apply(rule order_trans)
+      apply(rule card_Un_le)
+  apply (smt (z3) SEQs_eq_image add.commute add_Suc_right add_mono_thms_linordered_semiring(1) asize_fuse card_image_le dual_order.trans finite_pder le_add1)
+    apply(rule order_trans)
+     apply(rule card_Un_le)
+  using binullable_intern apply blast
+  using binullable_intern apply blast
+  by (smt (verit, best) SEQs_eq_image add.commute add_Suc_right card_image_le dual_order.trans finite_pder trans_le_add2)
+  
+lemma
+  "card (pder c r) \<le> asize (bsimp (bder c (intern r)))"
+  apply(induct c r rule: pder.induct)
+       apply(simp)
+      apply(simp)
+     apply(simp)
+    apply(simp)
+  apply(rule order_trans)
+     apply(rule card_Un_le)
+    prefer 3
+    apply(simp)
+  apply(rule order_trans)
+     apply(rule pders_SEQs)
+  using finite_pder apply blast
+  oops
+  
+
 (* below is the idempotency of bsimp *)
 
 lemma bsimp_ASEQ_fuse:
Binary file thys2/paper.pdf has changed