--- a/thys2/PDerivs.thy Sun Jan 30 23:37:29 2022 +0000
+++ b/thys2/PDerivs.thy Wed Feb 02 14:52:41 2022 +0000
@@ -12,7 +12,7 @@
"SEQs rs r = (\<lambda>r'. SEQ r' r) ` rs"
by auto
-primrec
+fun
pder :: "char \<Rightarrow> rexp \<Rightarrow> rexp set"
where
"pder c ZERO = {}"
@@ -23,7 +23,7 @@
(if nullable r1 then SEQs (pder c r1) r2 \<union> pder c r2 else SEQs (pder c r1) r2)"
| "pder c (STAR r) = SEQs (pder c r) (STAR r)"
-primrec
+fun
pders :: "char list \<Rightarrow> rexp \<Rightarrow> rexp set"
where
"pders [] r = {r}"
--- a/thys2/Paper/Paper.thy Sun Jan 30 23:37:29 2022 +0000
+++ b/thys2/Paper/Paper.thy Wed Feb 02 14:52:41 2022 +0000
@@ -45,17 +45,19 @@
set ("_" [73] 73) and
AZERO ("ZERO" 81) and
- AONE ("ONE _" [79] 81) and
+ AONE ("ONE _" [79] 78) and
ACHAR ("CHAR _ _" [79, 79] 80) and
AALTs ("ALTs _ _" [77,77] 78) and
- ASEQ ("SEQ _ _ _" [79, 77,77] 78) and
+ ASEQ ("SEQ _ _ _" [79, 79,79] 78) and
ASTAR ("STAR _ _" [79, 79] 78) and
code ("code _" [79] 74) and
intern ("_\<^latex>\<open>\\mbox{$^\\uparrow$}\<close>" [900] 80) and
erase ("_\<^latex>\<open>\\mbox{$^\\downarrow$}\<close>" [1000] 74) and
bnullable ("nullable\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80) and
- bmkeps ("mkeps\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80)
+ bmkeps ("mkeps\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80) and
+
+ srewrite ("_\<^latex>\<open>\\mbox{$\\,\\stackrel{s}{\\leadsto}$}\<close> _" [71, 71] 80)
lemma better_retrieve:
@@ -244,10 +246,32 @@
section {* Bitcoded Regular Expressions and Derivatives *}
text {*
- bitcoded regexes / decoding / bmkeps
- gets rid of the second phase (only single phase)
- correctness
+
+ Sulzmann and Lu describe another algorithm that generates POSIX
+ values but dispences with the second phase where characters are
+ injected ``back'' into values. For this they annotate bitcodes to
+ regular expressions, which we define in Isabelle/HOL as the datatype
+ \begin{center}
+ \begin{tabular}{lcl}
+ @{term breg} & $::=$ & @{term "AZERO"}\\
+ & $\mid$ & @{term "AONE bs"}\\
+ & $\mid$ & @{term "ACHAR bs c"}\\
+ & $\mid$ & @{term "AALTs bs rs"}\\
+ & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\
+ & $\mid$ & @{term "ASTAR bs r"}
+ \end{tabular}
+ \end{center}
+
+ \noindent where @{text bs} stands for a bitsequences; @{text r},
+ @{text "r\<^sub>1"} and @{text "r\<^sub>2"} for annotated regular
+ expressions; and @{text rs} for a list of annotated regular
+ expressions. In contrast to Sulzmann and Lu we generalise the
+ alternative regular expressions to lists, instead of just having
+ binary regular expressions. The idea with annotated regular
+ expressions is to incrementally generate the value information by
+ recording bitsequences. Sulzmann and Lu then
+ define a coding function for how values can be coded into bitsequences.
\begin{center}
\begin{tabular}{lcl}
@@ -261,6 +285,26 @@
\end{tabular}
\end{center}
+ There is also a corresponding decoding function that takes a bitsequence
+ and generates back a value. However, since the bitsequences are a ``lossy''
+ coding (@{term Seq}s are not coded) the decoding function depends also
+ on a regular expression in order to decode values.
+
+ \begin{center}
+ \begin{tabular}{lcll}
+ %@{thm (lhs) decode'.simps(1)} & $\dn$ & @{thm (rhs) decode'.simps(1)}\\
+ @{thm (lhs) decode'.simps(2)} & $\dn$ & @{thm (rhs) decode'.simps(2)}\\
+ @{thm (lhs) decode'.simps(3)} & $\dn$ & @{thm (rhs) decode'.simps(3)}\\
+ @{thm (lhs) decode'.simps(4)} & $\dn$ & @{thm (rhs) decode'.simps(4)}\\
+ @{thm (lhs) decode'.simps(5)} & $\dn$ & @{thm (rhs) decode'.simps(5)}\\
+ @{thm (lhs) decode'.simps(6)} & $\dn$ & @{thm (rhs) decode'.simps(6)}\\
+ @{thm (lhs) decode'.simps(7)} & $\dn$ & @{thm (rhs) decode'.simps(7)}\\
+ @{thm (lhs) decode'.simps(8)} & $\dn$ & @{thm (rhs) decode'.simps(8)}\\
+ @{thm (lhs) decode'.simps(9)} & $\dn$ & @{thm (rhs) decode'.simps(9)}\\
+ @{thm (lhs) decode'.simps(10)} & $\dn$ & @{thm (rhs) decode'.simps(10)} & fix
+ \end{tabular}
+ \end{center}
+
The idea of the bitcodes is to annotate them to regular expressions and generate values
incrementally. The bitcodes can be read off from the @{text breg} and then decoded into a value.
@@ -296,6 +340,11 @@
\begin{theorem}
@{thm blexer_correctness}
\end{theorem}
+
+
+ bitcoded regexes / decoding / bmkeps
+ gets rid of the second phase (only single phase)
+ correctness
*}
--- a/thys2/Paper/document/root.tex Sun Jan 30 23:37:29 2022 +0000
+++ b/thys2/Paper/document/root.tex Wed Feb 02 14:52:41 2022 +0000
@@ -65,8 +65,8 @@
string---that is, which part of the string is matched by which part
of the regular expression. The purpose of the bitcodes is to generate POSIX values incrementally while
derivatives are calculated. They also help with designing
- an `aggressive' simplification function that keeps the size of
- derivatives small. Without simplification derivatives can grow
+ an ``aggressive'' simplification function that keeps the size of
+ derivatives small. Without simplification the size derivatives can grow
exponentially resulting in an extremely slow lexing algorithm. In this
paper we describe a variant of Sulzmann and Lu's algorithm: Our
algorithm is a recursive functional program, whereas Sulzmann
--- a/thys2/SizeBound4.thy Sun Jan 30 23:37:29 2022 +0000
+++ b/thys2/SizeBound4.thy Wed Feb 02 14:52:41 2022 +0000
@@ -1,6 +1,6 @@
theory SizeBound4
- imports "Lexer"
+ imports "Lexer" "PDerivs"
begin
section \<open>Bit-Encodings\<close>
@@ -26,26 +26,26 @@
function
decode' :: "bit list \<Rightarrow> rexp \<Rightarrow> (val * bit list)"
where
- "decode' ds ZERO = (Void, [])"
-| "decode' ds ONE = (Void, ds)"
-| "decode' ds (CH d) = (Char d, ds)"
+ "decode' bs ZERO = (undefined, bs)"
+| "decode' bs ONE = (Void, bs)"
+| "decode' bs (CH d) = (Char d, bs)"
| "decode' [] (ALT r1 r2) = (Void, [])"
-| "decode' (Z # ds) (ALT r1 r2) = (let (v, ds') = decode' ds r1 in (Left v, ds'))"
-| "decode' (S # ds) (ALT r1 r2) = (let (v, ds') = decode' ds r2 in (Right v, ds'))"
-| "decode' ds (SEQ r1 r2) = (let (v1, ds') = decode' ds r1 in
- let (v2, ds'') = decode' ds' r2 in (Seq v1 v2, ds''))"
+| "decode' (Z # bs) (ALT r1 r2) = (let (v, bs') = decode' bs r1 in (Left v, bs'))"
+| "decode' (S # bs) (ALT r1 r2) = (let (v, bs') = decode' bs r2 in (Right v, bs'))"
+| "decode' bs (SEQ r1 r2) = (let (v1, bs') = decode' bs r1 in
+ let (v2, bs'') = decode' bs' r2 in (Seq v1 v2, bs''))"
| "decode' [] (STAR r) = (Void, [])"
-| "decode' (S # ds) (STAR r) = (Stars [], ds)"
-| "decode' (Z # ds) (STAR r) = (let (v, ds') = decode' ds r in
- let (vs, ds'') = decode' ds' (STAR r)
- in (Stars_add v vs, ds''))"
+| "decode' (S # bs) (STAR r) = (Stars [], bs)"
+| "decode' (Z # bs) (STAR r) = (let (v, bs') = decode' bs r in
+ let (vs, bs'') = decode' bs' (STAR r)
+ in (Stars_add v vs, bs''))"
by pat_completeness auto
lemma decode'_smaller:
- assumes "decode'_dom (ds, r)"
- shows "length (snd (decode' ds r)) \<le> length ds"
+ assumes "decode'_dom (bs, r)"
+ shows "length (snd (decode' bs r)) \<le> length bs"
using assms
-apply(induct ds r)
+apply(induct bs r)
apply(auto simp add: decode'.psimps split: prod.split)
using dual_order.trans apply blast
by (meson dual_order.trans le_SucI)
@@ -1032,6 +1032,152 @@
using blexer_correctness main_blexer_simp by simp
+(* some tests *)
+
+fun awidth :: "arexp \<Rightarrow> nat" where
+ "awidth AZERO = 1"
+| "awidth (AONE cs) = 1"
+| "awidth (ACHAR cs c) = 1"
+| "awidth (AALTs cs rs) = (sum_list (map awidth rs))"
+| "awidth (ASEQ cs r1 r2) = (awidth r1 + awidth r2)"
+| "awidth (ASTAR cs r) = (awidth r)"
+
+
+
+lemma
+ shows "s \<notin> L r \<Longrightarrow> blexer_simp r s = None"
+ by (simp add: blexersimp_correctness lexer_correct_None)
+
+lemma g1:
+ "bders_simp AZERO s = AZERO"
+ apply(induct s)
+ apply(simp)
+ apply(simp)
+ done
+
+lemma g2:
+ "s \<noteq> Nil \<Longrightarrow> bders_simp (AONE bs) s = AZERO"
+ apply(induct s)
+ apply(simp)
+ apply(simp)
+ apply(case_tac s)
+ apply(simp)
+ apply(simp)
+ done
+
+lemma finite_pder:
+ shows "finite (pder c r)"
+ apply(induct c r rule: pder.induct)
+ apply(auto)
+ done
+
+lemma asize_fuse:
+ shows "asize (fuse bs r) = asize r"
+ apply(induct r arbitrary: bs)
+ apply(auto)
+ done
+
+lemma awidth_fuse:
+ shows "awidth (fuse bs r) = awidth r"
+ apply(induct r arbitrary: bs)
+ apply(auto)
+ done
+
+lemma pders_SEQs:
+ assumes "finite A"
+ shows "card (SEQs A (STAR r)) \<le> card A"
+ using assms
+ by (simp add: SEQs_eq_image card_image_le)
+
+lemma binullable_intern:
+ shows "bnullable (intern r) = nullable r"
+ apply(induct r)
+ apply(auto simp add: bnullable_fuse)
+ done
+
+lemma
+ "asize (bsimp (bder c (intern r))) \<le> Suc ((Suc (card (pder c r))) * (size r) * (size r))"
+ oops
+
+lemma
+ "card (pder c r) \<le> awidth (bsimp (bder c (intern r)))"
+ apply(induct c r rule: pder.induct)
+ apply(simp)
+ apply(simp)
+ apply(simp)
+ apply(simp)
+ oops
+
+lemma
+ "card (pder c r) \<le> awidth (bder c (intern r))"
+ apply(induct c r rule: pder.induct)
+ apply(simp)
+ apply(simp)
+ apply(simp)
+ apply(simp)
+ apply(rule order_trans)
+ apply(rule card_Un_le)
+ apply (simp add: awidth_fuse bder_fuse)
+ defer
+ apply(simp)
+ apply(rule order_trans)
+ apply(rule pders_SEQs)
+ using finite_pder apply presburger
+ apply (simp add: awidth_fuse)
+ apply(auto)
+ apply(rule order_trans)
+ apply(rule card_Un_le)
+ apply(simp add: awidth_fuse)
+ defer
+ using binullable_intern apply blast
+ using binullable_intern apply blast
+ apply (smt (verit, best) SEQs_eq_image add.commute add_Suc_right card_image_le dual_order.trans finite_pder trans_le_add2)
+ apply(subgoal_tac "card (SEQs (pder c r1) r2) \<le> card (pder c r1)")
+ apply(linarith)
+ by (simp add: UNION_singleton_eq_range card_image_le finite_pder)
+
+lemma
+ "card (pder c r) \<le> asize (bder c (intern r))"
+ apply(induct c r rule: pder.induct)
+ apply(simp)
+ apply(simp)
+ apply(simp)
+ apply(simp)
+ apply (metis add_mono_thms_linordered_semiring(1) asize_fuse bder_fuse card_Un_le le_Suc_eq order_trans)
+ defer
+ apply(simp)
+ apply(rule order_trans)
+ apply(rule pders_SEQs)
+ using finite_pder apply presburger
+ apply (simp add: asize_fuse)
+ apply(simp)
+ apply(auto)
+ apply(rule order_trans)
+ apply(rule card_Un_le)
+ apply (smt (z3) SEQs_eq_image add.commute add_Suc_right add_mono_thms_linordered_semiring(1) asize_fuse card_image_le dual_order.trans finite_pder le_add1)
+ apply(rule order_trans)
+ apply(rule card_Un_le)
+ using binullable_intern apply blast
+ using binullable_intern apply blast
+ by (smt (verit, best) SEQs_eq_image add.commute add_Suc_right card_image_le dual_order.trans finite_pder trans_le_add2)
+
+lemma
+ "card (pder c r) \<le> asize (bsimp (bder c (intern r)))"
+ apply(induct c r rule: pder.induct)
+ apply(simp)
+ apply(simp)
+ apply(simp)
+ apply(simp)
+ apply(rule order_trans)
+ apply(rule card_Un_le)
+ prefer 3
+ apply(simp)
+ apply(rule order_trans)
+ apply(rule pders_SEQs)
+ using finite_pder apply blast
+ oops
+
+
(* below is the idempotency of bsimp *)
lemma bsimp_ASEQ_fuse:
Binary file thys2/paper.pdf has changed