thys2/Paper/Paper.thy
changeset 405 3cfea5bb5e23
parent 402 1612f2a77bf6
child 410 9261d980225d
--- a/thys2/Paper/Paper.thy	Sun Jan 30 23:37:29 2022 +0000
+++ b/thys2/Paper/Paper.thy	Wed Feb 02 14:52:41 2022 +0000
@@ -45,17 +45,19 @@
   set ("_" [73] 73) and
 
   AZERO ("ZERO" 81) and 
-  AONE ("ONE _" [79] 81) and 
+  AONE ("ONE _" [79] 78) and 
   ACHAR ("CHAR _ _" [79, 79] 80) and
   AALTs ("ALTs _ _" [77,77] 78) and
-  ASEQ ("SEQ _ _ _" [79, 77,77] 78) and
+  ASEQ ("SEQ _ _ _" [79, 79,79] 78) and
   ASTAR ("STAR _ _" [79, 79] 78) and
 
   code ("code _" [79] 74) and
   intern ("_\<^latex>\<open>\\mbox{$^\\uparrow$}\<close>" [900] 80) and
   erase ("_\<^latex>\<open>\\mbox{$^\\downarrow$}\<close>" [1000] 74) and
   bnullable ("nullable\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80) and
-  bmkeps ("mkeps\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80)
+  bmkeps ("mkeps\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80) and
+
+  srewrite ("_\<^latex>\<open>\\mbox{$\\,\\stackrel{s}{\\leadsto}$}\<close> _" [71, 71] 80) 
 
 
 lemma better_retrieve:
@@ -244,10 +246,32 @@
 section {* Bitcoded Regular Expressions and Derivatives *}
 
 text {*
-  bitcoded regexes / decoding / bmkeps
-  gets rid of the second phase (only single phase)   
-  correctness
+
+  Sulzmann and Lu describe another algorithm that generates POSIX
+  values but dispences with the second phase where characters are
+  injected ``back'' into values. For this they annotate bitcodes to
+  regular expressions, which we define in Isabelle/HOL as the datatype
 
+  \begin{center}
+  \begin{tabular}{lcl}
+  @{term breg} & $::=$ & @{term "AZERO"}\\
+               & $\mid$ & @{term "AONE bs"}\\
+               & $\mid$ & @{term "ACHAR bs c"}\\
+               & $\mid$ & @{term "AALTs bs rs"}\\
+               & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\
+               & $\mid$ & @{term "ASTAR bs r"}
+  \end{tabular}
+  \end{center}
+
+  \noindent where @{text bs} stands for a bitsequences; @{text r},
+  @{text "r\<^sub>1"} and @{text "r\<^sub>2"} for annotated regular
+  expressions; and @{text rs} for a list of annotated regular
+  expressions.  In contrast to Sulzmann and Lu we generalise the
+  alternative regular expressions to lists, instead of just having
+  binary regular expressions.  The idea with annotated regular
+  expressions is to incrementally generate the value information by
+  recording bitsequences. Sulzmann and Lu then  
+  define a coding function for how values can be coded into bitsequences.
 
   \begin{center}
   \begin{tabular}{lcl}
@@ -261,6 +285,26 @@
   \end{tabular}
   \end{center}
 
+  There is also a corresponding decoding function that takes a bitsequence
+  and generates back a value. However, since the bitsequences are a ``lossy''
+  coding (@{term Seq}s are not coded) the decoding function depends also
+  on a regular expression in order to decode values. 
+
+  \begin{center}
+  \begin{tabular}{lcll}
+  %@{thm (lhs) decode'.simps(1)} & $\dn$ & @{thm (rhs) decode'.simps(1)}\\
+  @{thm (lhs) decode'.simps(2)} & $\dn$ & @{thm (rhs) decode'.simps(2)}\\
+  @{thm (lhs) decode'.simps(3)} & $\dn$ & @{thm (rhs) decode'.simps(3)}\\
+  @{thm (lhs) decode'.simps(4)} & $\dn$ & @{thm (rhs) decode'.simps(4)}\\
+  @{thm (lhs) decode'.simps(5)} & $\dn$ & @{thm (rhs) decode'.simps(5)}\\
+  @{thm (lhs) decode'.simps(6)} & $\dn$ & @{thm (rhs) decode'.simps(6)}\\
+  @{thm (lhs) decode'.simps(7)} & $\dn$ & @{thm (rhs) decode'.simps(7)}\\
+  @{thm (lhs) decode'.simps(8)} & $\dn$ & @{thm (rhs) decode'.simps(8)}\\
+  @{thm (lhs) decode'.simps(9)} & $\dn$ & @{thm (rhs) decode'.simps(9)}\\
+  @{thm (lhs) decode'.simps(10)} & $\dn$ & @{thm (rhs) decode'.simps(10)} & fix
+  \end{tabular}
+  \end{center}
+
 
   The idea of the bitcodes is to annotate them to regular expressions and generate values
   incrementally. The bitcodes can be read off from the @{text breg} and then decoded into a value.
@@ -296,6 +340,11 @@
   \begin{theorem}
   @{thm blexer_correctness} 
   \end{theorem}
+
+
+  bitcoded regexes / decoding / bmkeps
+  gets rid of the second phase (only single phase)   
+  correctness
 *}