diff -r 1500f96707b0 -r 3cfea5bb5e23 thys2/Paper/Paper.thy --- a/thys2/Paper/Paper.thy Sun Jan 30 23:37:29 2022 +0000 +++ b/thys2/Paper/Paper.thy Wed Feb 02 14:52:41 2022 +0000 @@ -45,17 +45,19 @@ set ("_" [73] 73) and AZERO ("ZERO" 81) and - AONE ("ONE _" [79] 81) and + AONE ("ONE _" [79] 78) and ACHAR ("CHAR _ _" [79, 79] 80) and AALTs ("ALTs _ _" [77,77] 78) and - ASEQ ("SEQ _ _ _" [79, 77,77] 78) and + ASEQ ("SEQ _ _ _" [79, 79,79] 78) and ASTAR ("STAR _ _" [79, 79] 78) and code ("code _" [79] 74) and intern ("_\<^latex>\\\mbox{$^\\uparrow$}\" [900] 80) and erase ("_\<^latex>\\\mbox{$^\\downarrow$}\" [1000] 74) and bnullable ("nullable\<^latex>\\\mbox{$_b$}\ _" [1000] 80) and - bmkeps ("mkeps\<^latex>\\\mbox{$_b$}\ _" [1000] 80) + bmkeps ("mkeps\<^latex>\\\mbox{$_b$}\ _" [1000] 80) and + + srewrite ("_\<^latex>\\\mbox{$\\,\\stackrel{s}{\\leadsto}$}\ _" [71, 71] 80) lemma better_retrieve: @@ -244,10 +246,32 @@ section {* Bitcoded Regular Expressions and Derivatives *} text {* - bitcoded regexes / decoding / bmkeps - gets rid of the second phase (only single phase) - correctness + + Sulzmann and Lu describe another algorithm that generates POSIX + values but dispences with the second phase where characters are + injected ``back'' into values. For this they annotate bitcodes to + regular expressions, which we define in Isabelle/HOL as the datatype + \begin{center} + \begin{tabular}{lcl} + @{term breg} & $::=$ & @{term "AZERO"}\\ + & $\mid$ & @{term "AONE bs"}\\ + & $\mid$ & @{term "ACHAR bs c"}\\ + & $\mid$ & @{term "AALTs bs rs"}\\ + & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\ + & $\mid$ & @{term "ASTAR bs r"} + \end{tabular} + \end{center} + + \noindent where @{text bs} stands for a bitsequences; @{text r}, + @{text "r\<^sub>1"} and @{text "r\<^sub>2"} for annotated regular + expressions; and @{text rs} for a list of annotated regular + expressions. In contrast to Sulzmann and Lu we generalise the + alternative regular expressions to lists, instead of just having + binary regular expressions. The idea with annotated regular + expressions is to incrementally generate the value information by + recording bitsequences. Sulzmann and Lu then + define a coding function for how values can be coded into bitsequences. \begin{center} \begin{tabular}{lcl} @@ -261,6 +285,26 @@ \end{tabular} \end{center} + There is also a corresponding decoding function that takes a bitsequence + and generates back a value. However, since the bitsequences are a ``lossy'' + coding (@{term Seq}s are not coded) the decoding function depends also + on a regular expression in order to decode values. + + \begin{center} + \begin{tabular}{lcll} + %@{thm (lhs) decode'.simps(1)} & $\dn$ & @{thm (rhs) decode'.simps(1)}\\ + @{thm (lhs) decode'.simps(2)} & $\dn$ & @{thm (rhs) decode'.simps(2)}\\ + @{thm (lhs) decode'.simps(3)} & $\dn$ & @{thm (rhs) decode'.simps(3)}\\ + @{thm (lhs) decode'.simps(4)} & $\dn$ & @{thm (rhs) decode'.simps(4)}\\ + @{thm (lhs) decode'.simps(5)} & $\dn$ & @{thm (rhs) decode'.simps(5)}\\ + @{thm (lhs) decode'.simps(6)} & $\dn$ & @{thm (rhs) decode'.simps(6)}\\ + @{thm (lhs) decode'.simps(7)} & $\dn$ & @{thm (rhs) decode'.simps(7)}\\ + @{thm (lhs) decode'.simps(8)} & $\dn$ & @{thm (rhs) decode'.simps(8)}\\ + @{thm (lhs) decode'.simps(9)} & $\dn$ & @{thm (rhs) decode'.simps(9)}\\ + @{thm (lhs) decode'.simps(10)} & $\dn$ & @{thm (rhs) decode'.simps(10)} & fix + \end{tabular} + \end{center} + The idea of the bitcodes is to annotate them to regular expressions and generate values incrementally. The bitcodes can be read off from the @{text breg} and then decoded into a value. @@ -296,6 +340,11 @@ \begin{theorem} @{thm blexer_correctness} \end{theorem} + + + bitcoded regexes / decoding / bmkeps + gets rid of the second phase (only single phase) + correctness *}