# HG changeset patch # User Christian Urban # Date 1662845422 -3600 # Node ID a5f66641010183d4f0034b4e52a6c24c1cb6e48b # Parent 2c9a3aba8ebc4139a8aade51fb281132733f2dca updated paper diff -r 2c9a3aba8ebc -r a5f666410101 thys3/Paper.thy --- a/thys3/Paper.thy Sat Sep 10 15:04:35 2022 +0100 +++ b/thys3/Paper.thy Sat Sep 10 22:30:22 2022 +0100 @@ -263,7 +263,7 @@ extend straightforwardly to them too. The importance of the bounded regular expressions is that they are often used in practical applications, such as Snort (a system for detecting network - intrusion) and also in XML Schema definitions. According to Bj\"{o}rklund et + intrusions) and also in XML Schema definitions. According to Bj\"{o}rklund et al~\cite{BjorklundMartensTimm2015}, bounded regular expressions occur frequently in the latter and can have counters of up to ten million. The problem is that tools based on the classic notion @@ -718,8 +718,8 @@ \begin{figure} \begin{center} \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}} - \multicolumn{3}{@ {}l}{$\textit{decode}'\,bs\,(\ONE)$ $\;\dn\;$ $(\Empty, bs)$ \quad\qquad - $\textit{decode}'\,bs\,(c)$ $\;\dn\;$ $(\Char\,c, bs)$}\\ + $\textit{decode}'\,bs\,(\ONE)$ & $\;\dn\;$ & $(\Empty, bs)$\\ + $\textit{decode}'\,bs\,(c)$ & $\;\dn\;$ & $(\Char\,c, bs)$\\ $\textit{decode}'\,(\Z\!::\!bs)\;(r_1 + r_2)$ & $\dn$ & $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}\; (\Left\,v, bs_1)$\\ @@ -735,15 +735,11 @@ $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r\;\textit{in}$\\ & & $\textit{let}\,(\Stars\,vs, bs_2) = \textit{decode}'\,bs_1\,r^*$ \hspace{2mm}$\textit{in}\;(\Stars\,v\!::\!vs, bs_2)$\\ - $\textit{decode}'\,(\S\!::\!bs)\,(r^{\{n\}})$ & $\dn$ & $(\Stars\,[], bs)$\\ - $\textit{decode}'\,(\Z\!::\!bs)\,(r^{\{n\}})$ & $\dn$ & - $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r\;\textit{in}$\\ - & & $\textit{let}\,(\Stars\,vs, bs_2) = \textit{decode}'\,bs_1\,r^{\{n - 1\}}$ - \hspace{2mm}$\textit{in}\;(\Stars\,v\!::\!vs, bs_2)$\medskip\\ - \multicolumn{3}{@ {}l}{$\textit{decode}\,bs\,r$ $\dn$ - $\textit{let}\,(v, bs') = \textit{decode}'\,bs\,r\;\textit{in}$ - $\;\,\textit{if}\;bs' = []\;\textit{then}\;\textit{Some}\,v\; - \textit{else}\;\textit{None}$}\\[-4mm] + $\textit{decode}'\,bs\,(r^{\{n\}})$ & $\dn$ & $\textit{decode}'\,bs\,r^*$\smallskip\medskip\\ + + $\textit{decode}\,bs\,r$ & $\dn$ & + $\textit{let}\,(v, bs') = \textit{decode}'\,bs\,r\;\textit{in}$\\ + & & $\;\;\;\,\textit{if}\;bs' = []\;\textit{then}\;\textit{Some}\,v\;\textit{else}\;\textit{None}$\\[-4mm] \end{tabular} \end{center} \caption{Two functions, called $\textit{decode}'$ and \textit{decode}, for decoding a value from a bitsequence with the help of a regular expression.\\[-5mm]}\label{decode} @@ -1037,7 +1033,7 @@ \begin{theorem}\label{thmone} -$\textit{lexer}\,r\,s = \textit{blexer}\,r\,s$ +$\textit{blexer}\,r\,s = \textit{lexer}\,r\,s$ \end{theorem} @@ -1101,7 +1097,7 @@ de-nest, or spill out, @{text ALTs} as follows % \[ - @{term "ALTs bs\<^sub>1 ((ALTs bs\<^sub>2 rs\<^sub>2) # rs\<^sub>1)"} + @{term "ALTs bs\<^sub>1 (((ALTs bs\<^sub>2 rs\<^sub>2)) # rs\<^sub>1)"} \quad\xrightarrow{bsimp}\quad @{term "ALTs bs\<^sub>1 ((map (fuse bs\<^sub>2) rs\<^sub>2) @ rs\<^sub>1)"} \] @@ -1377,10 +1373,10 @@ \noindent With these lemmas in place we can finally establish that @{term "blexer_simp"} and @{term "blexer"} generate the same value, and using Theorem~\ref{thmone} from the previous section that this value - is indeed the POSIX value. + is indeed the POSIX value as generated by \textit{lexer}. \begin{theorem} - @{thm[mode=IfThen] main_blexer_simp} + @{thm[mode=IfThen] main_blexer_simp[symmetric]} \; (@{text "= lexer r s"}\; by Thm.~\ref{thmone}) \end{theorem} %\begin{proof} @@ -1543,7 +1539,8 @@ %point-of-view it is really important to have the formal proofs of %the corresponding properties at hand. - We can of course only make a claim about the correctness and the sizes of the + With the results reported here, we can of course only make a claim about the correctness + of the algorithm and the sizes of the derivatives, not about the efficiency or runtime of our version of Sulzman and Lu's algorithm. But we found the size is an important first indicator about efficiency: clearly if the derivatives can