diff -r f65444d29e74 -r 78cc255e286f thys2/Journal/Paper.thy --- a/thys2/Journal/Paper.thy Tue Nov 02 13:57:59 2021 +0000 +++ b/thys2/Journal/Paper.thy Tue Nov 02 19:42:52 2021 +0000 @@ -155,12 +155,42 @@ However what is not certain is whether we can add simplification to the bit-coded algorithm, without breaking the correct lexing output. + + +The reason that we do need to add a simplification phase +after each derivative step of $\textit{blexer}$ is +because it produces intermediate +regular expressions that can grow exponentially. +For example, the regular expression $(a+aa)^*$ after taking +derivative against just 10 $a$s will have size 8192. + +%TODO: add figure for this? + +\begin{lemma} +@{thm blexer_simp_def} +\end{lemma} + + +\begin{center} + \begin{tabular}{lcl} + @{thm (lhs) bsimp.simps(1)[of "bs" "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bsimp.simps(1)[of "bs" "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) bsimp.simps(2)} & $\dn$ & @{thm (rhs) bsimp.simps(2)}\\ + @{thm (lhs) bsimp.simps(3)} & $\dn$ & @{thm (rhs) bsimp.simps(3)}\\ + @{thm (lhs) intern.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) intern.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ +\end{tabular} +\end{center} + + This might sound trivial in the case of producing a YES/NO answer, but once we require a lexing output to be produced (which is required in applications like compiler front-end, malicious attack domain extraction, etc.), it is not straightforward if we still extract what is needed according to the POSIX standard. + + + + By simplification, we mean specifically the following rules: \begin{center} @@ -169,10 +199,10 @@ @{thm[mode=Axiom] rrewrite.intros(2)[of "bs" "r\<^sub>1"]}\\ @{thm[mode=Axiom] rrewrite.intros(3)[of "bs" "bs\<^sub>1" "r\<^sub>1"]}\\ @{thm[mode=Rule] rrewrite.intros(4)[of "r\<^sub>1" "r\<^sub>2" "bs" "r\<^sub>3"]}\\ - @{thm[mode=Rule] rrewrite.intros(5)[of "bs" "r\<^sub>2"]}\\ - @{thm[mode=Rule] rrewrite.intros(6)[of "bs" "r\<^sub>1"]}\\ - @{thm[mode=Axiom] rrewrite.intros(7)[of "bs" "r\<^sub>2"]}\\ - @{thm[mode=Axiom] rrewrite.intros(8)[of "bs" "r\<^sub>1"]}\\ + @{thm[mode=Rule] rrewrite.intros(5)[of "r\<^sub>3" "r\<^sub>4" "bs" "r\<^sub>1"]}\\ + @{thm[mode=Rule] rrewrite.intros(6)[of "r" "r'" "bs" "rs\<^sub>1" "rs\<^sub>2"]}\\ + @{thm[mode=Axiom] rrewrite.intros(7)[of "bs" "rs\<^sub>a" "rs\<^sub>b"]}\\ + @{thm[mode=Axiom] rrewrite.intros(8)[of "bs" "rs\<^sub>a" "bs\<^sub>1" "rs\<^sub>1" "rs\<^sub>b"]}\\ @{thm[mode=Axiom] rrewrite.intros(9)[of "bs" "r\<^sub>2"]}\\ @{thm[mode=Axiom] rrewrite.intros(10)[of "bs" "r\<^sub>1"]}\\ @{thm[mode=Axiom] rrewrite.intros(11)[of "bs" ]}\\ @@ -185,15 +215,6 @@ And these can be made compact by the following simplification function: -\begin{center} - \begin{tabular}{lcl} - @{thm (lhs) bsimp.simps(1)[of "bs" "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bsimp.simps(1)[of "bs" "r\<^sub>1" "r\<^sub>2"]}\\ - @{thm (lhs) bsimp.simps(2)} & $\dn$ & @{thm (rhs) bsimp.simps(2)}\\ - @{thm (lhs) bsimp.simps(3)} & $\dn$ & @{thm (rhs) bsimp.simps(3)}\\ - @{thm (lhs) intern.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) intern.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ -\end{tabular} -\end{center} - where the function $\mathit{bsimp_AALTs}$ The core idea of the proof is that two regular expressions, @@ -209,7 +230,7 @@ correctness result: if two (nullable) regular expressions are "rewritable" in many steps from one another, -then a call to function $textit{bmkeps}$ gives the same +then a call to function $\textit{bmkeps}$ gives the same bit-sequence : \begin{lemma} @{thm [mode=IfThen] rewrites_bmkeps}