153 @{thm [mode=IfThen] blexer_correctness} |
153 @{thm [mode=IfThen] blexer_correctness} |
154 \end{lemma} |
154 \end{lemma} |
155 |
155 |
156 However what is not certain is whether we can add simplification |
156 However what is not certain is whether we can add simplification |
157 to the bit-coded algorithm, without breaking the correct lexing output. |
157 to the bit-coded algorithm, without breaking the correct lexing output. |
158 This might sound trivial in the case of producing a YES/NO answer, |
158 |
159 but once we require a lexing output to be produced (which is required |
159 |
160 in applications like compiler front-end, malicious attack domain extraction, |
160 The reason that we do need to add a simplification phase |
161 etc.), it is not straightforward if we still extract what is needed according |
161 after each derivative step of $\textit{blexer}$ is |
162 to the POSIX standard. |
162 because it produces intermediate |
163 |
163 regular expressions that can grow exponentially. |
164 By simplification, we mean specifically the following rules: |
164 For example, the regular expression $(a+aa)^*$ after taking |
165 |
165 derivative against just 10 $a$s will have size 8192. |
166 \begin{center} |
166 |
167 \begin{tabular}{lcl} |
167 %TODO: add figure for this? |
168 @{thm[mode=Axiom] rrewrite.intros(1)[of "bs" "r\<^sub>2"]}\\ |
168 |
169 @{thm[mode=Axiom] rrewrite.intros(2)[of "bs" "r\<^sub>1"]}\\ |
169 \begin{lemma} |
170 @{thm[mode=Axiom] rrewrite.intros(3)[of "bs" "bs\<^sub>1" "r\<^sub>1"]}\\ |
170 @{thm blexer_simp_def} |
171 @{thm[mode=Rule] rrewrite.intros(4)[of "r\<^sub>1" "r\<^sub>2" "bs" "r\<^sub>3"]}\\ |
171 \end{lemma} |
172 @{thm[mode=Rule] rrewrite.intros(5)[of "bs" "r\<^sub>2"]}\\ |
172 |
173 @{thm[mode=Rule] rrewrite.intros(6)[of "bs" "r\<^sub>1"]}\\ |
|
174 @{thm[mode=Axiom] rrewrite.intros(7)[of "bs" "r\<^sub>2"]}\\ |
|
175 @{thm[mode=Axiom] rrewrite.intros(8)[of "bs" "r\<^sub>1"]}\\ |
|
176 @{thm[mode=Axiom] rrewrite.intros(9)[of "bs" "r\<^sub>2"]}\\ |
|
177 @{thm[mode=Axiom] rrewrite.intros(10)[of "bs" "r\<^sub>1"]}\\ |
|
178 @{thm[mode=Axiom] rrewrite.intros(11)[of "bs" ]}\\ |
|
179 @{thm[mode=Axiom] rrewrite.intros(12)[of "bs" "r\<^sub>1"]}\\ |
|
180 @{thm[mode=Rule] rrewrite.intros(13)[of "a\<^sub>1" "a\<^sub>2" "bs" "rs\<^sub>a" "rs\<^sub>b" "rs\<^sub>c"]}\\ |
|
181 |
|
182 \end{tabular} |
|
183 \end{center} |
|
184 |
|
185 |
|
186 And these can be made compact by the following simplification function: |
|
187 |
173 |
188 \begin{center} |
174 \begin{center} |
189 \begin{tabular}{lcl} |
175 \begin{tabular}{lcl} |
190 @{thm (lhs) bsimp.simps(1)[of "bs" "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bsimp.simps(1)[of "bs" "r\<^sub>1" "r\<^sub>2"]}\\ |
176 @{thm (lhs) bsimp.simps(1)[of "bs" "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) bsimp.simps(1)[of "bs" "r\<^sub>1" "r\<^sub>2"]}\\ |
191 @{thm (lhs) bsimp.simps(2)} & $\dn$ & @{thm (rhs) bsimp.simps(2)}\\ |
177 @{thm (lhs) bsimp.simps(2)} & $\dn$ & @{thm (rhs) bsimp.simps(2)}\\ |
192 @{thm (lhs) bsimp.simps(3)} & $\dn$ & @{thm (rhs) bsimp.simps(3)}\\ |
178 @{thm (lhs) bsimp.simps(3)} & $\dn$ & @{thm (rhs) bsimp.simps(3)}\\ |
193 @{thm (lhs) intern.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) intern.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
179 @{thm (lhs) intern.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) intern.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
194 \end{tabular} |
180 \end{tabular} |
195 \end{center} |
181 \end{center} |
|
182 |
|
183 |
|
184 This might sound trivial in the case of producing a YES/NO answer, |
|
185 but once we require a lexing output to be produced (which is required |
|
186 in applications like compiler front-end, malicious attack domain extraction, |
|
187 etc.), it is not straightforward if we still extract what is needed according |
|
188 to the POSIX standard. |
|
189 |
|
190 |
|
191 |
|
192 |
|
193 |
|
194 By simplification, we mean specifically the following rules: |
|
195 |
|
196 \begin{center} |
|
197 \begin{tabular}{lcl} |
|
198 @{thm[mode=Axiom] rrewrite.intros(1)[of "bs" "r\<^sub>2"]}\\ |
|
199 @{thm[mode=Axiom] rrewrite.intros(2)[of "bs" "r\<^sub>1"]}\\ |
|
200 @{thm[mode=Axiom] rrewrite.intros(3)[of "bs" "bs\<^sub>1" "r\<^sub>1"]}\\ |
|
201 @{thm[mode=Rule] rrewrite.intros(4)[of "r\<^sub>1" "r\<^sub>2" "bs" "r\<^sub>3"]}\\ |
|
202 @{thm[mode=Rule] rrewrite.intros(5)[of "r\<^sub>3" "r\<^sub>4" "bs" "r\<^sub>1"]}\\ |
|
203 @{thm[mode=Rule] rrewrite.intros(6)[of "r" "r'" "bs" "rs\<^sub>1" "rs\<^sub>2"]}\\ |
|
204 @{thm[mode=Axiom] rrewrite.intros(7)[of "bs" "rs\<^sub>a" "rs\<^sub>b"]}\\ |
|
205 @{thm[mode=Axiom] rrewrite.intros(8)[of "bs" "rs\<^sub>a" "bs\<^sub>1" "rs\<^sub>1" "rs\<^sub>b"]}\\ |
|
206 @{thm[mode=Axiom] rrewrite.intros(9)[of "bs" "r\<^sub>2"]}\\ |
|
207 @{thm[mode=Axiom] rrewrite.intros(10)[of "bs" "r\<^sub>1"]}\\ |
|
208 @{thm[mode=Axiom] rrewrite.intros(11)[of "bs" ]}\\ |
|
209 @{thm[mode=Axiom] rrewrite.intros(12)[of "bs" "r\<^sub>1"]}\\ |
|
210 @{thm[mode=Rule] rrewrite.intros(13)[of "a\<^sub>1" "a\<^sub>2" "bs" "rs\<^sub>a" "rs\<^sub>b" "rs\<^sub>c"]}\\ |
|
211 |
|
212 \end{tabular} |
|
213 \end{center} |
|
214 |
|
215 |
|
216 And these can be made compact by the following simplification function: |
196 |
217 |
197 where the function $\mathit{bsimp_AALTs}$ |
218 where the function $\mathit{bsimp_AALTs}$ |
198 |
219 |
199 The core idea of the proof is that two regular expressions, |
220 The core idea of the proof is that two regular expressions, |
200 if "isomorphic" up to a finite number of rewrite steps, will |
221 if "isomorphic" up to a finite number of rewrite steps, will |