43 mkeps ("mkeps _" [79] 76) and |
43 mkeps ("mkeps _" [79] 76) and |
44 length ("len _" [73] 73) and |
44 length ("len _" [73] 73) and |
45 set ("_" [73] 73) and |
45 set ("_" [73] 73) and |
46 |
46 |
47 AZERO ("ZERO" 81) and |
47 AZERO ("ZERO" 81) and |
48 AONE ("ONE _" [79] 81) and |
48 AONE ("ONE _" [79] 78) and |
49 ACHAR ("CHAR _ _" [79, 79] 80) and |
49 ACHAR ("CHAR _ _" [79, 79] 80) and |
50 AALTs ("ALTs _ _" [77,77] 78) and |
50 AALTs ("ALTs _ _" [77,77] 78) and |
51 ASEQ ("SEQ _ _ _" [79, 77,77] 78) and |
51 ASEQ ("SEQ _ _ _" [79, 79,79] 78) and |
52 ASTAR ("STAR _ _" [79, 79] 78) and |
52 ASTAR ("STAR _ _" [79, 79] 78) and |
53 |
53 |
54 code ("code _" [79] 74) and |
54 code ("code _" [79] 74) and |
55 intern ("_\<^latex>\<open>\\mbox{$^\\uparrow$}\<close>" [900] 80) and |
55 intern ("_\<^latex>\<open>\\mbox{$^\\uparrow$}\<close>" [900] 80) and |
56 erase ("_\<^latex>\<open>\\mbox{$^\\downarrow$}\<close>" [1000] 74) and |
56 erase ("_\<^latex>\<open>\\mbox{$^\\downarrow$}\<close>" [1000] 74) and |
57 bnullable ("nullable\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80) and |
57 bnullable ("nullable\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80) and |
58 bmkeps ("mkeps\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80) |
58 bmkeps ("mkeps\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80) and |
|
59 |
|
60 srewrite ("_\<^latex>\<open>\\mbox{$\\,\\stackrel{s}{\\leadsto}$}\<close> _" [71, 71] 80) |
59 |
61 |
60 |
62 |
61 lemma better_retrieve: |
63 lemma better_retrieve: |
62 shows "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v" |
64 shows "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v" |
63 and "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v" |
65 and "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v" |
242 *} |
244 *} |
243 |
245 |
244 section {* Bitcoded Regular Expressions and Derivatives *} |
246 section {* Bitcoded Regular Expressions and Derivatives *} |
245 |
247 |
246 text {* |
248 text {* |
247 bitcoded regexes / decoding / bmkeps |
249 |
248 gets rid of the second phase (only single phase) |
250 Sulzmann and Lu describe another algorithm that generates POSIX |
249 correctness |
251 values but dispences with the second phase where characters are |
250 |
252 injected ``back'' into values. For this they annotate bitcodes to |
|
253 regular expressions, which we define in Isabelle/HOL as the datatype |
|
254 |
|
255 \begin{center} |
|
256 \begin{tabular}{lcl} |
|
257 @{term breg} & $::=$ & @{term "AZERO"}\\ |
|
258 & $\mid$ & @{term "AONE bs"}\\ |
|
259 & $\mid$ & @{term "ACHAR bs c"}\\ |
|
260 & $\mid$ & @{term "AALTs bs rs"}\\ |
|
261 & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\ |
|
262 & $\mid$ & @{term "ASTAR bs r"} |
|
263 \end{tabular} |
|
264 \end{center} |
|
265 |
|
266 \noindent where @{text bs} stands for a bitsequences; @{text r}, |
|
267 @{text "r\<^sub>1"} and @{text "r\<^sub>2"} for annotated regular |
|
268 expressions; and @{text rs} for a list of annotated regular |
|
269 expressions. In contrast to Sulzmann and Lu we generalise the |
|
270 alternative regular expressions to lists, instead of just having |
|
271 binary regular expressions. The idea with annotated regular |
|
272 expressions is to incrementally generate the value information by |
|
273 recording bitsequences. Sulzmann and Lu then |
|
274 define a coding function for how values can be coded into bitsequences. |
251 |
275 |
252 \begin{center} |
276 \begin{center} |
253 \begin{tabular}{lcl} |
277 \begin{tabular}{lcl} |
254 @{thm (lhs) code.simps(1)} & $\dn$ & @{thm (rhs) code.simps(1)}\\ |
278 @{thm (lhs) code.simps(1)} & $\dn$ & @{thm (rhs) code.simps(1)}\\ |
255 @{thm (lhs) code.simps(2)} & $\dn$ & @{thm (rhs) code.simps(2)}\\ |
279 @{thm (lhs) code.simps(2)} & $\dn$ & @{thm (rhs) code.simps(2)}\\ |
256 @{thm (lhs) code.simps(3)} & $\dn$ & @{thm (rhs) code.simps(3)}\\ |
280 @{thm (lhs) code.simps(3)} & $\dn$ & @{thm (rhs) code.simps(3)}\\ |
257 @{thm (lhs) code.simps(4)} & $\dn$ & @{thm (rhs) code.simps(4)}\\ |
281 @{thm (lhs) code.simps(4)} & $\dn$ & @{thm (rhs) code.simps(4)}\\ |
258 @{thm (lhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\ |
282 @{thm (lhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\ |
259 @{thm (lhs) code.simps(6)} & $\dn$ & @{thm (rhs) code.simps(6)}\\ |
283 @{thm (lhs) code.simps(6)} & $\dn$ & @{thm (rhs) code.simps(6)}\\ |
260 @{thm (lhs) code.simps(7)} & $\dn$ & @{thm (rhs) code.simps(7)} |
284 @{thm (lhs) code.simps(7)} & $\dn$ & @{thm (rhs) code.simps(7)} |
|
285 \end{tabular} |
|
286 \end{center} |
|
287 |
|
288 There is also a corresponding decoding function that takes a bitsequence |
|
289 and generates back a value. However, since the bitsequences are a ``lossy'' |
|
290 coding (@{term Seq}s are not coded) the decoding function depends also |
|
291 on a regular expression in order to decode values. |
|
292 |
|
293 \begin{center} |
|
294 \begin{tabular}{lcll} |
|
295 %@{thm (lhs) decode'.simps(1)} & $\dn$ & @{thm (rhs) decode'.simps(1)}\\ |
|
296 @{thm (lhs) decode'.simps(2)} & $\dn$ & @{thm (rhs) decode'.simps(2)}\\ |
|
297 @{thm (lhs) decode'.simps(3)} & $\dn$ & @{thm (rhs) decode'.simps(3)}\\ |
|
298 @{thm (lhs) decode'.simps(4)} & $\dn$ & @{thm (rhs) decode'.simps(4)}\\ |
|
299 @{thm (lhs) decode'.simps(5)} & $\dn$ & @{thm (rhs) decode'.simps(5)}\\ |
|
300 @{thm (lhs) decode'.simps(6)} & $\dn$ & @{thm (rhs) decode'.simps(6)}\\ |
|
301 @{thm (lhs) decode'.simps(7)} & $\dn$ & @{thm (rhs) decode'.simps(7)}\\ |
|
302 @{thm (lhs) decode'.simps(8)} & $\dn$ & @{thm (rhs) decode'.simps(8)}\\ |
|
303 @{thm (lhs) decode'.simps(9)} & $\dn$ & @{thm (rhs) decode'.simps(9)}\\ |
|
304 @{thm (lhs) decode'.simps(10)} & $\dn$ & @{thm (rhs) decode'.simps(10)} & fix |
261 \end{tabular} |
305 \end{tabular} |
262 \end{center} |
306 \end{center} |
263 |
307 |
264 |
308 |
265 The idea of the bitcodes is to annotate them to regular expressions and generate values |
309 The idea of the bitcodes is to annotate them to regular expressions and generate values |