# HG changeset patch # User Christian Urban # Date 1643577684 0 # Node ID 1612f2a77bf63d9c5335cc5b99abd97b3662cb93 # Parent 8bbe2468fedcca78a1147cecc10ec70ee4434156 more definitions in the paper diff -r 8bbe2468fedc -r 1612f2a77bf6 thys2/Paper/Paper.thy --- a/thys2/Paper/Paper.thy Sun Jan 30 01:03:26 2022 +0000 +++ b/thys2/Paper/Paper.thy Sun Jan 30 21:21:24 2022 +0000 @@ -26,7 +26,43 @@ CH ("_" [1000] 80) and ALT ("_ + _" [77,77] 78) and SEQ ("_ \ _" [77,77] 78) and - STAR ("_\<^sup>\" [79] 78) + STAR ("_\<^sup>\" [79] 78) and + + val.Void ("Empty" 78) and + val.Char ("Char _" [1000] 78) and + val.Left ("Left _" [79] 78) and + val.Right ("Right _" [1000] 78) and + val.Seq ("Seq _ _" [79,79] 78) and + val.Stars ("Stars _" [79] 78) and + + Posix ("'(_, _') \ _" [63,75,75] 75) and + + flat ("|_|" [75] 74) and + flats ("|_|" [72] 74) and + injval ("inj _ _ _" [79,77,79] 76) and + mkeps ("mkeps _" [79] 76) and + length ("len _" [73] 73) and + set ("_" [73] 73) and + + AZERO ("ZERO" 81) and + AONE ("ONE _" [79] 81) and + ACHAR ("CHAR _ _" [79, 79] 80) and + AALTs ("ALTs _ _" [77,77] 78) and + ASEQ ("SEQ _ _ _" [79, 77,77] 78) and + ASTAR ("STAR _ _" [79, 79] 78) and + + code ("code _" [79] 74) and + intern ("_\<^latex>\\\mbox{$^\\uparrow$}\" [900] 80) and + erase ("_\<^latex>\\\mbox{$^\\downarrow$}\" [1000] 74) and + bnullable ("nullable\<^latex>\\\mbox{$_b$}\ _" [1000] 80) and + bmkeps ("mkeps\<^latex>\\\mbox{$_b$}\ _" [1000] 80) + + +lemma better_retrieve: + shows "rs \ Nil ==> retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v" + and "rs \ Nil ==> retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v" + apply (metis list.exhaust retrieve.simps(4)) + by (metis list.exhaust retrieve.simps(5)) (*>*) @@ -76,6 +112,31 @@ longest match. +\begin{center} +\begin{tabular}{cc} + \begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l} + @{thm (lhs) der.simps(1)} & $\dn$ & @{thm (rhs) der.simps(1)}\\ + @{thm (lhs) der.simps(2)} & $\dn$ & @{thm (rhs) der.simps(2)}\\ + @{thm (lhs) der.simps(3)} & $\dn$ & @{thm (rhs) der.simps(3)}\\ + @{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{text "if"} @{term "nullable(r\<^sub>1)"}\\ + & & @{text "then"} @{term "ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c r\<^sub>2)"}\\ + & & @{text "else"} @{term "SEQ (der c r\<^sub>1) r\<^sub>2"}\\ + % & & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) der.simps(6)} & $\dn$ & @{thm (rhs) der.simps(6)} + \end{tabular} + & + \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} + @{thm (lhs) nullable.simps(1)} & $\dn$ & @{thm (rhs) nullable.simps(1)}\\ + @{thm (lhs) nullable.simps(2)} & $\dn$ & @{thm (rhs) nullable.simps(2)}\\ + @{thm (lhs) nullable.simps(3)} & $\dn$ & @{thm (rhs) nullable.simps(3)}\\ + @{thm (lhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) nullable.simps(6)} & $\dn$ & @{thm (rhs) nullable.simps(6)}\medskip\\ + \end{tabular} +\end{tabular} +\end{center} + \begin{figure}[t] \begin{center} @@ -151,14 +212,90 @@ \end{figure} + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\ + @{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\ + \end{tabular} + \end{center} + + \begin{center} + \begin{tabular}{l@ {\hspace{5mm}}lcl} + \textit{(1)} & @{thm (lhs) injval.simps(1)} & $\dn$ & @{thm (rhs) injval.simps(1)}\\ + \textit{(2)} & @{thm (lhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]} & $\dn$ & + @{thm (rhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]}\\ + \textit{(3)} & @{thm (lhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ & + @{thm (rhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ + \textit{(4)} & @{thm (lhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ + & @{thm (rhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ + \textit{(5)} & @{thm (lhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ + & @{thm (rhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ + \textit{(6)} & @{thm (lhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ + & @{thm (rhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ + \textit{(7)} & @{thm (lhs) injval.simps(7)[of "r" "c" "v" "vs"]} & $\dn$ + & @{thm (rhs) injval.simps(7)[of "r" "c" "v" "vs"]}\\ + \end{tabular} + \end{center} + *} -section {* Bitcoded Derivatives *} +section {* Bitcoded Regular Expressions and Derivatives *} text {* bitcoded regexes / decoding / bmkeps gets rid of the second phase (only single phase) correctness + + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) code.simps(1)} & $\dn$ & @{thm (rhs) code.simps(1)}\\ + @{thm (lhs) code.simps(2)} & $\dn$ & @{thm (rhs) code.simps(2)}\\ + @{thm (lhs) code.simps(3)} & $\dn$ & @{thm (rhs) code.simps(3)}\\ + @{thm (lhs) code.simps(4)} & $\dn$ & @{thm (rhs) code.simps(4)}\\ + @{thm (lhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\ + @{thm (lhs) code.simps(6)} & $\dn$ & @{thm (rhs) code.simps(6)}\\ + @{thm (lhs) code.simps(7)} & $\dn$ & @{thm (rhs) code.simps(7)} + \end{tabular} + \end{center} + + + The idea of the bitcodes is to annotate them to regular expressions and generate values + incrementally. The bitcodes can be read off from the @{text breg} and then decoded into a value. + + \begin{center} + \begin{tabular}{lcl} + @{term breg} & $::=$ & @{term "AZERO"}\\ + & $\mid$ & @{term "AONE bs"}\\ + & $\mid$ & @{term "ACHAR bs c"}\\ + & $\mid$ & @{term "AALTs bs rs"}\\ + & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\ + & $\mid$ & @{term "ASTAR bs r"} + \end{tabular} + \end{center} + + + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) retrieve.simps(1)} & $\dn$ & @{thm (rhs) retrieve.simps(1)}\\ + @{thm (lhs) retrieve.simps(2)} & $\dn$ & @{thm (rhs) retrieve.simps(2)}\\ + @{thm (lhs) retrieve.simps(3)} & $\dn$ & @{thm (rhs) retrieve.simps(3)}\\ + @{thm (lhs) better_retrieve(1)} & $\dn$ & @{thm (rhs) better_retrieve(1)}\\ + @{thm (lhs) better_retrieve(2)} & $\dn$ & @{thm (rhs) better_retrieve(2)}\\ + @{thm (lhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]} + & $\dn$ & @{thm (rhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]}\\ + @{thm (lhs) retrieve.simps(7)} & $\dn$ & @{thm (rhs) retrieve.simps(7)}\\ + @{thm (lhs) retrieve.simps(8)} & $\dn$ & @{thm (rhs) retrieve.simps(8)} + \end{tabular} + \end{center} + + + \begin{theorem} + @{thm blexer_correctness} + \end{theorem} *} @@ -166,31 +303,34 @@ text {* Sulzmann \& Lu apply simplification via a fixpoint operation; also does not use erase to filter out duplicates. - + not direct correspondence with PDERs, because of example problem with retrieve correctness + + + \begin{figure}[t] \begin{center} \begin{tabular}{c} - @{thm[mode=Axiom] bs1}\qquad - @{thm[mode=Axiom] bs2}\qquad - @{thm[mode=Axiom] bs3}\\ - @{thm[mode=Rule] bs4}\qquad - @{thm[mode=Rule] bs5}\\ - @{thm[mode=Rule] bs6}\qquad - @{thm[mode=Rule] bs7}\\ - @{thm[mode=Rule] bs8}\\ + @{thm[mode=Axiom] bs1[of _ "r\<^sub>2"]}\qquad + @{thm[mode=Axiom] bs2[of _ "r\<^sub>1"]}\qquad + @{thm[mode=Axiom] bs3[of "bs\<^sub>1" "bs\<^sub>2"]}\\ + @{thm[mode=Rule] bs4[of "r\<^sub>1" "r\<^sub>2" _ "r\<^sub>3"]}\qquad + @{thm[mode=Rule] bs5[of "r\<^sub>3" "r\<^sub>4" _ "r\<^sub>1"]}\\ + @{thm[mode=Axiom] bs6}\qquad + @{thm[mode=Axiom] bs7}\\ + @{thm[mode=Rule] bs8[of "rs\<^sub>1" "rs\<^sub>2"]}\\ @{thm[mode=Axiom] ss1}\qquad - @{thm[mode=Rule] ss2}\qquad - @{thm[mode=Rule] ss3}\\ + @{thm[mode=Rule] ss2[of "rs\<^sub>1" "rs\<^sub>2"]}\qquad + @{thm[mode=Rule] ss3[of "r\<^sub>1" "r\<^sub>2"]}\\ @{thm[mode=Axiom] ss4}\qquad - @{thm[mode=Axiom] ss5}\qquad - @{thm[mode=Rule] ss6}\\ + @{thm[mode=Axiom] ss5[of "bs" "rs\<^sub>1" "rs\<^sub>2"]}\\ + @{thm[mode=Rule] ss6[of "r\<^sub>1" "r\<^sub>2" "rs\<^sub>1" "rs\<^sub>2" "rs\<^sub>3"]}\\ \end{tabular} \end{center} \caption{???}\label{SimpRewrites} diff -r 8bbe2468fedc -r 1612f2a77bf6 thys2/Paper/document/root.tex --- a/thys2/Paper/document/root.tex Sun Jan 30 01:03:26 2022 +0000 +++ b/thys2/Paper/document/root.tex Sun Jan 30 21:21:24 2022 +0000 @@ -63,20 +63,19 @@ expressions. Their algorithm generates POSIX values which encode the information of \emph{how} a regular expression matches a string---that is, which part of the string is matched by which part - of the regular expression. The purpose of the bitcodes in Sulzmann - and Lu's algorithm is to generate POSIX values incrementally while - derivatives are calculated. However they also help with designing - `aggressive' simplification functions that keep the size of + of the regular expression. The purpose of the bitcodes is to generate POSIX values incrementally while + derivatives are calculated. They also help with designing + an `aggressive' simplification function that keeps the size of derivatives small. Without simplification derivatives can grow exponentially resulting in an extremely slow lexing algorithm. In this paper we describe a variant of Sulzmann and Lu's algorithm: Our - algorithm is a small, recursive functional program, whereas Sulzmann + algorithm is a recursive functional program, whereas Sulzmann and Lu's version involves a fixpoint construction. We \textit{(i)} prove in Isabelle/HOL that our program is correct and generates unique POSIX values; we also \textit{(ii)} establish a polynomial bound for the size of the derivatives. The size can be seen as a - proxy measure for the efficiency of the lexing algorithm---that means - because of the polynomial bound our algorithm does not suffer from + proxy measure for the efficiency of the lexing algorithm: because of + the polynomial bound our algorithm does not suffer from the exponential blowup in earlier works. % Brzozowski introduced the notion of derivatives for regular diff -r 8bbe2468fedc -r 1612f2a77bf6 thys2/SizeBound4.thy --- a/thys2/SizeBound4.thy Sun Jan 30 01:03:26 2022 +0000 +++ b/thys2/SizeBound4.thy Sun Jan 30 21:21:24 2022 +0000 @@ -146,7 +146,7 @@ fun retrieve :: "arexp \ val \ bit list" where "retrieve (AONE bs) Void = bs" | "retrieve (ACHAR bs c) (Char d) = bs" -| "retrieve (AALTs bs [r]) v = bs @ retrieve r v" +| "retrieve (AALTs bs [r]) v = bs @ retrieve r v" | "retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v" | "retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v" | "retrieve (ASEQ bs r1 r2) (Seq v1 v2) = bs @ retrieve r1 v1 @ retrieve r2 v2" @@ -457,7 +457,7 @@ (if (f x) \ acc then distinctBy xs f acc else x # (distinctBy xs f ({f x} \ acc)))" - + fun flts :: "arexp list \ arexp list" where diff -r 8bbe2468fedc -r 1612f2a77bf6 thys2/paper.pdf Binary file thys2/paper.pdf has changed