--- a/thys2/Paper/Paper.thy Sun Jan 30 23:36:31 2022 +0000
+++ b/thys2/Paper/Paper.thy Sun Jan 30 23:37:29 2022 +0000
@@ -7,7 +7,342 @@
"../SizeBound4"
"HOL-Library.LaTeXsugar"
begin
+
+declare [[show_question_marks = false]]
+
+notation (latex output)
+ If ("(\<^latex>\<open>\\textrm{\<close>if\<^latex>\<open>}\<close> (_)/ \<^latex>\<open>\\textrm{\<close>then\<^latex>\<open>}\<close> (_)/ \<^latex>\<open>\\textrm{\<close>else\<^latex>\<open>}\<close> (_))" 10) and
+ Cons ("_\<^latex>\<open>\\mbox{$\\,$}\<close>::\<^latex>\<open>\\mbox{$\\,$}\<close>_" [75,73] 73)
+
+
+abbreviation
+ "der_syn r c \<equiv> der c r"
+
+notation (latex output)
+ der_syn ("_\\_" [79, 1000] 76) and
+
+ ZERO ("\<^bold>0" 81) and
+ ONE ("\<^bold>1" 81) and
+ CH ("_" [1000] 80) and
+ ALT ("_ + _" [77,77] 78) and
+ SEQ ("_ \<cdot> _" [77,77] 78) and
+ STAR ("_\<^sup>\<star>" [79] 78) and
+
+ val.Void ("Empty" 78) and
+ val.Char ("Char _" [1000] 78) and
+ val.Left ("Left _" [79] 78) and
+ val.Right ("Right _" [1000] 78) and
+ val.Seq ("Seq _ _" [79,79] 78) and
+ val.Stars ("Stars _" [79] 78) and
+
+ Posix ("'(_, _') \<rightarrow> _" [63,75,75] 75) and
+
+ flat ("|_|" [75] 74) and
+ flats ("|_|" [72] 74) and
+ injval ("inj _ _ _" [79,77,79] 76) and
+ mkeps ("mkeps _" [79] 76) and
+ length ("len _" [73] 73) and
+ set ("_" [73] 73) and
+
+ AZERO ("ZERO" 81) and
+ AONE ("ONE _" [79] 81) and
+ ACHAR ("CHAR _ _" [79, 79] 80) and
+ AALTs ("ALTs _ _" [77,77] 78) and
+ ASEQ ("SEQ _ _ _" [79, 77,77] 78) and
+ ASTAR ("STAR _ _" [79, 79] 78) and
+
+ code ("code _" [79] 74) and
+ intern ("_\<^latex>\<open>\\mbox{$^\\uparrow$}\<close>" [900] 80) and
+ erase ("_\<^latex>\<open>\\mbox{$^\\downarrow$}\<close>" [1000] 74) and
+ bnullable ("nullable\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80) and
+ bmkeps ("mkeps\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80)
+
+
+lemma better_retrieve:
+ shows "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v"
+ and "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v"
+ apply (metis list.exhaust retrieve.simps(4))
+ by (metis list.exhaust retrieve.simps(5))
+
(*>*)
+
+section {* Introduction *}
+
+text {*
+
+In the last fifteen or so years, Brzozowski's derivatives of regular
+expressions have sparked quite a bit of interest in the functional
+programming and theorem prover communities. The beauty of
+Brzozowski's derivatives \cite{Brzozowski1964} is that they are neatly
+expressible in any functional language, and easily definable and
+reasoned about in theorem provers---the definitions just consist of
+inductive datatypes and simple recursive functions. A mechanised
+correctness proof of Brzozowski's matcher in for example HOL4 has been
+mentioned by Owens and Slind~\cite{Owens2008}. Another one in
+Isabelle/HOL is part of the work by Krauss and Nipkow
+\cite{Krauss2011}. And another one in Coq is given by Coquand and
+Siles \cite{Coquand2012}.
+
+
+The notion of derivatives
+\cite{Brzozowski1964}, written @{term "der c r"}, of a regular
+expression give a simple solution to the problem of matching a string
+@{term s} with a regular expression @{term r}: if the derivative of
+@{term r} w.r.t.\ (in succession) all the characters of the string
+matches the empty string, then @{term r} matches @{term s} (and {\em
+vice versa}). The derivative has the property (which may almost be
+regarded as its specification) that, for every string @{term s} and
+regular expression @{term r} and character @{term c}, one has @{term
+"cs \<in> L(r)"} if and only if \mbox{@{term "s \<in> L(der c r)"}}.
+
+
+If a regular expression matches a string, then in general there is more
+than one way of how the string is matched. There are two commonly used
+disambiguation strategies to generate a unique answer: one is called
+GREEDY matching \cite{Frisch2004} and the other is POSIX
+matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}.
+For example consider the string @{term xy} and the regular expression
+\mbox{@{term "STAR (ALT (ALT x y) xy)"}}. Either the string can be
+matched in two `iterations' by the single letter-regular expressions
+@{term x} and @{term y}, or directly in one iteration by @{term xy}. The
+first case corresponds to GREEDY matching, which first matches with the
+left-most symbol and only matches the next symbol in case of a mismatch
+(this is greedy in the sense of preferring instant gratification to
+delayed repletion). The second case is POSIX matching, which prefers the
+longest match.
+
+
+\begin{center}
+\begin{tabular}{cc}
+ \begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
+ @{thm (lhs) der.simps(1)} & $\dn$ & @{thm (rhs) der.simps(1)}\\
+ @{thm (lhs) der.simps(2)} & $\dn$ & @{thm (rhs) der.simps(2)}\\
+ @{thm (lhs) der.simps(3)} & $\dn$ & @{thm (rhs) der.simps(3)}\\
+ @{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\
+ @{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{text "if"} @{term "nullable(r\<^sub>1)"}\\
+ & & @{text "then"} @{term "ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c r\<^sub>2)"}\\
+ & & @{text "else"} @{term "SEQ (der c r\<^sub>1) r\<^sub>2"}\\
+ % & & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\
+ @{thm (lhs) der.simps(6)} & $\dn$ & @{thm (rhs) der.simps(6)}
+ \end{tabular}
+ &
+ \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
+ @{thm (lhs) nullable.simps(1)} & $\dn$ & @{thm (rhs) nullable.simps(1)}\\
+ @{thm (lhs) nullable.simps(2)} & $\dn$ & @{thm (rhs) nullable.simps(2)}\\
+ @{thm (lhs) nullable.simps(3)} & $\dn$ & @{thm (rhs) nullable.simps(3)}\\
+ @{thm (lhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\
+ @{thm (lhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\
+ @{thm (lhs) nullable.simps(6)} & $\dn$ & @{thm (rhs) nullable.simps(6)}\medskip\\
+ \end{tabular}
+\end{tabular}
+\end{center}
+
+
+\begin{figure}[t]
+\begin{center}
+\begin{tikzpicture}[scale=2,node distance=1.3cm,
+ every node/.style={minimum size=6mm}]
+\node (r1) {@{term "r\<^sub>1"}};
+\node (r2) [right=of r1]{@{term "r\<^sub>2"}};
+\draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}};
+\node (r3) [right=of r2]{@{term "r\<^sub>3"}};
+\draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}};
+\node (r4) [right=of r3]{@{term "r\<^sub>4"}};
+\draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}};
+\draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}};
+\node (v4) [below=of r4]{@{term "v\<^sub>4"}};
+\draw[->,line width=1mm](r4) -- (v4);
+\node (v3) [left=of v4] {@{term "v\<^sub>3"}};
+\draw[->,line width=1mm](v4)--(v3) node[below,midway] {\<open>inj r\<^sub>3 c\<close>};
+\node (v2) [left=of v3]{@{term "v\<^sub>2"}};
+\draw[->,line width=1mm](v3)--(v2) node[below,midway] {\<open>inj r\<^sub>2 b\<close>};
+\node (v1) [left=of v2] {@{term "v\<^sub>1"}};
+\draw[->,line width=1mm](v2)--(v1) node[below,midway] {\<open>inj r\<^sub>1 a\<close>};
+\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}};
+\end{tikzpicture}
+\end{center}
+\mbox{}\\[-13mm]
+
+\caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014},
+matching the string @{term "[a,b,c]"}. The first phase (the arrows from
+left to right) is \Brz's matcher building successive derivatives. If the
+last regular expression is @{term nullable}, then the functions of the
+second phase are called (the top-down and right-to-left arrows): first
+@{term mkeps} calculates a value @{term "v\<^sub>4"} witnessing
+how the empty string has been recognised by @{term "r\<^sub>4"}. After
+that the function @{term inj} ``injects back'' the characters of the string into
+the values.
+\label{Sulz}}
+\end{figure}
+
+
+*}
+
+section {* Background *}
+
+text {*
+ Sulzmann-Lu algorithm with inj. State that POSIX rules.
+ metion slg is correct.
+
+
+ \begin{figure}[t]
+ \begin{center}
+ \begin{tabular}{c}
+ @{thm[mode=Axiom] Posix.intros(1)}\<open>P\<close>@{term "ONE"} \qquad
+ @{thm[mode=Axiom] Posix.intros(2)}\<open>P\<close>@{term "c"}\medskip\\
+ @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\<open>P+L\<close>\qquad
+ @{thm[mode=Rule] Posix.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\<open>P+R\<close>\medskip\\
+ $\mprset{flushleft}
+ \inferrule
+ {@{thm (prem 1) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad
+ @{thm (prem 2) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\
+ @{thm (prem 3) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}
+ {@{thm (concl) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\<open>PS\<close>\\
+ @{thm[mode=Axiom] Posix.intros(7)}\<open>P[]\<close>\medskip\\
+ $\mprset{flushleft}
+ \inferrule
+ {@{thm (prem 1) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad
+ @{thm (prem 2) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad
+ @{thm (prem 3) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\
+ @{thm (prem 4) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}
+ {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$\<open>P\<star>\<close>
+ \end{tabular}
+ \end{center}
+ \caption{Our inductive definition of POSIX values.}\label{POSIXrules}
+ \end{figure}
+
+
+ \begin{center}
+ \begin{tabular}{lcl}
+ @{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\
+ @{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\
+ @{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\
+ @{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\
+ \end{tabular}
+ \end{center}
+
+ \begin{center}
+ \begin{tabular}{l@ {\hspace{5mm}}lcl}
+ \textit{(1)} & @{thm (lhs) injval.simps(1)} & $\dn$ & @{thm (rhs) injval.simps(1)}\\
+ \textit{(2)} & @{thm (lhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]} & $\dn$ &
+ @{thm (rhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]}\\
+ \textit{(3)} & @{thm (lhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ &
+ @{thm (rhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\
+ \textit{(4)} & @{thm (lhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$
+ & @{thm (rhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\
+ \textit{(5)} & @{thm (lhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$
+ & @{thm (rhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\
+ \textit{(6)} & @{thm (lhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$
+ & @{thm (rhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\
+ \textit{(7)} & @{thm (lhs) injval.simps(7)[of "r" "c" "v" "vs"]} & $\dn$
+ & @{thm (rhs) injval.simps(7)[of "r" "c" "v" "vs"]}\\
+ \end{tabular}
+ \end{center}
+
+*}
+
+section {* Bitcoded Regular Expressions and Derivatives *}
+
+text {*
+ bitcoded regexes / decoding / bmkeps
+ gets rid of the second phase (only single phase)
+ correctness
+
+
+ \begin{center}
+ \begin{tabular}{lcl}
+ @{thm (lhs) code.simps(1)} & $\dn$ & @{thm (rhs) code.simps(1)}\\
+ @{thm (lhs) code.simps(2)} & $\dn$ & @{thm (rhs) code.simps(2)}\\
+ @{thm (lhs) code.simps(3)} & $\dn$ & @{thm (rhs) code.simps(3)}\\
+ @{thm (lhs) code.simps(4)} & $\dn$ & @{thm (rhs) code.simps(4)}\\
+ @{thm (lhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\
+ @{thm (lhs) code.simps(6)} & $\dn$ & @{thm (rhs) code.simps(6)}\\
+ @{thm (lhs) code.simps(7)} & $\dn$ & @{thm (rhs) code.simps(7)}
+ \end{tabular}
+ \end{center}
+
+
+ The idea of the bitcodes is to annotate them to regular expressions and generate values
+ incrementally. The bitcodes can be read off from the @{text breg} and then decoded into a value.
+
+ \begin{center}
+ \begin{tabular}{lcl}
+ @{term breg} & $::=$ & @{term "AZERO"}\\
+ & $\mid$ & @{term "AONE bs"}\\
+ & $\mid$ & @{term "ACHAR bs c"}\\
+ & $\mid$ & @{term "AALTs bs rs"}\\
+ & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\
+ & $\mid$ & @{term "ASTAR bs r"}
+ \end{tabular}
+ \end{center}
+
+
+
+ \begin{center}
+ \begin{tabular}{lcl}
+ @{thm (lhs) retrieve.simps(1)} & $\dn$ & @{thm (rhs) retrieve.simps(1)}\\
+ @{thm (lhs) retrieve.simps(2)} & $\dn$ & @{thm (rhs) retrieve.simps(2)}\\
+ @{thm (lhs) retrieve.simps(3)} & $\dn$ & @{thm (rhs) retrieve.simps(3)}\\
+ @{thm (lhs) better_retrieve(1)} & $\dn$ & @{thm (rhs) better_retrieve(1)}\\
+ @{thm (lhs) better_retrieve(2)} & $\dn$ & @{thm (rhs) better_retrieve(2)}\\
+ @{thm (lhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]}
+ & $\dn$ & @{thm (rhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]}\\
+ @{thm (lhs) retrieve.simps(7)} & $\dn$ & @{thm (rhs) retrieve.simps(7)}\\
+ @{thm (lhs) retrieve.simps(8)} & $\dn$ & @{thm (rhs) retrieve.simps(8)}
+ \end{tabular}
+ \end{center}
+
+
+ \begin{theorem}
+ @{thm blexer_correctness}
+ \end{theorem}
+*}
+
+
+section {* Simplification *}
+
+text {*
+ Sulzmann \& Lu apply simplification via a fixpoint operation; also does not use erase to filter out duplicates.
+
+ not direct correspondence with PDERs, because of example
+ problem with retrieve
+
+ correctness
+
+
+
+
+
+
+ \begin{figure}[t]
+ \begin{center}
+ \begin{tabular}{c}
+ @{thm[mode=Axiom] bs1[of _ "r\<^sub>2"]}\qquad
+ @{thm[mode=Axiom] bs2[of _ "r\<^sub>1"]}\qquad
+ @{thm[mode=Axiom] bs3[of "bs\<^sub>1" "bs\<^sub>2"]}\\
+ @{thm[mode=Rule] bs4[of "r\<^sub>1" "r\<^sub>2" _ "r\<^sub>3"]}\qquad
+ @{thm[mode=Rule] bs5[of "r\<^sub>3" "r\<^sub>4" _ "r\<^sub>1"]}\\
+ @{thm[mode=Axiom] bs6}\qquad
+ @{thm[mode=Axiom] bs7}\\
+ @{thm[mode=Rule] bs8[of "rs\<^sub>1" "rs\<^sub>2"]}\\
+ @{thm[mode=Axiom] ss1}\qquad
+ @{thm[mode=Rule] ss2[of "rs\<^sub>1" "rs\<^sub>2"]}\qquad
+ @{thm[mode=Rule] ss3[of "r\<^sub>1" "r\<^sub>2"]}\\
+ @{thm[mode=Axiom] ss4}\qquad
+ @{thm[mode=Axiom] ss5[of "bs" "rs\<^sub>1" "rs\<^sub>2"]}\\
+ @{thm[mode=Rule] ss6[of "r\<^sub>1" "r\<^sub>2" "rs\<^sub>1" "rs\<^sub>2" "rs\<^sub>3"]}\\
+ \end{tabular}
+ \end{center}
+ \caption{???}\label{SimpRewrites}
+ \end{figure}
+*}
+
+section {* Bound - NO *}
+
+section {* Bounded Regex / Not *}
+
+section {* Conclusion *}
+
text {*
\cite{AusafDyckhoffUrban2016}