diff -r 6291181fad07 -r 1500f96707b0 thys2/Paper/Paper.thy --- a/thys2/Paper/Paper.thy Sun Jan 30 23:36:31 2022 +0000 +++ b/thys2/Paper/Paper.thy Sun Jan 30 23:37:29 2022 +0000 @@ -7,7 +7,342 @@ "../SizeBound4" "HOL-Library.LaTeXsugar" begin + +declare [[show_question_marks = false]] + +notation (latex output) + If ("(\<^latex>\\\textrm{\if\<^latex>\}\ (_)/ \<^latex>\\\textrm{\then\<^latex>\}\ (_)/ \<^latex>\\\textrm{\else\<^latex>\}\ (_))" 10) and + Cons ("_\<^latex>\\\mbox{$\\,$}\::\<^latex>\\\mbox{$\\,$}\_" [75,73] 73) + + +abbreviation + "der_syn r c \ der c r" + +notation (latex output) + der_syn ("_\\_" [79, 1000] 76) and + + ZERO ("\<^bold>0" 81) and + ONE ("\<^bold>1" 81) and + CH ("_" [1000] 80) and + ALT ("_ + _" [77,77] 78) and + SEQ ("_ \ _" [77,77] 78) and + STAR ("_\<^sup>\" [79] 78) and + + val.Void ("Empty" 78) and + val.Char ("Char _" [1000] 78) and + val.Left ("Left _" [79] 78) and + val.Right ("Right _" [1000] 78) and + val.Seq ("Seq _ _" [79,79] 78) and + val.Stars ("Stars _" [79] 78) and + + Posix ("'(_, _') \ _" [63,75,75] 75) and + + flat ("|_|" [75] 74) and + flats ("|_|" [72] 74) and + injval ("inj _ _ _" [79,77,79] 76) and + mkeps ("mkeps _" [79] 76) and + length ("len _" [73] 73) and + set ("_" [73] 73) and + + AZERO ("ZERO" 81) and + AONE ("ONE _" [79] 81) and + ACHAR ("CHAR _ _" [79, 79] 80) and + AALTs ("ALTs _ _" [77,77] 78) and + ASEQ ("SEQ _ _ _" [79, 77,77] 78) and + ASTAR ("STAR _ _" [79, 79] 78) and + + code ("code _" [79] 74) and + intern ("_\<^latex>\\\mbox{$^\\uparrow$}\" [900] 80) and + erase ("_\<^latex>\\\mbox{$^\\downarrow$}\" [1000] 74) and + bnullable ("nullable\<^latex>\\\mbox{$_b$}\ _" [1000] 80) and + bmkeps ("mkeps\<^latex>\\\mbox{$_b$}\ _" [1000] 80) + + +lemma better_retrieve: + shows "rs \ Nil ==> retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v" + and "rs \ Nil ==> retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v" + apply (metis list.exhaust retrieve.simps(4)) + by (metis list.exhaust retrieve.simps(5)) + (*>*) + +section {* Introduction *} + +text {* + +In the last fifteen or so years, Brzozowski's derivatives of regular +expressions have sparked quite a bit of interest in the functional +programming and theorem prover communities. The beauty of +Brzozowski's derivatives \cite{Brzozowski1964} is that they are neatly +expressible in any functional language, and easily definable and +reasoned about in theorem provers---the definitions just consist of +inductive datatypes and simple recursive functions. A mechanised +correctness proof of Brzozowski's matcher in for example HOL4 has been +mentioned by Owens and Slind~\cite{Owens2008}. Another one in +Isabelle/HOL is part of the work by Krauss and Nipkow +\cite{Krauss2011}. And another one in Coq is given by Coquand and +Siles \cite{Coquand2012}. + + +The notion of derivatives +\cite{Brzozowski1964}, written @{term "der c r"}, of a regular +expression give a simple solution to the problem of matching a string +@{term s} with a regular expression @{term r}: if the derivative of +@{term r} w.r.t.\ (in succession) all the characters of the string +matches the empty string, then @{term r} matches @{term s} (and {\em +vice versa}). The derivative has the property (which may almost be +regarded as its specification) that, for every string @{term s} and +regular expression @{term r} and character @{term c}, one has @{term +"cs \ L(r)"} if and only if \mbox{@{term "s \ L(der c r)"}}. + + +If a regular expression matches a string, then in general there is more +than one way of how the string is matched. There are two commonly used +disambiguation strategies to generate a unique answer: one is called +GREEDY matching \cite{Frisch2004} and the other is POSIX +matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}. +For example consider the string @{term xy} and the regular expression +\mbox{@{term "STAR (ALT (ALT x y) xy)"}}. Either the string can be +matched in two `iterations' by the single letter-regular expressions +@{term x} and @{term y}, or directly in one iteration by @{term xy}. The +first case corresponds to GREEDY matching, which first matches with the +left-most symbol and only matches the next symbol in case of a mismatch +(this is greedy in the sense of preferring instant gratification to +delayed repletion). The second case is POSIX matching, which prefers the +longest match. + + +\begin{center} +\begin{tabular}{cc} + \begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l} + @{thm (lhs) der.simps(1)} & $\dn$ & @{thm (rhs) der.simps(1)}\\ + @{thm (lhs) der.simps(2)} & $\dn$ & @{thm (rhs) der.simps(2)}\\ + @{thm (lhs) der.simps(3)} & $\dn$ & @{thm (rhs) der.simps(3)}\\ + @{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{text "if"} @{term "nullable(r\<^sub>1)"}\\ + & & @{text "then"} @{term "ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c r\<^sub>2)"}\\ + & & @{text "else"} @{term "SEQ (der c r\<^sub>1) r\<^sub>2"}\\ + % & & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) der.simps(6)} & $\dn$ & @{thm (rhs) der.simps(6)} + \end{tabular} + & + \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} + @{thm (lhs) nullable.simps(1)} & $\dn$ & @{thm (rhs) nullable.simps(1)}\\ + @{thm (lhs) nullable.simps(2)} & $\dn$ & @{thm (rhs) nullable.simps(2)}\\ + @{thm (lhs) nullable.simps(3)} & $\dn$ & @{thm (rhs) nullable.simps(3)}\\ + @{thm (lhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) nullable.simps(6)} & $\dn$ & @{thm (rhs) nullable.simps(6)}\medskip\\ + \end{tabular} +\end{tabular} +\end{center} + + +\begin{figure}[t] +\begin{center} +\begin{tikzpicture}[scale=2,node distance=1.3cm, + every node/.style={minimum size=6mm}] +\node (r1) {@{term "r\<^sub>1"}}; +\node (r2) [right=of r1]{@{term "r\<^sub>2"}}; +\draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}}; +\node (r3) [right=of r2]{@{term "r\<^sub>3"}}; +\draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}}; +\node (r4) [right=of r3]{@{term "r\<^sub>4"}}; +\draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}}; +\draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}}; +\node (v4) [below=of r4]{@{term "v\<^sub>4"}}; +\draw[->,line width=1mm](r4) -- (v4); +\node (v3) [left=of v4] {@{term "v\<^sub>3"}}; +\draw[->,line width=1mm](v4)--(v3) node[below,midway] {\inj r\<^sub>3 c\}; +\node (v2) [left=of v3]{@{term "v\<^sub>2"}}; +\draw[->,line width=1mm](v3)--(v2) node[below,midway] {\inj r\<^sub>2 b\}; +\node (v1) [left=of v2] {@{term "v\<^sub>1"}}; +\draw[->,line width=1mm](v2)--(v1) node[below,midway] {\inj r\<^sub>1 a\}; +\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}}; +\end{tikzpicture} +\end{center} +\mbox{}\\[-13mm] + +\caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014}, +matching the string @{term "[a,b,c]"}. The first phase (the arrows from +left to right) is \Brz's matcher building successive derivatives. If the +last regular expression is @{term nullable}, then the functions of the +second phase are called (the top-down and right-to-left arrows): first +@{term mkeps} calculates a value @{term "v\<^sub>4"} witnessing +how the empty string has been recognised by @{term "r\<^sub>4"}. After +that the function @{term inj} ``injects back'' the characters of the string into +the values. +\label{Sulz}} +\end{figure} + + +*} + +section {* Background *} + +text {* + Sulzmann-Lu algorithm with inj. State that POSIX rules. + metion slg is correct. + + + \begin{figure}[t] + \begin{center} + \begin{tabular}{c} + @{thm[mode=Axiom] Posix.intros(1)}\P\@{term "ONE"} \qquad + @{thm[mode=Axiom] Posix.intros(2)}\P\@{term "c"}\medskip\\ + @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\P+L\\qquad + @{thm[mode=Rule] Posix.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\P+R\\medskip\\ + $\mprset{flushleft} + \inferrule + {@{thm (prem 1) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad + @{thm (prem 2) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\ + @{thm (prem 3) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}} + {@{thm (concl) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\PS\\\ + @{thm[mode=Axiom] Posix.intros(7)}\P[]\\medskip\\ + $\mprset{flushleft} + \inferrule + {@{thm (prem 1) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad + @{thm (prem 2) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad + @{thm (prem 3) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\ + @{thm (prem 4) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}} + {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$\P\\ + \end{tabular} + \end{center} + \caption{Our inductive definition of POSIX values.}\label{POSIXrules} + \end{figure} + + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\ + @{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\ + \end{tabular} + \end{center} + + \begin{center} + \begin{tabular}{l@ {\hspace{5mm}}lcl} + \textit{(1)} & @{thm (lhs) injval.simps(1)} & $\dn$ & @{thm (rhs) injval.simps(1)}\\ + \textit{(2)} & @{thm (lhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]} & $\dn$ & + @{thm (rhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]}\\ + \textit{(3)} & @{thm (lhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ & + @{thm (rhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ + \textit{(4)} & @{thm (lhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ + & @{thm (rhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ + \textit{(5)} & @{thm (lhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ + & @{thm (rhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ + \textit{(6)} & @{thm (lhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ + & @{thm (rhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ + \textit{(7)} & @{thm (lhs) injval.simps(7)[of "r" "c" "v" "vs"]} & $\dn$ + & @{thm (rhs) injval.simps(7)[of "r" "c" "v" "vs"]}\\ + \end{tabular} + \end{center} + +*} + +section {* Bitcoded Regular Expressions and Derivatives *} + +text {* + bitcoded regexes / decoding / bmkeps + gets rid of the second phase (only single phase) + correctness + + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) code.simps(1)} & $\dn$ & @{thm (rhs) code.simps(1)}\\ + @{thm (lhs) code.simps(2)} & $\dn$ & @{thm (rhs) code.simps(2)}\\ + @{thm (lhs) code.simps(3)} & $\dn$ & @{thm (rhs) code.simps(3)}\\ + @{thm (lhs) code.simps(4)} & $\dn$ & @{thm (rhs) code.simps(4)}\\ + @{thm (lhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\ + @{thm (lhs) code.simps(6)} & $\dn$ & @{thm (rhs) code.simps(6)}\\ + @{thm (lhs) code.simps(7)} & $\dn$ & @{thm (rhs) code.simps(7)} + \end{tabular} + \end{center} + + + The idea of the bitcodes is to annotate them to regular expressions and generate values + incrementally. The bitcodes can be read off from the @{text breg} and then decoded into a value. + + \begin{center} + \begin{tabular}{lcl} + @{term breg} & $::=$ & @{term "AZERO"}\\ + & $\mid$ & @{term "AONE bs"}\\ + & $\mid$ & @{term "ACHAR bs c"}\\ + & $\mid$ & @{term "AALTs bs rs"}\\ + & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\ + & $\mid$ & @{term "ASTAR bs r"} + \end{tabular} + \end{center} + + + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) retrieve.simps(1)} & $\dn$ & @{thm (rhs) retrieve.simps(1)}\\ + @{thm (lhs) retrieve.simps(2)} & $\dn$ & @{thm (rhs) retrieve.simps(2)}\\ + @{thm (lhs) retrieve.simps(3)} & $\dn$ & @{thm (rhs) retrieve.simps(3)}\\ + @{thm (lhs) better_retrieve(1)} & $\dn$ & @{thm (rhs) better_retrieve(1)}\\ + @{thm (lhs) better_retrieve(2)} & $\dn$ & @{thm (rhs) better_retrieve(2)}\\ + @{thm (lhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]} + & $\dn$ & @{thm (rhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]}\\ + @{thm (lhs) retrieve.simps(7)} & $\dn$ & @{thm (rhs) retrieve.simps(7)}\\ + @{thm (lhs) retrieve.simps(8)} & $\dn$ & @{thm (rhs) retrieve.simps(8)} + \end{tabular} + \end{center} + + + \begin{theorem} + @{thm blexer_correctness} + \end{theorem} +*} + + +section {* Simplification *} + +text {* + Sulzmann \& Lu apply simplification via a fixpoint operation; also does not use erase to filter out duplicates. + + not direct correspondence with PDERs, because of example + problem with retrieve + + correctness + + + + + + + \begin{figure}[t] + \begin{center} + \begin{tabular}{c} + @{thm[mode=Axiom] bs1[of _ "r\<^sub>2"]}\qquad + @{thm[mode=Axiom] bs2[of _ "r\<^sub>1"]}\qquad + @{thm[mode=Axiom] bs3[of "bs\<^sub>1" "bs\<^sub>2"]}\\ + @{thm[mode=Rule] bs4[of "r\<^sub>1" "r\<^sub>2" _ "r\<^sub>3"]}\qquad + @{thm[mode=Rule] bs5[of "r\<^sub>3" "r\<^sub>4" _ "r\<^sub>1"]}\\ + @{thm[mode=Axiom] bs6}\qquad + @{thm[mode=Axiom] bs7}\\ + @{thm[mode=Rule] bs8[of "rs\<^sub>1" "rs\<^sub>2"]}\\ + @{thm[mode=Axiom] ss1}\qquad + @{thm[mode=Rule] ss2[of "rs\<^sub>1" "rs\<^sub>2"]}\qquad + @{thm[mode=Rule] ss3[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm[mode=Axiom] ss4}\qquad + @{thm[mode=Axiom] ss5[of "bs" "rs\<^sub>1" "rs\<^sub>2"]}\\ + @{thm[mode=Rule] ss6[of "r\<^sub>1" "r\<^sub>2" "rs\<^sub>1" "rs\<^sub>2" "rs\<^sub>3"]}\\ + \end{tabular} + \end{center} + \caption{???}\label{SimpRewrites} + \end{figure} +*} + +section {* Bound - NO *} + +section {* Bounded Regex / Not *} + +section {* Conclusion *} + text {* \cite{AusafDyckhoffUrban2016}