(*<*)theory Paperimports "../Lexer" "../Simplifying" "../Positions" "../SizeBound4" "HOL-Library.LaTeXsugar"begindeclare [[show_question_marks = false]]notation (latex output) If ("(\<^latex>\<open>\\textrm{\<close>if\<^latex>\<open>}\<close> (_)/ \<^latex>\<open>\\textrm{\<close>then\<^latex>\<open>}\<close> (_)/ \<^latex>\<open>\\textrm{\<close>else\<^latex>\<open>}\<close> (_))" 10) and Cons ("_\<^latex>\<open>\\mbox{$\\,$}\<close>::\<^latex>\<open>\\mbox{$\\,$}\<close>_" [75,73] 73) abbreviation "der_syn r c \<equiv> der c r"notation (latex output) der_syn ("_\\_" [79, 1000] 76) and ZERO ("\<^bold>0" 81) and ONE ("\<^bold>1" 81) and CH ("_" [1000] 80) and ALT ("_ + _" [77,77] 78) and SEQ ("_ \<cdot> _" [77,77] 78) and STAR ("_\<^sup>\<star>" [79] 78) (*>*)section {* Introduction *}text {*Brzozowski \cite{Brzozowski1964} introduced the notion of the {\emderivative} @{term "der c r"} of a regular expression \<open>r\<close> w.r.t.\a character~\<open>c\<close>, and showed that it gave a simple solution to theproblem of matching a string @{term s} with a regular expression @{termr}: if the derivative of @{term r} w.r.t.\ (in succession) all thecharacters of the string matches the empty string, then @{term r}matches @{term s} (and {\em vice versa}). The derivative has theproperty (which may almost be regarded as its specification) that, forevery string @{term s} and regular expression @{term r} and character@{term c}, one has @{term "cs \<in> L(r)"} if and only if \mbox{@{term "s \<in> L(der c r)"}}. The beauty of Brzozowski's derivatives is thatthey are neatly expressible in any functional language, and easilydefinable and reasoned about in theorem provers---the definitions justconsist of inductive datatypes and simple recursive functions. Amechanised correctness proof of Brzozowski's matcher in for example HOL4has been mentioned by Owens and Slind~\cite{Owens2008}. Another one inIsabelle/HOL is part of the work by Krauss and Nipkow \cite{Krauss2011}.And another one in Coq is given by Coquand and Siles \cite{Coquand2012}.If a regular expression matches a string, then in general there is morethan one way of how the string is matched. There are two commonly useddisambiguation strategies to generate a unique answer: one is calledGREEDY matching \cite{Frisch2004} and the other is POSIXmatching~\cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}.For example consider the string @{term xy} and the regular expression\mbox{@{term "STAR (ALT (ALT x y) xy)"}}. Either the string can bematched in two `iterations' by the single letter-regular expressions@{term x} and @{term y}, or directly in one iteration by @{term xy}. Thefirst case corresponds to GREEDY matching, which first matches with theleft-most symbol and only matches the next symbol in case of a mismatch(this is greedy in the sense of preferring instant gratification todelayed repletion). The second case is POSIX matching, which prefers thelongest match.\begin{figure}[t]\begin{center}\begin{tikzpicture}[scale=2,node distance=1.3cm, every node/.style={minimum size=6mm}]\node (r1) {@{term "r\<^sub>1"}};\node (r2) [right=of r1]{@{term "r\<^sub>2"}};\draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}};\node (r3) [right=of r2]{@{term "r\<^sub>3"}};\draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}};\node (r4) [right=of r3]{@{term "r\<^sub>4"}};\draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}};\draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}};\node (v4) [below=of r4]{@{term "v\<^sub>4"}};\draw[->,line width=1mm](r4) -- (v4);\node (v3) [left=of v4] {@{term "v\<^sub>3"}};\draw[->,line width=1mm](v4)--(v3) node[below,midway] {\<open>inj r\<^sub>3 c\<close>};\node (v2) [left=of v3]{@{term "v\<^sub>2"}};\draw[->,line width=1mm](v3)--(v2) node[below,midway] {\<open>inj r\<^sub>2 b\<close>};\node (v1) [left=of v2] {@{term "v\<^sub>1"}};\draw[->,line width=1mm](v2)--(v1) node[below,midway] {\<open>inj r\<^sub>1 a\<close>};\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}};\end{tikzpicture}\end{center}\mbox{}\\[-13mm]\caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014},matching the string @{term "[a,b,c]"}. The first phase (the arrows from left to right) is \Brz's matcher building successive derivatives. If the last regular expression is @{term nullable}, then the functions of the second phase are called (the top-down and right-to-left arrows): first @{term mkeps} calculates a value @{term "v\<^sub>4"} witnessinghow the empty string has been recognised by @{term "r\<^sub>4"}. Afterthat the function @{term inj} ``injects back'' the characters of the string intothe values.\label{Sulz}}\end{figure} *}section {* Background *}text {* Sulzmann-Lu algorithm with inj. State that POSIX rules. metion slg is correct. \begin{figure}[t] \begin{center} \begin{tabular}{c} @{thm[mode=Axiom] Posix.intros(1)}\<open>P\<close>@{term "ONE"} \qquad @{thm[mode=Axiom] Posix.intros(2)}\<open>P\<close>@{term "c"}\medskip\\ @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\<open>P+L\<close>\qquad @{thm[mode=Rule] Posix.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\<open>P+R\<close>\medskip\\ $\mprset{flushleft} \inferrule {@{thm (prem 1) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad @{thm (prem 2) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\ @{thm (prem 3) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}} {@{thm (concl) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\<open>PS\<close>\\ @{thm[mode=Axiom] Posix.intros(7)}\<open>P[]\<close>\medskip\\ $\mprset{flushleft} \inferrule {@{thm (prem 1) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad @{thm (prem 2) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad @{thm (prem 3) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\ @{thm (prem 4) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}} {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$\<open>P\<star>\<close> \end{tabular} \end{center} \caption{Our inductive definition of POSIX values.}\label{POSIXrules} \end{figure}*}section {* Bitcoded Derivatives *}text {* bitcoded regexes / decoding / bmkeps gets rid of the second phase (only single phase) correctness*}section {* Simplification *}text {* not direct correspondence with PDERs, because of example problem with retrieve correctness \begin{figure}[t] \begin{center} \begin{tabular}{c} @{thm[mode=Axiom] bs1}\qquad @{thm[mode=Axiom] bs2}\qquad @{thm[mode=Axiom] bs3}\\ @{thm[mode=Rule] bs4}\qquad @{thm[mode=Rule] bs5}\\ @{thm[mode=Rule] bs6}\qquad @{thm[mode=Rule] bs7}\\ @{thm[mode=Rule] bs8}\\ @{thm[mode=Axiom] ss1}\qquad @{thm[mode=Rule] ss2}\qquad @{thm[mode=Rule] ss3}\\ @{thm[mode=Axiom] ss4}\qquad @{thm[mode=Axiom] ss5}\qquad @{thm[mode=Rule] ss6}\\ \end{tabular} \end{center} \caption{???}\label{SimpRewrites} \end{figure}*}section {* Bound - NO *}section {* Bounded Regex / Not *}section {* Conclusion *}text {*\cite{AusafDyckhoffUrban2016}%%\bibliographystyle{plain}\bibliography{root}*}(*<*)end(*>*)