diff -r 660cf698eb26 -r 3831621d7b14 ChengsongTanPhdThesis/Chapters/Finite.tex --- a/ChengsongTanPhdThesis/Chapters/Finite.tex Tue Jul 25 17:28:29 2023 +0100 +++ b/ChengsongTanPhdThesis/Chapters/Finite.tex Wed Aug 23 03:02:31 2023 +0100 @@ -1,6 +1,6 @@ % Chapter Template -\chapter{Finiteness Bound} % Main chapter title +\chapter{A Formal Proof That $\textit{Blexer}\_\textit{simp}$ will not Grow Unbounded} % Main chapter title \label{Finite} % In Chapter 4 \ref{Chapter4} we give the second guarantee @@ -8,15 +8,18 @@ %regex's derivatives. %(this is cahpter 5 now) -In this chapter we give a bound in terms of the size of +In this chapter we prove a bound in terms of the size of the calculated derivatives: -given an annotated regular expression $a$, for any string $s$ +given an annotated regular expression $a$, there exists +a constant integer $N$, such that for any string $s$ our algorithm $\blexersimp$'s derivatives -are finitely bounded -by a constant that only depends on $a$. -Formally we show that there exists a constant integer $N_a$ such that +are bounded +by $N$. %a constant that only depends on $a$. +Formally this can be expresssed +as +%we show that there exists a constant integer $N_a$ such that \begin{center} - $\llbracket \bderssimp{a}{s} \rrbracket \leq N_a$ + $\llbracket \bderssimp{a}{s} \rrbracket \leq N$ \end{center} \noindent where the size ($\llbracket \_ \rrbracket$) of @@ -36,7 +39,7 @@ save other possibilities on a stack when exploring one possible path of matching. Catastrophic backtracking typically occurs when the number of steps increase exponentially with respect - to input. In other words, the runtime is $O((c_r)^n)$ of the input + to input. In other words, the complexity is $O((c_r)^n)$ of the input string length $n$, where the base of the exponent is determined by the regular expression $r$. %so that they @@ -107,7 +110,7 @@ $\llbracket \ZERO \rrbracket$ & $\dn$ & $1$ \\ $\llbracket _{bs} r_1 \cdot r_2 \rrbracket$ & $\dn$ & $\llbracket r_1 \rrbracket + \llbracket r_2 \rrbracket + 1$\\ $\llbracket _{bs}\mathbf{c} \rrbracket $ & $\dn$ & $1$\\ - $\llbracket _{bs}\sum as \rrbracket $ & $\dn$ & $\map \; (\llbracket \_ \rrbracket)\; as + 1$\\ + $\llbracket _{bs}\sum as \rrbracket $ & $\dn$ & $(\textit{sum}\; (\map \; (\llbracket \_ \rrbracket)\; as) ) + 1$\\ $\llbracket _{bs} a^* \rrbracket $ & $\dn$ & $\llbracket a \rrbracket + 1$. \end{tabular} \end{center} @@ -152,40 +155,40 @@ Our proof states that all the blue nodes stay below a size bound $N_a$ determined by the input $a$. -\noindent Sulzmann and Lu's assumed a similar picture of their algorithm, -though in fact their algorithm's size might be better depicted by the following graph: -\begin{figure}[H] - \begin{tikzpicture}[scale=2, - every node/.style={minimum size=11mm}, - ->,>=stealth',shorten >=1pt,auto,thick - ] - \node (r0) [rectangle, draw=black, thick, minimum size = 5mm, draw=blue] {$a$}; - \node (r1) [rectangle, draw=black, thick, right=of r0, minimum size = 7mm]{$a_1$}; - \draw[->,line width=0.2mm](r0)--(r1) node[above,midway] {$\backslash c_1$}; - - \node (r1s) [rectangle, draw=blue, thick, right=of r1, minimum size=7mm]{$a_{1s}$}; - \draw[->, line width=0.2mm](r1)--(r1s) node[above, midway] {$\simp'$}; - - \node (r2) [rectangle, draw=black, thick, right=of r1s, minimum size = 17mm]{$a_2$}; - \draw[->,line width=0.2mm](r1s)--(r2) node[above,midway] {$\backslash c_2$}; - - \node (r2s) [rectangle, draw = blue, thick, right=of r2,minimum size=14mm]{$a_{2s}$}; - \draw[->,line width=0.2mm](r2)--(r2s) node[above,midway] {$\simp'$}; - - \node (r3) [rectangle, draw = black, thick, right= of r2s, minimum size = 22mm]{$a_3$}; - \draw[->,line width=0.2mm](r2s)--(r3) node[above,midway] {$\backslash c_3$}; - - \node (rns) [right = of r3, draw=blue, minimum size = 20mm]{$a_{3s}$}; - \draw[->,line width=0.2mm] (r3)--(rns) node [above, midway] {$\simp'$}; - - \node (rnn) [right = of rns, minimum size = 1mm]{}; - \draw[->, dashed] (rns)--(rnn) node [above, midway] {$\ldots$}; - - \end{tikzpicture} - \caption{Regular expression size change during our $\blexersimp$ algorithm}\label{sulzShrinks} -\end{figure} -\noindent +even though it did not work as they expected. +%though in fact their algorithm's size might be better depicted by the following graph: +%\begin{figure}[H] +% \begin{tikzpicture}[scale=2, +% every node/.style={minimum size=11mm}, +% ->,>=stealth',shorten >=1pt,auto,thick +% ] +% \node (r0) [rectangle, draw=black, thick, minimum size = 5mm, draw=blue] {$a$}; +% \node (r1) [rectangle, draw=black, thick, right=of r0, minimum size = 7mm]{$a_1$}; +% \draw[->,line width=0.2mm](r0)--(r1) node[above,midway] {$\backslash c_1$}; +% +% \node (r1s) [rectangle, draw=blue, thick, right=of r1, minimum size=7mm]{$a_{1s}$}; +% \draw[->, line width=0.2mm](r1)--(r1s) node[above, midway] {$\simp'$}; +% +% \node (r2) [rectangle, draw=black, thick, right=of r1s, minimum size = 17mm]{$a_2$}; +% \draw[->,line width=0.2mm](r1s)--(r2) node[above,midway] {$\backslash c_2$}; +% +% \node (r2s) [rectangle, draw = blue, thick, right=of r2,minimum size=14mm]{$a_{2s}$}; +% \draw[->,line width=0.2mm](r2)--(r2s) node[above,midway] {$\simp'$}; +% +% \node (r3) [rectangle, draw = black, thick, right= of r2s, minimum size = 22mm]{$a_3$}; +% \draw[->,line width=0.2mm](r2s)--(r3) node[above,midway] {$\backslash c_3$}; +% +% \node (rns) [right = of r3, draw=blue, minimum size = 20mm]{$a_{3s}$}; +% \draw[->,line width=0.2mm] (r3)--(rns) node [above, midway] {$\simp'$}; +% +% \node (rnn) [right = of rns, minimum size = 1mm]{}; +% \draw[->, dashed] (rns)--(rnn) node [above, midway] {$\ldots$}; +% +% \end{tikzpicture} +% \caption{Regular expression size change during our $\blexersimp$ algorithm}\label{sulzShrinks} +%\end{figure} +%\noindent The picture means that in some cases their lexer (where they use $\simpsulz$ as the simplification function) will have a size explosion, causing the running time @@ -765,10 +768,13 @@ we have to somehow get a more concrete form to begin. We call such more concrete representations the ``closed forms'' of string derivatives as opposed to their original definitions. -The terminology ``closed form'' is borrowed from mathematics, -which usually describe expressions that are solely comprised of finitely many -well-known and easy-to-compute operations such as -additions, multiplications, and exponential functions. +The name ``closed from'' was inspired by closed forms in math, +and the similarity with closed forms here is that they make +estimating the same term easier. +%The terminology ``closed form'' is borrowed from mathematics, +%which usually describe expressions that are solely comprised of finitely many +%well-known and easy-to-compute operations such as +%additions, multiplications, and exponential functions. We start by proving some basic identities involving the simplification functions for r-regular expressions.