# HG changeset patch # User Christian Urban # Date 1472031460 -3600 # Node ID 02568e85a394bba2d038df98d0eabdcbfca6eb1c # Parent 599b2bfcebf674145d7894a67eea94430964adf9 updated diff -r 599b2bfcebf6 -r 02568e85a394 Slides/slides04.pdf Binary file Slides/slides04.pdf has changed diff -r 599b2bfcebf6 -r 02568e85a394 Slides/slides04.tex --- a/Slides/slides04.tex Sat Aug 20 13:39:19 2016 +0100 +++ b/Slides/slides04.tex Wed Aug 24 10:37:40 2016 +0100 @@ -21,32 +21,7 @@ {\end{minipage}\egroup;% \end{tikzpicture}\bigskip} -\newcommand\grid[1]{% -\begin{tikzpicture}[baseline=(char.base)] - \path[use as bounding box] - (0,0) rectangle (1em,1em); - \draw[red!50, fill=red!20] - (0,0) rectangle (1em,1em); - \node[inner sep=1pt,anchor=base west] - (char) at (0em,\gridraiseamount) {#1}; -\end{tikzpicture}} -\newcommand\gridraiseamount{0.12em} - -\makeatletter -\newcommand\Grid[1]{% - \@tfor\z:=#1\do{\grid{\z}}} -\makeatother - -\newcommand\Vspace[1][.3em]{% - \mbox{\kern.06em\vrule height.3ex}% - \vbox{\hrule width#1}% - \hbox{\vrule height.3ex}} - -\def\VS{\Vspace[0.6em]} - - -\title[POSIX Lexing with Derivatives of Regexes] - {\bf POSIX Lexing with\\ +\title{\bf POSIX Lexing with\\ \bf Derivatives of Regular Expressions\\ \bf (Proof Pearl)} \author{Fahad Ausaf, Roy Dyckhoff and Christian Urban} @@ -125,6 +100,18 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% \begin{frame} +% \frametitle{Sulzmann and Lu's Matcher} +% \begin{itemize} +% \item Sulzmann \& Lu came up with a beautiful idea for how +% to extend the simple regular expression matcher to POSIX +% matching/lexing (FLOPS 2014) +% \item The idea: define an inverse operation to the derivatives +% \end{itemize} +% \end{frame} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + \begin{frame} \frametitle{Sulzmann and Lu's Matcher} @@ -177,39 +164,6 @@ \alert{\textbf{how}}: POSIX, GREEDY, \ldots \end{frame} - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - \begin{frame}{Regular Expressions and Values} - Regular expressions and their corresponding values (for how a - regular expression matched a string):\bigskip - - \begin{columns}[c] % the "c" option specifies center vertical alignment - \column{.4\textwidth} % column designated by a command - \begin{tabular}{ l l l } - \smath{r} & ::= & \smath{\Zero} \\ - & $\mid$ & \smath{\One} \\ - & $\mid$ & \smath{c} \\ - & $\mid$ & \smath{r_1 \cdot r_2} \\ - & $\mid$ & \smath{r_1 + r_2} \\ \\ - & $\mid$ & \smath{r^*} \\ - \end{tabular} - \column{.4\textwidth} - \begin{tabular}{ l l l } - - \smath{v} & ::= & \\ - & $\mid$ & \smath{Empty} \\ - & $\mid$ & \smath{Char(c)} \\ - & $\mid$ & \smath{Seq(v_1\cdot v_2)} \\ - & $\mid$ & \smath{Left(v)} \\ - & $\mid$ & \smath{Right(v)} \\ - & $\mid$ & \smath{[v_1,...,v_n]} - \end{tabular} - \end{columns} - \end{frame} - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -230,13 +184,15 @@ token. \end{bubble} - For example: \smath{r_{keywords} + r_{identifiers}}:\bigskip + For example: \smath{r_{keywords} + r_{identifiers}} (fix graphics below) - \begin{itemize} - \item \smath{\texttt{\Grid{iffoo\VS bla}}} - - \item \smath{\texttt{\Grid{if\VS bla}}} - \end{itemize} + \begin{center} + \texttt{iffoo\_bla} + \end{center} + + \begin{center} + \texttt{if\_bla} + \end{center} \end{frame} @@ -266,46 +222,47 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% \begin{frame} +% \frametitle{Correctness} +% \begin{itemize} +% \item Sulzmann \& Lu came up with a beautiful idea for how +% to extend the simple regular expression matcher to POSIX +% matching/lexing (FLOPS 2014) +% \item The idea: define an inverse operation to the derivatives +% \end{itemize} +% \end{frame} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + \begin{frame} - \frametitle{``Correctness'' by Sulzmann and Lu} - \begin{itemize} - \item Sulzmann \& Lu's idea is to order all possible - answer such that they can prove the correct answer is - the maximum - \item The idea is taken from a GREEDY algorithm (and it - works there)\bigskip\pause - - \item {\bf But} we made no progress in formalising - Sulzmann \& Lu's idea, because - - \begin{itemize} - \item transitivity, existence of maxima etc all fail to - turn into real proofs - \item the reason: the ordering works only if .... - \item though we did find mistakes: -\begin{center} - \small - ``How could I miss this? Well, I was rather careless when - stating this Lemma :)\smallskip - - Great example how formal machine checked proofs (and - proof assistants) can help to spot flawed reasoning steps.'' - - \end{center}\pause - - \begin{center} - \small - ``Well, I don't think there's any flaw. The issue is how to - come up with a mechanical proof. In my world mathematical - proof $=$ mechanical proof doesn't necessarily hold.'' - - \end{center}\pause - \end{itemize} - - \end{itemize} + \frametitle{Regular Expressions and Values} + Regular expressions and their corresponding values (for how a + regular expression matched string): + \begin{columns}[c] % the "c" option specifies center vertical alignment + \column{.5\textwidth} % column designated by a command + \begin{tabular}{ l l l } + $r$ & ::= & $\varnothing$ \\ + & $\mid$ & $\epsilon$ \\ + & $\mid$ & $c$ \\ + & $\mid$ & $r_1 \cdot r_2$ \\ + & $\mid$ & $r_1 + r_2$ \\ + & $\mid$ & $r^*$ \\ + \end{tabular} + \column{.5\textwidth} + \begin{tabular}{ l l l } + $v$ & ::= & $\varnothing$ \\ + & $\mid$ & $Empty$ \\ + & $\mid$ & $Char$($c$) \\ + & $\mid$ & $Seq$($v_1.v_2$) \\ + & $\mid$ & $Left$($v$) \\ + & $\mid$ & $Right$($v$) \\ + & $\mid$ & [] \\ + & $\mid$ & [$v_1,...,v_n$] + \end{tabular} + \end{columns} + There is also a notion of a string behind a value $\mid v \mid$ \end{frame} - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} @@ -324,7 +281,8 @@ \draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {$der\,c$};\pause \draw (r4) node[anchor=west] {\;\raisebox{3mm}{$\;\;nullable?$}};\pause \node (v4) [below=of r4] {$v_4$}; - \draw[->,line width=1mm] (r4) -- (v4);\pause + \draw[->,line width=1mm] (r4) -- (v4); + \draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{$mkeps$}};\pause \node (v3) [left=of v4] {$v_3$}; \draw[->,line width=1mm] (v4) -- (v3) node[below,midway] {$inj\,c$};\pause \node (v2) [left=of v3] {$v_2$}; @@ -334,7 +292,6 @@ \draw[->,line width=0.5mm] (r3) -- (v3); \draw[->,line width=0.5mm] (r2) -- (v2); \draw[->,line width=0.5mm] (r1) -- (v1); - \draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{$mkeps$}}; \end{tikzpicture} \end{center} @@ -342,6 +299,96 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + \begin{frame} + \frametitle{Nullable, Mkeps and Injection Functions} + + \textbf{Nullable Function} + \begin{tabular}{ l l l } + \textit{nullable} (\textbf{0}) & $\dn$ & \textit{False}\\ + \textit{nullable} (\textbf{1}) & $\dn$ & \textit{True}\\ + \textit{nullable} (c) & $\dn$ & \textit{False}\\ + \textit{nullable} ($r_1 + r_2$) & $\dn$ & \textit{nullable} $r_1$ $\lor$ \textit{nullable} $r_2$\\ + \textit{nullable} ($r_1 \cdot r_2$) & $\dn$ & \textit{nullable} $r_1$ $\land$ \textit{nullable} $r_2$\\ + \textit{nullable} ($r^*$) & $\dn$ & \textit{True}\\ + \end{tabular}\\ + \vspace{3 mm} + \textbf{Mkeps Function} + \begin{tabular}{ l l l } + \textit{mkeps} (\textbf{1}) & $\dn$ & () \\ + \textit{mkeps} ($r_1 \cdot r_2$) & $\dn$ & \textit{Seq} (\textit{mkeps} $r_1$) (\textit{mkeps} $r_2$)\\ + \textit{mkeps} ($r_1 + r_2$) & $\dn$ & \textit{if nullable $r_1$ then Left} (\textit{mkeps $r_1$})\\ + & & \textit{else Right} (\textit{mkeps $r_2$})\\ + \textit{mkeps} ($r^*$) & $\dn$ & \textit{Stars} [] \\ + \end{tabular}\\ + + \end{frame} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + \begin{frame} + \frametitle{Nullable, Mkeps and Injection Functions} + + \textbf{Injection Function} + \begin{tabular}{ l l l } + \textit{inj d c} () & $\dn$ & \textit{Char d}\\ + \textit{inj} ($r_1 + r_2$) \textit{c} (\textit{Left $v_1$}) & $\dn$ & \textit{Left} (\textit{inj $r_1$ c $v_1$})\\ + \textit{inj} ($r_1 + r_2$) \textit{c} (\textit{Right $v_2$}) & $\dn$ & \textit{Right} (\textit{inj $r_2$ c $v_2$})\\ + \textit{inj} ($r_1 \cdot r_2$) \textit{c} (\textit{Seq $v_1$ $v_2$}) & $\dn$ & \textit{Seq} (\textit{inj $r_1$ c $v_1$}) $v_2$\\ + \textit{inj} ($r_1 \cdot r_2$) \textit{c} (\textit{Left} (\textit{Seq $v_1$ $v_2$})) & $\dn$ & \textit{Seq} (\textit{inj $r_1$ c $v_1$}) $v_2$\\ + \textit{inj} ($r_1 \cdot r_2$) \textit{c} (\textit{Right $v_2$}) & $\dn$ & \textit{Seq} (\textit{mkeps $r_1$}) (\textit{inj $r_2$ c $v_2$})\\ + \textit{inj} ($r^*$) \textit{c} (\textit{Seq v} (\textit{Stars vs})) & $\dn$ & \textit{Stars} (\textit{inj r c v}::\textit{vs}) + \end{tabular} + + \end{frame} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + \begin{frame} + \frametitle{POSIX Ordering Relation by Sulzmann \& Lu} + + \begin{itemize} + \item Introduce an inductive defined ordering relation + $v \succ_r v'$ which captures the idea of POSIX matching. + + \item The algorithm returns the maximum of all possible values + that are possible for a regular expression. + + \item The idea is from a paper by Frisch \& Cardelli about + GREEDY matching (GREEDY = preferring instant gratification + to delayed repletion): + + \item e.g. given $(a + (b + ab))^{*}$ and string $ab$\\ + \begin{center} + \begin{tabular}{ll} + GREEDY: & $[Left(a), Right(Left(b))]$\\ + POSIX: & $[Right(Right(Seq(a, b)))]$ + \end{tabular} + \end{center} + + \end{itemize} + \end{frame} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + \begin{frame} + \frametitle{POSIX Ordering Relation by Sulzmann \& Lu} + + \begin{center} + $\infer{\vdash Empty : \epsilon}{}$\hspace{15mm} + $\infer{\vdash Char(c): c}{}$\bigskip + + $\infer{\vdash Seq(v_1, v_2) : r_1\cdot r_2}{\vdash v_1 : r_1 \quad \vdash v_2 : r_2}$ + \bigskip + + $\infer{\vdash Left(v) : r_1 + r_2}{\vdash v : r_1}$\hspace{15mm} + $\infer{\vdash Right(v): r_1 + r_2}{\vdash v : r_2}$\bigskip + + $\infer{\vdash [] : r^*}{}$\hspace{15mm} + $\infer{\vdash [v_1,\ldots, v_n] : r^*} + {\vdash v_1 : r \quad\ldots\quad \vdash v_n : r}$ + \end{center} + + \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -350,12 +397,12 @@ \begin{itemize} \item Sulzmann: \ldots Let's assume $v$ is not a $POSIX$ value, - then there must be another one \ldots contradiction.\pause + then there must be another one \ldots contradiction. \item Exists ? \begin{center} $L(r) \not= \varnothing \;\Rightarrow\; \exists v.\;POSIX(v, r)$ - \end{center}\pause + \end{center} \item In the sequence case $Seq(v_1,v_2)\succ_{r_1\cdot r_2} Seq(v_1', v_2')$, @@ -364,7 +411,7 @@ \begin{center} $|v_1| @ |v_2| = |v_1'| @ |v_2'|$ - \end{center}\pause + \end{center} \item Although one begins with the assumption that the two values have the same flattening, this cannot be maintained @@ -376,32 +423,64 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} + \frametitle{Problems} + + \begin{itemize} + \item I have no doubt the algorithm is correct --- + the problem is I do not believe their proof. + + \begin{center} + \small + ``How could I miss this? Well, I was rather careless when + stating this Lemma :)\smallskip + + Great example how formal machine checked proofs (and + proof assistants) can help to spot flawed reasoning steps.'' + + \end{center} + + \begin{center} + \small + ``Well, I don't think there's any flaw. The issue is how to + come up with a mechanical proof. In my world mathematical + proof $=$ mechanical proof doesn't necessarily hold.'' + + \end{center} + + \end{itemize} + + \end{frame} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + \begin{frame} \frametitle{Our Solution} \begin{itemize} \item A direct definition of what a POSIX value is, using the - relation \smath{s \in r \to v} (our specification)\bigskip + relation $s \in r \to v$ (specification) \begin{center} - \smath{\infer{[] \in \epsilon \to Empty}{}}\hspace{15mm} - \smath{\infer{[c] \in c \to Char(c)}{}}\bigskip\medskip - - \smath{\infer{s \in r_1 + r_2 \to Left(v)} - {s \in r_1 \to v}}\hspace{10mm} - \smath{\infer{s \in r_1 + r_2 \to Right(v)} - {s \in r_2 \to v & s \not\in L(r_1)}}\bigskip\medskip - - \smath{\infer{s_1 @ s_2 \in r_1 \cdot r_2 \to Seq(v_1, v_2)} - {\small\begin{array}{l} - s_1 \in r_1 \to v_1 \\ - s_2 \in r_2 \to v_2 \\ - \neg(\exists s_3\,s_4.\; s_3 \not= [] - \wedge s_3 @ s_4 = s_2 \wedge - s_1 @ s_3 \in L(r_1) \wedge - s_4 \in L(r_2)) - \end{array}}} - - {\ldots} + $\infer{[] \in \epsilon \to Empty}{}$\hspace{15mm} + $\infer{c \in c \to Char(c)}{}$\bigskip\medskip + + $\infer{s \in r_1 + r_2 \to Left(v)} + {s \in r_1 \to v}$\hspace{10mm} + $\infer{s \in r_1 + r_2 \to Right(v)} + {s \in r_2 \to v & s \not\in L(r_1)}$\bigskip\medskip + + $\infer{s_1 @ s_2 \in r_1 \cdot r_2 \to Seq(v_1, v_2)} + {\small\begin{array}{l} + s_1 \in r_1 \to v_1 \\ + s_2 \in r_2 \to v_2 \\ + \neg(\exists s_3\,s_4.\; s_3 \not= [] + \wedge s_3 @ s_4 = s_2 \wedge + s_1 @ s_3 \in L(r_1) \wedge + s_4 \in L(r_2)) + \end{array}}$ + \\ + \bigskip + ... \end{center} \end{itemize} @@ -438,5 +517,31 @@ (biggest value for $s$ and $r$) \end{frame} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + \begin{frame} + \frametitle{Conclusions} + + \begin{itemize} + + \item We replaced the POSIX definition of Sulzmann \& Lu by a + new definition (ours is inspired by work of Vansummeren, + 2006)\medskip + + \item Their proof contained small gaps (acknowledged) but had + also fundamental flaws\medskip + + \item Now, its a nice exercise for theorem proving\medskip + + \item Some optimisations need to be applied to the algorithm + in order to become fast enough\medskip + + \item Can be used for lexing, is a small beautiful functional + program + + \end{itemize} + \end{frame} + + \end{document}