# HG changeset patch # User Christian Urban # Date 1472034646 -3600 # Node ID c592ea0661aeeeb273c25628560c32b0a34459af # Parent 02568e85a394bba2d038df98d0eabdcbfca6eb1c updated diff -r 02568e85a394 -r c592ea0661ae Slides/slides04.pdf Binary file Slides/slides04.pdf has changed diff -r 02568e85a394 -r c592ea0661ae Slides/slides04.tex --- a/Slides/slides04.tex Wed Aug 24 10:37:40 2016 +0100 +++ b/Slides/slides04.tex Wed Aug 24 11:30:46 2016 +0100 @@ -21,12 +21,38 @@ {\end{minipage}\egroup;% \end{tikzpicture}\bigskip} -\title{\bf POSIX Lexing with\\ - \bf Derivatives of Regular Expressions\\ - \bf (Proof Pearl)} +\newcommand\grid[1]{% + \begin{tikzpicture}[baseline=(char.base)] + \path[use as bounding box] + (0,0) rectangle (1em,1em); + \draw[red!50, fill=red!20] + (0,0) rectangle (1em,1em); + \node[inner sep=1pt,anchor=base west] + (char) at (0em,\gridraiseamount) {#1}; + \end{tikzpicture}} +\newcommand\gridraiseamount{0.12em} + +\makeatletter +\newcommand\Grid[1]{% + \@tfor\z:=#1\do{\grid{\z}}} +\makeatother + +\newcommand\Vspace[1][.3em]{% + \mbox{\kern.06em\vrule height.3ex}% + \vbox{\hrule width#1}% + \hbox{\vrule height.3ex}} + +\def\VS{\Vspace[0.6em]} + + +\title[POSIX Lexing with Derivatives of Regexes] +{\bf POSIX Lexing with\\ + \bf Derivatives of Regular Expressions\\ + \bf (Proof Pearl)} \author{Fahad Ausaf, Roy Dyckhoff and Christian Urban} \date{King's College London, University of St Andrews} + \begin{document} \maketitle @@ -184,15 +210,13 @@ token. \end{bubble} - For example: \smath{r_{keywords} + r_{identifiers}} (fix graphics below) + For example: \smath{r_{keywords} + r_{identifiers}} - \begin{center} - \texttt{iffoo\_bla} - \end{center} - - \begin{center} - \texttt{if\_bla} - \end{center} + \begin{itemize} + \item \smath{\texttt{\Grid{iffoo\VS bla}}} + + \item \smath{\texttt{\Grid{if\VS bla}}} + \end{itemize} \end{frame} @@ -212,7 +236,7 @@ \end{bubble}\bigskip Also Kuklewicz maintains a unit-test repository for POSIX - matching, which indicates that most POSIX mathcers are buggy. + matching, which indicates that most POSIX matchcers are buggy. \begin{center} \url{http://www.haskell.org/haskellwiki/Regex_Posix} @@ -232,36 +256,7 @@ % \end{itemize} % \end{frame} - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - \begin{frame} - \frametitle{Regular Expressions and Values} - Regular expressions and their corresponding values (for how a - regular expression matched string): - \begin{columns}[c] % the "c" option specifies center vertical alignment - \column{.5\textwidth} % column designated by a command - \begin{tabular}{ l l l } - $r$ & ::= & $\varnothing$ \\ - & $\mid$ & $\epsilon$ \\ - & $\mid$ & $c$ \\ - & $\mid$ & $r_1 \cdot r_2$ \\ - & $\mid$ & $r_1 + r_2$ \\ - & $\mid$ & $r^*$ \\ - \end{tabular} - \column{.5\textwidth} - \begin{tabular}{ l l l } - $v$ & ::= & $\varnothing$ \\ - & $\mid$ & $Empty$ \\ - & $\mid$ & $Char$($c$) \\ - & $\mid$ & $Seq$($v_1.v_2$) \\ - & $\mid$ & $Left$($v$) \\ - & $\mid$ & $Right$($v$) \\ - & $\mid$ & [] \\ - & $\mid$ & [$v_1,...,v_n$] - \end{tabular} - \end{columns} - There is also a notion of a string behind a value $\mid v \mid$ - \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -271,14 +266,14 @@ We want to match the string $abc$ using $r_1$\\ \begin{center} - \begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}] + \begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}] \node (r1) {$r_1$}; \node (r2) [right=of r1] {$r_2$}; - \draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {$der\,a$};\pause + \draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {$der\,a$}; \node (r3) [right=of r2] {$r_3$}; - \draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {$der\,b$};\pause + \draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {$der\,b$}; \node (r4) [right=of r3] {$r_4$}; - \draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {$der\,c$};\pause + \draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {$der\,c$}; \draw (r4) node[anchor=west] {\;\raisebox{3mm}{$\;\;nullable?$}};\pause \node (v4) [below=of r4] {$v_4$}; \draw[->,line width=1mm] (r4) -- (v4); @@ -289,10 +284,11 @@ \draw[->,line width=1mm] (v3) -- (v2) node[below,midway] {$inj\,b$};\pause \node (v1) [left=of v2] {$v_1$}; \draw[->,line width=1mm] (v2) -- (v1) node[below,midway] {$inj\,a$};\pause - \draw[->,line width=0.5mm] (r3) -- (v3); - \draw[->,line width=0.5mm] (r2) -- (v2); - \draw[->,line width=0.5mm] (r1) -- (v1); - \end{tikzpicture} + %\draw[->,line width=0.5mm] (r3) -- (v3); + %\draw[->,line width=0.5mm] (r2) -- (v2); + %\draw[->,line width=0.5mm] (r1) -- (v1); + \onslide<1-> + \end{tikzpicture} \end{center} \end{frame} @@ -300,21 +296,44 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} - \frametitle{Nullable, Mkeps and Injection Functions} + \frametitle{Regular Expressions and Values} + Regular expressions and their corresponding values (for how a + regular expression matched string): + \vspace{5 mm} + \begin{columns}[c] % the "c" option specifies center vertical alignment + \column{.4\textwidth} % column designated by a command + \begin{tabular}{ l l l } + \smath{r} & ::= & \smath{\Zero} \\ + & $\mid$ & \smath{\One} \\ + & $\mid$ & \smath{c} \\ + & $\mid$ & \smath{r_1 \cdot r_2} \\ + & $\mid$ & \smath{r_1 + r_2} \\ \\ + & $\mid$ & \smath{r^*} \\ + \end{tabular} + \column{.4\textwidth} + \begin{tabular}{ l l l } + + \smath{v} & ::= & \\ + & $\mid$ & \smath{Empty} \\ + & $\mid$ & \smath{Char(c)} \\ + & $\mid$ & \smath{Seq(v_1\cdot v_2)} \\ + & $\mid$ & \smath{Left(v)} \\ + & $\mid$ & \smath{Right(v)} \\ + & $\mid$ & \smath{[v_1,...,v_n]} + \end{tabular} + \end{columns} + \vspace{5 mm} + There is also a notion of a string behind a value $\mid v \mid$ + \end{frame} - \textbf{Nullable Function} - \begin{tabular}{ l l l } - \textit{nullable} (\textbf{0}) & $\dn$ & \textit{False}\\ - \textit{nullable} (\textbf{1}) & $\dn$ & \textit{True}\\ - \textit{nullable} (c) & $\dn$ & \textit{False}\\ - \textit{nullable} ($r_1 + r_2$) & $\dn$ & \textit{nullable} $r_1$ $\lor$ \textit{nullable} $r_2$\\ - \textit{nullable} ($r_1 \cdot r_2$) & $\dn$ & \textit{nullable} $r_1$ $\land$ \textit{nullable} $r_2$\\ - \textit{nullable} ($r^*$) & $\dn$ & \textit{True}\\ - \end{tabular}\\ - \vspace{3 mm} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + \begin{frame} + \frametitle{Mkeps and Injection Functions} + \textbf{Mkeps Function} \begin{tabular}{ l l l } - \textit{mkeps} (\textbf{1}) & $\dn$ & () \\ + \textit{mkeps} (\textbf{1}) & $\dn$ & \textit{Empty} \\ \textit{mkeps} ($r_1 \cdot r_2$) & $\dn$ & \textit{Seq} (\textit{mkeps} $r_1$) (\textit{mkeps} $r_2$)\\ \textit{mkeps} ($r_1 + r_2$) & $\dn$ & \textit{if nullable $r_1$ then Left} (\textit{mkeps $r_1$})\\ & & \textit{else Right} (\textit{mkeps $r_2$})\\ @@ -326,7 +345,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} - \frametitle{Nullable, Mkeps and Injection Functions} + \frametitle{Mkeps and Injection Functions} \textbf{Injection Function} \begin{tabular}{ l l l } @@ -355,40 +374,40 @@ \item The idea is from a paper by Frisch \& Cardelli about GREEDY matching (GREEDY = preferring instant gratification - to delayed repletion): + to delayed repletion) - \item e.g. given $(a + (b + ab))^{*}$ and string $ab$\\ - \begin{center} - \begin{tabular}{ll} - GREEDY: & $[Left(a), Right(Left(b))]$\\ - POSIX: & $[Right(Right(Seq(a, b)))]$ - \end{tabular} - \end{center} + %\item e.g. given $(a + (b + ab))^{*}$ and string $ab$\\ + %\begin{center} + %\begin{tabular}{ll} + %GREEDY: & $[Left(a), Right(Left(b))]$\\ + %POSIX: & $[Right(Right(Seq(a, b)))]$ + %\end{tabular} + %\end{center} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - \begin{frame} - \frametitle{POSIX Ordering Relation by Sulzmann \& Lu} - - \begin{center} - $\infer{\vdash Empty : \epsilon}{}$\hspace{15mm} - $\infer{\vdash Char(c): c}{}$\bigskip - - $\infer{\vdash Seq(v_1, v_2) : r_1\cdot r_2}{\vdash v_1 : r_1 \quad \vdash v_2 : r_2}$ - \bigskip - - $\infer{\vdash Left(v) : r_1 + r_2}{\vdash v : r_1}$\hspace{15mm} - $\infer{\vdash Right(v): r_1 + r_2}{\vdash v : r_2}$\bigskip - - $\infer{\vdash [] : r^*}{}$\hspace{15mm} - $\infer{\vdash [v_1,\ldots, v_n] : r^*} - {\vdash v_1 : r \quad\ldots\quad \vdash v_n : r}$ - \end{center} - - \end{frame} +% \begin{frame} +% \frametitle{POSIX Ordering Relation by Sulzmann \& Lu} +% +% \begin{center} +% $\infer{\vdash Empty : \epsilon}{}$\hspace{15mm} +% $\infer{\vdash Char(c): c}{}$\bigskip +% +% $\infer{\vdash Seq(v_1, v_2) : r_1\cdot r_2}{\vdash v_1 : r_1 \quad \vdash v_2 : r_2}$ +% \bigskip +% +% $\infer{\vdash Left(v) : r_1 + r_2}{\vdash v : r_1}$\hspace{15mm} +% $\infer{\vdash Right(v): r_1 + r_2}{\vdash v : r_2}$\bigskip +% +% $\infer{\vdash [] : r^*}{}$\hspace{15mm} +% $\infer{\vdash [v_1,\ldots, v_n] : r^*} +% {\vdash v_1 : r \quad\ldots\quad \vdash v_n : r}$ +% \end{center} +% +% \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -422,68 +441,67 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - \begin{frame} - \frametitle{Problems} - - \begin{itemize} - \item I have no doubt the algorithm is correct --- - the problem is I do not believe their proof. - - \begin{center} - \small - ``How could I miss this? Well, I was rather careless when - stating this Lemma :)\smallskip - - Great example how formal machine checked proofs (and - proof assistants) can help to spot flawed reasoning steps.'' - - \end{center} - - \begin{center} - \small - ``Well, I don't think there's any flaw. The issue is how to - come up with a mechanical proof. In my world mathematical - proof $=$ mechanical proof doesn't necessarily hold.'' - - \end{center} - - \end{itemize} - - \end{frame} +% \begin{frame} +% \frametitle{Problems} +% +% \begin{itemize} +% \item I have no doubt the algorithm is correct --- +% the problem is I do not believe their proof. +% +% \begin{center} +% \small +% ``How could I miss this? Well, I was rather careless when +% stating this Lemma :)\smallskip +% +% Great example how formal machine checked proofs (and +% proof assistants) can help to spot flawed reasoning steps.'' +% +% \end{center} +% +% \begin{center} +% \small +% ``Well, I don't think there's any flaw. The issue is how to +% come up with a mechanical proof. In my world mathematical +% proof $=$ mechanical proof doesn't necessarily hold.'' +% +% \end{center} +% +% \end{itemize} +% +% \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} - \frametitle{Our Solution} - + \frametitle{Our Solution} + \begin{itemize} - \item A direct definition of what a POSIX value is, using the - relation $s \in r \to v$ (specification) - + \item A direct definition of what a POSIX value is, using the + relation \smath{s \in r \to v} (our specification)\bigskip + \begin{center} - $\infer{[] \in \epsilon \to Empty}{}$\hspace{15mm} - $\infer{c \in c \to Char(c)}{}$\bigskip\medskip + \smath{\infer{[] \in \One \to Empty}{}}\hspace{15mm} + \smath{\infer{[c] \in c \to Char(c)}{}}\bigskip\medskip - $\infer{s \in r_1 + r_2 \to Left(v)} - {s \in r_1 \to v}$\hspace{10mm} - $\infer{s \in r_1 + r_2 \to Right(v)} - {s \in r_2 \to v & s \not\in L(r_1)}$\bigskip\medskip + \smath{\infer{s \in r_1 + r_2 \to Left(v)} + {s \in r_1 \to v}}\hspace{10mm} + \smath{\infer{s \in r_1 + r_2 \to Right(v)} + {s \in r_2 \to v & s \not\in L(r_1)}}\bigskip\medskip - $\infer{s_1 @ s_2 \in r_1 \cdot r_2 \to Seq(v_1, v_2)} - {\small\begin{array}{l} - s_1 \in r_1 \to v_1 \\ - s_2 \in r_2 \to v_2 \\ - \neg(\exists s_3\,s_4.\; s_3 \not= [] - \wedge s_3 @ s_4 = s_2 \wedge - s_1 @ s_3 \in L(r_1) \wedge - s_4 \in L(r_2)) - \end{array}}$ - \\ - \bigskip - ... - \end{center} - - \end{itemize} + \smath{\infer{s_1 @ s_2 \in r_1 \cdot r_2 \to Seq(v_1, v_2)} + {\small\begin{array}{l} + s_1 \in r_1 \to v_1 \\ + s_2 \in r_2 \to v_2 \\ + \neg(\exists s_3\,s_4.\; s_3 \not= [] + \wedge s_3 @ s_4 = s_2 \wedge + s_1 @ s_3 \in L(r_1) \wedge + s_4 \in L(r_2)) + \end{array}}} + + {\ldots} + \end{center} + + \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -507,41 +525,41 @@ You can now start to implement optimisations and derive - correctness proofs for them. But we still do not know whether - - \begin{center} - $s \in r \to v$ - \end{center} - - is a POSIX value according to Sulzmann \& Lu's definition - (biggest value for $s$ and $r$) + correctness proofs for them.% But we still do not know whether +% +% \begin{center} +% $s \in r \to v$ +% \end{center} +% +% is a POSIX value according to Sulzmann \& Lu's definition +% (biggest value for $s$ and $r$) \end{frame} - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - \begin{frame} - \frametitle{Conclusions} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - \begin{itemize} - - \item We replaced the POSIX definition of Sulzmann \& Lu by a - new definition (ours is inspired by work of Vansummeren, - 2006)\medskip + \begin{frame} + \frametitle{Conclusions} - \item Their proof contained small gaps (acknowledged) but had - also fundamental flaws\medskip - - \item Now, its a nice exercise for theorem proving\medskip - - \item Some optimisations need to be applied to the algorithm - in order to become fast enough\medskip - - \item Can be used for lexing, is a small beautiful functional - program - - \end{itemize} - \end{frame} - + \begin{itemize} + + \item Sulzmann and Lu's informal proof contained small gaps (acknowledged) but we believe it had + also fundamental flaws\medskip + + \item We replaced the POSIX definition of Sulzmann \& Lu by a + new definition (ours is inspired by work of Vansummeren, + 2006)\medskip + + \item Now, its a nice exercise for theorem proving\medskip + + \item Some optimisations need to be applied to the algorithm + in order to become fast enough\medskip + + \item Can be used for lexing, is a small beautiful functional + program + + \end{itemize} + \end{frame} + \end{document}