--- a/Slides/slides04.tex Sat Aug 20 13:39:19 2016 +0100
+++ b/Slides/slides04.tex Wed Aug 24 10:37:40 2016 +0100
@@ -21,32 +21,7 @@
{\end{minipage}\egroup;%
\end{tikzpicture}\bigskip}
-\newcommand\grid[1]{%
-\begin{tikzpicture}[baseline=(char.base)]
- \path[use as bounding box]
- (0,0) rectangle (1em,1em);
- \draw[red!50, fill=red!20]
- (0,0) rectangle (1em,1em);
- \node[inner sep=1pt,anchor=base west]
- (char) at (0em,\gridraiseamount) {#1};
-\end{tikzpicture}}
-\newcommand\gridraiseamount{0.12em}
-
-\makeatletter
-\newcommand\Grid[1]{%
- \@tfor\z:=#1\do{\grid{\z}}}
-\makeatother
-
-\newcommand\Vspace[1][.3em]{%
- \mbox{\kern.06em\vrule height.3ex}%
- \vbox{\hrule width#1}%
- \hbox{\vrule height.3ex}}
-
-\def\VS{\Vspace[0.6em]}
-
-
-\title[POSIX Lexing with Derivatives of Regexes]
- {\bf POSIX Lexing with\\
+\title{\bf POSIX Lexing with\\
\bf Derivatives of Regular Expressions\\
\bf (Proof Pearl)}
\author{Fahad Ausaf, Roy Dyckhoff and Christian Urban}
@@ -125,6 +100,18 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% \begin{frame}
+% \frametitle{Sulzmann and Lu's Matcher}
+% \begin{itemize}
+% \item Sulzmann \& Lu came up with a beautiful idea for how
+% to extend the simple regular expression matcher to POSIX
+% matching/lexing (FLOPS 2014)
+% \item The idea: define an inverse operation to the derivatives
+% \end{itemize}
+% \end{frame}
+
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
\begin{frame}
\frametitle{Sulzmann and Lu's Matcher}
@@ -177,39 +164,6 @@
\alert{\textbf{how}}: POSIX, GREEDY, \ldots
\end{frame}
-
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
- \begin{frame}{Regular Expressions and Values}
- Regular expressions and their corresponding values (for how a
- regular expression matched a string):\bigskip
-
- \begin{columns}[c] % the "c" option specifies center vertical alignment
- \column{.4\textwidth} % column designated by a command
- \begin{tabular}{ l l l }
- \smath{r} & ::= & \smath{\Zero} \\
- & $\mid$ & \smath{\One} \\
- & $\mid$ & \smath{c} \\
- & $\mid$ & \smath{r_1 \cdot r_2} \\
- & $\mid$ & \smath{r_1 + r_2} \\ \\
- & $\mid$ & \smath{r^*} \\
- \end{tabular}
- \column{.4\textwidth}
- \begin{tabular}{ l l l }
-
- \smath{v} & ::= & \\
- & $\mid$ & \smath{Empty} \\
- & $\mid$ & \smath{Char(c)} \\
- & $\mid$ & \smath{Seq(v_1\cdot v_2)} \\
- & $\mid$ & \smath{Left(v)} \\
- & $\mid$ & \smath{Right(v)} \\
- & $\mid$ & \smath{[v_1,...,v_n]}
- \end{tabular}
- \end{columns}
- \end{frame}
-
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -230,13 +184,15 @@
token.
\end{bubble}
- For example: \smath{r_{keywords} + r_{identifiers}}:\bigskip
+ For example: \smath{r_{keywords} + r_{identifiers}} (fix graphics below)
- \begin{itemize}
- \item \smath{\texttt{\Grid{iffoo\VS bla}}}
-
- \item \smath{\texttt{\Grid{if\VS bla}}}
- \end{itemize}
+ \begin{center}
+ \texttt{iffoo\_bla}
+ \end{center}
+
+ \begin{center}
+ \texttt{if\_bla}
+ \end{center}
\end{frame}
@@ -266,46 +222,47 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% \begin{frame}
+% \frametitle{Correctness}
+% \begin{itemize}
+% \item Sulzmann \& Lu came up with a beautiful idea for how
+% to extend the simple regular expression matcher to POSIX
+% matching/lexing (FLOPS 2014)
+% \item The idea: define an inverse operation to the derivatives
+% \end{itemize}
+% \end{frame}
+
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
\begin{frame}
- \frametitle{``Correctness'' by Sulzmann and Lu}
- \begin{itemize}
- \item Sulzmann \& Lu's idea is to order all possible
- answer such that they can prove the correct answer is
- the maximum
- \item The idea is taken from a GREEDY algorithm (and it
- works there)\bigskip\pause
-
- \item {\bf But} we made no progress in formalising
- Sulzmann \& Lu's idea, because
-
- \begin{itemize}
- \item transitivity, existence of maxima etc all fail to
- turn into real proofs
- \item the reason: the ordering works only if ....
- \item though we did find mistakes:
-\begin{center}
- \small
- ``How could I miss this? Well, I was rather careless when
- stating this Lemma :)\smallskip
-
- Great example how formal machine checked proofs (and
- proof assistants) can help to spot flawed reasoning steps.''
-
- \end{center}\pause
-
- \begin{center}
- \small
- ``Well, I don't think there's any flaw. The issue is how to
- come up with a mechanical proof. In my world mathematical
- proof $=$ mechanical proof doesn't necessarily hold.''
-
- \end{center}\pause
- \end{itemize}
-
- \end{itemize}
+ \frametitle{Regular Expressions and Values}
+ Regular expressions and their corresponding values (for how a
+ regular expression matched string):
+ \begin{columns}[c] % the "c" option specifies center vertical alignment
+ \column{.5\textwidth} % column designated by a command
+ \begin{tabular}{ l l l }
+ $r$ & ::= & $\varnothing$ \\
+ & $\mid$ & $\epsilon$ \\
+ & $\mid$ & $c$ \\
+ & $\mid$ & $r_1 \cdot r_2$ \\
+ & $\mid$ & $r_1 + r_2$ \\
+ & $\mid$ & $r^*$ \\
+ \end{tabular}
+ \column{.5\textwidth}
+ \begin{tabular}{ l l l }
+ $v$ & ::= & $\varnothing$ \\
+ & $\mid$ & $Empty$ \\
+ & $\mid$ & $Char$($c$) \\
+ & $\mid$ & $Seq$($v_1.v_2$) \\
+ & $\mid$ & $Left$($v$) \\
+ & $\mid$ & $Right$($v$) \\
+ & $\mid$ & [] \\
+ & $\mid$ & [$v_1,...,v_n$]
+ \end{tabular}
+ \end{columns}
+ There is also a notion of a string behind a value $\mid v \mid$
\end{frame}
-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
@@ -324,7 +281,8 @@
\draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {$der\,c$};\pause
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{$\;\;nullable?$}};\pause
\node (v4) [below=of r4] {$v_4$};
- \draw[->,line width=1mm] (r4) -- (v4);\pause
+ \draw[->,line width=1mm] (r4) -- (v4);
+ \draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{$mkeps$}};\pause
\node (v3) [left=of v4] {$v_3$};
\draw[->,line width=1mm] (v4) -- (v3) node[below,midway] {$inj\,c$};\pause
\node (v2) [left=of v3] {$v_2$};
@@ -334,7 +292,6 @@
\draw[->,line width=0.5mm] (r3) -- (v3);
\draw[->,line width=0.5mm] (r2) -- (v2);
\draw[->,line width=0.5mm] (r1) -- (v1);
- \draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{$mkeps$}};
\end{tikzpicture}
\end{center}
@@ -342,6 +299,96 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ \begin{frame}
+ \frametitle{Nullable, Mkeps and Injection Functions}
+
+ \textbf{Nullable Function}
+ \begin{tabular}{ l l l }
+ \textit{nullable} (\textbf{0}) & $\dn$ & \textit{False}\\
+ \textit{nullable} (\textbf{1}) & $\dn$ & \textit{True}\\
+ \textit{nullable} (c) & $\dn$ & \textit{False}\\
+ \textit{nullable} ($r_1 + r_2$) & $\dn$ & \textit{nullable} $r_1$ $\lor$ \textit{nullable} $r_2$\\
+ \textit{nullable} ($r_1 \cdot r_2$) & $\dn$ & \textit{nullable} $r_1$ $\land$ \textit{nullable} $r_2$\\
+ \textit{nullable} ($r^*$) & $\dn$ & \textit{True}\\
+ \end{tabular}\\
+ \vspace{3 mm}
+ \textbf{Mkeps Function}
+ \begin{tabular}{ l l l }
+ \textit{mkeps} (\textbf{1}) & $\dn$ & () \\
+ \textit{mkeps} ($r_1 \cdot r_2$) & $\dn$ & \textit{Seq} (\textit{mkeps} $r_1$) (\textit{mkeps} $r_2$)\\
+ \textit{mkeps} ($r_1 + r_2$) & $\dn$ & \textit{if nullable $r_1$ then Left} (\textit{mkeps $r_1$})\\
+ & & \textit{else Right} (\textit{mkeps $r_2$})\\
+ \textit{mkeps} ($r^*$) & $\dn$ & \textit{Stars} [] \\
+ \end{tabular}\\
+
+ \end{frame}
+
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+ \begin{frame}
+ \frametitle{Nullable, Mkeps and Injection Functions}
+
+ \textbf{Injection Function}
+ \begin{tabular}{ l l l }
+ \textit{inj d c} () & $\dn$ & \textit{Char d}\\
+ \textit{inj} ($r_1 + r_2$) \textit{c} (\textit{Left $v_1$}) & $\dn$ & \textit{Left} (\textit{inj $r_1$ c $v_1$})\\
+ \textit{inj} ($r_1 + r_2$) \textit{c} (\textit{Right $v_2$}) & $\dn$ & \textit{Right} (\textit{inj $r_2$ c $v_2$})\\
+ \textit{inj} ($r_1 \cdot r_2$) \textit{c} (\textit{Seq $v_1$ $v_2$}) & $\dn$ & \textit{Seq} (\textit{inj $r_1$ c $v_1$}) $v_2$\\
+ \textit{inj} ($r_1 \cdot r_2$) \textit{c} (\textit{Left} (\textit{Seq $v_1$ $v_2$})) & $\dn$ & \textit{Seq} (\textit{inj $r_1$ c $v_1$}) $v_2$\\
+ \textit{inj} ($r_1 \cdot r_2$) \textit{c} (\textit{Right $v_2$}) & $\dn$ & \textit{Seq} (\textit{mkeps $r_1$}) (\textit{inj $r_2$ c $v_2$})\\
+ \textit{inj} ($r^*$) \textit{c} (\textit{Seq v} (\textit{Stars vs})) & $\dn$ & \textit{Stars} (\textit{inj r c v}::\textit{vs})
+ \end{tabular}
+
+ \end{frame}
+
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+ \begin{frame}
+ \frametitle{POSIX Ordering Relation by Sulzmann \& Lu}
+
+ \begin{itemize}
+ \item Introduce an inductive defined ordering relation
+ $v \succ_r v'$ which captures the idea of POSIX matching.
+
+ \item The algorithm returns the maximum of all possible values
+ that are possible for a regular expression.
+
+ \item The idea is from a paper by Frisch \& Cardelli about
+ GREEDY matching (GREEDY = preferring instant gratification
+ to delayed repletion):
+
+ \item e.g. given $(a + (b + ab))^{*}$ and string $ab$\\
+ \begin{center}
+ \begin{tabular}{ll}
+ GREEDY: & $[Left(a), Right(Left(b))]$\\
+ POSIX: & $[Right(Right(Seq(a, b)))]$
+ \end{tabular}
+ \end{center}
+
+ \end{itemize}
+ \end{frame}
+
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+ \begin{frame}
+ \frametitle{POSIX Ordering Relation by Sulzmann \& Lu}
+
+ \begin{center}
+ $\infer{\vdash Empty : \epsilon}{}$\hspace{15mm}
+ $\infer{\vdash Char(c): c}{}$\bigskip
+
+ $\infer{\vdash Seq(v_1, v_2) : r_1\cdot r_2}{\vdash v_1 : r_1 \quad \vdash v_2 : r_2}$
+ \bigskip
+
+ $\infer{\vdash Left(v) : r_1 + r_2}{\vdash v : r_1}$\hspace{15mm}
+ $\infer{\vdash Right(v): r_1 + r_2}{\vdash v : r_2}$\bigskip
+
+ $\infer{\vdash [] : r^*}{}$\hspace{15mm}
+ $\infer{\vdash [v_1,\ldots, v_n] : r^*}
+ {\vdash v_1 : r \quad\ldots\quad \vdash v_n : r}$
+ \end{center}
+
+ \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -350,12 +397,12 @@
\begin{itemize}
\item Sulzmann: \ldots Let's assume $v$ is not a $POSIX$ value,
- then there must be another one \ldots contradiction.\pause
+ then there must be another one \ldots contradiction.
\item Exists ?
\begin{center}
$L(r) \not= \varnothing \;\Rightarrow\; \exists v.\;POSIX(v, r)$
- \end{center}\pause
+ \end{center}
\item In the sequence case
$Seq(v_1,v_2)\succ_{r_1\cdot r_2} Seq(v_1', v_2')$,
@@ -364,7 +411,7 @@
\begin{center}
$|v_1| @ |v_2| = |v_1'| @ |v_2'|$
- \end{center}\pause
+ \end{center}
\item Although one begins with the assumption that the two
values have the same flattening, this cannot be maintained
@@ -376,32 +423,64 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
+ \frametitle{Problems}
+
+ \begin{itemize}
+ \item I have no doubt the algorithm is correct ---
+ the problem is I do not believe their proof.
+
+ \begin{center}
+ \small
+ ``How could I miss this? Well, I was rather careless when
+ stating this Lemma :)\smallskip
+
+ Great example how formal machine checked proofs (and
+ proof assistants) can help to spot flawed reasoning steps.''
+
+ \end{center}
+
+ \begin{center}
+ \small
+ ``Well, I don't think there's any flaw. The issue is how to
+ come up with a mechanical proof. In my world mathematical
+ proof $=$ mechanical proof doesn't necessarily hold.''
+
+ \end{center}
+
+ \end{itemize}
+
+ \end{frame}
+
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+ \begin{frame}
\frametitle{Our Solution}
\begin{itemize}
\item A direct definition of what a POSIX value is, using the
- relation \smath{s \in r \to v} (our specification)\bigskip
+ relation $s \in r \to v$ (specification)
\begin{center}
- \smath{\infer{[] \in \epsilon \to Empty}{}}\hspace{15mm}
- \smath{\infer{[c] \in c \to Char(c)}{}}\bigskip\medskip
-
- \smath{\infer{s \in r_1 + r_2 \to Left(v)}
- {s \in r_1 \to v}}\hspace{10mm}
- \smath{\infer{s \in r_1 + r_2 \to Right(v)}
- {s \in r_2 \to v & s \not\in L(r_1)}}\bigskip\medskip
-
- \smath{\infer{s_1 @ s_2 \in r_1 \cdot r_2 \to Seq(v_1, v_2)}
- {\small\begin{array}{l}
- s_1 \in r_1 \to v_1 \\
- s_2 \in r_2 \to v_2 \\
- \neg(\exists s_3\,s_4.\; s_3 \not= []
- \wedge s_3 @ s_4 = s_2 \wedge
- s_1 @ s_3 \in L(r_1) \wedge
- s_4 \in L(r_2))
- \end{array}}}
-
- {\ldots}
+ $\infer{[] \in \epsilon \to Empty}{}$\hspace{15mm}
+ $\infer{c \in c \to Char(c)}{}$\bigskip\medskip
+
+ $\infer{s \in r_1 + r_2 \to Left(v)}
+ {s \in r_1 \to v}$\hspace{10mm}
+ $\infer{s \in r_1 + r_2 \to Right(v)}
+ {s \in r_2 \to v & s \not\in L(r_1)}$\bigskip\medskip
+
+ $\infer{s_1 @ s_2 \in r_1 \cdot r_2 \to Seq(v_1, v_2)}
+ {\small\begin{array}{l}
+ s_1 \in r_1 \to v_1 \\
+ s_2 \in r_2 \to v_2 \\
+ \neg(\exists s_3\,s_4.\; s_3 \not= []
+ \wedge s_3 @ s_4 = s_2 \wedge
+ s_1 @ s_3 \in L(r_1) \wedge
+ s_4 \in L(r_2))
+ \end{array}}$
+ \\
+ \bigskip
+ ...
\end{center}
\end{itemize}
@@ -438,5 +517,31 @@
(biggest value for $s$ and $r$)
\end{frame}
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+ \begin{frame}
+ \frametitle{Conclusions}
+
+ \begin{itemize}
+
+ \item We replaced the POSIX definition of Sulzmann \& Lu by a
+ new definition (ours is inspired by work of Vansummeren,
+ 2006)\medskip
+
+ \item Their proof contained small gaps (acknowledged) but had
+ also fundamental flaws\medskip
+
+ \item Now, its a nice exercise for theorem proving\medskip
+
+ \item Some optimisations need to be applied to the algorithm
+ in order to become fast enough\medskip
+
+ \item Can be used for lexing, is a small beautiful functional
+ program
+
+ \end{itemize}
+ \end{frame}
+
+
\end{document}