--- a/Slides/slides04.tex Wed Aug 24 10:37:40 2016 +0100
+++ b/Slides/slides04.tex Wed Aug 24 11:30:46 2016 +0100
@@ -21,12 +21,38 @@
{\end{minipage}\egroup;%
\end{tikzpicture}\bigskip}
-\title{\bf POSIX Lexing with\\
- \bf Derivatives of Regular Expressions\\
- \bf (Proof Pearl)}
+\newcommand\grid[1]{%
+ \begin{tikzpicture}[baseline=(char.base)]
+ \path[use as bounding box]
+ (0,0) rectangle (1em,1em);
+ \draw[red!50, fill=red!20]
+ (0,0) rectangle (1em,1em);
+ \node[inner sep=1pt,anchor=base west]
+ (char) at (0em,\gridraiseamount) {#1};
+ \end{tikzpicture}}
+\newcommand\gridraiseamount{0.12em}
+
+\makeatletter
+\newcommand\Grid[1]{%
+ \@tfor\z:=#1\do{\grid{\z}}}
+\makeatother
+
+\newcommand\Vspace[1][.3em]{%
+ \mbox{\kern.06em\vrule height.3ex}%
+ \vbox{\hrule width#1}%
+ \hbox{\vrule height.3ex}}
+
+\def\VS{\Vspace[0.6em]}
+
+
+\title[POSIX Lexing with Derivatives of Regexes]
+{\bf POSIX Lexing with\\
+ \bf Derivatives of Regular Expressions\\
+ \bf (Proof Pearl)}
\author{Fahad Ausaf, Roy Dyckhoff and Christian Urban}
\date{King's College London, University of St Andrews}
+
\begin{document}
\maketitle
@@ -184,15 +210,13 @@
token.
\end{bubble}
- For example: \smath{r_{keywords} + r_{identifiers}} (fix graphics below)
+ For example: \smath{r_{keywords} + r_{identifiers}}
- \begin{center}
- \texttt{iffoo\_bla}
- \end{center}
-
- \begin{center}
- \texttt{if\_bla}
- \end{center}
+ \begin{itemize}
+ \item \smath{\texttt{\Grid{iffoo\VS bla}}}
+
+ \item \smath{\texttt{\Grid{if\VS bla}}}
+ \end{itemize}
\end{frame}
@@ -212,7 +236,7 @@
\end{bubble}\bigskip
Also Kuklewicz maintains a unit-test repository for POSIX
- matching, which indicates that most POSIX mathcers are buggy.
+ matching, which indicates that most POSIX matchcers are buggy.
\begin{center}
\url{http://www.haskell.org/haskellwiki/Regex_Posix}
@@ -232,36 +256,7 @@
% \end{itemize}
% \end{frame}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \begin{frame}
- \frametitle{Regular Expressions and Values}
- Regular expressions and their corresponding values (for how a
- regular expression matched string):
- \begin{columns}[c] % the "c" option specifies center vertical alignment
- \column{.5\textwidth} % column designated by a command
- \begin{tabular}{ l l l }
- $r$ & ::= & $\varnothing$ \\
- & $\mid$ & $\epsilon$ \\
- & $\mid$ & $c$ \\
- & $\mid$ & $r_1 \cdot r_2$ \\
- & $\mid$ & $r_1 + r_2$ \\
- & $\mid$ & $r^*$ \\
- \end{tabular}
- \column{.5\textwidth}
- \begin{tabular}{ l l l }
- $v$ & ::= & $\varnothing$ \\
- & $\mid$ & $Empty$ \\
- & $\mid$ & $Char$($c$) \\
- & $\mid$ & $Seq$($v_1.v_2$) \\
- & $\mid$ & $Left$($v$) \\
- & $\mid$ & $Right$($v$) \\
- & $\mid$ & [] \\
- & $\mid$ & [$v_1,...,v_n$]
- \end{tabular}
- \end{columns}
- There is also a notion of a string behind a value $\mid v \mid$
- \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -271,14 +266,14 @@
We want to match the string $abc$ using $r_1$\\
\begin{center}
- \begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}]
+ \begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}]
\node (r1) {$r_1$};
\node (r2) [right=of r1] {$r_2$};
- \draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {$der\,a$};\pause
+ \draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {$der\,a$};
\node (r3) [right=of r2] {$r_3$};
- \draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {$der\,b$};\pause
+ \draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {$der\,b$};
\node (r4) [right=of r3] {$r_4$};
- \draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {$der\,c$};\pause
+ \draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {$der\,c$};
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{$\;\;nullable?$}};\pause
\node (v4) [below=of r4] {$v_4$};
\draw[->,line width=1mm] (r4) -- (v4);
@@ -289,10 +284,11 @@
\draw[->,line width=1mm] (v3) -- (v2) node[below,midway] {$inj\,b$};\pause
\node (v1) [left=of v2] {$v_1$};
\draw[->,line width=1mm] (v2) -- (v1) node[below,midway] {$inj\,a$};\pause
- \draw[->,line width=0.5mm] (r3) -- (v3);
- \draw[->,line width=0.5mm] (r2) -- (v2);
- \draw[->,line width=0.5mm] (r1) -- (v1);
- \end{tikzpicture}
+ %\draw[->,line width=0.5mm] (r3) -- (v3);
+ %\draw[->,line width=0.5mm] (r2) -- (v2);
+ %\draw[->,line width=0.5mm] (r1) -- (v1);
+ \onslide<1->
+ \end{tikzpicture}
\end{center}
\end{frame}
@@ -300,21 +296,44 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
- \frametitle{Nullable, Mkeps and Injection Functions}
+ \frametitle{Regular Expressions and Values}
+ Regular expressions and their corresponding values (for how a
+ regular expression matched string):
+ \vspace{5 mm}
+ \begin{columns}[c] % the "c" option specifies center vertical alignment
+ \column{.4\textwidth} % column designated by a command
+ \begin{tabular}{ l l l }
+ \smath{r} & ::= & \smath{\Zero} \\
+ & $\mid$ & \smath{\One} \\
+ & $\mid$ & \smath{c} \\
+ & $\mid$ & \smath{r_1 \cdot r_2} \\
+ & $\mid$ & \smath{r_1 + r_2} \\ \\
+ & $\mid$ & \smath{r^*} \\
+ \end{tabular}
+ \column{.4\textwidth}
+ \begin{tabular}{ l l l }
+
+ \smath{v} & ::= & \\
+ & $\mid$ & \smath{Empty} \\
+ & $\mid$ & \smath{Char(c)} \\
+ & $\mid$ & \smath{Seq(v_1\cdot v_2)} \\
+ & $\mid$ & \smath{Left(v)} \\
+ & $\mid$ & \smath{Right(v)} \\
+ & $\mid$ & \smath{[v_1,...,v_n]}
+ \end{tabular}
+ \end{columns}
+ \vspace{5 mm}
+ There is also a notion of a string behind a value $\mid v \mid$
+ \end{frame}
- \textbf{Nullable Function}
- \begin{tabular}{ l l l }
- \textit{nullable} (\textbf{0}) & $\dn$ & \textit{False}\\
- \textit{nullable} (\textbf{1}) & $\dn$ & \textit{True}\\
- \textit{nullable} (c) & $\dn$ & \textit{False}\\
- \textit{nullable} ($r_1 + r_2$) & $\dn$ & \textit{nullable} $r_1$ $\lor$ \textit{nullable} $r_2$\\
- \textit{nullable} ($r_1 \cdot r_2$) & $\dn$ & \textit{nullable} $r_1$ $\land$ \textit{nullable} $r_2$\\
- \textit{nullable} ($r^*$) & $\dn$ & \textit{True}\\
- \end{tabular}\\
- \vspace{3 mm}
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+ \begin{frame}
+ \frametitle{Mkeps and Injection Functions}
+
\textbf{Mkeps Function}
\begin{tabular}{ l l l }
- \textit{mkeps} (\textbf{1}) & $\dn$ & () \\
+ \textit{mkeps} (\textbf{1}) & $\dn$ & \textit{Empty} \\
\textit{mkeps} ($r_1 \cdot r_2$) & $\dn$ & \textit{Seq} (\textit{mkeps} $r_1$) (\textit{mkeps} $r_2$)\\
\textit{mkeps} ($r_1 + r_2$) & $\dn$ & \textit{if nullable $r_1$ then Left} (\textit{mkeps $r_1$})\\
& & \textit{else Right} (\textit{mkeps $r_2$})\\
@@ -326,7 +345,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
- \frametitle{Nullable, Mkeps and Injection Functions}
+ \frametitle{Mkeps and Injection Functions}
\textbf{Injection Function}
\begin{tabular}{ l l l }
@@ -355,40 +374,40 @@
\item The idea is from a paper by Frisch \& Cardelli about
GREEDY matching (GREEDY = preferring instant gratification
- to delayed repletion):
+ to delayed repletion)
- \item e.g. given $(a + (b + ab))^{*}$ and string $ab$\\
- \begin{center}
- \begin{tabular}{ll}
- GREEDY: & $[Left(a), Right(Left(b))]$\\
- POSIX: & $[Right(Right(Seq(a, b)))]$
- \end{tabular}
- \end{center}
+ %\item e.g. given $(a + (b + ab))^{*}$ and string $ab$\\
+ %\begin{center}
+ %\begin{tabular}{ll}
+ %GREEDY: & $[Left(a), Right(Left(b))]$\\
+ %POSIX: & $[Right(Right(Seq(a, b)))]$
+ %\end{tabular}
+ %\end{center}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \begin{frame}
- \frametitle{POSIX Ordering Relation by Sulzmann \& Lu}
-
- \begin{center}
- $\infer{\vdash Empty : \epsilon}{}$\hspace{15mm}
- $\infer{\vdash Char(c): c}{}$\bigskip
-
- $\infer{\vdash Seq(v_1, v_2) : r_1\cdot r_2}{\vdash v_1 : r_1 \quad \vdash v_2 : r_2}$
- \bigskip
-
- $\infer{\vdash Left(v) : r_1 + r_2}{\vdash v : r_1}$\hspace{15mm}
- $\infer{\vdash Right(v): r_1 + r_2}{\vdash v : r_2}$\bigskip
-
- $\infer{\vdash [] : r^*}{}$\hspace{15mm}
- $\infer{\vdash [v_1,\ldots, v_n] : r^*}
- {\vdash v_1 : r \quad\ldots\quad \vdash v_n : r}$
- \end{center}
-
- \end{frame}
+% \begin{frame}
+% \frametitle{POSIX Ordering Relation by Sulzmann \& Lu}
+%
+% \begin{center}
+% $\infer{\vdash Empty : \epsilon}{}$\hspace{15mm}
+% $\infer{\vdash Char(c): c}{}$\bigskip
+%
+% $\infer{\vdash Seq(v_1, v_2) : r_1\cdot r_2}{\vdash v_1 : r_1 \quad \vdash v_2 : r_2}$
+% \bigskip
+%
+% $\infer{\vdash Left(v) : r_1 + r_2}{\vdash v : r_1}$\hspace{15mm}
+% $\infer{\vdash Right(v): r_1 + r_2}{\vdash v : r_2}$\bigskip
+%
+% $\infer{\vdash [] : r^*}{}$\hspace{15mm}
+% $\infer{\vdash [v_1,\ldots, v_n] : r^*}
+% {\vdash v_1 : r \quad\ldots\quad \vdash v_n : r}$
+% \end{center}
+%
+% \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -422,68 +441,67 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \begin{frame}
- \frametitle{Problems}
-
- \begin{itemize}
- \item I have no doubt the algorithm is correct ---
- the problem is I do not believe their proof.
-
- \begin{center}
- \small
- ``How could I miss this? Well, I was rather careless when
- stating this Lemma :)\smallskip
-
- Great example how formal machine checked proofs (and
- proof assistants) can help to spot flawed reasoning steps.''
-
- \end{center}
-
- \begin{center}
- \small
- ``Well, I don't think there's any flaw. The issue is how to
- come up with a mechanical proof. In my world mathematical
- proof $=$ mechanical proof doesn't necessarily hold.''
-
- \end{center}
-
- \end{itemize}
-
- \end{frame}
+% \begin{frame}
+% \frametitle{Problems}
+%
+% \begin{itemize}
+% \item I have no doubt the algorithm is correct ---
+% the problem is I do not believe their proof.
+%
+% \begin{center}
+% \small
+% ``How could I miss this? Well, I was rather careless when
+% stating this Lemma :)\smallskip
+%
+% Great example how formal machine checked proofs (and
+% proof assistants) can help to spot flawed reasoning steps.''
+%
+% \end{center}
+%
+% \begin{center}
+% \small
+% ``Well, I don't think there's any flaw. The issue is how to
+% come up with a mechanical proof. In my world mathematical
+% proof $=$ mechanical proof doesn't necessarily hold.''
+%
+% \end{center}
+%
+% \end{itemize}
+%
+% \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
- \frametitle{Our Solution}
-
+ \frametitle{Our Solution}
+
\begin{itemize}
- \item A direct definition of what a POSIX value is, using the
- relation $s \in r \to v$ (specification)
-
+ \item A direct definition of what a POSIX value is, using the
+ relation \smath{s \in r \to v} (our specification)\bigskip
+
\begin{center}
- $\infer{[] \in \epsilon \to Empty}{}$\hspace{15mm}
- $\infer{c \in c \to Char(c)}{}$\bigskip\medskip
+ \smath{\infer{[] \in \One \to Empty}{}}\hspace{15mm}
+ \smath{\infer{[c] \in c \to Char(c)}{}}\bigskip\medskip
- $\infer{s \in r_1 + r_2 \to Left(v)}
- {s \in r_1 \to v}$\hspace{10mm}
- $\infer{s \in r_1 + r_2 \to Right(v)}
- {s \in r_2 \to v & s \not\in L(r_1)}$\bigskip\medskip
+ \smath{\infer{s \in r_1 + r_2 \to Left(v)}
+ {s \in r_1 \to v}}\hspace{10mm}
+ \smath{\infer{s \in r_1 + r_2 \to Right(v)}
+ {s \in r_2 \to v & s \not\in L(r_1)}}\bigskip\medskip
- $\infer{s_1 @ s_2 \in r_1 \cdot r_2 \to Seq(v_1, v_2)}
- {\small\begin{array}{l}
- s_1 \in r_1 \to v_1 \\
- s_2 \in r_2 \to v_2 \\
- \neg(\exists s_3\,s_4.\; s_3 \not= []
- \wedge s_3 @ s_4 = s_2 \wedge
- s_1 @ s_3 \in L(r_1) \wedge
- s_4 \in L(r_2))
- \end{array}}$
- \\
- \bigskip
- ...
- \end{center}
-
- \end{itemize}
+ \smath{\infer{s_1 @ s_2 \in r_1 \cdot r_2 \to Seq(v_1, v_2)}
+ {\small\begin{array}{l}
+ s_1 \in r_1 \to v_1 \\
+ s_2 \in r_2 \to v_2 \\
+ \neg(\exists s_3\,s_4.\; s_3 \not= []
+ \wedge s_3 @ s_4 = s_2 \wedge
+ s_1 @ s_3 \in L(r_1) \wedge
+ s_4 \in L(r_2))
+ \end{array}}}
+
+ {\ldots}
+ \end{center}
+
+ \end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -507,41 +525,41 @@
You can now start to implement optimisations and derive
- correctness proofs for them. But we still do not know whether
-
- \begin{center}
- $s \in r \to v$
- \end{center}
-
- is a POSIX value according to Sulzmann \& Lu's definition
- (biggest value for $s$ and $r$)
+ correctness proofs for them.% But we still do not know whether
+%
+% \begin{center}
+% $s \in r \to v$
+% \end{center}
+%
+% is a POSIX value according to Sulzmann \& Lu's definition
+% (biggest value for $s$ and $r$)
\end{frame}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
- \begin{frame}
- \frametitle{Conclusions}
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \begin{itemize}
-
- \item We replaced the POSIX definition of Sulzmann \& Lu by a
- new definition (ours is inspired by work of Vansummeren,
- 2006)\medskip
+ \begin{frame}
+ \frametitle{Conclusions}
- \item Their proof contained small gaps (acknowledged) but had
- also fundamental flaws\medskip
-
- \item Now, its a nice exercise for theorem proving\medskip
-
- \item Some optimisations need to be applied to the algorithm
- in order to become fast enough\medskip
-
- \item Can be used for lexing, is a small beautiful functional
- program
-
- \end{itemize}
- \end{frame}
-
+ \begin{itemize}
+
+ \item Sulzmann and Lu's informal proof contained small gaps (acknowledged) but we believe it had
+ also fundamental flaws\medskip
+
+ \item We replaced the POSIX definition of Sulzmann \& Lu by a
+ new definition (ours is inspired by work of Vansummeren,
+ 2006)\medskip
+
+ \item Now, its a nice exercise for theorem proving\medskip
+
+ \item Some optimisations need to be applied to the algorithm
+ in order to become fast enough\medskip
+
+ \item Can be used for lexing, is a small beautiful functional
+ program
+
+ \end{itemize}
+ \end{frame}
+
\end{document}