--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Slides/slides03.tex Tue Jun 14 12:37:46 2016 +0100
@@ -0,0 +1,494 @@
+\documentclass[dvipsnames,14pt,t]{beamer}
+\usepackage{slides}
+\usepackage{langs}
+\usepackage{graph}
+\usepackage{data}
+\usepackage{proof}
+
+% beamer stuff
+\renewcommand{\slidecaption}{ITP ????}
+\newcommand{\bl}[1]{\textcolor{blue}{#1}}
+
+
+\begin{document}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t]
+\frametitle{%
+ \begin{tabular}{@ {}c@ {}}
+ \\
+ \Large POSIX Lexing with Derivatives\\[-1.5mm]
+ \Large of Regular Expressions\\
+ \Large (Proof Pearl)\\[-1mm]
+ \end{tabular}}\bigskip\bigskip\bigskip
+
+ \normalsize
+ \begin{center}
+ \begin{tabular}{c}
+ \small Fahad Ausaf\\
+ \small King's College London\\
+ \\
+ \small joint work with Roy Dyckhoff and Christian Urban
+ \end{tabular}
+ \end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Regular Expressions}
+
+
+\begin{textblock}{6}(2,5)
+ \begin{tabular}{rrl@ {\hspace{13mm}}l}
+ \bl{$r$} & \bl{$::=$} & \bl{$\varnothing$} & null\\
+ & \bl{$\mid$} & \bl{$\epsilon$} & empty string\\
+ & \bl{$\mid$} & \bl{$c$} & character\\
+ & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
+ & \bl{$\mid$} & \bl{$r_1 + r_2$} & alternative / choice\\
+ & \bl{$\mid$} & \bl{$r^*$} & star (zero or more)\\
+ \end{tabular}
+ \end{textblock}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{The Derivative of a Rexp}
+
+\large
+If \bl{$r$} matches the string \bl{$c\!::\!s$}, what is a regular
+expression that matches just \bl{$s$}?\bigskip\bigskip\bigskip\bigskip
+
+\small
+\bl{$der\,c\,r$} gives the answer, Brzozowski (1964), Owens (2005)
+``\ldots have been lost in the sands of time\ldots''
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Correctness}
+
+It is a relative easy exercise in a theorem prover:
+
+\begin{center}
+\bl{$matches(r, s)$} if and only if \bl{$s \in L(r)$}
+\end{center}\bigskip
+
+\small
+where \bl{$matches(r, s) \dn nullable(ders(r, s))$}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{POSIX Regex Matching}
+
+Two rules:
+
+\begin{itemize}
+\item Longest match rule (``maximal munch rule''): The
+longest initial substring matched by any regular expression
+is taken as the next token.
+
+\begin{center}
+\bl{$\texttt{\Grid{iffoo\VS bla}}$}
+\end{center}\medskip
+
+\item Rule priority:
+For a particular longest initial substring, the first regular
+expression that can match determines the token.
+
+\begin{center}
+\bl{$\texttt{\Grid{if\VS bla}}$}
+\end{center}
+\end{itemize}\bigskip\pause
+
+\small
+\hfill Kuklewicz: most POSIX matchers are buggy\\
+\footnotesize
+\hfill \url{http://www.haskell.org/haskellwiki/Regex_Posix}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{POSIX Regex Matching}
+
+\begin{itemize}
+
+\item Sulzmann \& Lu came up with a beautiful
+idea for how to extend the simple regular expression
+matcher to POSIX matching/lexing (FLOPS 2014)\bigskip\bigskip
+
+\begin{tabular}{@{\hspace{4cm}}c@{}}
+ \includegraphics[scale=0.20]{pics/sulzmann.jpg}\\[-2mm]
+ \hspace{0cm}\footnotesize Martin Sulzmann
+\end{tabular}\bigskip\bigskip
+
+\item the idea: define an inverse operation to the derivatives
+\end{itemize}
+
+
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Regexes and Values}
+
+Regular expressions and their corresponding values
+(for \emph{how} a regular expression matched a string):
+
+\begin{center}
+\begin{columns}
+\begin{column}{3cm}
+\begin{tabular}{@{}rrl@{}}
+ \bl{$r$} & \bl{$::=$} & \bl{$\varnothing$}\\
+ & \bl{$\mid$} & \bl{$\epsilon$} \\
+ & \bl{$\mid$} & \bl{$c$} \\
+ & \bl{$\mid$} & \bl{$r_1 \cdot r_2$}\\
+ & \bl{$\mid$} & \bl{$r_1 + r_2$} \\
+ \\
+ & \bl{$\mid$} & \bl{$r^*$} \\
+ \\
+ \end{tabular}
+\end{column}
+\begin{column}{3cm}
+\begin{tabular}{@{\hspace{-7mm}}rrl@{}}
+ \bl{$v$} & \bl{$::=$} & \\
+ & & \bl{$Empty$} \\
+ & \bl{$\mid$} & \bl{$Char(c)$} \\
+ & \bl{$\mid$} & \bl{$Seq(v_1,v_2)$}\\
+ & \bl{$\mid$} & \bl{$Left(v)$} \\
+ & \bl{$\mid$} & \bl{$Right(v)$} \\
+ & \bl{$\mid$} & \bl{$[]$} \\
+ & \bl{$\mid$} & \bl{$[v_1,\ldots\,v_n]$} \\
+ \end{tabular}
+\end{column}
+\end{columns}
+\end{center}\pause
+
+There is also a notion of a string behind a value: \bl{$|v|$}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Sulzmann \& Lu Matcher}
+
+We want to match the string \bl{$abc$} using \bl{$r_1$}:
+
+\begin{center}
+\begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}]
+\node (r1) {\bl{$r_1$}};
+\node (r2) [right=of r1] {\bl{$r_2$}};
+\draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};\pause
+\node (r3) [right=of r2] {\bl{$r_3$}};
+\draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};\pause
+\node (r4) [right=of r3] {\bl{$r_4$}};
+\draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};\pause
+\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$\;\;nullable?$}}};\pause
+\node (v4) [below=of r4] {\bl{$v_4$}};
+\draw[->,line width=1mm] (r4) -- (v4);\pause
+\node (v3) [left=of v4] {\bl{$v_3$}};
+\draw[->,line width=1mm] (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};\pause
+\node (v2) [left=of v3] {\bl{$v_2$}};
+\draw[->,line width=1mm] (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};\pause
+\node (v1) [left=of v2] {\bl{$v_1$}};
+\draw[->,line width=1mm] (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};\pause
+\draw[->,line width=0.5mm] (r3) -- (v3);
+\draw[->,line width=0.5mm] (r2) -- (v2);
+\draw[->,line width=0.5mm] (r1) -- (v1);
+\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
+\end{tikzpicture}
+\end{center}
+
+\only<10>{
+The original ideas of Sulzmann and Lu are the \bl{\textit{mkeps}}
+and \bl{\textit{inj}} functions (ommitted here).}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t,squeeze]
+\frametitle{Sulzmann \& Lu Paper}
+
+\begin{itemize}
+\item I have no doubt the algorithm is correct ---
+ the problem is I do not believe their proof.
+
+ \begin{center}
+ \begin{bubble}[10cm]\small
+ ``How could I miss this? Well, I was rather careless when
+ stating this Lemma :)\smallskip
+
+ Great example how formal machine checked proofs (and
+ proof assistants) can help to spot flawed reasoning steps.''
+ \end{bubble}
+ \end{center}\pause
+
+ \begin{center}
+ \begin{bubble}[10cm]\small
+ ``Well, I don't think there's any flaw. The issue is how to
+ come up with a mechanical proof. In my world mathematical
+ proof $=$ mechanical proof doesn't necessarily hold.''
+ \end{bubble}
+ \end{center}\pause
+
+\end{itemize}
+
+ \only<3>{%
+ \begin{textblock}{11}(1,4.4)
+ \begin{center}
+ \begin{bubble}[10.9cm]\small\centering
+ \includegraphics[scale=0.37]{pics/msbug.png}
+ \end{bubble}
+ \end{center}
+ \end{textblock}}
+
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{\begin{tabular}{c}The Proof Idea\\[-1mm] by Sulzmann \& Lu
+\end{tabular}}
+
+\begin{itemize}
+\item introduce an inductively defined ordering relation
+\bl{$v \succ_r v'$} which captures the idea of POSIX matching
+
+\item the algorithm returns the maximum of all possible
+ values that are possible for a regular expression.\pause
+ \bigskip\small
+
+\item the idea is from a paper by Cardelli \& Frisch about
+GREEDY matching (GREEDY $=$ preferring instant gratification to delayed
+repletion):
+
+\item e.g.~given \bl{$(a + (b + ab))^*$} and string \bl{$ab$}
+
+\begin{center}
+\begin{tabular}{ll}
+GREEDY: & \bl{$[Left(a), Right(Left(b)]$}\\
+POSIX: & \bl{$[Right(Right(Seq(a, b))))]$}
+\end{tabular}
+\end{center}
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{}
+\centering
+
+
+\bl{\infer{\vdash Empty : \epsilon}{}}\hspace{15mm}
+\bl{\infer{\vdash Char(c): c}{}}\bigskip
+
+\bl{\infer{\vdash Seq(v_1, v_2) : r_1\cdot r_2}{\vdash v_1 : r_1 \quad \vdash v_2 : r_2}}
+\bigskip
+
+\bl{\infer{\vdash Left(v) : r_1 + r_2}{\vdash v : r_1}}\hspace{15mm}
+\bl{\infer{\vdash Right(v): r_1 + r_2}{\vdash v : r_2}}\bigskip
+
+\bl{\infer{\vdash [] : r^*}{}}\hspace{15mm}
+\bl{\infer{\vdash [v_1,\ldots, v_n] : r^*}
+ {\vdash v_1 : r \quad\ldots\quad \vdash v_n : r}}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}<1>[c]
+\frametitle{}
+\small
+
+%\begin{tabular}{@{}lll@{}}
+%\bl{$POSIX(v, r)$} & \bl{$\dn$} & \bl{$\vdash v : r$}\\
+% & & \bl{$\wedge \;\;(\forall v'.\;\; \vdash v' : r \,\wedge\, |v'| = |v|
+% \Rightarrow v \succ_{\alert<2>{r}} v')$}
+%\end{tabular}\bigskip\bigskip\bigskip
+
+
+\centering
+
+%\bl{\infer{Seq(v_1, v_2) \succ_{\alert<2>{r_1\cdot r_2}} Seq(v'_1, v'_2)}
+% {v_1 = v'_1 \quad v_2 \succ_{\alert<2>{r_2}} v'_2}}\hspace{3mm}
+%\bl{\infer{Seq(v_1, v_2) \succ_{\alert<2>{r_1\cdot r_2}} Seq(v'_1, v'_2)}
+% {v_1 \not= v'_1 \quad v_1 \succ_{\alert<2>{r_1}} v'_1}}
+%\bigskip
+
+%\bl{\infer{Left(v) \succ_{\alert<2>{r_1 + r_2}} Left(v')}
+% {v \succ_{\alert<2>{r_1}} v'}}\hspace{15mm}
+%\bl{\infer{Right(v) \succ_{\alert<2>{r_1 + r_2}} Right(v')}
+% {v \succ_{\alert<2>{r_2}} v'}}\bigskip\medskip
+
+%\bl{\infer{Left(v) \succ_{\alert<2>{r_1 + r_2}} Right(v')}
+% {length |v| \ge length |v'|}}\hspace{15mm}
+%\bl{\infer{Right(v) \succ_{\alert<2>{r_1 + r_2}} Left(v')}
+% {length |v| > length |v'|}}\bigskip
+
+%\bl{$\big\ldots$}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Problems}
+
+\begin{itemize}
+\item Sulzmann: \ldots Let's assume \bl{$v$} is not
+ a $POSIX$ value, then there must be another one
+ \ldots contradiction.\bigskip\pause
+
+\item Exists?
+
+\begin{center}
+\bl{$L(r) \not= \varnothing \;\Rightarrow\; \exists v.\;POSIX(v, r)$}
+\end{center}\bigskip\bigskip\pause
+
+\item in the sequence case
+\bl{$Seq(v_1, v_2)\succ_{r_1\cdot r_2} Seq(v_1', v_2')$},
+the induction hypotheses require
+\bl{$|v_1| = |v'_1|$} and \bl{$|v_2| = |v'_2|$},
+but you only know
+
+\begin{center}
+\bl{$|v_1| \;@\; |v_2| = |v'_1| \;@\; |v'_2|$}
+\end{center}\pause\small
+
+\item although one begins with the assumption that the two
+values have the same flattening, this cannot be maintained
+as one descends into the induction (alternative, sequence)
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Our Solution}
+
+\begin{itemize}
+\item a direct definition of what a POSIX value is, using
+the relation \bl{$s \in r \to v$} (specification):\medskip
+
+\begin{center}
+\bl{\infer{[] \in \epsilon \to Empty}{}}\hspace{15mm}
+\bl{\infer{c \in c \to Char(c)}{}}\bigskip\medskip
+
+\bl{\infer{s \in r_1 + r_2 \to Left(v)}
+ {s \in r_1 \to v}}\hspace{10mm}
+\bl{\infer{s \in r_1 + r_2 \to Right(v)}
+ {s \in r_2 \to v & s \not\in L(r_1)}}\bigskip\medskip
+
+\bl{\infer{s_1 @ s_2 \in r_1 \cdot r_2 \to Seq(v_1, v_2)}
+ {\small\begin{array}{l}
+ s_1 \in r_1 \to v_1 \\
+ s_2 \in r_2 \to v_2 \\
+ \neg(\exists s_3\,s_4.\; s_3 \not= []
+ \wedge s_3 @ s_4 = s_2 \wedge
+ s_1 @ s_3 \in L(r_1) \wedge
+ s_4 \in L(r_2))
+ \end{array}}}
+
+\bl{\ldots}
+\end{center}
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Properties}
+
+It is almost trival to prove:
+
+\begin{itemize}
+\item Uniqueness
+\begin{center}
+If \bl{$s \in r \to v_1$} and \bl{$s \in r \to v_2$} then
+\bl{$v_1 = v_2$}.
+\end{center}\bigskip
+
+\item Correctness
+\begin{center}
+\bl{$lexer(r, s) = v$} if and only if \bl{$s \in r \to v$}
+\end{center}
+\end{itemize}\bigskip\bigskip\pause
+
+
+You can now start to implement optimisations and derive
+correctness proofs for them. But we still do not know whether
+
+\begin{center}
+\bl{$s \in r \to v$}
+\end{center}
+
+is a POSIX value according to Sulzmann \& Lu's definition
+(biggest value for \bl{$s$} and \bl{$r$})
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Conclusion}
+
+\begin{itemize}
+
+\item we replaced the POSIX definition of Sulzmann \& Lu by a
+ new definition (ours is inspired by work of Vansummeren,
+ 2006)\medskip
+
+\item their proof contained small gaps (acknowledged) but had
+ also fundamental flaws\medskip
+
+\item now, its a nice exercise for theorem proving\medskip
+
+\item some optimisations need to be applied to the algorithm
+ in order to become fast enough\medskip
+
+\item can be used for lexing, is a small beautiful functional
+ program
+
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ \begin{frame}[b]
+ \frametitle{
+ \begin{tabular}{c}
+ \mbox{}\\[13mm]
+ \alert{\LARGE Questions?}
+ \end{tabular}}
+
+ \end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\end{document}
+
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: t
+%%% End:
+