diff -r 10c096d59218 -r 2585e2a7a7ab Slides/slides03.tex
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Slides/slides03.tex	Tue Jun 14 12:37:46 2016 +0100
@@ -0,0 +1,494 @@
+\documentclass[dvipsnames,14pt,t]{beamer}
+\usepackage{slides}
+\usepackage{langs}
+\usepackage{graph}
+\usepackage{data}
+\usepackage{proof}
+
+% beamer stuff 
+\renewcommand{\slidecaption}{ITP ????}
+\newcommand{\bl}[1]{\textcolor{blue}{#1}}
+
+
+\begin{document}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t]
+\frametitle{%
+  \begin{tabular}{@ {}c@ {}}
+  \\
+  \Large POSIX Lexing with Derivatives\\[-1.5mm] 
+  \Large of Regular Expressions\\
+  \Large (Proof Pearl)\\[-1mm] 
+  \end{tabular}}\bigskip\bigskip\bigskip
+
+  \normalsize
+  \begin{center}
+  \begin{tabular}{c}
+  \small Fahad Ausaf\\
+  \small King's College London\\
+  \\
+  \small joint work with Roy Dyckhoff and Christian Urban
+  \end{tabular}
+  \end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Regular Expressions}
+
+
+\begin{textblock}{6}(2,5)
+  \begin{tabular}{rrl@ {\hspace{13mm}}l}
+  \bl{$r$} & \bl{$::=$}  & \bl{$\varnothing$}   & null\\
+           & \bl{$\mid$} & \bl{$\epsilon$}      & empty string\\
+           & \bl{$\mid$} & \bl{$c$}             & character\\
+           & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
+           & \bl{$\mid$} & \bl{$r_1 + r_2$}     & alternative / choice\\
+           & \bl{$\mid$} & \bl{$r^*$}           & star (zero or more)\\
+  \end{tabular}
+  \end{textblock}
+  
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{The Derivative of a Rexp}
+
+\large
+If \bl{$r$} matches the string \bl{$c\!::\!s$}, what is a regular 
+expression that matches just \bl{$s$}?\bigskip\bigskip\bigskip\bigskip
+
+\small
+\bl{$der\,c\,r$} gives the answer, Brzozowski (1964), Owens (2005)
+``\ldots have been lost in the sands of time\ldots''
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Correctness}
+
+It is a relative easy exercise in a theorem prover:
+
+\begin{center}
+\bl{$matches(r, s)$}  if and only if  \bl{$s \in L(r)$} 
+\end{center}\bigskip
+
+\small
+where \bl{$matches(r, s) \dn nullable(ders(r, s))$}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{POSIX Regex Matching}
+
+Two rules:
+
+\begin{itemize}
+\item Longest match rule (``maximal munch rule''): The 
+longest initial substring matched by any regular expression 
+is taken as the next token.
+
+\begin{center}
+\bl{$\texttt{\Grid{iffoo\VS bla}}$}
+\end{center}\medskip
+
+\item Rule priority:
+For a particular longest initial substring, the first regular
+expression that can match determines the token.
+
+\begin{center}
+\bl{$\texttt{\Grid{if\VS bla}}$}
+\end{center}
+\end{itemize}\bigskip\pause
+
+\small
+\hfill Kuklewicz: most POSIX matchers are buggy\\
+\footnotesize
+\hfill \url{http://www.haskell.org/haskellwiki/Regex_Posix}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{POSIX Regex Matching}
+
+\begin{itemize}
+
+\item Sulzmann \& Lu came up with a beautiful
+idea for how to extend the simple regular expression 
+matcher to POSIX matching/lexing (FLOPS 2014)\bigskip\bigskip
+
+\begin{tabular}{@{\hspace{4cm}}c@{}}
+  \includegraphics[scale=0.20]{pics/sulzmann.jpg}\\[-2mm]
+  \hspace{0cm}\footnotesize Martin Sulzmann
+\end{tabular}\bigskip\bigskip
+
+\item the idea: define an inverse operation to the derivatives
+\end{itemize}
+
+
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Regexes and Values}
+
+Regular expressions and their corresponding values
+(for \emph{how} a regular expression matched a string):
+
+\begin{center}
+\begin{columns}
+\begin{column}{3cm}
+\begin{tabular}{@{}rrl@{}}
+  \bl{$r$} & \bl{$::=$}  & \bl{$\varnothing$}\\
+           & \bl{$\mid$} & \bl{$\epsilon$}   \\
+           & \bl{$\mid$} & \bl{$c$}          \\
+           & \bl{$\mid$} & \bl{$r_1 \cdot r_2$}\\
+           & \bl{$\mid$} & \bl{$r_1 + r_2$}   \\
+  \\
+           & \bl{$\mid$} & \bl{$r^*$}         \\
+  \\
+  \end{tabular}
+\end{column}
+\begin{column}{3cm}
+\begin{tabular}{@{\hspace{-7mm}}rrl@{}}
+  \bl{$v$} & \bl{$::=$}  & \\
+           &             & \bl{$Empty$}   \\
+           & \bl{$\mid$} & \bl{$Char(c)$}          \\
+           & \bl{$\mid$} & \bl{$Seq(v_1,v_2)$}\\
+           & \bl{$\mid$} & \bl{$Left(v)$}   \\
+           & \bl{$\mid$} & \bl{$Right(v)$}  \\
+           & \bl{$\mid$} & \bl{$[]$}      \\
+           & \bl{$\mid$} & \bl{$[v_1,\ldots\,v_n]$} \\
+  \end{tabular}
+\end{column}
+\end{columns}
+\end{center}\pause
+
+There is also a notion of a string behind a value: \bl{$|v|$}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Sulzmann \& Lu Matcher}
+
+We want to match the string \bl{$abc$} using \bl{$r_1$}:
+
+\begin{center}
+\begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}]
+\node (r1)  {\bl{$r_1$}};
+\node (r2) [right=of r1] {\bl{$r_2$}};
+\draw[->,line width=1mm]  (r1) -- (r2) node[above,midway] {\bl{$der\,a$}};\pause
+\node (r3) [right=of r2] {\bl{$r_3$}};
+\draw[->,line width=1mm]  (r2) -- (r3) node[above,midway] {\bl{$der\,b$}};\pause
+\node (r4) [right=of r3] {\bl{$r_4$}};
+\draw[->,line width=1mm]  (r3) -- (r4) node[above,midway] {\bl{$der\,c$}};\pause
+\draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$\;\;nullable?$}}};\pause
+\node (v4) [below=of r4] {\bl{$v_4$}};
+\draw[->,line width=1mm]  (r4) -- (v4);\pause
+\node (v3) [left=of v4] {\bl{$v_3$}};
+\draw[->,line width=1mm]  (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}};\pause
+\node (v2) [left=of v3] {\bl{$v_2$}};
+\draw[->,line width=1mm]  (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}};\pause
+\node (v1) [left=of v2] {\bl{$v_1$}};
+\draw[->,line width=1mm]  (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}};\pause
+\draw[->,line width=0.5mm]  (r3) -- (v3);
+\draw[->,line width=0.5mm]  (r2) -- (v2);
+\draw[->,line width=0.5mm]  (r1) -- (v1);
+\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}};
+\end{tikzpicture}
+\end{center}
+
+\only<10>{
+The original ideas of Sulzmann and Lu are the \bl{\textit{mkeps}} 
+and \bl{\textit{inj}} functions (ommitted here).}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t,squeeze]
+\frametitle{Sulzmann \& Lu Paper}
+
+\begin{itemize}
+\item I have no doubt the algorithm is correct --- 
+  the problem is I do not believe their proof.
+
+  \begin{center}
+  \begin{bubble}[10cm]\small
+  ``How could I miss this? Well, I was rather careless when 
+  stating this Lemma :)\smallskip
+ 
+  Great example how formal machine checked proofs (and 
+  proof assistants) can help to spot flawed reasoning steps.''
+  \end{bubble}
+  \end{center}\pause
+  
+  \begin{center}
+  \begin{bubble}[10cm]\small
+  ``Well, I don't think there's any flaw. The issue is how to 
+  come up with a mechanical proof. In my world mathematical 
+  proof $=$ mechanical proof doesn't necessarily hold.''
+  \end{bubble}
+  \end{center}\pause
+  
+\end{itemize}
+
+  \only<3>{%
+  \begin{textblock}{11}(1,4.4)
+  \begin{center}
+  \begin{bubble}[10.9cm]\small\centering
+  \includegraphics[scale=0.37]{pics/msbug.png}
+  \end{bubble}
+  \end{center}
+  \end{textblock}}
+  
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{\begin{tabular}{c}The Proof Idea\\[-1mm] by Sulzmann \& Lu
+\end{tabular}}
+
+\begin{itemize}
+\item introduce an inductively defined ordering relation 
+\bl{$v \succ_r v'$} which captures the idea of POSIX matching
+
+\item the algorithm returns the maximum of all possible
+ values that are possible for a regular expression.\pause
+ \bigskip\small
+ 
+\item the idea is from a paper by Cardelli \& Frisch about 
+GREEDY matching (GREEDY $=$ preferring instant gratification to delayed
+repletion):
+
+\item e.g.~given \bl{$(a + (b + ab))^*$} and string \bl{$ab$}
+
+\begin{center}
+\begin{tabular}{ll}
+GREEDY: & \bl{$[Left(a), Right(Left(b)]$}\\
+POSIX:  & \bl{$[Right(Right(Seq(a, b))))]$}  
+\end{tabular}
+\end{center} 
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{}
+\centering
+
+
+\bl{\infer{\vdash Empty : \epsilon}{}}\hspace{15mm}
+\bl{\infer{\vdash Char(c): c}{}}\bigskip
+
+\bl{\infer{\vdash Seq(v_1, v_2) : r_1\cdot r_2}{\vdash v_1 : r_1 \quad \vdash v_2 : r_2}}
+\bigskip
+
+\bl{\infer{\vdash Left(v) : r_1 + r_2}{\vdash v : r_1}}\hspace{15mm}
+\bl{\infer{\vdash Right(v): r_1 + r_2}{\vdash v : r_2}}\bigskip
+
+\bl{\infer{\vdash [] : r^*}{}}\hspace{15mm}
+\bl{\infer{\vdash [v_1,\ldots, v_n] : r^*}
+          {\vdash v_1 : r \quad\ldots\quad \vdash v_n : r}}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}<1>[c]
+\frametitle{}
+\small
+
+%\begin{tabular}{@{}lll@{}}
+%\bl{$POSIX(v, r)$} & \bl{$\dn$} & \bl{$\vdash v : r$}\\ 
+% & &   \bl{$\wedge \;\;(\forall v'.\;\; \vdash v' : r \,\wedge\, |v'| = |v| 
+%     \Rightarrow v \succ_{\alert<2>{r}} v')$}
+%\end{tabular}\bigskip\bigskip\bigskip
+
+
+\centering
+
+%\bl{\infer{Seq(v_1, v_2) \succ_{\alert<2>{r_1\cdot r_2}} Seq(v'_1, v'_2)}
+%   {v_1 = v'_1 \quad v_2 \succ_{\alert<2>{r_2}} v'_2}}\hspace{3mm}
+%\bl{\infer{Seq(v_1, v_2) \succ_{\alert<2>{r_1\cdot r_2}} Seq(v'_1, v'_2)}
+%   {v_1 \not= v'_1 \quad v_1 \succ_{\alert<2>{r_1}} v'_1}}
+%\bigskip
+
+%\bl{\infer{Left(v) \succ_{\alert<2>{r_1 + r_2}} Left(v')}
+%          {v \succ_{\alert<2>{r_1}} v'}}\hspace{15mm}
+%\bl{\infer{Right(v) \succ_{\alert<2>{r_1 + r_2}} Right(v')}
+%          {v \succ_{\alert<2>{r_2}} v'}}\bigskip\medskip
+
+%\bl{\infer{Left(v) \succ_{\alert<2>{r_1 + r_2}} Right(v')}
+%          {length |v|  \ge length |v'|}}\hspace{15mm}
+%\bl{\infer{Right(v) \succ_{\alert<2>{r_1 + r_2}} Left(v')}
+%          {length |v| >  length |v'|}}\bigskip
+
+%\bl{$\big\ldots$}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Problems}
+
+\begin{itemize}
+\item Sulzmann: \ldots Let's assume \bl{$v$} is not 
+    a $POSIX$ value, then there must be another one
+    \ldots contradiction.\bigskip\pause
+
+\item Exists?
+
+\begin{center}
+\bl{$L(r) \not= \varnothing \;\Rightarrow\; \exists v.\;POSIX(v, r)$}
+\end{center}\bigskip\bigskip\pause
+
+\item in the sequence case 
+\bl{$Seq(v_1, v_2)\succ_{r_1\cdot r_2} Seq(v_1', v_2')$}, 
+the induction hypotheses require
+\bl{$|v_1| = |v'_1|$} and \bl{$|v_2| = |v'_2|$}, 
+but you only know
+
+\begin{center}
+\bl{$|v_1| \;@\; |v_2| = |v'_1| \;@\; |v'_2|$}
+\end{center}\pause\small
+
+\item although one begins with the assumption that the two 
+values have the same flattening, this cannot be maintained 
+as one descends into the induction (alternative, sequence)
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Our Solution}
+
+\begin{itemize}
+\item a direct definition of what a POSIX value is, using
+the relation \bl{$s \in r \to v$} (specification):\medskip
+
+\begin{center}
+\bl{\infer{[] \in \epsilon \to Empty}{}}\hspace{15mm}
+\bl{\infer{c \in c \to Char(c)}{}}\bigskip\medskip
+
+\bl{\infer{s \in r_1 + r_2 \to Left(v)}
+          {s \in r_1 \to v}}\hspace{10mm}
+\bl{\infer{s \in r_1 + r_2 \to Right(v)}
+          {s \in r_2 \to v & s \not\in L(r_1)}}\bigskip\medskip
+
+\bl{\infer{s_1 @ s_2 \in r_1 \cdot r_2 \to Seq(v_1, v_2)}
+          {\small\begin{array}{l}
+           s_1 \in r_1 \to v_1 \\
+           s_2 \in r_2 \to v_2 \\
+           \neg(\exists s_3\,s_4.\; s_3 \not= []
+           \wedge s_3 @ s_4 = s_2 \wedge
+           s_1 @ s_3 \in L(r_1) \wedge
+           s_4 \in L(r_2))
+           \end{array}}}
+           
+\bl{\ldots}           
+\end{center}
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Properties}
+
+It is almost trival to prove:
+
+\begin{itemize}
+\item Uniqueness
+\begin{center}
+If \bl{$s \in r \to v_1$} and \bl{$s \in r \to v_2$} then
+\bl{$v_1 = v_2$}.
+\end{center}\bigskip
+
+\item Correctness
+\begin{center}
+\bl{$lexer(r, s) = v$} if and only if \bl{$s \in r \to v$}
+\end{center}
+\end{itemize}\bigskip\bigskip\pause
+
+
+You can now start to implement optimisations and derive
+correctness proofs for them. But we still do not know whether
+
+\begin{center}
+\bl{$s \in r \to v$} 
+\end{center}
+
+is a POSIX value according to Sulzmann \& Lu's definition
+(biggest value for \bl{$s$} and \bl{$r$})
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Conclusion}
+
+\begin{itemize}
+
+\item we replaced the POSIX definition of Sulzmann \& Lu by a
+      new definition (ours is inspired by work of Vansummeren,
+      2006)\medskip
+  
+\item their proof contained small gaps (acknowledged) but had
+      also fundamental flaws\medskip
+
+\item now, its a nice exercise for theorem proving\medskip
+
+\item some optimisations need to be applied to the algorithm
+      in order to become fast enough\medskip
+
+\item can be used for lexing, is a small beautiful functional
+      program
+ 
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  \begin{frame}[b]
+  \frametitle{
+  \begin{tabular}{c}
+  \mbox{}\\[13mm]
+  \alert{\LARGE Questions?}
+  \end{tabular}}
+
+  \end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     
+
+\end{document}
+
+
+%%% Local Variables:  
+%%% mode: latex
+%%% TeX-master: t
+%%% End: 
+