lexing: changeset 208:02568e85a394

Binary file Slides/slides04.pdf has changed

--- a/Slides/slides04.tex	Sat Aug 20 13:39:19 2016 +0100
+++ b/Slides/slides04.tex	Wed Aug 24 10:37:40 2016 +0100
@@ -21,32 +21,7 @@
 {\end{minipage}\egroup;%
 \end{tikzpicture}\bigskip}
 
-\newcommand\grid[1]{%
-\begin{tikzpicture}[baseline=(char.base)]
-  \path[use as bounding box]
-    (0,0) rectangle (1em,1em);
-  \draw[red!50, fill=red!20]
-    (0,0) rectangle (1em,1em);
-  \node[inner sep=1pt,anchor=base west]
-    (char) at (0em,\gridraiseamount) {#1};
-\end{tikzpicture}}
-\newcommand\gridraiseamount{0.12em}
-
-\makeatletter
-\newcommand\Grid[1]{%
-  \@tfor\z:=#1\do{\grid{\z}}}
-\makeatother	
-
-\newcommand\Vspace[1][.3em]{%
-  \mbox{\kern.06em\vrule height.3ex}%
-  \vbox{\hrule width#1}%
-  \hbox{\vrule height.3ex}}
-
-\def\VS{\Vspace[0.6em]}
-
-
-\title[POSIX Lexing with Derivatives of Regexes]
-      {\bf POSIX Lexing with\\ 
+\title{\bf POSIX Lexing with\\ 
        \bf Derivatives of Regular Expressions\\
        \bf (Proof Pearl)}
 \author{Fahad Ausaf, Roy Dyckhoff and Christian Urban}
@@ -125,6 +100,18 @@
 	
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	
+%	\begin{frame}
+%		\frametitle{Sulzmann and Lu's Matcher}
+%		\begin{itemize}
+%			\item Sulzmann \& Lu came up with a beautiful idea for how 
+%			to extend the simple regular expression matcher to POSIX 
+%			matching/lexing (FLOPS 2014)
+%			\item The idea: define an inverse operation to the derivatives
+%		\end{itemize}
+%	\end{frame}
+	
+	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+	
 	\begin{frame}
 	\frametitle{Sulzmann and Lu's Matcher}
 
@@ -177,39 +164,6 @@
    \alert{\textbf{how}}: POSIX, GREEDY, \ldots
 
    \end{frame}
-   
-   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-	
-	\begin{frame}{Regular Expressions and Values}
-	Regular expressions and their corresponding values (for how a 
-	regular expression matched a string):\bigskip 
-	
-    \begin{columns}[c] % the "c" option specifies center vertical alignment
-    \column{.4\textwidth} % column designated by a command
-     	\begin{tabular}{ l l l }
-			\smath{r} & ::= 		& \smath{\Zero} 	\\
-			    & $\mid$	& \smath{\One} 		\\
-			    & $\mid$	& \smath{c} 				\\
-			    & $\mid$	& \smath{r_1 \cdot r_2}	\\
-			    & $\mid$	& \smath{r_1 + r_2}		\\ \\
-			    & $\mid$	& \smath{r^*}				\\
-		\end{tabular}
-    \column{.4\textwidth}
-     	\begin{tabular}{ l l l }
-     	
-			\smath{v} & ::= 		& \\
-			    & $\mid$	& \smath{Empty} 			\\
-			    & $\mid$	& \smath{Char(c)} 		\\
-			    & $\mid$	& \smath{Seq(v_1\cdot v_2)}	\\
-			    & $\mid$	& \smath{Left(v)}		\\
-			    & $\mid$	& \smath{Right(v)}		\\
-			    & $\mid$	& \smath{[v_1,...,v_n]}
-		\end{tabular}
-    \end{columns}
-	\end{frame}
-	
-	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-	
 
 
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -230,13 +184,15 @@
 	token.
     \end{bubble}
   
-	For example: \smath{r_{keywords} + r_{identifiers}}:\bigskip
+	For example: \smath{r_{keywords} + r_{identifiers}} (fix graphics below)
 
-		\begin{itemize}
-		\item \smath{\texttt{\Grid{iffoo\VS bla}}} 
-
-		\item \smath{\texttt{\Grid{if\VS bla}}}
-		\end{itemize}	
+		\begin{center}
+		\texttt{iffoo\_bla} 
+		\end{center}
+		
+		\begin{center}
+		\texttt{if\_bla} 
+		\end{center}	
 	
 	\end{frame}
 	
@@ -266,46 +222,47 @@
 	
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	
+%	\begin{frame}
+%	\frametitle{Correctness}
+%	\begin{itemize}
+%		\item Sulzmann \& Lu came up with a beautiful idea for how 
+%		to extend the simple regular expression matcher to POSIX 
+%		matching/lexing (FLOPS 2014)
+%		\item The idea: define an inverse operation to the derivatives
+%	\end{itemize}
+%	\end{frame}
+
+	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+	
 	\begin{frame}
-	\frametitle{``Correctness'' by Sulzmann and Lu}
-	\begin{itemize}
-		\item Sulzmann \& Lu's idea is to order all possible
-		answer such that they can prove the correct answer is
-		the maximum
-		\item The idea is taken from a GREEDY algorithm (and it 
-		works there)\bigskip\pause
-		
-		\item {\bf But} we made no progress in formalising 
-		Sulzmann \& Lu's idea, because
-		
-      \begin{itemize}
-      \item transitivity, existence of maxima etc all fail to
-      turn into real proofs
-      \item the reason: the ordering works only if ....
-      \item though we did find mistakes:
-\begin{center}
-	\small
-	``How could I miss this? Well, I was rather careless when 
-	stating this Lemma :)\smallskip
-	
-	Great example how formal machine checked proofs (and 
-	proof assistants) can help to spot flawed reasoning steps.''
-	
-	\end{center}\pause
-	  
-	\begin{center}
-	\small
-	``Well, I don't think there's any flaw. The issue is how to 
-	come up with a mechanical proof. In my world mathematical 
-	proof $=$ mechanical proof doesn't necessarily hold.''
-	
-	\end{center}\pause
-	  \end{itemize}		
-   
-	\end{itemize}
+	\frametitle{Regular Expressions and Values}
+	Regular expressions and their corresponding values (for how a 
+	regular expression matched string): 
+    \begin{columns}[c] % the "c" option specifies center vertical alignment
+    \column{.5\textwidth} % column designated by a command
+     	\begin{tabular}{ l l l }
+			$r$ & ::= 		& $\varnothing$ 	\\
+			    & $\mid$	& $\epsilon$ 		\\
+			    & $\mid$	& $c$ 				\\
+			    & $\mid$	& $r_1 \cdot r_2$	\\
+			    & $\mid$	& $r_1 + r_2$		\\
+			    & $\mid$	& $r^*$				\\
+		\end{tabular}
+    \column{.5\textwidth}
+     	\begin{tabular}{ l l l }
+			$v$ & ::= 		& $\varnothing$ 	\\
+			    & $\mid$	& $Empty$ 			\\
+			    & $\mid$	& $Char$($c$) 		\\
+			    & $\mid$	& $Seq$($v_1.v_2$)	\\
+			    & $\mid$	& $Left$($v$)		\\
+			    & $\mid$	& $Right$($v$)		\\
+			    & $\mid$	& []				\\
+			    & $\mid$	& [$v_1,...,v_n$]
+		\end{tabular}
+    \end{columns}
+    There is also a notion of a string behind a value $\mid v \mid$
 	\end{frame}
 	
-
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	
 	\begin{frame}
@@ -324,7 +281,8 @@
 		\draw[->,line width=1mm]  (r3) -- (r4) node[above,midway] {$der\,c$};\pause
 		\draw (r4) node[anchor=west] {\;\raisebox{3mm}{$\;\;nullable?$}};\pause
 		\node (v4) [below=of r4] {$v_4$};
-		\draw[->,line width=1mm]  (r4) -- (v4);\pause
+		\draw[->,line width=1mm]  (r4) -- (v4);
+		\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{$mkeps$}};\pause
 		\node (v3) [left=of v4] {$v_3$};
 		\draw[->,line width=1mm]  (v4) -- (v3) node[below,midway] {$inj\,c$};\pause
 		\node (v2) [left=of v3] {$v_2$};
@@ -334,7 +292,6 @@
 		\draw[->,line width=0.5mm]  (r3) -- (v3);
 		\draw[->,line width=0.5mm]  (r2) -- (v2);
 		\draw[->,line width=0.5mm]  (r1) -- (v1);
-		\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{$mkeps$}};
 	\end{tikzpicture}
 	\end{center}
 	
@@ -342,6 +299,96 @@
 	
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	
+	\begin{frame}
+	\frametitle{Nullable, Mkeps and Injection Functions}
+	
+	\textbf{Nullable Function}
+	\begin{tabular}{ l l l }
+		\textit{nullable} (\textbf{0}) & $\dn$ & \textit{False}\\
+		\textit{nullable} (\textbf{1}) & $\dn$ & \textit{True}\\
+		\textit{nullable} (c) & $\dn$ & \textit{False}\\
+		\textit{nullable} ($r_1 + r_2$) & $\dn$ & \textit{nullable} $r_1$ $\lor$ \textit{nullable} $r_2$\\
+		\textit{nullable} ($r_1 \cdot r_2$) & $\dn$ & \textit{nullable} $r_1$ $\land$ \textit{nullable} $r_2$\\
+		\textit{nullable} ($r^*$) & $\dn$ & \textit{True}\\
+	\end{tabular}\\	
+	\vspace{3 mm}	
+	\textbf{Mkeps Function}
+	\begin{tabular}{ l l l }
+		\textit{mkeps} (\textbf{1}) 		& $\dn$ & () 	\\
+		\textit{mkeps} ($r_1 \cdot r_2$) 	& $\dn$	& \textit{Seq} (\textit{mkeps} $r_1$) (\textit{mkeps} $r_2$)\\
+		\textit{mkeps} ($r_1 + r_2$) 		& $\dn$	& \textit{if nullable $r_1$ then Left} (\textit{mkeps $r_1$})\\
+											& 		& \textit{else Right} (\textit{mkeps $r_2$})\\
+		\textit{mkeps} ($r^*$) 				& $\dn$	& \textit{Stars} []	\\
+	\end{tabular}\\
+				
+	\end{frame}
+
+	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+	
+	\begin{frame}
+	\frametitle{Nullable, Mkeps and Injection Functions}
+	
+	\textbf{Injection Function}
+	\begin{tabular}{ l l l }
+		\textit{inj d c} () & $\dn$ & \textit{Char d}\\
+		\textit{inj} ($r_1 + r_2$) \textit{c} (\textit{Left $v_1$}) & $\dn$ & \textit{Left} (\textit{inj $r_1$ c $v_1$})\\
+		\textit{inj} ($r_1 + r_2$) \textit{c} (\textit{Right $v_2$}) & $\dn$ & \textit{Right} (\textit{inj $r_2$ c $v_2$})\\
+		\textit{inj} ($r_1 \cdot r_2$) \textit{c} (\textit{Seq $v_1$ $v_2$}) & $\dn$ & \textit{Seq} (\textit{inj $r_1$ c $v_1$}) $v_2$\\
+		\textit{inj} ($r_1 \cdot r_2$) \textit{c} (\textit{Left} (\textit{Seq $v_1$ $v_2$})) & $\dn$ & \textit{Seq} (\textit{inj $r_1$ c $v_1$}) $v_2$\\
+		\textit{inj} ($r_1 \cdot r_2$) \textit{c} (\textit{Right $v_2$}) & $\dn$ & \textit{Seq} (\textit{mkeps $r_1$}) (\textit{inj $r_2$ c $v_2$})\\
+		\textit{inj} ($r^*$) \textit{c} (\textit{Seq v} (\textit{Stars vs})) & $\dn$ & \textit{Stars} (\textit{inj r c v}::\textit{vs})
+	\end{tabular}
+	
+	\end{frame}	
+		
+	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+	
+	\begin{frame}
+	\frametitle{POSIX Ordering Relation by Sulzmann \& Lu}
+	
+	\begin{itemize}
+	\item Introduce an inductive defined ordering relation 
+	$v \succ_r v'$ which captures the idea of POSIX matching.
+	
+	\item The algorithm returns the maximum of all possible values 
+	that are possible for a regular expression.
+	
+	\item The idea is from a paper by Frisch \& Cardelli about 
+	GREEDY matching (GREEDY = preferring instant gratification 
+	to delayed repletion):
+	
+	\item e.g. given $(a + (b + ab))^{*}$ and string $ab$\\
+	\begin{center}	
+	\begin{tabular}{ll}
+	GREEDY: & $[Left(a), Right(Left(b))]$\\
+	POSIX: 	& $[Right(Right(Seq(a, b)))]$
+	\end{tabular}
+	\end{center}
+	
+	\end{itemize}
+	\end{frame}
+	
+	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+	
+	\begin{frame}
+	\frametitle{POSIX Ordering Relation by Sulzmann \& Lu}
+	
+	\begin{center}
+		$\infer{\vdash Empty : \epsilon}{}$\hspace{15mm}	
+		$\infer{\vdash Char(c): c}{}$\bigskip
+		
+		$\infer{\vdash Seq(v_1, v_2) : r_1\cdot r_2}{\vdash v_1 : r_1 \quad \vdash v_2 : r_2}$
+		\bigskip
+		
+		$\infer{\vdash Left(v) : r_1 + r_2}{\vdash v : r_1}$\hspace{15mm}
+		$\infer{\vdash Right(v): r_1 + r_2}{\vdash v : r_2}$\bigskip
+		
+		$\infer{\vdash [] : r^*}{}$\hspace{15mm}
+		$\infer{\vdash [v_1,\ldots, v_n] : r^*}
+		{\vdash v_1 : r \quad\ldots\quad \vdash v_n : r}$
+	\end{center}
+	
+	\end{frame}
 	
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	
@@ -350,12 +397,12 @@
 	\begin{itemize}
 	
 	\item Sulzmann: \ldots Let's assume $v$ is not a $POSIX$ value, 
-	then there must be another one \ldots contradiction.\pause
+	then there must be another one \ldots contradiction.
 	
 	\item Exists ?
 	\begin{center}
 	$L(r) \not= \varnothing \;\Rightarrow\; \exists v.\;POSIX(v, r)$
-	\end{center}\pause
+	\end{center}
 	
 	\item In the sequence case 
 	$Seq(v_1,v_2)\succ_{r_1\cdot r_2} Seq(v_1', v_2')$, 
@@ -364,7 +411,7 @@
 	
 	\begin{center}
 	$|v_1| @ |v_2| = |v_1'| @ |v_2'|$
-	\end{center}\pause
+	\end{center}
 	
 	\item Although one begins with the assumption that the two 
 	values have the same flattening, this cannot be maintained 
@@ -376,32 +423,64 @@
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	
 	\begin{frame}
+		\frametitle{Problems}
+		
+		\begin{itemize}
+			\item I have no doubt the algorithm is correct --- 
+			the problem is I do not believe their proof.
+			
+			\begin{center}
+				\small
+				``How could I miss this? Well, I was rather careless when 
+				stating this Lemma :)\smallskip
+				
+				Great example how formal machine checked proofs (and 
+				proof assistants) can help to spot flawed reasoning steps.''
+				
+			\end{center}
+			
+			\begin{center}
+				\small
+				``Well, I don't think there's any flaw. The issue is how to 
+				come up with a mechanical proof. In my world mathematical 
+				proof $=$ mechanical proof doesn't necessarily hold.''
+				
+			\end{center}
+			
+		\end{itemize}	
+		
+	\end{frame}
+	
+	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+	
+	\begin{frame}
 	\frametitle{Our Solution}
 
 	\begin{itemize}
 	\item A direct definition of what a POSIX value is, using the 
-	relation \smath{s \in r \to v} (our specification)\bigskip
+	relation $s \in r \to v$ (specification)
 	
 	\begin{center}
-	\smath{\infer{[] \in \epsilon \to Empty}{}}\hspace{15mm}
-	\smath{\infer{[c] \in c \to Char(c)}{}}\bigskip\medskip
-	
-	\smath{\infer{s \in r_1 + r_2 \to Left(v)}
-	      {s \in r_1 \to v}}\hspace{10mm}
-	\smath{\infer{s \in r_1 + r_2 \to Right(v)}
-	      {s \in r_2 \to v & s \not\in L(r_1)}}\bigskip\medskip
-	
-	\smath{\infer{s_1 @ s_2 \in r_1 \cdot r_2 \to Seq(v_1, v_2)}
-	          {\small\begin{array}{l}
-	           s_1 \in r_1 \to v_1 \\
-	           s_2 \in r_2 \to v_2 \\
-	           \neg(\exists s_3\,s_4.\; s_3 \not= []
-	           \wedge s_3 @ s_4 = s_2 \wedge
-	           s_1 @ s_3 \in L(r_1) \wedge
-	           s_4 \in L(r_2))
-	           \end{array}}}
-	           
-	{\ldots}           
+		$\infer{[] \in \epsilon \to Empty}{}$\hspace{15mm}
+		$\infer{c \in c \to Char(c)}{}$\bigskip\medskip
+		
+		$\infer{s \in r_1 + r_2 \to Left(v)}
+		{s \in r_1 \to v}$\hspace{10mm}
+		$\infer{s \in r_1 + r_2 \to Right(v)}
+		{s \in r_2 \to v & s \not\in L(r_1)}$\bigskip\medskip
+		
+		$\infer{s_1 @ s_2 \in r_1 \cdot r_2 \to Seq(v_1, v_2)}
+		{\small\begin{array}{l}
+			s_1 \in r_1 \to v_1 \\
+			s_2 \in r_2 \to v_2 \\
+			\neg(\exists s_3\,s_4.\; s_3 \not= []
+			\wedge s_3 @ s_4 = s_2 \wedge
+			s_1 @ s_3 \in L(r_1) \wedge
+			s_4 \in L(r_2))
+			\end{array}}$
+		\\
+		\bigskip
+		...
 	\end{center}	
 	
 	\end{itemize}
@@ -438,5 +517,31 @@
 	(biggest value for $s$ and $r$)
 	\end{frame}
 	
+	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+	
+	\begin{frame}
+		\frametitle{Conclusions}
+		
+		\begin{itemize}
+			
+			\item We replaced the POSIX definition of Sulzmann \& Lu by a
+			new definition (ours is inspired by work of Vansummeren,
+			2006)\medskip
+			
+			\item Their proof contained small gaps (acknowledged) but had
+			also fundamental flaws\medskip
+			
+			\item Now, its a nice exercise for theorem proving\medskip
+			
+			\item Some optimisations need to be applied to the algorithm
+			in order to become fast enough\medskip
+			
+			\item Can be used for lexing, is a small beautiful functional
+			program
+			
+		\end{itemize}
+	\end{frame}
+	
+	
 
 \end{document}

author	Christian Urban <christian dot urban at kcl dot ac dot uk>
	Wed, 24 Aug 2016 10:37:40 +0100
changeset 208	02568e85a394
parent 207	599b2bfcebf6
child 209	c592ea0661ae

Slides/slides04.pdf		file \| annotate \| diff \| comparison \| revisions
Slides/slides04.tex		file \| annotate \| diff \| comparison \| revisions