slides/ssy.tex
changeset 913 eef6a56c185a
parent 906 2bf1516d730f
child 960 c7009356ddd8
equal deleted inserted replaced
912:e32802acf952 913:eef6a56c185a
       
     1 % !TEX program = xelatex
       
     2 \documentclass[dvipsnames,14pt,t,xelatex,xcolor={table}]{beamer}
       
     3 \usepackage{../slides}
       
     4 \usepackage{../graphicss}
       
     5 \usepackage{../langs}
       
     6 \usepackage{../data}
       
     7 \usetikzlibrary{cd}
       
     8 
       
     9 
       
    10 \usepackage{tcolorbox}
       
    11 \newtcolorbox{mybox}{colback=red!5!white,colframe=red!75!black}
       
    12 \newtcolorbox{mybox2}[1]{colback=red!5!white,colframe=red!75!black,fonttitle=\bfseries,title=#1}
       
    13 \newtcolorbox{mybox3}[1]{colback=Cyan!5!white,colframe=Cyan!75!black,fonttitle=\bfseries,title=#1}
       
    14 
       
    15 
       
    16 
       
    17 \hfuzz=220pt 
       
    18 
       
    19 \lstset{language=Scala,
       
    20         style=mystyle,
       
    21         numbersep=0pt,
       
    22         numbers=none,
       
    23         xleftmargin=0mm}
       
    24 
       
    25 \pgfplotsset{compat=1.17}      
       
    26 
       
    27 \newcommand{\bl}[1]{\textcolor{blue}{#1}}     
       
    28  
       
    29 % beamer stuff 
       
    30 \renewcommand{\slidecaption}{SSY, King's College London}
       
    31 
       
    32 
       
    33 
       
    34 \begin{document}
       
    35 
       
    36 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
    37 \begin{frame}[t]
       
    38 \frametitle{%  
       
    39   \begin{tabular}{@ {}c@ {}}
       
    40     \\[8mm]
       
    41     \LARGE Derivative-Based\\
       
    42     \LARGE Regular Expression Matching\\[5mm]
       
    43     \normalsize\textcolor{gray}{Christian Urban, SSY Group}
       
    44   \end{tabular}}
       
    45 
       
    46 \end{frame}
       
    47 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
    48 
       
    49 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
       
    50 \begin{frame}[c]
       
    51 \frametitle{A Bit of Background}
       
    52 
       
    53 \begin{textblock}{10}(2,2.5)
       
    54 \includegraphics[scale=0.4]{../pics/phd-1.jpg}
       
    55 \end{textblock}
       
    56 
       
    57 \begin{textblock}{11}(1.7,10)
       
    58 \begin{bubble}[9.6cm]
       
    59   PhD thesis on a particular term-rewriting system:\medskip\\
       
    60   Proved that all rewriting sequences are strongly normalising (terminate). 
       
    61 \end{bubble}
       
    62 \end{textblock}
       
    63 
       
    64 \only<2->{%
       
    65 \begin{textblock}{3}(10,2.5)
       
    66 \begin{bubble}[3cm]
       
    67 \begin{tabular}{rcl}
       
    68   \bl{$t$} & \bl{::=} & \bl{$x$}\\
       
    69          & \bl{|} & \bl{$\lambda x.\,t$}\\
       
    70          & \bl{|} & \bl{$t_1\;t_2$}\\
       
    71          & \bl{|} & \bl{...}
       
    72 \end{tabular}  
       
    73 \end{bubble}  
       
    74 \end{textblock}}
       
    75 
       
    76 \end{frame}
       
    77 
       
    78 
       
    79 
       
    80 
       
    81 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
       
    82 
       
    83 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
       
    84 \begin{frame}[t]
       
    85 \frametitle{Quiz 1}
       
    86 \begin{bubble}[10cm]\it
       
    87   There are many, many regular expression libraries.\bigskip\\
       
    88   
       
    89   \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the
       
    90   difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?}
       
    91 \end{bubble}
       
    92 
       
    93 \begin{textblock}{12}(2,9)
       
    94 \begin{tikzpicture}
       
    95   \draw(0,0)  node[rotate=20]{linear};
       
    96   \draw(3,0)  node[rotate=-20]{\bl{$O(n \log n)$}};
       
    97   \draw(3,-2) node[rotate=10]{quadratic};
       
    98   \draw(6,0) node[rotate=20]{P};
       
    99   \draw(7,0) node[rotate=-20]{NP};
       
   100   \draw(7,-1) node[rotate=0]{\bl{$O(2^n)$}};
       
   101 \end{tikzpicture}
       
   102 \end{textblock}
       
   103 
       
   104 \end{frame}
       
   105 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
       
   106 
       
   107 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
       
   108 \begin{frame}[t]
       
   109 \frametitle{Quiz 2}
       
   110 
       
   111 \begin{textblock}{12}(2,2.5)
       
   112 A regular expression for (some) email addresses:
       
   113 \begin{center}
       
   114   ($\underbrace{\bl{[a\mbox{-}z0\mbox{-}9\_\!\!\_\,.-]^+}}_{\textrm{name}}$)\bl{$\,@\,$}
       
   115   ($\underbrace{\bl{[a\mbox{-}z0\mbox{-}9\,-]^+}}_{\textrm{domain}}$) \bl{$\,.\,$}
       
   116   ($\underbrace{\bl{[a\mbox{-}z\,.]^{\{2..5\}}}}_{\textrm{top-level domain}}$)
       
   117 \end{center}  
       
   118 \end{textblock}
       
   119 
       
   120 \only<2>{
       
   121 \begin{textblock}{10}(4,8)
       
   122 bounded regular expressions:\medskip\\
       
   123 \begin{tabular}{ll}    
       
   124   \bl{$r^{\{n\}}$}     & exactly n-times \bl{$r$}\\
       
   125   \bl{$r^{\{n..m\}}$}  & between n and m-times\\
       
   126   \bl{$r^{\{n..\}}$}   & from n-times\\
       
   127   \bl{$r^{\{..m\}}$}   & upto m-times\\
       
   128 \end{tabular}\smallskip\\
       
   129 \footnotesize\hfill ${}^*$ in some applications the counters can be in the millions
       
   130 \end{textblock}}
       
   131  
       
   132 \end{frame}
       
   133 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   134 
       
   135 
       
   136 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
       
   137 \begin{frame}[t]
       
   138 \frametitle{Quiz 2}
       
   139 
       
   140 
       
   141 \begin{textblock}{12}(2,2.5)
       
   142   \onslide<1->{Thompson construction: By recursion we are given two NFAs for $r_1$ and $r_2$.\medskip\\
       
   143     \onslide<2->{For $r_1\cdot r_2$:}\bigskip}
       
   144 \end{textblock}
       
   145 
       
   146 \begin{textblock}{12}(2,6)
       
   147 \onslide<1>{
       
   148 \begin{tikzpicture}[node distance=3mm,
       
   149     >=stealth',very thick,
       
   150     every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20}]
       
   151 \node[state, initial]  (Q_0)  {$\mbox{}$};
       
   152 \node[state, initial]  (Q_01) [below=1mm of Q_0] {$\mbox{}$};  
       
   153 \node[state, initial]  (Q_02) [above=1mm of Q_0] {$\mbox{}$};  
       
   154 \node (R_1)  [right=of Q_0] {$\ldots$};
       
   155 \node[state, accepting]  (T_1)  [right=of R_1] {$\mbox{}$};
       
   156 \node[state, accepting]  (T_2)  [above=of T_1] {$\mbox{}$};
       
   157 \node[state, accepting]  (T_3)  [below=of T_1] {$\mbox{}$};
       
   158 
       
   159 \node (A_0)  [right=2.5cm of T_1] {$\mbox{}$};
       
   160 \node[state, initial]  (A_01)  [above=1mm of A_0] {$\mbox{}$};
       
   161 \node[state, initial]  (A_02)  [below=1mm of A_0] {$\mbox{}$};
       
   162 
       
   163 \node (b_1)  [right=of A_0] {$\ldots$};
       
   164 \node[state, accepting]  (c_1)  [right=of b_1] {$\mbox{}$};
       
   165 \node[state, accepting]  (c_2)  [above=of c_1] {$\mbox{}$};
       
   166 \node[state, accepting]  (c_3)  [below=of c_1] {$\mbox{}$};
       
   167 \begin{pgfonlayer}{background}
       
   168   \node (1) [rounded corners, inner sep=1mm, thick,
       
   169     draw=black!60, fill=black!20, fit= (Q_0) (R_1) (T_1) (T_2) (T_3)] {};
       
   170   \node (2) [rounded corners, inner sep=1mm, thick,
       
   171     draw=black!60, fill=black!20, fit= (A_0) (b_1) (c_1) (c_2) (c_3)] {};
       
   172 \node [yshift=2mm] at (1.north) {$r_1$};
       
   173 \node [yshift=2mm] at (2.north) {$r_2$};
       
   174 \end{pgfonlayer}
       
   175 \end{tikzpicture}}
       
   176 \end{textblock}
       
   177 
       
   178 \begin{textblock}{12}(2,6)
       
   179 \onslide<2>{
       
   180 \begin{tikzpicture}[node distance=3mm,
       
   181     >=stealth',very thick,
       
   182     every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},]
       
   183 \node[state, initial]  (Q_0)  {$\mbox{}$};
       
   184 \node[state, initial]  (Q_01) [below=1mm of Q_0] {$\mbox{}$};  
       
   185 \node[state, initial]  (Q_02) [above=1mm of Q_0] {$\mbox{}$};  
       
   186 \node (r_1)  [right=of Q_0] {$\ldots$};
       
   187 \node[state]  (t_1)  [right=of r_1] {$\mbox{}$};
       
   188 \node[state]  (t_2)  [above=of t_1] {$\mbox{}$};
       
   189 \node[state]  (t_3)  [below=of t_1] {$\mbox{}$};
       
   190 
       
   191 \node  (A_0)  [right=2.5cm of t_1] {$\mbox{}$};
       
   192 \node[state]  (A_01)  [above=1mm of A_0] {$\mbox{}$};
       
   193 \node[state]  (A_02)  [below=1mm of A_0] {$\mbox{}$};
       
   194 
       
   195 \node (b_1)  [right=of A_0] {$\ldots$};
       
   196 \node[state, accepting]  (c_1)  [right=of b_1] {$\mbox{}$};
       
   197 \node[state, accepting]  (c_2)  [above=of c_1] {$\mbox{}$};
       
   198 \node[state, accepting]  (c_3)  [below=of c_1] {$\mbox{}$};
       
   199 \path[->] (t_1) edge (A_01);
       
   200 \path[->] (t_2) edge node [above]  {$\varepsilon$\footnotesize{}s} (A_01);
       
   201 \path[->] (t_3) edge (A_01);
       
   202 \path[->] (t_1) edge (A_02);
       
   203 \path[->] (t_2) edge (A_02);
       
   204 \path[->] (t_3) edge node [below]  {$\varepsilon$\footnotesize{}s} (A_02);
       
   205  
       
   206 \begin{pgfonlayer}{background}
       
   207   \node (3) [rounded corners, inner sep=1mm, thick,
       
   208     draw=black!60, fill=black!20, fit= (Q_0) (c_1) (c_2) (c_3)] {};
       
   209 \node [yshift=2mm] at (3.north) {$r_1\cdot r_2$};
       
   210 \end{pgfonlayer}
       
   211 \end{tikzpicture}}
       
   212 \end{textblock}
       
   213 
       
   214 
       
   215 \end{frame}
       
   216 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   217 
       
   218 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
       
   219 \begin{frame}[t]
       
   220 \frametitle{Quiz 2}
       
   221 
       
   222 \begin{textblock}{12}(2,2.5)
       
   223   \onslide<1->{Thompson construction: By recursion we have a NFA for $r$.\medskip\\
       
   224   \onslide<2->{For $r^*$:\bigskip}}
       
   225 \end{textblock}
       
   226 
       
   227 \begin{textblock}{12}(4,6)
       
   228 \onslide<1>{  
       
   229 \begin{tikzpicture}[node distance=3mm,
       
   230     >=stealth',very thick,
       
   231     every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},
       
   232     baseline=(current bounding box.north)]
       
   233 \node (2)  {$\mbox{}$};
       
   234 \node[state, initial]  (4)  [above=1mm of 2] {$\mbox{}$};
       
   235 \node[state, initial]  (5)  [below=1mm of 2] {$\mbox{}$};  
       
   236 \node (a)  [right=of 2] {$\ldots$};
       
   237 \node[state, accepting]  (a1)  [right=of a] {$\mbox{}$};
       
   238 \node[state, accepting]  (a2)  [above=of a1] {$\mbox{}$};
       
   239 \node[state, accepting]  (a3)  [below=of a1] {$\mbox{}$};
       
   240 \begin{pgfonlayer}{background}
       
   241 \node (1) [rounded corners, inner sep=1mm, thick, draw=black!60, fill=black!20, fit= (2) (a1) (a2) (a3)] {};
       
   242 \node [yshift=3mm] at (1.north) {$r$};
       
   243 \end{pgfonlayer}
       
   244 \end{tikzpicture}}
       
   245 \end{textblock}
       
   246 
       
   247 \begin{textblock}{12}(2,6)
       
   248 \onslide<2->{  
       
   249 \begin{tikzpicture}[node distance=3mm,
       
   250     >=stealth',very thick,
       
   251     every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},
       
   252     baseline=(current bounding box.north)]
       
   253 \node at (0,0) [state, initial,accepting]  (1)  {$\mbox{}$};
       
   254 \node (2)  [right=16mm of 1] {$\mbox{}$};
       
   255 \node[state]  (4)  [above=1mm of 2] {$\mbox{}$};
       
   256 \node[state]  (5)  [below=1mm of 2] {$\mbox{}$};  
       
   257 \node (a)  [right=of 2] {$\ldots$};
       
   258 \node[state]  (a1)  [right=of a] {$\mbox{}$};
       
   259 \node[state]  (a2)  [above=of a1] {$\mbox{}$};
       
   260 \node[state]  (a3)  [below=of a1] {$\mbox{}$};
       
   261 \path[->] (1) edge node [below]  {$\varepsilon$} (4);
       
   262 \path[->] (1) edge node [below]  {$\varepsilon$} (5);
       
   263 \path[->] (a1) edge [bend left=45] node [below]  {$\varepsilon$} (1);
       
   264 \path[->] (a2) edge [bend right] node [below]  {$\varepsilon$} (1);
       
   265 \path[->] (a3) edge [bend left=45] node [below]  {$\varepsilon$} (1);
       
   266 \begin{pgfonlayer}{background}
       
   267 \node (2) [rounded corners, inner sep=1mm, thick, draw=black!60, fill=black!20, fit= (1) (a2) (a3)] {};
       
   268 \node [yshift=3mm] at (2.north) {$r^*$};
       
   269 \end{pgfonlayer}
       
   270 \end{tikzpicture}}
       
   271 \end{textblock}
       
   272 
       
   273 \begin{textblock}{12}(2,12)
       
   274 \onslide<3->{ 
       
   275   \begin{bubble}[9.5cm]\it
       
   276   Quiz:\smallskip\\  
       
   277   \textbf{What is the Thompson Construction for \bl{$r^{\{n\}}$} ?}
       
   278 \end{bubble}}
       
   279 \end{textblock}
       
   280 
       
   281 \end{frame}
       
   282 
       
   283 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
       
   284 
       
   285 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
       
   286 \begin{frame}[t]
       
   287   \frametitle{Quiz 3}
       
   288 
       
   289 Hierarchy of Languages:  
       
   290 
       
   291 \begin{center}
       
   292 \begin{tikzpicture}
       
   293 [rect/.style={draw=black!50, 
       
   294               top color=white,
       
   295               bottom color=black!20, 
       
   296               rectangle, 
       
   297               very thick, 
       
   298               rounded corners}, scale=1.2]
       
   299 
       
   300 \draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages};
       
   301 \draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages};
       
   302 \draw (0,-0.85) node [rect, text depth=17mm] {\;\;context sensitive languages\;\;};
       
   303 \draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages};
       
   304 \draw (0,-1.4) node [rect] {regular languages};
       
   305 \end{tikzpicture}
       
   306 \end{center}  
       
   307 
       
   308 \begin{textblock}{12}(2,11.5)
       
   309 \onslide<2->{ 
       
   310   \begin{bubble}[9.5cm]\it
       
   311     Quiz:\smallskip\\
       
   312     
       
   313     \textbf{Can we use standard parsing algorithms for matching / lexing\,?}
       
   314     Say CYK, LL, LR(k), PEG, \ldots
       
   315 \end{bubble}}
       
   316 \end{textblock}
       
   317 
       
   318 \end{frame}
       
   319 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   320 
       
   321 \defverbatim{\foo}{\footnotesize%
       
   322 \begin{verbatim}
       
   323 (?:(?:\"|'|\]|\}|\\|\d|(?:nan|infinity|true|false|
       
   324 null|undefined|symbol|math)|\`|\-|\+)+[)]*;?((?:\s
       
   325 | -|~|!|{}|\|\||\+)*.*(?:.*=.*)))  
       
   326 \end{verbatim}
       
   327 }
       
   328 
       
   329 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
       
   330 \begin{frame}[t,fragile]
       
   331 \frametitle{Quiz 4}
       
   332 
       
   333 \begin{textblock}{12}(2,2.5)
       
   334 \begin{bubble}[9.5cm]\it
       
   335   Quiz:\smallskip\\    
       
   336   Do regular expressions have any security relevance\,?
       
   337 \end{bubble}
       
   338 \end{textblock}
       
   339 
       
   340 \only<2->{
       
   341 \begin{textblock}{2}(1,6)
       
   342 \includegraphics[scale=0.8]{../pics/zeek.png}
       
   343 \includegraphics[scale=0.17]{../pics/snort.png}
       
   344 \end{textblock}}
       
   345 
       
   346 \only<3->{
       
   347 \begin{textblock}{7}(7,5.6)
       
   348   \includegraphics[scale=0.14]{../pics/cloudflare.png}\\
       
   349     \footnotesize
       
   350     It serves more web traffic than Twitter, Amazon, Apple,
       
   351     Instagram, Bing \& Wikipedia combined.\medskip\\
       
   352     Web Application Firewall filters out
       
   353     SQL injection attacks, XSS attacks etc
       
   354 \end{textblock}}
       
   355 
       
   356 \only<3->{
       
   357   \begin{textblock}{13}(4,12.3)
       
   358   \footnotesize
       
   359   \hspace{1.5cm}a global outage on 2 July 2019 (first one for six years)
       
   360   \color{blue}\foo\color{black}
       
   361    \end{textblock}
       
   362 }
       
   363 
       
   364     
       
   365 \end{frame}
       
   366 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   367 
       
   368 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
       
   369 \begin{frame}[t]
       
   370   \frametitle{Quiz 3}
       
   371 
       
   372 \begin{textblock}{12}(2,2) 
       
   373   \begin{bubble}[9.5cm]\it
       
   374     Quiz:\smallskip\\
       
   375     
       
   376     \textbf{Can we use standard parsing algorithms for matching / lexing\,?}
       
   377     Say CYK, LL, LR(k), PEG, \ldots
       
   378 \end{bubble}
       
   379 \end{textblock}
       
   380 
       
   381 
       
   382 \begin{textblock}{12}(1,7)
       
   383 \textbf{POSIX lexing}
       
   384 
       
   385 \begin{tabular}{@ {}ll}
       
   386   Assume you have: & \bl{$r_{key} \dn \texttt{if} + \texttt{then} + \texttt{while} \ldots$}\\
       
   387                    & \bl{$r_{id\;} \,\dn \texttt{[a-z]} \cdot \texttt{[a-z0-9\_]}^*$}\medskip\\
       
   388 
       
   389   Lexing a string with  & \bl{$(r_{key} + r_{id})^*$}\medskip\\
       
   390 
       
   391   What should be & \\
       
   392   the result for:  & \bl{\texttt{"iffoo"}} \;\;\; \bl{\texttt{"if"}}\\
       
   393 \end{tabular}  
       
   394 \end{textblock}
       
   395 
       
   396 \only<2->{
       
   397 \begin{textblock}{13.5}(1.5,4) 
       
   398   \begin{mybox3}{POSIX rules}
       
   399     \begin{itemize}
       
   400     \item \textbf{The Longest Match Rule:} The longest initial
       
   401       substring matched by any regular expression is taken as next token.
       
   402     \item \textbf{Priority Rule:}  For a particular longest initial
       
   403       substring, the first (leftmost) regular expression that can match
       
   404       determines the token.
       
   405     \item \ldots  
       
   406     \end{itemize}
       
   407     \begin{center}
       
   408     \bl{$(a + ab) \cdot (bc + c)$} \;\;and\;\; \bl{\texttt{"}$abc$\texttt{"}}
       
   409     \end{center}  
       
   410   \end{mybox3}
       
   411 \end{textblock}}
       
   412 
       
   413 \end{frame}
       
   414 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   415 
       
   416 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
       
   417 \begin{frame}[t]
       
   418 \frametitle{Quiz 1}
       
   419 \begin{bubble}[10cm]\it
       
   420   There are many, many regular expression libraries.\bigskip\\
       
   421   
       
   422   \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the
       
   423   difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?}
       
   424 \end{bubble}
       
   425 
       
   426 \begin{textblock}{12}(1,9)
       
   427   \begin{tabular}{p{4cm}|p{4cm}|p{2cm}}
       
   428     atrociously slow (s't) & pretty lazy (s't) & \textcolor{gray}{OTBT}\medskip\\
       
   429     Python, Ruby, Swift, Dart, JavaScript & Rust, Go, RE2 & \textcolor{gray}{Snort, .Net7}\\
       
   430   \end{tabular}
       
   431 \end{textblock}
       
   432 
       
   433 \end{frame}
       
   434 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
       
   435 
       
   436 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
       
   437 \begin{frame}[t]
       
   438 
       
   439 \mbox{}\\[10mm]  
       
   440 
       
   441 \begin{columns}[t,onlytextwidth]  
       
   442 \begin{column}{.2\textwidth}
       
   443 \raisebox{-10mm}{    
       
   444 \begin{tikzpicture}
       
   445   \begin{axis}[
       
   446     xlabel={\bl{$n$} \pcode{a}s},
       
   447     %%x label style={at={(1.05,0.0)}},
       
   448     ylabel={time in secs},
       
   449     enlargelimits=false,
       
   450     xtick={0,10,...,30},
       
   451     xmax=35,
       
   452     ymax=35,
       
   453     ytick={0,5,...,30},
       
   454     scaled ticks=false,
       
   455     axis lines=left,
       
   456     width=5cm,
       
   457     height=5cm, 
       
   458     legend entries={Python,JavaScript,Swift,Dart},
       
   459     legend style={font=\small,at={(0.5,-0.39)},anchor=north},
       
   460     legend cell align=left]
       
   461 \addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
       
   462 \addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
       
   463 \addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};
       
   464 \addplot[brown,mark=*, mark options={fill=white}] table {re-dart.data};
       
   465 \end{axis}
       
   466 \end{tikzpicture}}
       
   467 \end{column}
       
   468 
       
   469 \begin{column}{.2\textwidth}
       
   470 \begin{tikzpicture}
       
   471   \begin{axis}[
       
   472     xlabel={\bl{$n$} \pcode{a}s},
       
   473     %ylabel={time in secs},
       
   474     enlargelimits=false,
       
   475     xtick={0,10,...,30},
       
   476     xmax=35,
       
   477     ymax=35,
       
   478     ytick={0,5,...,30},
       
   479     scaled ticks=false,
       
   480     axis lines=left,
       
   481     width=5cm,
       
   482     height=5cm, 
       
   483     legend entries={Python,Ruby},
       
   484     legend style={font=\small,at={(0.5,-0.39)},anchor=north},
       
   485     legend cell align=left]
       
   486 \addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
       
   487 \addplot[brown,mark=pentagon*, mark options={fill=white}] table {re-ruby.data};  
       
   488 \end{axis}
       
   489 \end{tikzpicture}  
       
   490 \end{column}  
       
   491 \end{columns}
       
   492 
       
   493 \begin{textblock}{4}(9,1.7)
       
   494 \textbf{\bl{\texttt{(a?)\boldmath$^{\{n\}}\cdot$(a)\boldmath$^{\{n\}}$}}}
       
   495 \end{textblock}
       
   496 
       
   497 \begin{textblock}{4}(4,1.7)  
       
   498 \textbf{\bl{\texttt{(a*)*$\cdot$b}}}
       
   499 \end{textblock}
       
   500 
       
   501 \begin{textblock}{3.4}(0.3,10)
       
   502 \small{}\textbf{matching with strings}
       
   503 \textbf{\bl{$\underbrace{\texttt{a}...\texttt{a}}_n$}}  
       
   504 \end{textblock}
       
   505 
       
   506 \end{frame}
       
   507 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   508 
       
   509 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      
       
   510 \begin{frame}[t]
       
   511 \frametitle{Quiz 1}
       
   512 \begin{bubble}[10cm]\it
       
   513   \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the
       
   514   difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?}
       
   515 \end{bubble}
       
   516 
       
   517 \begin{textblock}{12}(1.7,7)
       
   518 For \textbf{Perl-style} regular expression matchers (say Python, JavaScript, etc), the
       
   519 answer is:
       
   520 \only<2->{\begin{center}
       
   521 \fontspec{Hoefler Text Black}\textcolor{ProcessBlue}{\huge{}NP}
       
   522 \end{center}}  
       
   523 \end{textblock}
       
   524 
       
   525 \begin{textblock}{12}(1.7,12)
       
   526 \only<3->{Backreferences: \bl{(\ldots)$\,\ldots{}\backslash{}n$}}
       
   527 \end{textblock}  
       
   528 \end{frame}
       
   529 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
       
   530 
       
   531 
       
   532 
       
   533 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
       
   534 \begin{frame}
       
   535 \frametitle{Quiz 2}
       
   536 
       
   537 \begin{textblock}{12}(2,2)
       
   538 \onslide<1->{ 
       
   539   \begin{bubble}[9.5cm]\it
       
   540   Quiz:\smallskip\\  
       
   541   \textbf{What is the Thompson Construction for \bl{$r^{\{n\}}$} ?}
       
   542 \end{bubble}}
       
   543 \end{textblock}
       
   544 
       
   545 \only<2->{
       
   546 \begin{textblock}{12}(1,6)
       
   547 \begin{tabular}{ll}
       
   548   Google's RE2 lib & $\Rightarrow$ \bl{$a^{\{1001\}}$} is too big\\
       
   549   \onslide<3->{Rust Regex lib}    & \onslide<5->{$\Rightarrow$ \bl{$a^{\{1000\}\{100\}\{5\}}$} too big}\\
       
   550                     & \onslide<6->{$\Rightarrow$ \bl{$a^{\{0\}\{4294967295\}}$} ?}
       
   551 \end{tabular}
       
   552 \end{textblock}}
       
   553 
       
   554 \only<4>{
       
   555 \begin{textblock}{13.5}(1.5,4) 
       
   556   \begin{mybox3}{From Rust's Regex Description}\it 
       
   557     ``\ldots [the] syntax is similar to Perl-style regular expressions, but lacks a few features like look
       
   558     around and backreferences. In exchange, all searches execute in linear time with respect
       
   559     to the size of the regular expression and search text. \ldots''
       
   560   \end{mybox3}
       
   561 \end{textblock}}
       
   562 
       
   563 
       
   564 \end{frame}  
       
   565 
       
   566 
       
   567 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   568 \begin{frame}[t]
       
   569 \frametitle{Regular Expressions}
       
   570 
       
   571 \begin{textblock}{6}(2,5)
       
   572   \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
       
   573   \bl{$r$} & \bl{$::=$}  & \bl{$\ZERO$}  & nothing\\
       
   574          & \bl{$\mid$} & \bl{$\ONE$}       & empty string / \pcode{""} \\
       
   575          & \bl{$\mid$} & \bl{$c, d, \ldots$}                         & characters\\
       
   576          & \bl{$\mid$} & \bl{$r_1 + r_2$}  & alternative / choice\\
       
   577          & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
       
   578          & \bl{$\mid$} & \bl{$r^*$}            & star (zero or more)\\
       
   579   \end{tabular}
       
   580   \end{textblock}
       
   581   
       
   582 \end{frame}
       
   583 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   584 
       
   585 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   586 \begin{frame}[c]
       
   587 \frametitle{\begin{tabular}{l}The Derivative of a Rexp\end{tabular}}
       
   588 
       
   589 \large
       
   590 If \bl{$r$} matches the string \bl{$c\!::\!s$}, what is a regular 
       
   591 expression that matches just \bl{$s$}?\bigskip\bigskip\bigskip\bigskip
       
   592 
       
   593 \small
       
   594 \bl{$\der\,c\,r$} gives the answer, Brzozowski 1964\medskip\\
       
   595 (lost in the sands of time, re-appeared in 2009)
       
   596 \end{frame}
       
   597 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   598 
       
   599 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   600 \begin{frame}[c]
       
   601 \frametitle{\begin{tabular}{l}The Derivative of a Rexp\end{tabular}}
       
   602 
       
   603 \begin{center}
       
   604 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
       
   605   \bl{$\der\, c\, (\ZERO)$}      & \bl{$\dn$} & \bl{$\ZERO$} & \\
       
   606   \bl{$\der\, c\, (\ONE)$}           & \bl{$\dn$} & \bl{$\ZERO$} & \\
       
   607   \bl{$\der\, c\, (d)$}                     & \bl{$\dn$} & \bl{if $c = d$ then $\ONE$ else $\ZERO$} & \\
       
   608   \bl{$\der\, c\, (r_1 + r_2)$}        & \bl{$\dn$} & \bl{$\der\, c\, r_1 + \der\, c\, r_2$} & \\
       
   609   \bl{$\der\, c\, (r_1 \cdot r_2)$}  & \bl{$\dn$}  & \bl{if $nullable (r_1)$}\\
       
   610   & & \bl{then $(\der\,c\,r_1) \cdot r_2 + \der\, c\, r_2$}\\ 
       
   611   & & \bl{else $(\der\, c\, r_1) \cdot r_2$}\\
       
   612   \bl{$\der\, c\, (r^*)$}  & \bl{$\dn$} & \bl{$(\der\,c\,r) \cdot (r^*)$} &\medskip\bigskip\\
       
   613   \end{tabular}
       
   614 \end{center}
       
   615 
       
   616 \end{frame}
       
   617 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   618 
       
   619 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   620 \begin{frame}[t]
       
   621 \frametitle{\begin{tabular}{l}Brzozowski matcher\end{tabular}}
       
   622 
       
   623 Does \bl{$r_1$} match \bl{"$abc$"}?
       
   624 \begin{center}
       
   625 \begin{tabular}{@{}rl@{}l@{}}
       
   626 Step 1: & build derivative of \bl{$a$} and \bl{$r_1$} & \bl{$(r_2 = \der\,a\,r_1)$}\smallskip\\
       
   627 Step 2: & build derivative of \bl{$b$} and \bl{$r_2$} & \bl{$(r_3 = \der\,b\,r_2)$}\smallskip\\
       
   628 Step 3: & build derivative of \bl{$c$} and \bl{$r_3$} & \bl{$(r_4 = \der\,c\,r_3)$}\smallskip\\
       
   629 Step 4: & the string is exhausted: & \bl{($nullable(r_4))$}\\
       
   630         & test whether \bl{$r_4$} can recognise\\
       
   631         & the empty string\medskip\\
       
   632 Output: & result of the test\\
       
   633         & $\Rightarrow \bl{\textit{true}} \,\text{or}\, 
       
   634                        \bl{\textit{false}}$\\        
       
   635 \end{tabular}
       
   636 \end{center}
       
   637 
       
   638 
       
   639 \end{frame}
       
   640 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   641 
       
   642 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   643 \begin{frame}[c]
       
   644 \frametitle{\mbox{Nullable}}
       
   645 
       
   646 
       
   647 \ldots{}whether a regular expression can match the empty string:
       
   648 \begin{center}
       
   649 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
       
   650 \bl{$nullable(\ZERO)$}    & \bl{$\dn$} & \bl{\textit{false}}\\
       
   651 \bl{$nullable(\ONE)$}       & \bl{$\dn$} & \bl{\textit{true}}\\
       
   652 \bl{$nullable (c)$}             & \bl{$\dn$} & \bl{\textit{false}}\\
       
   653 \bl{$nullable (r_1 + r_2)$}     & \bl{$\dn$} & \bl{$nullable(r_1) \vee nullable(r_2)$} \\ 
       
   654 \bl{$nullable (r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$nullable(r_1) \wedge nullable(r_2)$} \\
       
   655 \bl{$nullable (r^*)$}           & \bl{$\dn$} & \bl{\textit{true}}\\
       
   656 \end{tabular}
       
   657 \end{center}
       
   658 
       
   659 \end{frame}
       
   660 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   661 
       
   662 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   663 \begin{frame}[t]
       
   664 \frametitle{\begin{tabular}{l}Extensions\end{tabular}}
       
   665 
       
   666 \begin{center}
       
   667 \begin{tabular}{@ {\hspace{-4mm}}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
       
   668   \bl{$\der\, c\, (r^{\{n\}})$}      & \bl{$\dn$} & \bl{$if\;n=0\;then\;\ZERO\;else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\
       
   669   \bl{$\der\, c\, (r^{\{..n\}})$}    & \bl{$\dn$} & \bl{$if\;n=0\;then\;\ZERO\;else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\
       
   670   \bl{$\der\, c\, (r^{\{n..\}})$}    & \bl{$\dn$} & \bl{$if\;n=0\;then\;(der\,c\,r)\cdot r^*$} \\
       
   671                                     &            & \bl{$else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\
       
   672   \bl{$\der\, c\, (r^{\{n..m\}})$}   & \bl{$\dn$} & \bl{$if\;n=0 \wedge m=0\;then\;\ZERO\;else$} & \\
       
   673                                      &           & \bl{$if \;n=0 \wedge 0<m\; then\;(der\,c\,r)\cdot r^{\{..m-1\}}$} & \\
       
   674                                      &           & \bl{$else\; (der\,c\,r)\cdot r^{\{n-1..m-1\}}$} & \\
       
   675   \bl{$\der\, c\, (\neg{}r)$}            & \bl{$\dn$}  & \bl{$\neg(der\,c\,r)$}\\
       
   676   \end{tabular}
       
   677 \end{center}
       
   678 
       
   679 also works for lookarounds, various anchors, etc, but \textbf{not} for backreferences
       
   680 
       
   681 \begin{textblock}{3}(11,13)
       
   682 \onslide<2>{ 
       
   683   \begin{bubble}[3cm]
       
   684   \bl{$a^{\{0\}\{4294967295\}}$}
       
   685 \end{bubble}}
       
   686 \end{textblock}
       
   687 
       
   688 \end{frame}
       
   689 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   690 
       
   691 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   692 \begin{frame}[t]
       
   693 \frametitle{\mbox{Problems with Ders}}
       
   694 
       
   695 \textcolor{blue}{
       
   696 \small
       
   697 \def\ll{\stackrel{der\,a}{\longrightarrow}}
       
   698 \begin{center}
       
   699 \begin{tabular}{@{\hspace{-5mm}}rll}
       
   700 $(a + aa)^*$ & $\ll$ & $(\ONE + \ONE{}a) \cdot (a + aa)^*$\\
       
   701 & $\ll$ & $(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* \;+\; (\ONE + \ONE{}a) \cdot (a + aa)^*$\\
       
   702 & $\ll$ & $(\ZERO + \ZERO{}a + \ZERO) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^* \;+\; $\\
       
   703 & & $\qquad(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^*$\\
       
   704 & $\ll$ & \ldots\\ \hspace{15mm}
       
   705 \end{tabular}
       
   706 \end{center}}
       
   707 
       
   708 \begin{textblock}{13.5}(1.5,12) 
       
   709 (regular expressions of sizes 98, 169, 283, 468, 767, \ldots)
       
   710 \end{textblock}
       
   711 
       
   712 \end{frame}
       
   713 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   714 
       
   715 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   716 \begin{frame}[c]
       
   717 \frametitle{\mbox{Conclusion}}
       
   718 
       
   719 \begin{itemize}
       
   720 \item The beauty is that this only involves functional programs that can be conveniently reasoned about in theorem provers 
       
   721 \item POSIX lexing can be done via an extension by Sulzmann and Lu, 2014 (our current work)\medskip\pause
       
   722 \item How surprising that one can still do new work on regular expressions.  
       
   723 \end{itemize} 
       
   724 
       
   725 \end{frame}
       
   726 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   727 
       
   728 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
       
   729 
       
   730 \begin{frame}<1-10>
       
   731 \end{frame}  
       
   732 
       
   733 
       
   734 
       
   735 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   736 \end{document}
       
   737 
       
   738 %%% Local Variables:  
       
   739 %%% mode: latex
       
   740 %%% TeX-master: t
       
   741 %%% End: 
       
   742