| 
876
 | 
     1  | 
% !TEX program = xelatex
  | 
| 
913
 | 
     2  | 
\documentclass[dvipsnames,14pt,t,xelatex,xcolor={table}]{beamer}
 | 
| 
876
 | 
     3  | 
\usepackage{../slides}
 | 
| 
 | 
     4  | 
\usepackage{../graphicss}
 | 
| 
 | 
     5  | 
\usepackage{../langs}
 | 
| 
 | 
     6  | 
\usepackage{../data}
 | 
| 
 | 
     7  | 
\usetikzlibrary{cd}
 | 
| 
 | 
     8  | 
  | 
| 
 | 
     9  | 
  | 
| 
 | 
    10  | 
\usepackage{tcolorbox}
 | 
| 
 | 
    11  | 
\newtcolorbox{mybox}{colback=red!5!white,colframe=red!75!black}
 | 
| 
 | 
    12  | 
\newtcolorbox{mybox2}[1]{colback=red!5!white,colframe=red!75!black,fonttitle=\bfseries,title=#1}
 | 
| 
 | 
    13  | 
\newtcolorbox{mybox3}[1]{colback=Cyan!5!white,colframe=Cyan!75!black,fonttitle=\bfseries,title=#1}
 | 
| 
 | 
    14  | 
  | 
| 
 | 
    15  | 
  | 
| 
959
 | 
    16  | 
\setbeamersize{text margin left=2mm} % <- like this
 | 
| 
 | 
    17  | 
\hfuzz=220pt
  | 
| 
876
 | 
    18  | 
  | 
| 
 | 
    19  | 
\lstset{language=Scala,
 | 
| 
 | 
    20  | 
        style=mystyle,
  | 
| 
 | 
    21  | 
        numbersep=0pt,
  | 
| 
 | 
    22  | 
        numbers=none,
  | 
| 
 | 
    23  | 
        xleftmargin=0mm}
  | 
| 
 | 
    24  | 
  | 
| 
959
 | 
    25  | 
\pgfplotsset{compat=1.17}
 | 
| 
913
 | 
    26  | 
  | 
| 
959
 | 
    27  | 
\newcommand{\bl}[1]{\textcolor{blue}{#1}}
 | 
| 
 | 
    28  | 
  | 
| 
 | 
    29  | 
% beamer stuff
  | 
| 
 | 
    30  | 
\renewcommand{\slidecaption}{Christian Urban, SSY, King's College London}
 | 
| 
876
 | 
    31  | 
  | 
| 
 | 
    32  | 
  | 
| 
 | 
    33  | 
  | 
| 
913
 | 
    34  | 
\begin{document}
 | 
| 
876
 | 
    35  | 
  | 
| 
959
 | 
    36  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
 | 
    37  | 
\begin{frame}[t]
 | 
| 
 | 
    38  | 
\frametitle{\begin{tabular}{@ {\hspace{8mm}}c@ {}}Fast Regular Expression Matching\end{tabular}}
 | 
| 
 | 
    39  | 
\mbox{}\\[1mm]
 | 
| 
 | 
    40  | 
  | 
| 
 | 
    41  | 
\begin{columns}[t,onlytextwidth]
 | 
| 
 | 
    42  | 
\begin{column}{.2\textwidth}
 | 
| 
 | 
    43  | 
\raisebox{-10mm}{
 | 
| 
 | 
    44  | 
\begin{tikzpicture}
 | 
| 
 | 
    45  | 
  \begin{axis}[
 | 
| 
 | 
    46  | 
    xlabel={size of strings},
 | 
| 
 | 
    47  | 
    x label style={at={(0.45,-0.16)}},
 | 
| 
 | 
    48  | 
    ylabel={time in secs},
 | 
| 
 | 
    49  | 
    enlargelimits=false,
  | 
| 
 | 
    50  | 
    xtick={0,10,...,30},
 | 
| 
 | 
    51  | 
    xmax=35,
  | 
| 
 | 
    52  | 
    ymax=35,
  | 
| 
 | 
    53  | 
    ytick={0,10,...,30},
 | 
| 
 | 
    54  | 
    scaled ticks=false,
  | 
| 
 | 
    55  | 
    axis lines=left,
  | 
| 
 | 
    56  | 
    width=5cm,
  | 
| 
 | 
    57  | 
    height=4.5cm,
  | 
| 
 | 
    58  | 
    legend entries={Python,JavaScript,Swift,Dart},
 | 
| 
 | 
    59  | 
    legend style={font=\footnotesize,at={(0.45,-0.48)},anchor=north},
 | 
| 
 | 
    60  | 
    legend cell align=left]
  | 
| 
 | 
    61  | 
\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
 | 
| 
 | 
    62  | 
\addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
 | 
| 
 | 
    63  | 
\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};
 | 
| 
 | 
    64  | 
\addplot[brown,mark=*, mark options={fill=white}] table {re-dart.data};
 | 
| 
 | 
    65  | 
\end{axis}
 | 
| 
 | 
    66  | 
\end{tikzpicture}}
 | 
| 
 | 
    67  | 
\end{column}
 | 
| 
 | 
    68  | 
  | 
| 
 | 
    69  | 
\end{columns}
 | 
| 
 | 
    70  | 
  | 
| 
 | 
    71  | 
\begin{textblock}{4}(3.5,3.2)
 | 
| 
 | 
    72  | 
{\bl{\texttt{(a*)*$\cdot$b}}}
 | 
| 
 | 
    73  | 
\end{textblock}
 | 
| 
 | 
    74  | 
  | 
| 
 | 
    75  | 
\begin{textblock}{7.5}(8,3)
 | 
| 
 | 
    76  | 
\begin{itemize}
 | 
| 
 | 
    77  | 
\item use \textbf{Brzozowski derivatives} for regex matching rather\\ than NFAs/DFAs% and lexing
 | 
| 
 | 
    78  | 
\item based on work by \textbf{Christian Urban} and \textbf{Roy Dyckhoff}\bigskip
 | 
| 
 | 
    79  | 
\item applications in network security (traffic filtering)
  | 
| 
 | 
    80  | 
\item formal verification of correctness and speed (\textbf{Isabelle} theorem prover)
 | 
| 
 | 
    81  | 
\end{itemize}
 | 
| 
 | 
    82  | 
\end{textblock}
 | 
| 
 | 
    83  | 
  | 
| 
 | 
    84  | 
\end{frame}
 | 
| 
 | 
    85  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
 | 
    86  | 
  | 
| 
 | 
    87  | 
  | 
| 
 | 
    88  | 
\end{document}
 | 
| 
 | 
    89  | 
  | 
| 
876
 | 
    90  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
 | 
    91  | 
\begin{frame}[t]
 | 
| 
959
 | 
    92  | 
\frametitle{%
 | 
| 
876
 | 
    93  | 
  \begin{tabular}{@ {}c@ {}}
 | 
| 
913
 | 
    94  | 
    \\[8mm]
  | 
| 
 | 
    95  | 
    \LARGE Derivative-Based\\
  | 
| 
 | 
    96  | 
    \LARGE Regular Expression Matching\\[5mm]
  | 
| 
 | 
    97  | 
    \normalsize\textcolor{gray}{Christian Urban, SSY Group}
 | 
| 
876
 | 
    98  | 
  \end{tabular}}
 | 
| 
 | 
    99  | 
  | 
| 
 | 
   100  | 
\end{frame}
 | 
| 
 | 
   101  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
 | 
   102  | 
  | 
| 
959
 | 
   103  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   104  | 
\begin{frame}[c]
 | 
| 
 | 
   105  | 
\frametitle{A Bit of Background}
 | 
| 
876
 | 
   106  | 
  | 
| 
913
 | 
   107  | 
\begin{textblock}{10}(2,2.5)
 | 
| 
 | 
   108  | 
\includegraphics[scale=0.4]{../pics/phd-1.jpg}
 | 
| 
 | 
   109  | 
\end{textblock}
 | 
| 
876
 | 
   110  | 
  | 
| 
913
 | 
   111  | 
\begin{textblock}{11}(1.7,10)
 | 
| 
 | 
   112  | 
\begin{bubble}[9.6cm]
 | 
| 
 | 
   113  | 
  PhD thesis on a particular term-rewriting system:\medskip\\
  | 
| 
959
 | 
   114  | 
  Proved that all rewriting sequences are strongly normalising (terminate).
  | 
| 
876
 | 
   115  | 
\end{bubble}
 | 
| 
913
 | 
   116  | 
\end{textblock}
 | 
| 
876
 | 
   117  | 
  | 
| 
913
 | 
   118  | 
\only<2->{%
 | 
| 
 | 
   119  | 
\begin{textblock}{3}(10,2.5)
 | 
| 
 | 
   120  | 
\begin{bubble}[3cm]
 | 
| 
 | 
   121  | 
\begin{tabular}{rcl}
 | 
| 
 | 
   122  | 
  \bl{$t$} & \bl{::=} & \bl{$x$}\\
 | 
| 
 | 
   123  | 
         & \bl{|} & \bl{$\lambda x.\,t$}\\
 | 
| 
 | 
   124  | 
         & \bl{|} & \bl{$t_1\;t_2$}\\
 | 
| 
 | 
   125  | 
         & \bl{|} & \bl{...}
 | 
| 
959
 | 
   126  | 
\end{tabular}
 | 
| 
 | 
   127  | 
\end{bubble}
 | 
| 
876
 | 
   128  | 
\end{textblock}}
 | 
| 
 | 
   129  | 
  | 
| 
 | 
   130  | 
\end{frame}
 | 
| 
879
 | 
   131  | 
  | 
| 
 | 
   132  | 
  | 
| 
 | 
   133  | 
  | 
| 
876
 | 
   134  | 
  | 
| 
959
 | 
   135  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   136  | 
  | 
| 
959
 | 
   137  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   138  | 
\begin{frame}[t]
 | 
| 
 | 
   139  | 
\frametitle{Quiz 1}
 | 
| 
 | 
   140  | 
\begin{bubble}[10cm]\it
 | 
| 
 | 
   141  | 
  There are many, many regular expression libraries.\bigskip\\
  | 
| 
959
 | 
   142  | 
  | 
| 
913
 | 
   143  | 
  \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the
 | 
| 
 | 
   144  | 
  difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?}
 | 
| 
 | 
   145  | 
\end{bubble}
 | 
| 
 | 
   146  | 
  | 
| 
 | 
   147  | 
\begin{textblock}{12}(2,9)
 | 
| 
 | 
   148  | 
\begin{tikzpicture}
 | 
| 
 | 
   149  | 
  \draw(0,0)  node[rotate=20]{linear};
 | 
| 
 | 
   150  | 
  \draw(3,0)  node[rotate=-20]{\bl{$O(n \log n)$}};
 | 
| 
 | 
   151  | 
  \draw(3,-2) node[rotate=10]{quadratic};
 | 
| 
 | 
   152  | 
  \draw(6,0) node[rotate=20]{P};
 | 
| 
 | 
   153  | 
  \draw(7,0) node[rotate=-20]{NP};
 | 
| 
 | 
   154  | 
  \draw(7,-1) node[rotate=0]{\bl{$O(2^n)$}};
 | 
| 
 | 
   155  | 
\end{tikzpicture}
 | 
| 
 | 
   156  | 
\end{textblock}
 | 
| 
 | 
   157  | 
  | 
| 
 | 
   158  | 
\end{frame}
 | 
| 
959
 | 
   159  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   160  | 
  | 
| 
959
 | 
   161  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   162  | 
\begin{frame}[t]
 | 
| 
 | 
   163  | 
\frametitle{Quiz 2}
 | 
| 
 | 
   164  | 
  | 
| 
 | 
   165  | 
\begin{textblock}{12}(2,2.5)
 | 
| 
 | 
   166  | 
A regular expression for (some) email addresses:
  | 
| 
 | 
   167  | 
\begin{center}
 | 
| 
 | 
   168  | 
  ($\underbrace{\bl{[a\mbox{-}z0\mbox{-}9\_\!\!\_\,.-]^+}}_{\textrm{name}}$)\bl{$\,@\,$}
 | 
| 
 | 
   169  | 
  ($\underbrace{\bl{[a\mbox{-}z0\mbox{-}9\,-]^+}}_{\textrm{domain}}$) \bl{$\,.\,$}
 | 
| 
 | 
   170  | 
  ($\underbrace{\bl{[a\mbox{-}z\,.]^{\{2..5\}}}}_{\textrm{top-level domain}}$)
 | 
| 
959
 | 
   171  | 
\end{center}
 | 
| 
913
 | 
   172  | 
\end{textblock}
 | 
| 
 | 
   173  | 
  | 
| 
 | 
   174  | 
\only<2>{
 | 
| 
 | 
   175  | 
\begin{textblock}{10}(4,8)
 | 
| 
 | 
   176  | 
bounded regular expressions:\medskip\\
  | 
| 
959
 | 
   177  | 
\begin{tabular}{ll}
 | 
| 
913
 | 
   178  | 
  \bl{$r^{\{n\}}$}     & exactly n-times \bl{$r$}\\
 | 
| 
 | 
   179  | 
  \bl{$r^{\{n..m\}}$}  & between n and m-times\\
 | 
| 
 | 
   180  | 
  \bl{$r^{\{n..\}}$}   & from n-times\\
 | 
| 
 | 
   181  | 
  \bl{$r^{\{..m\}}$}   & upto m-times\\
 | 
| 
 | 
   182  | 
\end{tabular}\smallskip\\
 | 
| 
 | 
   183  | 
\footnotesize\hfill ${}^*$ in some applications the counters can be in the millions
 | 
| 
 | 
   184  | 
\end{textblock}}
 | 
| 
959
 | 
   185  | 
  | 
| 
913
 | 
   186  | 
\end{frame}
 | 
| 
959
 | 
   187  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   188  | 
  | 
| 
 | 
   189  | 
  | 
| 
959
 | 
   190  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   191  | 
\begin{frame}[t]
 | 
| 
 | 
   192  | 
\frametitle{Quiz 2}
 | 
| 
 | 
   193  | 
  | 
| 
 | 
   194  | 
  | 
| 
 | 
   195  | 
\begin{textblock}{12}(2,2.5)
 | 
| 
 | 
   196  | 
  \onslide<1->{Thompson construction: By recursion we are given two NFAs for $r_1$ and $r_2$.\medskip\\
 | 
| 
 | 
   197  | 
    \onslide<2->{For $r_1\cdot r_2$:}\bigskip}
 | 
| 
 | 
   198  | 
\end{textblock}
 | 
| 
 | 
   199  | 
  | 
| 
 | 
   200  | 
\begin{textblock}{12}(2,6)
 | 
| 
 | 
   201  | 
\onslide<1>{
 | 
| 
 | 
   202  | 
\begin{tikzpicture}[node distance=3mm,
 | 
| 
 | 
   203  | 
    >=stealth',very thick,
  | 
| 
 | 
   204  | 
    every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20}]
 | 
| 
 | 
   205  | 
\node[state, initial]  (Q_0)  {$\mbox{}$};
 | 
| 
959
 | 
   206  | 
\node[state, initial]  (Q_01) [below=1mm of Q_0] {$\mbox{}$};
 | 
| 
 | 
   207  | 
\node[state, initial]  (Q_02) [above=1mm of Q_0] {$\mbox{}$};
 | 
| 
913
 | 
   208  | 
\node (R_1)  [right=of Q_0] {$\ldots$};
 | 
| 
 | 
   209  | 
\node[state, accepting]  (T_1)  [right=of R_1] {$\mbox{}$};
 | 
| 
 | 
   210  | 
\node[state, accepting]  (T_2)  [above=of T_1] {$\mbox{}$};
 | 
| 
 | 
   211  | 
\node[state, accepting]  (T_3)  [below=of T_1] {$\mbox{}$};
 | 
| 
 | 
   212  | 
  | 
| 
 | 
   213  | 
\node (A_0)  [right=2.5cm of T_1] {$\mbox{}$};
 | 
| 
 | 
   214  | 
\node[state, initial]  (A_01)  [above=1mm of A_0] {$\mbox{}$};
 | 
| 
 | 
   215  | 
\node[state, initial]  (A_02)  [below=1mm of A_0] {$\mbox{}$};
 | 
| 
 | 
   216  | 
  | 
| 
 | 
   217  | 
\node (b_1)  [right=of A_0] {$\ldots$};
 | 
| 
 | 
   218  | 
\node[state, accepting]  (c_1)  [right=of b_1] {$\mbox{}$};
 | 
| 
 | 
   219  | 
\node[state, accepting]  (c_2)  [above=of c_1] {$\mbox{}$};
 | 
| 
 | 
   220  | 
\node[state, accepting]  (c_3)  [below=of c_1] {$\mbox{}$};
 | 
| 
 | 
   221  | 
\begin{pgfonlayer}{background}
 | 
| 
 | 
   222  | 
  \node (1) [rounded corners, inner sep=1mm, thick,
  | 
| 
 | 
   223  | 
    draw=black!60, fill=black!20, fit= (Q_0) (R_1) (T_1) (T_2) (T_3)] {};
 | 
| 
 | 
   224  | 
  \node (2) [rounded corners, inner sep=1mm, thick,
  | 
| 
 | 
   225  | 
    draw=black!60, fill=black!20, fit= (A_0) (b_1) (c_1) (c_2) (c_3)] {};
 | 
| 
 | 
   226  | 
\node [yshift=2mm] at (1.north) {$r_1$};
 | 
| 
 | 
   227  | 
\node [yshift=2mm] at (2.north) {$r_2$};
 | 
| 
 | 
   228  | 
\end{pgfonlayer}
 | 
| 
 | 
   229  | 
\end{tikzpicture}}
 | 
| 
 | 
   230  | 
\end{textblock}
 | 
| 
 | 
   231  | 
  | 
| 
 | 
   232  | 
\begin{textblock}{12}(2,6)
 | 
| 
 | 
   233  | 
\onslide<2>{
 | 
| 
 | 
   234  | 
\begin{tikzpicture}[node distance=3mm,
 | 
| 
 | 
   235  | 
    >=stealth',very thick,
  | 
| 
 | 
   236  | 
    every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},]
 | 
| 
 | 
   237  | 
\node[state, initial]  (Q_0)  {$\mbox{}$};
 | 
| 
959
 | 
   238  | 
\node[state, initial]  (Q_01) [below=1mm of Q_0] {$\mbox{}$};
 | 
| 
 | 
   239  | 
\node[state, initial]  (Q_02) [above=1mm of Q_0] {$\mbox{}$};
 | 
| 
913
 | 
   240  | 
\node (r_1)  [right=of Q_0] {$\ldots$};
 | 
| 
 | 
   241  | 
\node[state]  (t_1)  [right=of r_1] {$\mbox{}$};
 | 
| 
 | 
   242  | 
\node[state]  (t_2)  [above=of t_1] {$\mbox{}$};
 | 
| 
 | 
   243  | 
\node[state]  (t_3)  [below=of t_1] {$\mbox{}$};
 | 
| 
 | 
   244  | 
  | 
| 
 | 
   245  | 
\node  (A_0)  [right=2.5cm of t_1] {$\mbox{}$};
 | 
| 
 | 
   246  | 
\node[state]  (A_01)  [above=1mm of A_0] {$\mbox{}$};
 | 
| 
 | 
   247  | 
\node[state]  (A_02)  [below=1mm of A_0] {$\mbox{}$};
 | 
| 
 | 
   248  | 
  | 
| 
 | 
   249  | 
\node (b_1)  [right=of A_0] {$\ldots$};
 | 
| 
 | 
   250  | 
\node[state, accepting]  (c_1)  [right=of b_1] {$\mbox{}$};
 | 
| 
 | 
   251  | 
\node[state, accepting]  (c_2)  [above=of c_1] {$\mbox{}$};
 | 
| 
 | 
   252  | 
\node[state, accepting]  (c_3)  [below=of c_1] {$\mbox{}$};
 | 
| 
 | 
   253  | 
\path[->] (t_1) edge (A_01);
  | 
| 
 | 
   254  | 
\path[->] (t_2) edge node [above]  {$\varepsilon$\footnotesize{}s} (A_01);
 | 
| 
 | 
   255  | 
\path[->] (t_3) edge (A_01);
  | 
| 
 | 
   256  | 
\path[->] (t_1) edge (A_02);
  | 
| 
 | 
   257  | 
\path[->] (t_2) edge (A_02);
  | 
| 
 | 
   258  | 
\path[->] (t_3) edge node [below]  {$\varepsilon$\footnotesize{}s} (A_02);
 | 
| 
959
 | 
   259  | 
  | 
| 
913
 | 
   260  | 
\begin{pgfonlayer}{background}
 | 
| 
 | 
   261  | 
  \node (3) [rounded corners, inner sep=1mm, thick,
  | 
| 
 | 
   262  | 
    draw=black!60, fill=black!20, fit= (Q_0) (c_1) (c_2) (c_3)] {};
 | 
| 
 | 
   263  | 
\node [yshift=2mm] at (3.north) {$r_1\cdot r_2$};
 | 
| 
 | 
   264  | 
\end{pgfonlayer}
 | 
| 
 | 
   265  | 
\end{tikzpicture}}
 | 
| 
 | 
   266  | 
\end{textblock}
 | 
| 
 | 
   267  | 
  | 
| 
 | 
   268  | 
  | 
| 
 | 
   269  | 
\end{frame}
 | 
| 
959
 | 
   270  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   271  | 
  | 
| 
959
 | 
   272  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   273  | 
\begin{frame}[t]
 | 
| 
 | 
   274  | 
\frametitle{Quiz 2}
 | 
| 
 | 
   275  | 
  | 
| 
 | 
   276  | 
\begin{textblock}{12}(2,2.5)
 | 
| 
 | 
   277  | 
  \onslide<1->{Thompson construction: By recursion we have a NFA for $r$.\medskip\\
 | 
| 
 | 
   278  | 
  \onslide<2->{For $r^*$:\bigskip}}
 | 
| 
 | 
   279  | 
\end{textblock}
 | 
| 
 | 
   280  | 
  | 
| 
 | 
   281  | 
\begin{textblock}{12}(4,6)
 | 
| 
959
 | 
   282  | 
\onslide<1>{
 | 
| 
913
 | 
   283  | 
\begin{tikzpicture}[node distance=3mm,
 | 
| 
 | 
   284  | 
    >=stealth',very thick,
  | 
| 
 | 
   285  | 
    every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},
 | 
| 
 | 
   286  | 
    baseline=(current bounding box.north)]
  | 
| 
 | 
   287  | 
\node (2)  {$\mbox{}$};
 | 
| 
 | 
   288  | 
\node[state, initial]  (4)  [above=1mm of 2] {$\mbox{}$};
 | 
| 
959
 | 
   289  | 
\node[state, initial]  (5)  [below=1mm of 2] {$\mbox{}$};
 | 
| 
913
 | 
   290  | 
\node (a)  [right=of 2] {$\ldots$};
 | 
| 
 | 
   291  | 
\node[state, accepting]  (a1)  [right=of a] {$\mbox{}$};
 | 
| 
 | 
   292  | 
\node[state, accepting]  (a2)  [above=of a1] {$\mbox{}$};
 | 
| 
 | 
   293  | 
\node[state, accepting]  (a3)  [below=of a1] {$\mbox{}$};
 | 
| 
 | 
   294  | 
\begin{pgfonlayer}{background}
 | 
| 
 | 
   295  | 
\node (1) [rounded corners, inner sep=1mm, thick, draw=black!60, fill=black!20, fit= (2) (a1) (a2) (a3)] {};
 | 
| 
 | 
   296  | 
\node [yshift=3mm] at (1.north) {$r$};
 | 
| 
 | 
   297  | 
\end{pgfonlayer}
 | 
| 
 | 
   298  | 
\end{tikzpicture}}
 | 
| 
 | 
   299  | 
\end{textblock}
 | 
| 
876
 | 
   300  | 
  | 
| 
913
 | 
   301  | 
\begin{textblock}{12}(2,6)
 | 
| 
959
 | 
   302  | 
\onslide<2->{
 | 
| 
913
 | 
   303  | 
\begin{tikzpicture}[node distance=3mm,
 | 
| 
 | 
   304  | 
    >=stealth',very thick,
  | 
| 
 | 
   305  | 
    every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},
 | 
| 
 | 
   306  | 
    baseline=(current bounding box.north)]
  | 
| 
 | 
   307  | 
\node at (0,0) [state, initial,accepting]  (1)  {$\mbox{}$};
 | 
| 
 | 
   308  | 
\node (2)  [right=16mm of 1] {$\mbox{}$};
 | 
| 
 | 
   309  | 
\node[state]  (4)  [above=1mm of 2] {$\mbox{}$};
 | 
| 
959
 | 
   310  | 
\node[state]  (5)  [below=1mm of 2] {$\mbox{}$};
 | 
| 
913
 | 
   311  | 
\node (a)  [right=of 2] {$\ldots$};
 | 
| 
 | 
   312  | 
\node[state]  (a1)  [right=of a] {$\mbox{}$};
 | 
| 
 | 
   313  | 
\node[state]  (a2)  [above=of a1] {$\mbox{}$};
 | 
| 
 | 
   314  | 
\node[state]  (a3)  [below=of a1] {$\mbox{}$};
 | 
| 
 | 
   315  | 
\path[->] (1) edge node [below]  {$\varepsilon$} (4);
 | 
| 
 | 
   316  | 
\path[->] (1) edge node [below]  {$\varepsilon$} (5);
 | 
| 
 | 
   317  | 
\path[->] (a1) edge [bend left=45] node [below]  {$\varepsilon$} (1);
 | 
| 
 | 
   318  | 
\path[->] (a2) edge [bend right] node [below]  {$\varepsilon$} (1);
 | 
| 
 | 
   319  | 
\path[->] (a3) edge [bend left=45] node [below]  {$\varepsilon$} (1);
 | 
| 
 | 
   320  | 
\begin{pgfonlayer}{background}
 | 
| 
 | 
   321  | 
\node (2) [rounded corners, inner sep=1mm, thick, draw=black!60, fill=black!20, fit= (1) (a2) (a3)] {};
 | 
| 
 | 
   322  | 
\node [yshift=3mm] at (2.north) {$r^*$};
 | 
| 
 | 
   323  | 
\end{pgfonlayer}
 | 
| 
 | 
   324  | 
\end{tikzpicture}}
 | 
| 
 | 
   325  | 
\end{textblock}
 | 
| 
 | 
   326  | 
  | 
| 
 | 
   327  | 
\begin{textblock}{12}(2,12)
 | 
| 
959
 | 
   328  | 
\onslide<3->{
 | 
| 
913
 | 
   329  | 
  \begin{bubble}[9.5cm]\it
 | 
| 
959
 | 
   330  | 
  Quiz:\smallskip\\
  | 
| 
913
 | 
   331  | 
  \textbf{What is the Thompson Construction for \bl{$r^{\{n\}}$} ?}
 | 
| 
 | 
   332  | 
\end{bubble}}
 | 
| 
 | 
   333  | 
\end{textblock}
 | 
| 
 | 
   334  | 
  | 
| 
 | 
   335  | 
\end{frame}
 | 
| 
 | 
   336  | 
  | 
| 
959
 | 
   337  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   338  | 
  | 
| 
959
 | 
   339  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   340  | 
\begin{frame}[t]
 | 
| 
 | 
   341  | 
  \frametitle{Quiz 3}
 | 
| 
 | 
   342  | 
  | 
| 
959
 | 
   343  | 
Hierarchy of Languages:
  | 
| 
913
 | 
   344  | 
  | 
| 
 | 
   345  | 
\begin{center}
 | 
| 
 | 
   346  | 
\begin{tikzpicture}
 | 
| 
959
 | 
   347  | 
[rect/.style={draw=black!50,
 | 
| 
913
 | 
   348  | 
              top color=white,
  | 
| 
959
 | 
   349  | 
              bottom color=black!20,
  | 
| 
 | 
   350  | 
              rectangle,
  | 
| 
 | 
   351  | 
              very thick,
  | 
| 
913
 | 
   352  | 
              rounded corners}, scale=1.2]
  | 
| 
 | 
   353  | 
  | 
| 
 | 
   354  | 
\draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages};
 | 
| 
 | 
   355  | 
\draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages};
 | 
| 
 | 
   356  | 
\draw (0,-0.85) node [rect, text depth=17mm] {\;\;context sensitive languages\;\;};
 | 
| 
 | 
   357  | 
\draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages};
 | 
| 
 | 
   358  | 
\draw (0,-1.4) node [rect] {regular languages};
 | 
| 
 | 
   359  | 
\end{tikzpicture}
 | 
| 
959
 | 
   360  | 
\end{center}
 | 
| 
913
 | 
   361  | 
  | 
| 
 | 
   362  | 
\begin{textblock}{12}(2,11.5)
 | 
| 
959
 | 
   363  | 
\onslide<2->{
 | 
| 
913
 | 
   364  | 
  \begin{bubble}[9.5cm]\it
 | 
| 
 | 
   365  | 
    Quiz:\smallskip\\
  | 
| 
959
 | 
   366  | 
  | 
| 
913
 | 
   367  | 
    \textbf{Can we use standard parsing algorithms for matching / lexing\,?}
 | 
| 
 | 
   368  | 
    Say CYK, LL, LR(k), PEG, \ldots
  | 
| 
 | 
   369  | 
\end{bubble}}
 | 
| 
 | 
   370  | 
\end{textblock}
 | 
| 
 | 
   371  | 
  | 
| 
 | 
   372  | 
\end{frame}
 | 
| 
959
 | 
   373  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   374  | 
  | 
| 
 | 
   375  | 
\defverbatim{\foo}{\footnotesize%
 | 
| 
 | 
   376  | 
\begin{verbatim}
 | 
| 
 | 
   377  | 
(?:(?:\"|'|\]|\}|\\|\d|(?:nan|infinity|true|false|
  | 
| 
 | 
   378  | 
null|undefined|symbol|math)|\`|\-|\+)+[)]*;?((?:\s
  | 
| 
959
 | 
   379  | 
| -|~|!|{}|\|\||\+)*.*(?:.*=.*)))
 | 
| 
913
 | 
   380  | 
\end{verbatim}
 | 
| 
 | 
   381  | 
}
  | 
| 
 | 
   382  | 
  | 
| 
959
 | 
   383  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   384  | 
\begin{frame}[t,fragile]
 | 
| 
 | 
   385  | 
\frametitle{Quiz 4}
 | 
| 
 | 
   386  | 
  | 
| 
 | 
   387  | 
\begin{textblock}{12}(2,2.5)
 | 
| 
 | 
   388  | 
\begin{bubble}[9.5cm]\it
 | 
| 
959
 | 
   389  | 
  Quiz:\smallskip\\
  | 
| 
913
 | 
   390  | 
  Do regular expressions have any security relevance\,?
  | 
| 
 | 
   391  | 
\end{bubble}
 | 
| 
 | 
   392  | 
\end{textblock}
 | 
| 
 | 
   393  | 
  | 
| 
 | 
   394  | 
\only<2->{
 | 
| 
 | 
   395  | 
\begin{textblock}{2}(1,6)
 | 
| 
 | 
   396  | 
\includegraphics[scale=0.8]{../pics/zeek.png}
 | 
| 
 | 
   397  | 
\includegraphics[scale=0.17]{../pics/snort.png}
 | 
| 
 | 
   398  | 
\end{textblock}}
 | 
| 
 | 
   399  | 
  | 
| 
 | 
   400  | 
\only<3->{
 | 
| 
 | 
   401  | 
\begin{textblock}{7}(7,5.6)
 | 
| 
 | 
   402  | 
  \includegraphics[scale=0.14]{../pics/cloudflare.png}\\
 | 
| 
 | 
   403  | 
    \footnotesize
  | 
| 
 | 
   404  | 
    It serves more web traffic than Twitter, Amazon, Apple,
  | 
| 
 | 
   405  | 
    Instagram, Bing \& Wikipedia combined.\medskip\\
  | 
| 
 | 
   406  | 
    Web Application Firewall filters out
  | 
| 
 | 
   407  | 
    SQL injection attacks, XSS attacks etc
  | 
| 
 | 
   408  | 
\end{textblock}}
 | 
| 
 | 
   409  | 
  | 
| 
 | 
   410  | 
\only<3->{
 | 
| 
 | 
   411  | 
  \begin{textblock}{13}(4,12.3)
 | 
| 
 | 
   412  | 
  \footnotesize
  | 
| 
 | 
   413  | 
  \hspace{1.5cm}a global outage on 2 July 2019 (first one for six years)
 | 
| 
 | 
   414  | 
  \color{blue}\foo\color{black}
 | 
| 
 | 
   415  | 
   \end{textblock}
 | 
| 
 | 
   416  | 
}
  | 
| 
 | 
   417  | 
  | 
| 
959
 | 
   418  | 
  | 
| 
913
 | 
   419  | 
\end{frame}
 | 
| 
 | 
   420  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
 | 
   421  | 
  | 
| 
959
 | 
   422  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   423  | 
\begin{frame}[t]
 | 
| 
 | 
   424  | 
  \frametitle{Quiz 3}
 | 
| 
 | 
   425  | 
  | 
| 
959
 | 
   426  | 
\begin{textblock}{12}(2,2)
 | 
| 
913
 | 
   427  | 
  \begin{bubble}[9.5cm]\it
 | 
| 
 | 
   428  | 
    Quiz:\smallskip\\
  | 
| 
959
 | 
   429  | 
  | 
| 
913
 | 
   430  | 
    \textbf{Can we use standard parsing algorithms for matching / lexing\,?}
 | 
| 
 | 
   431  | 
    Say CYK, LL, LR(k), PEG, \ldots
  | 
| 
 | 
   432  | 
\end{bubble}
 | 
| 
 | 
   433  | 
\end{textblock}
 | 
| 
 | 
   434  | 
  | 
| 
 | 
   435  | 
  | 
| 
 | 
   436  | 
\begin{textblock}{12}(1,7)
 | 
| 
 | 
   437  | 
\textbf{POSIX lexing}
 | 
| 
 | 
   438  | 
  | 
| 
 | 
   439  | 
\begin{tabular}{@ {}ll}
 | 
| 
 | 
   440  | 
  Assume you have: & \bl{$r_{key} \dn \texttt{if} + \texttt{then} + \texttt{while} \ldots$}\\
 | 
| 
 | 
   441  | 
                   & \bl{$r_{id\;} \,\dn \texttt{[a-z]} \cdot \texttt{[a-z0-9\_]}^*$}\medskip\\
 | 
| 
 | 
   442  | 
  | 
| 
 | 
   443  | 
  Lexing a string with  & \bl{$(r_{key} + r_{id})^*$}\medskip\\
 | 
| 
 | 
   444  | 
  | 
| 
 | 
   445  | 
  What should be & \\
  | 
| 
 | 
   446  | 
  the result for:  & \bl{\texttt{"iffoo"}} \;\;\; \bl{\texttt{"if"}}\\
 | 
| 
959
 | 
   447  | 
\end{tabular}
 | 
| 
913
 | 
   448  | 
\end{textblock}
 | 
| 
 | 
   449  | 
  | 
| 
 | 
   450  | 
\only<2->{
 | 
| 
959
 | 
   451  | 
\begin{textblock}{13.5}(1.5,4)
 | 
| 
913
 | 
   452  | 
  \begin{mybox3}{POSIX rules}
 | 
| 
 | 
   453  | 
    \begin{itemize}
 | 
| 
 | 
   454  | 
    \item \textbf{The Longest Match Rule:} The longest initial
 | 
| 
 | 
   455  | 
      substring matched by any regular expression is taken as next token.
  | 
| 
 | 
   456  | 
    \item \textbf{Priority Rule:}  For a particular longest initial
 | 
| 
 | 
   457  | 
      substring, the first (leftmost) regular expression that can match
  | 
| 
 | 
   458  | 
      determines the token.
  | 
| 
959
 | 
   459  | 
    \item \ldots
  | 
| 
913
 | 
   460  | 
    \end{itemize}
 | 
| 
 | 
   461  | 
    \begin{center}
 | 
| 
 | 
   462  | 
    \bl{$(a + ab) \cdot (bc + c)$} \;\;and\;\; \bl{\texttt{"}$abc$\texttt{"}}
 | 
| 
959
 | 
   463  | 
    \end{center}
 | 
| 
913
 | 
   464  | 
  \end{mybox3}
 | 
| 
876
 | 
   465  | 
\end{textblock}}
 | 
| 
 | 
   466  | 
  | 
| 
 | 
   467  | 
\end{frame}
 | 
| 
 | 
   468  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
 | 
   469  | 
  | 
| 
959
 | 
   470  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   471  | 
\begin{frame}[t]
 | 
| 
 | 
   472  | 
\frametitle{Quiz 1}
 | 
| 
 | 
   473  | 
\begin{bubble}[10cm]\it
 | 
| 
 | 
   474  | 
  There are many, many regular expression libraries.\bigskip\\
  | 
| 
959
 | 
   475  | 
  | 
| 
913
 | 
   476  | 
  \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the
 | 
| 
 | 
   477  | 
  difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?}
 | 
| 
 | 
   478  | 
\end{bubble}
 | 
| 
 | 
   479  | 
  | 
| 
 | 
   480  | 
\begin{textblock}{12}(1,9)
 | 
| 
 | 
   481  | 
  \begin{tabular}{p{4cm}|p{4cm}|p{2cm}}
 | 
| 
 | 
   482  | 
    atrociously slow (s't) & pretty lazy (s't) & \textcolor{gray}{OTBT}\medskip\\
 | 
| 
 | 
   483  | 
    Python, Ruby, Swift, Dart, JavaScript & Rust, Go, RE2 & \textcolor{gray}{Snort, .Net7}\\
 | 
| 
 | 
   484  | 
  \end{tabular}
 | 
| 
 | 
   485  | 
\end{textblock}
 | 
| 
 | 
   486  | 
  | 
| 
 | 
   487  | 
\end{frame}
 | 
| 
959
 | 
   488  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   489  | 
  | 
| 
959
 | 
   490  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   491  | 
\begin{frame}[t]
 | 
| 
 | 
   492  | 
  | 
| 
959
 | 
   493  | 
\mbox{}\\[10mm]
 | 
| 
913
 | 
   494  | 
  | 
| 
959
 | 
   495  | 
\begin{columns}[t,onlytextwidth]
 | 
| 
913
 | 
   496  | 
\begin{column}{.2\textwidth}
 | 
| 
959
 | 
   497  | 
\raisebox{-10mm}{
 | 
| 
913
 | 
   498  | 
\begin{tikzpicture}
 | 
| 
 | 
   499  | 
  \begin{axis}[
 | 
| 
 | 
   500  | 
    xlabel={\bl{$n$} \pcode{a}s},
 | 
| 
 | 
   501  | 
    %%x label style={at={(1.05,0.0)}},
 | 
| 
 | 
   502  | 
    ylabel={time in secs},
 | 
| 
 | 
   503  | 
    enlargelimits=false,
  | 
| 
 | 
   504  | 
    xtick={0,10,...,30},
 | 
| 
 | 
   505  | 
    xmax=35,
  | 
| 
 | 
   506  | 
    ymax=35,
  | 
| 
 | 
   507  | 
    ytick={0,5,...,30},
 | 
| 
 | 
   508  | 
    scaled ticks=false,
  | 
| 
 | 
   509  | 
    axis lines=left,
  | 
| 
 | 
   510  | 
    width=5cm,
  | 
| 
959
 | 
   511  | 
    height=5cm,
  | 
| 
913
 | 
   512  | 
    legend entries={Python,JavaScript,Swift,Dart},
 | 
| 
 | 
   513  | 
    legend style={font=\small,at={(0.5,-0.39)},anchor=north},
 | 
| 
 | 
   514  | 
    legend cell align=left]
  | 
| 
 | 
   515  | 
\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
 | 
| 
 | 
   516  | 
\addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
 | 
| 
 | 
   517  | 
\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};
 | 
| 
 | 
   518  | 
\addplot[brown,mark=*, mark options={fill=white}] table {re-dart.data};
 | 
| 
 | 
   519  | 
\end{axis}
 | 
| 
 | 
   520  | 
\end{tikzpicture}}
 | 
| 
 | 
   521  | 
\end{column}
 | 
| 
 | 
   522  | 
  | 
| 
 | 
   523  | 
\begin{column}{.2\textwidth}
 | 
| 
 | 
   524  | 
\begin{tikzpicture}
 | 
| 
 | 
   525  | 
  \begin{axis}[
 | 
| 
 | 
   526  | 
    xlabel={\bl{$n$} \pcode{a}s},
 | 
| 
 | 
   527  | 
    %ylabel={time in secs},
 | 
| 
 | 
   528  | 
    enlargelimits=false,
  | 
| 
 | 
   529  | 
    xtick={0,10,...,30},
 | 
| 
 | 
   530  | 
    xmax=35,
  | 
| 
 | 
   531  | 
    ymax=35,
  | 
| 
 | 
   532  | 
    ytick={0,5,...,30},
 | 
| 
 | 
   533  | 
    scaled ticks=false,
  | 
| 
 | 
   534  | 
    axis lines=left,
  | 
| 
 | 
   535  | 
    width=5cm,
  | 
| 
959
 | 
   536  | 
    height=5cm,
  | 
| 
913
 | 
   537  | 
    legend entries={Python,Ruby},
 | 
| 
 | 
   538  | 
    legend style={font=\small,at={(0.5,-0.39)},anchor=north},
 | 
| 
 | 
   539  | 
    legend cell align=left]
  | 
| 
 | 
   540  | 
\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
 | 
| 
959
 | 
   541  | 
\addplot[brown,mark=pentagon*, mark options={fill=white}] table {re-ruby.data};
 | 
| 
913
 | 
   542  | 
\end{axis}
 | 
| 
959
 | 
   543  | 
\end{tikzpicture}
 | 
| 
 | 
   544  | 
\end{column}
 | 
| 
913
 | 
   545  | 
\end{columns}
 | 
| 
 | 
   546  | 
  | 
| 
 | 
   547  | 
\begin{textblock}{4}(9,1.7)
 | 
| 
 | 
   548  | 
\textbf{\bl{\texttt{(a?)\boldmath$^{\{n\}}\cdot$(a)\boldmath$^{\{n\}}$}}}
 | 
| 
 | 
   549  | 
\end{textblock}
 | 
| 
 | 
   550  | 
  | 
| 
959
 | 
   551  | 
\begin{textblock}{4}(4,1.7)
 | 
| 
913
 | 
   552  | 
\textbf{\bl{\texttt{(a*)*$\cdot$b}}}
 | 
| 
 | 
   553  | 
\end{textblock}
 | 
| 
 | 
   554  | 
  | 
| 
 | 
   555  | 
\begin{textblock}{3.4}(0.3,10)
 | 
| 
 | 
   556  | 
\small{}\textbf{matching with strings}
 | 
| 
959
 | 
   557  | 
\textbf{\bl{$\underbrace{\texttt{a}...\texttt{a}}_n$}}
 | 
| 
913
 | 
   558  | 
\end{textblock}
 | 
| 
 | 
   559  | 
  | 
| 
 | 
   560  | 
\end{frame}
 | 
| 
 | 
   561  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
 | 
   562  | 
  | 
| 
959
 | 
   563  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   564  | 
\begin{frame}[t]
 | 
| 
 | 
   565  | 
\frametitle{Quiz 1}
 | 
| 
 | 
   566  | 
\begin{bubble}[10cm]\it
 | 
| 
 | 
   567  | 
  \textbf{Given a regular expression \bl{r} and a string \bl{s}, what is the
 | 
| 
 | 
   568  | 
  difficulty / complexity of the problem deciding whether \bl{r} matches \bl{s}?}
 | 
| 
 | 
   569  | 
\end{bubble}
 | 
| 
 | 
   570  | 
  | 
| 
 | 
   571  | 
\begin{textblock}{12}(1.7,7)
 | 
| 
 | 
   572  | 
For \textbf{Perl-style} regular expression matchers (say Python, JavaScript, etc), the
 | 
| 
 | 
   573  | 
answer is:
  | 
| 
 | 
   574  | 
\only<2->{\begin{center}
 | 
| 
 | 
   575  | 
\fontspec{Hoefler Text Black}\textcolor{ProcessBlue}{\huge{}NP}
 | 
| 
959
 | 
   576  | 
\end{center}}
 | 
| 
913
 | 
   577  | 
\end{textblock}
 | 
| 
 | 
   578  | 
  | 
| 
 | 
   579  | 
\begin{textblock}{12}(1.7,12)
 | 
| 
 | 
   580  | 
\only<3->{Backreferences: \bl{(\ldots)$\,\ldots{}\backslash{}n$}}
 | 
| 
959
 | 
   581  | 
\end{textblock}
 | 
| 
913
 | 
   582  | 
\end{frame}
 | 
| 
959
 | 
   583  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   584  | 
  | 
| 
 | 
   585  | 
  | 
| 
 | 
   586  | 
  | 
| 
959
 | 
   587  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   588  | 
\begin{frame}
 | 
| 
 | 
   589  | 
\frametitle{Quiz 2}
 | 
| 
 | 
   590  | 
  | 
| 
 | 
   591  | 
\begin{textblock}{12}(2,2)
 | 
| 
959
 | 
   592  | 
\onslide<1->{
 | 
| 
913
 | 
   593  | 
  \begin{bubble}[9.5cm]\it
 | 
| 
959
 | 
   594  | 
  Quiz:\smallskip\\
  | 
| 
913
 | 
   595  | 
  \textbf{What is the Thompson Construction for \bl{$r^{\{n\}}$} ?}
 | 
| 
 | 
   596  | 
\end{bubble}}
 | 
| 
 | 
   597  | 
\end{textblock}
 | 
| 
 | 
   598  | 
  | 
| 
 | 
   599  | 
\only<2->{
 | 
| 
 | 
   600  | 
\begin{textblock}{12}(1,6)
 | 
| 
 | 
   601  | 
\begin{tabular}{ll}
 | 
| 
 | 
   602  | 
  Google's RE2 lib & $\Rightarrow$ \bl{$a^{\{1001\}}$} is too big\\
 | 
| 
 | 
   603  | 
  \onslide<3->{Rust Regex lib}    & \onslide<5->{$\Rightarrow$ \bl{$a^{\{1000\}\{100\}\{5\}}$} too big}\\
 | 
| 
 | 
   604  | 
                    & \onslide<6->{$\Rightarrow$ \bl{$a^{\{0\}\{4294967295\}}$} ?}
 | 
| 
 | 
   605  | 
\end{tabular}
 | 
| 
 | 
   606  | 
\end{textblock}}
 | 
| 
 | 
   607  | 
  | 
| 
 | 
   608  | 
\only<4>{
 | 
| 
959
 | 
   609  | 
\begin{textblock}{13.5}(1.5,4)
 | 
| 
 | 
   610  | 
  \begin{mybox3}{From Rust's Regex Description}\it
 | 
| 
913
 | 
   611  | 
    ``\ldots [the] syntax is similar to Perl-style regular expressions, but lacks a few features like look
  | 
| 
 | 
   612  | 
    around and backreferences. In exchange, all searches execute in linear time with respect
  | 
| 
 | 
   613  | 
    to the size of the regular expression and search text. \ldots''
  | 
| 
 | 
   614  | 
  \end{mybox3}
 | 
| 
 | 
   615  | 
\end{textblock}}
 | 
| 
 | 
   616  | 
  | 
| 
 | 
   617  | 
  | 
| 
959
 | 
   618  | 
\end{frame}
 | 
| 
876
 | 
   619  | 
  | 
| 
 | 
   620  | 
  | 
| 
 | 
   621  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   622  | 
\begin{frame}[t]
 | 
| 
 | 
   623  | 
\frametitle{Regular Expressions}
 | 
| 
876
 | 
   624  | 
  | 
| 
913
 | 
   625  | 
\begin{textblock}{6}(2,5)
 | 
| 
 | 
   626  | 
  \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
 | 
| 
 | 
   627  | 
  \bl{$r$} & \bl{$::=$}  & \bl{$\ZERO$}  & nothing\\
 | 
| 
 | 
   628  | 
         & \bl{$\mid$} & \bl{$\ONE$}       & empty string / \pcode{""} \\
 | 
| 
 | 
   629  | 
         & \bl{$\mid$} & \bl{$c, d, \ldots$}                         & characters\\
 | 
| 
 | 
   630  | 
         & \bl{$\mid$} & \bl{$r_1 + r_2$}  & alternative / choice\\
 | 
| 
 | 
   631  | 
         & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
 | 
| 
 | 
   632  | 
         & \bl{$\mid$} & \bl{$r^*$}            & star (zero or more)\\
 | 
| 
 | 
   633  | 
  \end{tabular}
 | 
| 
 | 
   634  | 
  \end{textblock}
 | 
| 
959
 | 
   635  | 
  | 
| 
876
 | 
   636  | 
\end{frame}
 | 
| 
959
 | 
   637  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
876
 | 
   638  | 
  | 
| 
 | 
   639  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
 | 
   640  | 
\begin{frame}[c]
 | 
| 
913
 | 
   641  | 
\frametitle{\begin{tabular}{l}The Derivative of a Rexp\end{tabular}}
 | 
| 
 | 
   642  | 
  | 
| 
 | 
   643  | 
\large
  | 
| 
959
 | 
   644  | 
If \bl{$r$} matches the string \bl{$c\!::\!s$}, what is a regular
 | 
| 
913
 | 
   645  | 
expression that matches just \bl{$s$}?\bigskip\bigskip\bigskip\bigskip
 | 
| 
 | 
   646  | 
  | 
| 
 | 
   647  | 
\small
  | 
| 
 | 
   648  | 
\bl{$\der\,c\,r$} gives the answer, Brzozowski 1964\medskip\\
 | 
| 
 | 
   649  | 
(lost in the sands of time, re-appeared in 2009)
  | 
| 
876
 | 
   650  | 
\end{frame}
 | 
| 
 | 
   651  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
 | 
   652  | 
  | 
| 
 | 
   653  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
 | 
   654  | 
\begin{frame}[c]
 | 
| 
913
 | 
   655  | 
\frametitle{\begin{tabular}{l}The Derivative of a Rexp\end{tabular}}
 | 
| 
876
 | 
   656  | 
  | 
| 
913
 | 
   657  | 
\begin{center}
 | 
| 
 | 
   658  | 
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
 | 
| 
 | 
   659  | 
  \bl{$\der\, c\, (\ZERO)$}      & \bl{$\dn$} & \bl{$\ZERO$} & \\
 | 
| 
 | 
   660  | 
  \bl{$\der\, c\, (\ONE)$}           & \bl{$\dn$} & \bl{$\ZERO$} & \\
 | 
| 
 | 
   661  | 
  \bl{$\der\, c\, (d)$}                     & \bl{$\dn$} & \bl{if $c = d$ then $\ONE$ else $\ZERO$} & \\
 | 
| 
 | 
   662  | 
  \bl{$\der\, c\, (r_1 + r_2)$}        & \bl{$\dn$} & \bl{$\der\, c\, r_1 + \der\, c\, r_2$} & \\
 | 
| 
 | 
   663  | 
  \bl{$\der\, c\, (r_1 \cdot r_2)$}  & \bl{$\dn$}  & \bl{if $nullable (r_1)$}\\
 | 
| 
959
 | 
   664  | 
  & & \bl{then $(\der\,c\,r_1) \cdot r_2 + \der\, c\, r_2$}\\
 | 
| 
913
 | 
   665  | 
  & & \bl{else $(\der\, c\, r_1) \cdot r_2$}\\
 | 
| 
 | 
   666  | 
  \bl{$\der\, c\, (r^*)$}  & \bl{$\dn$} & \bl{$(\der\,c\,r) \cdot (r^*)$} &\medskip\bigskip\\
 | 
| 
 | 
   667  | 
  \end{tabular}
 | 
| 
 | 
   668  | 
\end{center}
 | 
| 
876
 | 
   669  | 
  | 
| 
 | 
   670  | 
\end{frame}
 | 
| 
959
 | 
   671  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
876
 | 
   672  | 
  | 
| 
 | 
   673  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   674  | 
\begin{frame}[t]
 | 
| 
 | 
   675  | 
\frametitle{\begin{tabular}{l}Brzozowski matcher\end{tabular}}
 | 
| 
876
 | 
   676  | 
  | 
| 
913
 | 
   677  | 
Does \bl{$r_1$} match \bl{"$abc$"}?
 | 
| 
 | 
   678  | 
\begin{center}
 | 
| 
 | 
   679  | 
\begin{tabular}{@{}rl@{}l@{}}
 | 
| 
 | 
   680  | 
Step 1: & build derivative of \bl{$a$} and \bl{$r_1$} & \bl{$(r_2 = \der\,a\,r_1)$}\smallskip\\
 | 
| 
 | 
   681  | 
Step 2: & build derivative of \bl{$b$} and \bl{$r_2$} & \bl{$(r_3 = \der\,b\,r_2)$}\smallskip\\
 | 
| 
 | 
   682  | 
Step 3: & build derivative of \bl{$c$} and \bl{$r_3$} & \bl{$(r_4 = \der\,c\,r_3)$}\smallskip\\
 | 
| 
 | 
   683  | 
Step 4: & the string is exhausted: & \bl{($nullable(r_4))$}\\
 | 
| 
 | 
   684  | 
        & test whether \bl{$r_4$} can recognise\\
 | 
| 
 | 
   685  | 
        & the empty string\medskip\\
  | 
| 
 | 
   686  | 
Output: & result of the test\\
  | 
| 
959
 | 
   687  | 
        & $\Rightarrow \bl{\textit{true}} \,\text{or}\,
 | 
| 
 | 
   688  | 
                       \bl{\textit{false}}$\\
 | 
| 
913
 | 
   689  | 
\end{tabular}
 | 
| 
 | 
   690  | 
\end{center}
 | 
| 
876
 | 
   691  | 
  | 
| 
913
 | 
   692  | 
  | 
| 
 | 
   693  | 
\end{frame}
 | 
| 
959
 | 
   694  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   695  | 
  | 
| 
 | 
   696  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
 | 
   697  | 
\begin{frame}[c]
 | 
| 
 | 
   698  | 
\frametitle{\mbox{Nullable}}
 | 
| 
 | 
   699  | 
  | 
| 
876
 | 
   700  | 
  | 
| 
913
 | 
   701  | 
\ldots{}whether a regular expression can match the empty string:
 | 
| 
 | 
   702  | 
\begin{center}
 | 
| 
 | 
   703  | 
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
 | 
| 
 | 
   704  | 
\bl{$nullable(\ZERO)$}    & \bl{$\dn$} & \bl{\textit{false}}\\
 | 
| 
 | 
   705  | 
\bl{$nullable(\ONE)$}       & \bl{$\dn$} & \bl{\textit{true}}\\
 | 
| 
 | 
   706  | 
\bl{$nullable (c)$}             & \bl{$\dn$} & \bl{\textit{false}}\\
 | 
| 
959
 | 
   707  | 
\bl{$nullable (r_1 + r_2)$}     & \bl{$\dn$} & \bl{$nullable(r_1) \vee nullable(r_2)$} \\
 | 
| 
913
 | 
   708  | 
\bl{$nullable (r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$nullable(r_1) \wedge nullable(r_2)$} \\
 | 
| 
 | 
   709  | 
\bl{$nullable (r^*)$}           & \bl{$\dn$} & \bl{\textit{true}}\\
 | 
| 
 | 
   710  | 
\end{tabular}
 | 
| 
 | 
   711  | 
\end{center}
 | 
| 
 | 
   712  | 
  | 
| 
 | 
   713  | 
\end{frame}
 | 
| 
959
 | 
   714  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   715  | 
  | 
| 
 | 
   716  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
 | 
   717  | 
\begin{frame}[t]
 | 
| 
 | 
   718  | 
\frametitle{\begin{tabular}{l}Extensions\end{tabular}}
 | 
| 
876
 | 
   719  | 
  | 
| 
913
 | 
   720  | 
\begin{center}
 | 
| 
 | 
   721  | 
\begin{tabular}{@ {\hspace{-4mm}}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
 | 
| 
 | 
   722  | 
  \bl{$\der\, c\, (r^{\{n\}})$}      & \bl{$\dn$} & \bl{$if\;n=0\;then\;\ZERO\;else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\
 | 
| 
 | 
   723  | 
  \bl{$\der\, c\, (r^{\{..n\}})$}    & \bl{$\dn$} & \bl{$if\;n=0\;then\;\ZERO\;else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\
 | 
| 
 | 
   724  | 
  \bl{$\der\, c\, (r^{\{n..\}})$}    & \bl{$\dn$} & \bl{$if\;n=0\;then\;(der\,c\,r)\cdot r^*$} \\
 | 
| 
 | 
   725  | 
                                    &            & \bl{$else\; (der\,c\,r)\cdot r^{\{n-1\}}$} & \\
 | 
| 
 | 
   726  | 
  \bl{$\der\, c\, (r^{\{n..m\}})$}   & \bl{$\dn$} & \bl{$if\;n=0 \wedge m=0\;then\;\ZERO\;else$} & \\
 | 
| 
 | 
   727  | 
                                     &           & \bl{$if \;n=0 \wedge 0<m\; then\;(der\,c\,r)\cdot r^{\{..m-1\}}$} & \\
 | 
| 
 | 
   728  | 
                                     &           & \bl{$else\; (der\,c\,r)\cdot r^{\{n-1..m-1\}}$} & \\
 | 
| 
 | 
   729  | 
  \bl{$\der\, c\, (\neg{}r)$}            & \bl{$\dn$}  & \bl{$\neg(der\,c\,r)$}\\
 | 
| 
 | 
   730  | 
  \end{tabular}
 | 
| 
 | 
   731  | 
\end{center}
 | 
| 
 | 
   732  | 
  | 
| 
 | 
   733  | 
also works for lookarounds, various anchors, etc, but \textbf{not} for backreferences
 | 
| 
 | 
   734  | 
  | 
| 
 | 
   735  | 
\begin{textblock}{3}(11,13)
 | 
| 
959
 | 
   736  | 
\onslide<2>{
 | 
| 
913
 | 
   737  | 
  \begin{bubble}[3cm]
 | 
| 
 | 
   738  | 
  \bl{$a^{\{0\}\{4294967295\}}$}
 | 
| 
 | 
   739  | 
\end{bubble}}
 | 
| 
876
 | 
   740  | 
\end{textblock}
 | 
| 
 | 
   741  | 
  | 
| 
 | 
   742  | 
\end{frame}
 | 
| 
959
 | 
   743  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
876
 | 
   744  | 
  | 
| 
 | 
   745  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
913
 | 
   746  | 
\begin{frame}[t]
 | 
| 
 | 
   747  | 
\frametitle{\mbox{Problems with Ders}}
 | 
| 
876
 | 
   748  | 
  | 
| 
913
 | 
   749  | 
\textcolor{blue}{
 | 
| 
876
 | 
   750  | 
\small
  | 
| 
913
 | 
   751  | 
\def\ll{\stackrel{der\,a}{\longrightarrow}}
 | 
| 
876
 | 
   752  | 
\begin{center}
 | 
| 
913
 | 
   753  | 
\begin{tabular}{@{\hspace{-5mm}}rll}
 | 
| 
 | 
   754  | 
$(a + aa)^*$ & $\ll$ & $(\ONE + \ONE{}a) \cdot (a + aa)^*$\\
 | 
| 
 | 
   755  | 
& $\ll$ & $(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* \;+\; (\ONE + \ONE{}a) \cdot (a + aa)^*$\\
 | 
| 
 | 
   756  | 
& $\ll$ & $(\ZERO + \ZERO{}a + \ZERO) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^* \;+\; $\\
 | 
| 
 | 
   757  | 
& & $\qquad(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^*$\\
 | 
| 
 | 
   758  | 
& $\ll$ & \ldots\\ \hspace{15mm}
 | 
| 
876
 | 
   759  | 
\end{tabular}
 | 
| 
913
 | 
   760  | 
\end{center}}
 | 
| 
 | 
   761  | 
  | 
| 
959
 | 
   762  | 
\begin{textblock}{13.5}(1.5,12)
 | 
| 
913
 | 
   763  | 
(regular expressions of sizes 98, 169, 283, 468, 767, \ldots)
  | 
| 
 | 
   764  | 
\end{textblock}
 | 
| 
876
 | 
   765  | 
  | 
| 
 | 
   766  | 
\end{frame}
 | 
| 
959
 | 
   767  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
876
 | 
   768  | 
  | 
| 
 | 
   769  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
 | 
   770  | 
\begin{frame}[c]
 | 
| 
913
 | 
   771  | 
\frametitle{\mbox{Conclusion}}
 | 
| 
876
 | 
   772  | 
  | 
| 
 | 
   773  | 
\begin{itemize}
 | 
| 
959
 | 
   774  | 
\item The beauty is that this only involves functional programs that can be conveniently reasoned about in theorem provers
  | 
| 
913
 | 
   775  | 
\item POSIX lexing can be done via an extension by Sulzmann and Lu, 2014 (our current work)\medskip\pause
  | 
| 
959
 | 
   776  | 
\item How surprising that one can still do new work on regular expressions.
  | 
| 
 | 
   777  | 
\end{itemize}
 | 
| 
876
 | 
   778  | 
  | 
| 
 | 
   779  | 
\end{frame}
 | 
| 
959
 | 
   780  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
876
 | 
   781  | 
  | 
| 
959
 | 
   782  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
876
 | 
   783  | 
  | 
| 
913
 | 
   784  | 
\begin{frame}<1-10>
 | 
| 
959
 | 
   785  | 
\end{frame}
 | 
| 
876
 | 
   786  | 
  | 
| 
 | 
   787  | 
  | 
| 
 | 
   788  | 
  | 
| 
959
 | 
   789  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  | 
| 
876
 | 
   790  | 
\end{document}
 | 
| 
 | 
   791  | 
  | 
| 
959
 | 
   792  | 
%%% Local Variables:
  | 
| 
876
 | 
   793  | 
%%% mode: latex
  | 
| 
 | 
   794  | 
%%% TeX-master: t
  | 
| 
959
 | 
   795  | 
%%% End:
  |