49   | 
    49   | 
    50 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
    50 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
    51 \begin{frame}[t] | 
    51 \begin{frame}[t] | 
    52 \frametitle{Why Study Compilers?} | 
    52 \frametitle{Why Study Compilers?} | 
    53   | 
    53   | 
    54 John Regehr {\small(LLVM compiler hacker)}\smallskip\\ | 
    54 John Regehr {\small(Univ.~Utah, LLVM compiler hacker)}\smallskip\\ | 
    55   | 
    55   | 
    56 \begin{bubble}[10.5cm] | 
    56 \begin{bubble}[10.5cm] | 
    57   \bf ``\ldots{}It’s effectively a perpetual | 
    57   \bf ``\ldots{}It’s effectively a perpetual | 
    58   employment act for solid compiler hackers.''  | 
    58   employment act for solid compiler hackers.''  | 
    59 \end{bubble} | 
    59 \end{bubble} | 
   185 matching \texttt{[a?]\{n\}[a]\{n\}} and \texttt{(a*)*b} | 
   185 matching \texttt{[a?]\{n\}[a]\{n\}} and \texttt{(a*)*b} | 
   186 against $\underbrace{\texttt{a}...\texttt{a}}_n$ | 
   186 against $\underbrace{\texttt{a}...\texttt{a}}_n$ | 
   187 \end{frame}  | 
   187 \end{frame}  | 
   188 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    | 
   188 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    | 
   189   | 
   189   | 
         | 
   190 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
         | 
   191 \begin{frame}[c] | 
         | 
   192 \frametitle{Evil Regular Expressions} | 
         | 
   193   | 
         | 
   194 \begin{itemize} | 
         | 
   195 \item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\medskip | 
         | 
   196 \item Evil regular expressions\medskip  | 
         | 
   197 \begin{itemize} | 
         | 
   198 \item \bl{$(a^{?\{n\}}) \cdot a^{\{n\}}$} | 
         | 
   199 \item \bl{$(a^*)^*\cdot b$} | 
         | 
   200 \item \bl{$([a$\,-\,$z]^+)^*$} | 
         | 
   201 \item \bl{$(a + a \cdot a)^*$} | 
         | 
   202 \item \bl{$(a + a^?)^*$} | 
         | 
   203 \end{itemize} | 
         | 
   204   | 
         | 
   205 \item sometimes also called \alert{catastrophic backtracking} | 
         | 
   206 \item this is a problem for \alert{N}etwork \alert{I}ntrusion | 
         | 
   207   \alert{D}etection systems, StackExchange, Atom editor | 
         | 
   208 \item \url{https://vimeo.com/112065252}   | 
         | 
   209 \end{itemize} | 
         | 
   210   | 
         | 
   211 \end{frame} | 
         | 
   212 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
   190   | 
   213   | 
   191 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
   214 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
   192 \begin{frame}[c] | 
   215 \begin{frame}[c] | 
   193 \frametitle{The Goal of this Module} | 
   216 \frametitle{The Goal of this Module} | 
   194   | 
   217   | 
   197                       node/.style={ | 
   220                       node/.style={ | 
   198                       rectangle,rounded corners=3mm,  | 
   221                       rectangle,rounded corners=3mm,  | 
   199                       very thick,draw=black!50,minimum height=18mm, minimum width=20mm,  | 
   222                       very thick,draw=black!50,minimum height=18mm, minimum width=20mm,  | 
   200                       top color=white,bottom color=black!20}]  | 
   223                       top color=white,bottom color=black!20}]  | 
   201   | 
   224   | 
   202   \node at (3.05, 1.8) {\Large\bf Write a compiler}; | 
   225   \node at (3.05, 1.8) {\Large\bf write a compiler}; | 
   203   | 
   226   | 
   204   \node (0) at (-2.3,0) {};  | 
   227   \node (0) at (-2.3,0) {};   | 
         | 
   228   \node [above=5mm of 0]  | 
         | 
   229   {\makebox[0mm]{\footnotesize | 
         | 
   230       \begin{tabular}{@{}l@{}}input\\[-1mm]program\end{tabular}}};  | 
   205     | 
   231     | 
   206   \node (A) at (0,0)  [node] {}; | 
   232   \node (A) at (0,0)  [node] {}; | 
   207   \node [below right] at (A.north west) {lexer}; | 
   233   \node [below right] at (A.north west) {lexer}; | 
   208   | 
   234   | 
   209   \node (B) at (3,0)  [node] {}; | 
   235   \node (B) at (3,0)  [node] {}; | 
   210   \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser}; | 
   236   \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser}; | 
   211   | 
   237   | 
   212   \node (C) at (6,0)  [node] {}; | 
   238   \node (C) at (6,0)  [node] {}; | 
   213   \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen}; | 
   239   \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen}; | 
   214   | 
   240   | 
   215   \node (1) at (8.4,0) {};  | 
   241   \node (1) at (8.4,0) {}; | 
         | 
   242   \node [above=5mm of 1]  | 
         | 
   243   {\makebox[0mm]{\footnotesize | 
         | 
   244       \begin{tabular}{@{}r@{}}binary\\[-1mm]code\end{tabular}}}; | 
   216   | 
   245   | 
   217   \draw [->,line width=4mm] (0) -- (A);   | 
   246   \draw [->,line width=4mm] (0) -- (A);   | 
   218   \draw [->,line width=4mm] (A) -- (B);   | 
   247   \draw [->,line width=4mm] (A) -- (B);   | 
   219   \draw [->,line width=4mm] (B) -- (C);   | 
   248   \draw [->,line width=4mm] (B) -- (C);   | 
   220   \draw [->,line width=4mm] (C) -- (1);   | 
   249   \draw [->,line width=4mm] (C) -- (1);   | 
   355 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
   384 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
   356 \begin{frame}[t] | 
   385 \begin{frame}[t] | 
   357 \frametitle{Familiar Regular Expr.} | 
   386 \frametitle{Familiar Regular Expr.} | 
   358 \small  | 
   387 \small  | 
   359 \begin{center} | 
   388 \begin{center} | 
   360 \texttt{[a-z0-9\_.-]+ @ [a-z0-9.-]+ . [a-z.]\{2,6\}} | 
   389 \texttt{[a-z0-9\_$\backslash{}$.-]+ @ [a-z0-9$\backslash{}$.-]+ . [a-z$\backslash{}$.]\{2,6\}} | 
   361 \end{center}\smallskip | 
   390 \end{center}\smallskip | 
   362   | 
   391   | 
   363 \begin{center} | 
   392 \begin{center} | 
   364 \begin{tabular}{@{}lp{8.5cm}@{}} | 
   393 \begin{tabular}{@{}lp{8.5cm}@{}} | 
   365 \pcode{re*} & matches 0 or more times\\ | 
   394 \pcode{re*} & matches 0 or more times\\ | 
   368 \pcode{re\{n\}}	& matches exactly \pcode{n} number of times\\ | 
   397 \pcode{re\{n\}}	& matches exactly \pcode{n} number of times\\ | 
   369 \pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\ | 
   398 \pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\ | 
   370 \pcode{[...]} & matches any single character inside the brackets\\ | 
   399 \pcode{[...]} & matches any single character inside the brackets\\ | 
   371 \pcode{[^...]} & matches any single character not inside the  | 
   400 \pcode{[^...]} & matches any single character not inside the  | 
   372 brackets\\  | 
   401 brackets\\  | 
   373 \pcode{a-zA-Z} & character ranges\\ | 
   402 \pcode{a-z A-Z} & character ranges\\ | 
   374 \pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\ | 
   403 \pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\ | 
   375 \pcode{.} & matches every character except newline\\ | 
   404 \pcode{.} & matches every character except newline\\ | 
   376 \pcode{(re)}	& groups regular expressions and remembers  | 
   405 \pcode{(re)}	& groups regular expressions and remembers  | 
   377 the matched text  | 
   406 the matched text  | 
   378 \end{tabular} | 
   407 \end{tabular} |