|     49  |     49  | 
|     50 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |     50 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | 
|     51 \begin{frame}[t] |     51 \begin{frame}[t] | 
|     52 \frametitle{Why Study Compilers?} |     52 \frametitle{Why Study Compilers?} | 
|     53  |     53  | 
|     54 John Regehr {\small(LLVM compiler hacker)}\smallskip\\ |     54 John Regehr {\small(Univ.~Utah, LLVM compiler hacker)}\smallskip\\ | 
|     55  |     55  | 
|     56 \begin{bubble}[10.5cm] |     56 \begin{bubble}[10.5cm] | 
|     57   \bf ``\ldots{}It’s effectively a perpetual |     57   \bf ``\ldots{}It’s effectively a perpetual | 
|     58   employment act for solid compiler hackers.'' |     58   employment act for solid compiler hackers.'' | 
|     59 \end{bubble} |     59 \end{bubble} | 
|    185 matching \texttt{[a?]\{n\}[a]\{n\}} and \texttt{(a*)*b} |    185 matching \texttt{[a?]\{n\}[a]\{n\}} and \texttt{(a*)*b} | 
|    186 against $\underbrace{\texttt{a}...\texttt{a}}_n$ |    186 against $\underbrace{\texttt{a}...\texttt{a}}_n$ | 
|    187 \end{frame}  |    187 \end{frame}  | 
|    188 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   |    188 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   | 
|    189  |    189  | 
|         |    190 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | 
|         |    191 \begin{frame}[c] | 
|         |    192 \frametitle{Evil Regular Expressions} | 
|         |    193  | 
|         |    194 \begin{itemize} | 
|         |    195 \item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\medskip | 
|         |    196 \item Evil regular expressions\medskip | 
|         |    197 \begin{itemize} | 
|         |    198 \item \bl{$(a^{?\{n\}}) \cdot a^{\{n\}}$} | 
|         |    199 \item \bl{$(a^*)^*\cdot b$} | 
|         |    200 \item \bl{$([a$\,-\,$z]^+)^*$} | 
|         |    201 \item \bl{$(a + a \cdot a)^*$} | 
|         |    202 \item \bl{$(a + a^?)^*$} | 
|         |    203 \end{itemize} | 
|         |    204  | 
|         |    205 \item sometimes also called \alert{catastrophic backtracking} | 
|         |    206 \item this is a problem for \alert{N}etwork \alert{I}ntrusion | 
|         |    207   \alert{D}etection systems, StackExchange, Atom editor | 
|         |    208 \item \url{https://vimeo.com/112065252}   | 
|         |    209 \end{itemize} | 
|         |    210  | 
|         |    211 \end{frame} | 
|         |    212 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | 
|    190  |    213  | 
|    191 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |    214 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | 
|    192 \begin{frame}[c] |    215 \begin{frame}[c] | 
|    193 \frametitle{The Goal of this Module} |    216 \frametitle{The Goal of this Module} | 
|    194  |    217  | 
|    197                       node/.style={ |    220                       node/.style={ | 
|    198                       rectangle,rounded corners=3mm, |    221                       rectangle,rounded corners=3mm, | 
|    199                       very thick,draw=black!50,minimum height=18mm, minimum width=20mm, |    222                       very thick,draw=black!50,minimum height=18mm, minimum width=20mm, | 
|    200                       top color=white,bottom color=black!20}] |    223                       top color=white,bottom color=black!20}] | 
|    201  |    224  | 
|    202   \node at (3.05, 1.8) {\Large\bf Write a compiler}; |    225   \node at (3.05, 1.8) {\Large\bf write a compiler}; | 
|    203  |    226  | 
|    204   \node (0) at (-2.3,0) {};  |    227   \node (0) at (-2.3,0) {};   | 
|         |    228   \node [above=5mm of 0] | 
|         |    229   {\makebox[0mm]{\footnotesize | 
|         |    230       \begin{tabular}{@{}l@{}}input\\[-1mm]program\end{tabular}}};  | 
|    205    |    231    | 
|    206   \node (A) at (0,0)  [node] {}; |    232   \node (A) at (0,0)  [node] {}; | 
|    207   \node [below right] at (A.north west) {lexer}; |    233   \node [below right] at (A.north west) {lexer}; | 
|    208  |    234  | 
|    209   \node (B) at (3,0)  [node] {}; |    235   \node (B) at (3,0)  [node] {}; | 
|    210   \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser}; |    236   \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser}; | 
|    211  |    237  | 
|    212   \node (C) at (6,0)  [node] {}; |    238   \node (C) at (6,0)  [node] {}; | 
|    213   \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen}; |    239   \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen}; | 
|    214  |    240  | 
|    215   \node (1) at (8.4,0) {};  |    241   \node (1) at (8.4,0) {}; | 
|         |    242   \node [above=5mm of 1] | 
|         |    243   {\makebox[0mm]{\footnotesize | 
|         |    244       \begin{tabular}{@{}r@{}}binary\\[-1mm]code\end{tabular}}}; | 
|    216  |    245  | 
|    217   \draw [->,line width=4mm] (0) -- (A);  |    246   \draw [->,line width=4mm] (0) -- (A);  | 
|    218   \draw [->,line width=4mm] (A) -- (B);  |    247   \draw [->,line width=4mm] (A) -- (B);  | 
|    219   \draw [->,line width=4mm] (B) -- (C);  |    248   \draw [->,line width=4mm] (B) -- (C);  | 
|    220   \draw [->,line width=4mm] (C) -- (1);  |    249   \draw [->,line width=4mm] (C) -- (1);  | 
|    355 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |    384 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | 
|    356 \begin{frame}[t] |    385 \begin{frame}[t] | 
|    357 \frametitle{Familiar Regular Expr.} |    386 \frametitle{Familiar Regular Expr.} | 
|    358 \small |    387 \small | 
|    359 \begin{center} |    388 \begin{center} | 
|    360 \texttt{[a-z0-9\_.-]+ @ [a-z0-9.-]+ . [a-z.]\{2,6\}} |    389 \texttt{[a-z0-9\_$\backslash{}$.-]+ @ [a-z0-9$\backslash{}$.-]+ . [a-z$\backslash{}$.]\{2,6\}} | 
|    361 \end{center}\smallskip |    390 \end{center}\smallskip | 
|    362  |    391  | 
|    363 \begin{center} |    392 \begin{center} | 
|    364 \begin{tabular}{@{}lp{8.5cm}@{}} |    393 \begin{tabular}{@{}lp{8.5cm}@{}} | 
|    365 \pcode{re*} & matches 0 or more times\\ |    394 \pcode{re*} & matches 0 or more times\\ | 
|    368 \pcode{re\{n\}}	& matches exactly \pcode{n} number of times\\ |    397 \pcode{re\{n\}}	& matches exactly \pcode{n} number of times\\ | 
|    369 \pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\ |    398 \pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\ | 
|    370 \pcode{[...]} & matches any single character inside the brackets\\ |    399 \pcode{[...]} & matches any single character inside the brackets\\ | 
|    371 \pcode{[^...]} & matches any single character not inside the  |    400 \pcode{[^...]} & matches any single character not inside the  | 
|    372 brackets\\ |    401 brackets\\ | 
|    373 \pcode{a-zA-Z} & character ranges\\ |    402 \pcode{a-z A-Z} & character ranges\\ | 
|    374 \pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\ |    403 \pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\ | 
|    375 \pcode{.} & matches every character except newline\\ |    404 \pcode{.} & matches every character except newline\\ | 
|    376 \pcode{(re)}	& groups regular expressions and remembers  |    405 \pcode{(re)}	& groups regular expressions and remembers  | 
|    377 the matched text |    406 the matched text | 
|    378 \end{tabular} |    407 \end{tabular} |