49 |
49 |
50 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
50 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
51 \begin{frame}[t] |
51 \begin{frame}[t] |
52 \frametitle{Why Study Compilers?} |
52 \frametitle{Why Study Compilers?} |
53 |
53 |
54 John Regehr {\small(LLVM compiler hacker)}\smallskip\\ |
54 John Regehr {\small(Univ.~Utah, LLVM compiler hacker)}\smallskip\\ |
55 |
55 |
56 \begin{bubble}[10.5cm] |
56 \begin{bubble}[10.5cm] |
57 \bf ``\ldots{}It’s effectively a perpetual |
57 \bf ``\ldots{}It’s effectively a perpetual |
58 employment act for solid compiler hackers.'' |
58 employment act for solid compiler hackers.'' |
59 \end{bubble} |
59 \end{bubble} |
185 matching \texttt{[a?]\{n\}[a]\{n\}} and \texttt{(a*)*b} |
185 matching \texttt{[a?]\{n\}[a]\{n\}} and \texttt{(a*)*b} |
186 against $\underbrace{\texttt{a}...\texttt{a}}_n$ |
186 against $\underbrace{\texttt{a}...\texttt{a}}_n$ |
187 \end{frame} |
187 \end{frame} |
188 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
188 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
189 |
189 |
|
190 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
191 \begin{frame}[c] |
|
192 \frametitle{Evil Regular Expressions} |
|
193 |
|
194 \begin{itemize} |
|
195 \item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\medskip |
|
196 \item Evil regular expressions\medskip |
|
197 \begin{itemize} |
|
198 \item \bl{$(a^{?\{n\}}) \cdot a^{\{n\}}$} |
|
199 \item \bl{$(a^*)^*\cdot b$} |
|
200 \item \bl{$([a$\,-\,$z]^+)^*$} |
|
201 \item \bl{$(a + a \cdot a)^*$} |
|
202 \item \bl{$(a + a^?)^*$} |
|
203 \end{itemize} |
|
204 |
|
205 \item sometimes also called \alert{catastrophic backtracking} |
|
206 \item this is a problem for \alert{N}etwork \alert{I}ntrusion |
|
207 \alert{D}etection systems, StackExchange, Atom editor |
|
208 \item \url{https://vimeo.com/112065252} |
|
209 \end{itemize} |
|
210 |
|
211 \end{frame} |
|
212 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
190 |
213 |
191 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
214 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
192 \begin{frame}[c] |
215 \begin{frame}[c] |
193 \frametitle{The Goal of this Module} |
216 \frametitle{The Goal of this Module} |
194 |
217 |
197 node/.style={ |
220 node/.style={ |
198 rectangle,rounded corners=3mm, |
221 rectangle,rounded corners=3mm, |
199 very thick,draw=black!50,minimum height=18mm, minimum width=20mm, |
222 very thick,draw=black!50,minimum height=18mm, minimum width=20mm, |
200 top color=white,bottom color=black!20}] |
223 top color=white,bottom color=black!20}] |
201 |
224 |
202 \node at (3.05, 1.8) {\Large\bf Write a compiler}; |
225 \node at (3.05, 1.8) {\Large\bf write a compiler}; |
203 |
226 |
204 \node (0) at (-2.3,0) {}; |
227 \node (0) at (-2.3,0) {}; |
|
228 \node [above=5mm of 0] |
|
229 {\makebox[0mm]{\footnotesize |
|
230 \begin{tabular}{@{}l@{}}input\\[-1mm]program\end{tabular}}}; |
205 |
231 |
206 \node (A) at (0,0) [node] {}; |
232 \node (A) at (0,0) [node] {}; |
207 \node [below right] at (A.north west) {lexer}; |
233 \node [below right] at (A.north west) {lexer}; |
208 |
234 |
209 \node (B) at (3,0) [node] {}; |
235 \node (B) at (3,0) [node] {}; |
210 \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser}; |
236 \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser}; |
211 |
237 |
212 \node (C) at (6,0) [node] {}; |
238 \node (C) at (6,0) [node] {}; |
213 \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen}; |
239 \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen}; |
214 |
240 |
215 \node (1) at (8.4,0) {}; |
241 \node (1) at (8.4,0) {}; |
|
242 \node [above=5mm of 1] |
|
243 {\makebox[0mm]{\footnotesize |
|
244 \begin{tabular}{@{}r@{}}binary\\[-1mm]code\end{tabular}}}; |
216 |
245 |
217 \draw [->,line width=4mm] (0) -- (A); |
246 \draw [->,line width=4mm] (0) -- (A); |
218 \draw [->,line width=4mm] (A) -- (B); |
247 \draw [->,line width=4mm] (A) -- (B); |
219 \draw [->,line width=4mm] (B) -- (C); |
248 \draw [->,line width=4mm] (B) -- (C); |
220 \draw [->,line width=4mm] (C) -- (1); |
249 \draw [->,line width=4mm] (C) -- (1); |
355 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
384 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
356 \begin{frame}[t] |
385 \begin{frame}[t] |
357 \frametitle{Familiar Regular Expr.} |
386 \frametitle{Familiar Regular Expr.} |
358 \small |
387 \small |
359 \begin{center} |
388 \begin{center} |
360 \texttt{[a-z0-9\_.-]+ @ [a-z0-9.-]+ . [a-z.]\{2,6\}} |
389 \texttt{[a-z0-9\_$\backslash{}$.-]+ @ [a-z0-9$\backslash{}$.-]+ . [a-z$\backslash{}$.]\{2,6\}} |
361 \end{center}\smallskip |
390 \end{center}\smallskip |
362 |
391 |
363 \begin{center} |
392 \begin{center} |
364 \begin{tabular}{@{}lp{8.5cm}@{}} |
393 \begin{tabular}{@{}lp{8.5cm}@{}} |
365 \pcode{re*} & matches 0 or more times\\ |
394 \pcode{re*} & matches 0 or more times\\ |
368 \pcode{re\{n\}} & matches exactly \pcode{n} number of times\\ |
397 \pcode{re\{n\}} & matches exactly \pcode{n} number of times\\ |
369 \pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\ |
398 \pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\ |
370 \pcode{[...]} & matches any single character inside the brackets\\ |
399 \pcode{[...]} & matches any single character inside the brackets\\ |
371 \pcode{[^...]} & matches any single character not inside the |
400 \pcode{[^...]} & matches any single character not inside the |
372 brackets\\ |
401 brackets\\ |
373 \pcode{a-zA-Z} & character ranges\\ |
402 \pcode{a-z A-Z} & character ranges\\ |
374 \pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\ |
403 \pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\ |
375 \pcode{.} & matches every character except newline\\ |
404 \pcode{.} & matches every character except newline\\ |
376 \pcode{(re)} & groups regular expressions and remembers |
405 \pcode{(re)} & groups regular expressions and remembers |
377 the matched text |
406 the matched text |
378 \end{tabular} |
407 \end{tabular} |