slides/slides01.tex
changeset 560 99d2bb1f145c
parent 559 db5cb071644d
child 576 550186034b6e
equal deleted inserted replaced
559:db5cb071644d 560:99d2bb1f145c
    49 
    49 
    50 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    50 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    51 \begin{frame}[t]
    51 \begin{frame}[t]
    52 \frametitle{Why Study Compilers?}
    52 \frametitle{Why Study Compilers?}
    53 
    53 
    54 John Regehr {\small(LLVM compiler hacker)}\smallskip\\
    54 John Regehr {\small(Univ.~Utah, LLVM compiler hacker)}\smallskip\\
    55 
    55 
    56 \begin{bubble}[10.5cm]
    56 \begin{bubble}[10.5cm]
    57   \bf ``\ldots{}It’s effectively a perpetual
    57   \bf ``\ldots{}It’s effectively a perpetual
    58   employment act for solid compiler hackers.''
    58   employment act for solid compiler hackers.''
    59 \end{bubble}
    59 \end{bubble}
   185 matching \texttt{[a?]\{n\}[a]\{n\}} and \texttt{(a*)*b}
   185 matching \texttt{[a?]\{n\}[a]\{n\}} and \texttt{(a*)*b}
   186 against $\underbrace{\texttt{a}...\texttt{a}}_n$
   186 against $\underbrace{\texttt{a}...\texttt{a}}_n$
   187 \end{frame} 
   187 \end{frame} 
   188 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
   188 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
   189 
   189 
       
   190 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   191 \begin{frame}[c]
       
   192 \frametitle{Evil Regular Expressions}
       
   193 
       
   194 \begin{itemize}
       
   195 \item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\medskip
       
   196 \item Evil regular expressions\medskip
       
   197 \begin{itemize}
       
   198 \item \bl{$(a^{?\{n\}}) \cdot a^{\{n\}}$}
       
   199 \item \bl{$(a^*)^*\cdot b$}
       
   200 \item \bl{$([a$\,-\,$z]^+)^*$}
       
   201 \item \bl{$(a + a \cdot a)^*$}
       
   202 \item \bl{$(a + a^?)^*$}
       
   203 \end{itemize}
       
   204 
       
   205 \item sometimes also called \alert{catastrophic backtracking}
       
   206 \item this is a problem for \alert{N}etwork \alert{I}ntrusion
       
   207   \alert{D}etection systems, StackExchange, Atom editor
       
   208 \item \url{https://vimeo.com/112065252}  
       
   209 \end{itemize}
       
   210 
       
   211 \end{frame}
       
   212 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   190 
   213 
   191 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   214 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   192 \begin{frame}[c]
   215 \begin{frame}[c]
   193 \frametitle{The Goal of this Module}
   216 \frametitle{The Goal of this Module}
   194 
   217 
   197                       node/.style={
   220                       node/.style={
   198                       rectangle,rounded corners=3mm,
   221                       rectangle,rounded corners=3mm,
   199                       very thick,draw=black!50,minimum height=18mm, minimum width=20mm,
   222                       very thick,draw=black!50,minimum height=18mm, minimum width=20mm,
   200                       top color=white,bottom color=black!20}]
   223                       top color=white,bottom color=black!20}]
   201 
   224 
   202   \node at (3.05, 1.8) {\Large\bf Write a compiler};
   225   \node at (3.05, 1.8) {\Large\bf write a compiler};
   203 
   226 
   204   \node (0) at (-2.3,0) {}; 
   227   \node (0) at (-2.3,0) {};  
       
   228   \node [above=5mm of 0]
       
   229   {\makebox[0mm]{\footnotesize
       
   230       \begin{tabular}{@{}l@{}}input\\[-1mm]program\end{tabular}}}; 
   205   
   231   
   206   \node (A) at (0,0)  [node] {};
   232   \node (A) at (0,0)  [node] {};
   207   \node [below right] at (A.north west) {lexer};
   233   \node [below right] at (A.north west) {lexer};
   208 
   234 
   209   \node (B) at (3,0)  [node] {};
   235   \node (B) at (3,0)  [node] {};
   210   \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser};
   236   \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser};
   211 
   237 
   212   \node (C) at (6,0)  [node] {};
   238   \node (C) at (6,0)  [node] {};
   213   \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen};
   239   \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen};
   214 
   240 
   215   \node (1) at (8.4,0) {}; 
   241   \node (1) at (8.4,0) {};
       
   242   \node [above=5mm of 1]
       
   243   {\makebox[0mm]{\footnotesize
       
   244       \begin{tabular}{@{}r@{}}binary\\[-1mm]code\end{tabular}}};
   216 
   245 
   217   \draw [->,line width=4mm] (0) -- (A); 
   246   \draw [->,line width=4mm] (0) -- (A); 
   218   \draw [->,line width=4mm] (A) -- (B); 
   247   \draw [->,line width=4mm] (A) -- (B); 
   219   \draw [->,line width=4mm] (B) -- (C); 
   248   \draw [->,line width=4mm] (B) -- (C); 
   220   \draw [->,line width=4mm] (C) -- (1); 
   249   \draw [->,line width=4mm] (C) -- (1); 
   355 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   384 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   356 \begin{frame}[t]
   385 \begin{frame}[t]
   357 \frametitle{Familiar Regular Expr.}
   386 \frametitle{Familiar Regular Expr.}
   358 \small
   387 \small
   359 \begin{center}
   388 \begin{center}
   360 \texttt{[a-z0-9\_.-]+ @ [a-z0-9.-]+ . [a-z.]\{2,6\}}
   389 \texttt{[a-z0-9\_$\backslash{}$.-]+ @ [a-z0-9$\backslash{}$.-]+ . [a-z$\backslash{}$.]\{2,6\}}
   361 \end{center}\smallskip
   390 \end{center}\smallskip
   362 
   391 
   363 \begin{center}
   392 \begin{center}
   364 \begin{tabular}{@{}lp{8.5cm}@{}}
   393 \begin{tabular}{@{}lp{8.5cm}@{}}
   365 \pcode{re*} & matches 0 or more times\\
   394 \pcode{re*} & matches 0 or more times\\
   368 \pcode{re\{n\}}	& matches exactly \pcode{n} number of times\\
   397 \pcode{re\{n\}}	& matches exactly \pcode{n} number of times\\
   369 \pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\
   398 \pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\
   370 \pcode{[...]} & matches any single character inside the brackets\\
   399 \pcode{[...]} & matches any single character inside the brackets\\
   371 \pcode{[^...]} & matches any single character not inside the 
   400 \pcode{[^...]} & matches any single character not inside the 
   372 brackets\\
   401 brackets\\
   373 \pcode{a-zA-Z} & character ranges\\
   402 \pcode{a-z A-Z} & character ranges\\
   374 \pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\
   403 \pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\
   375 \pcode{.} & matches every character except newline\\
   404 \pcode{.} & matches every character except newline\\
   376 \pcode{(re)}	& groups regular expressions and remembers 
   405 \pcode{(re)}	& groups regular expressions and remembers 
   377 the matched text
   406 the matched text
   378 \end{tabular}
   407 \end{tabular}
   385 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   414 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   386 \begin{frame}[c]
   415 \begin{frame}[c]
   387 \frametitle{Today}
   416 \frametitle{Today}
   388 
   417 
   389 \begin{itemize}
   418 \begin{itemize}
   390 \item While the ultimate goal is to implement a small compiler 
   419 \item While the ultimate goal is to implement a small compiler for the JVM
   391 (a really small one for the JVM)\ldots\bigskip
   420   \ldots\bigskip
   392 \end{itemize}
   421 \end{itemize}
   393 
   422 
   394 Let's start with:
   423 Let's start with:
   395 
   424 
   396 \begin{itemize}
   425 \begin{itemize}