# HG changeset patch # User Christian Urban # Date 1412558213 -3600 # Node ID 0105257429f32467b1482e405b59f69f24885c00 # Parent 88416b7df38c56cf1d36660c79a6fe9709928d6b updated diff -r 88416b7df38c -r 0105257429f3 handouts/ho03.pdf Binary file handouts/ho03.pdf has changed diff -r 88416b7df38c -r 0105257429f3 handouts/ho03.tex --- a/handouts/ho03.tex Mon Oct 06 00:57:44 2014 +0100 +++ b/handouts/ho03.tex Mon Oct 06 02:16:53 2014 +0100 @@ -1,6 +1,6 @@ \documentclass{article} \usepackage{../style} - +\usepackage{../langs} \begin{document} @@ -23,12 +23,143 @@ free-riding script-kiddies who use this technology without knowing what are the underlying ideas. +For buffer overflow attacks to work, a number of innocent +design decisions, which are really benign on their own, need +to conspire against you. All these decisions were pretty much +taken in a time when there was no Internet: C was introduced +around 1973, the Internet TCP/IP protocol was standardised in +1982 by which time there were maybe 500 servers connected +worldwide (all users were well-behaved), Intel's first 8086 +CPUs arrived around 1977. So nobody of the creators can +really be blamed, but as mentioned above we should already +be way beyond the point that buffer overflow attacks are +worth a thought. Unfortunately this is far from the truth. I +let you think why? + +One such ``benign'' design decision is how the memory is laid +out into different regions for each process. -\bigskip -For buffer overflow attacks to work a number of innocent -design decisions, which are benign on their own, need to -conspire against you. One such design decision is how the -memory is laid out for each process. +\begin{center} + \begin{tikzpicture}[scale=0.7] + %\draw[step=1cm] (-3,-3) grid (3,3); + \draw[line width=1mm] (-2, -3) rectangle (2,3); + \draw[line width=1mm] (-2,1) -- (2,1); + \draw[line width=1mm] (-2,-1) -- (2,-1); + \draw (0,2) node {\large\tt text}; + \draw (0,0) node {\large\tt heap}; + \draw (0,-2) node {\large\tt stack}; + + \draw (-2.7,3) node[anchor=north east] {\tt\begin{tabular}{@{}l@{}}lower\\ address\end{tabular}}; + \draw (-2.7,-3) node[anchor=south east] {\tt\begin{tabular}{@{}l@{}}higher\\ address\end{tabular}}; + \draw[->, line width=1mm] (-2.5,3) -- (-2.5,-3); + + \draw (2.7,-2) node[anchor=west] {\tt grows}; + \draw (2.7,-3) node[anchor=south west] {\tt\footnotesize older}; + \draw (2.7,-1) node[anchor=north west] {\tt\footnotesize newer}; + \draw[|->, line width=1mm] (2.5,-3) -- (2.5,-1); + \end{tikzpicture} +\end{center} + +\noindent The text region contains the program code (usually +this region is read-only). The heap stores all data the +programmer explicitly allocates. For us the most interesting +region is the stack, which contains data mostly associated +with the ``control flow'' of the program. Notice that the stack +grows from a higher addresses to lower addresses. That means +that older items on the stack will be stored behind newer +items. Let's look a bit closer what happens with the stack. +Consider the the trivial C program. + +\lstinputlisting[language=C]{../progs/example1.c} + +\noindent The main function calls \code{foo} with three +argument. Foo contains two (local) buffers. The interesting +point is what will the stack looks like after Line 3 has been +executed? The answer is as follows: + +\begin{center} + \begin{tikzpicture}[scale=0.65] + \draw[gray!20,fill=gray!20] (-5, 0) rectangle (-3,-1); + \draw[line width=1mm] (-5,-1.2) -- (-5,0.2); + \draw[line width=1mm] (-3,-1.2) -- (-3,0.2); + \draw (-4,-1) node[anchor=south] {\tt main}; + \draw[line width=1mm] (-5,0) -- (-3,0); + + \draw[gray!20,fill=gray!20] (3, 0) rectangle (5,-1); + \draw[line width=1mm] (3,-1.2) -- (3,0.2); + \draw[line width=1mm] (5,-1.2) -- (5,0.2); + \draw (4,-1) node[anchor=south] {\tt main}; + \draw[line width=1mm] (3,0) -- (5,0); + + %\draw[step=1cm] (-3,-1) grid (3,8); + \draw[gray!20,fill=gray!20] (-1, 0) rectangle (1,-1); + \draw[line width=1mm] (-1,-1.2) -- (-1,7.4); + \draw[line width=1mm] ( 1,-1.2) -- ( 1,7.4); + \draw (0,-1) node[anchor=south] {\tt main}; + \draw[line width=1mm] (-1,0) -- (1,0); + \draw (0,0) node[anchor=south] {\tt arg$_3$=3}; + \draw[line width=1mm] (-1,1) -- (1,1); + \draw (0,1) node[anchor=south] {\tt arg$_2$=2}; + \draw[line width=1mm] (-1,2) -- (1,2); + \draw (0,2) node[anchor=south] {\tt arg$_1$=1}; + \draw[line width=1mm] (-1,3) -- (1,3); + \draw (0,3.1) node[anchor=south] {\tt ret}; + \draw[line width=1mm] (-1,4) -- (1,4); + \draw (0,4) node[anchor=south] {\small\tt last sp}; + \draw[line width=1mm] (-1,5) -- (1,5); + \draw (0,5) node[anchor=south] {\tt buf$_1$}; + \draw[line width=1mm] (-1,6) -- (1,6); + \draw (0,6) node[anchor=south] {\tt buf$_2$}; + \draw[line width=1mm] (-1,7) -- (1,7); + + \draw[->,line width=0.5mm] (1,4.5) -- (1.8,4.5) -- (1.8, 0) -- (1.1,0); + \draw[->,line width=0.5mm] (1,3.5) -- (2.5,3.5); + \draw (2.6,3.1) node[anchor=south west] {\tt back to main()}; +\end{tikzpicture} +\end{center} + +\noindent On the left is the stack before \code{foo} is +called; on the right is the stack after \code{foo} finishes. +The function call to \code{foo} in Line 7 pushes the arguments +onto the stack in reverse order---shown in the middle. +Therefore first 3 then 2 and finally 1. Then it pushes the +return address to the stack where execution should resume once +\code{foo} has finished. The last stack pointer (\code{sp}) is +needed in order to clean up the stack to the last level---in +fact there is no cleaning involved, but just the top of the +stack will be set back. The two buffers are also on the stack, +because they are local data within \code{foo}. + + +Another part of the ``conspiracy'' is that library functions +in C look typically as follows: + +\begin{center} +\lstinputlisting[language=C,numbers=none]{../progs/app5.c} +\end{center} + +\noindent This function copies data from a source \pcode{src} +to a destination \pcode{dst}. It copies the data until it +reaches a zero-byte (\code{"\\0"}). + +\bigskip\bigskip +\subsubsection*{A Crash-Course on GDB} + +\begin{itemize} +\item \texttt{(l)ist n} -- listing the source file from line +\texttt{n} +\item \texttt{disassemble fun-name} +\item \texttt{run} -- starts the program +\item \texttt{(b)reak line-number} -- set break point +\item \texttt{(c)ontinue} -- continue execution until next +breakpoint in a line number + +\item \texttt{x/nxw addr} -- print out \texttt{n} words starting +from address \pcode{addr}, the address could be \code{$esp} +for looking at the content of the stack +\item \texttt{x/nxb addr} -- print out \texttt{n} bytes +\end{itemize} + \bigskip\bigskip \noindent If you want to know more about buffer overflow attacks, the original Phrack article @@ -38,6 +169,16 @@ \begin{center} \url{http://phrack.org/issues/49/14.html} \end{center} + +\noindent This is an article from 1996 and some parts are +not up-to-date anymore. The article called +``Smashing the Stack in 2010'' + +\begin{center} +\url{http://www.mgraziano.info/docs/stsi2010.pdf} +\end{center} + +\noindent updates, as the name says, most information to 2010. \end{document} diff -r 88416b7df38c -r 0105257429f3 slides/slides03.pdf Binary file slides/slides03.pdf has changed diff -r 88416b7df38c -r 0105257429f3 slides/slides03.tex --- a/slides/slides03.tex Mon Oct 06 00:57:44 2014 +0100 +++ b/slides/slides03.tex Mon Oct 06 02:16:53 2014 +0100 @@ -524,19 +524,65 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] +\frametitle{The Stack} + +\begin{textblock}{7}(1,1) + \begin{tikzpicture}[scale=0.8] + %\draw[step=1cm] (-3,-1) grid (3,8); + \draw[gray!20,fill=gray!20] (-1, 0) rectangle (1,-1); + \draw[line width=1mm] (-1,-1.2) -- (-1,7.4); + \draw[line width=1mm] ( 1,-1.2) -- ( 1,7.4); + \draw (0,-1) node[anchor=south] {\tt main}; + \draw[line width=1mm] (-1,0) -- (1,0); + \draw (0,0) node[anchor=south] {\tt arg$_3$=3}; + \draw[line width=1mm] (-1,1) -- (1,1); + \draw (0,1) node[anchor=south] {\tt arg$_2$=2}; + \draw[line width=1mm] (-1,2) -- (1,2); + \draw (0,2) node[anchor=south] {\tt arg$_1$=1}; + \draw[line width=1mm] (-1,3) -- (1,3); + \draw (0,3.1) node[anchor=south] {\tt ret}; + \draw[line width=1mm] (-1,4) -- (1,4); + \draw (0,4) node[anchor=south] {\small\tt last sp}; + \draw[line width=1mm] (-1,5) -- (1,5); + \draw (0,5) node[anchor=south] {\tt buf$_1$}; + \draw[line width=1mm] (-1,6) -- (1,6); + \draw (0,6) node[anchor=south] {\tt buf$_2$}; + \draw[line width=1mm] (-1,7) -- (1,7); + \draw (2,6.1) node[anchor=south] {\code{$esp}}; + \draw[<-,line width=0.5mm] (1.1,7) -- (2.5,7); + + \draw[->,line width=0.5mm] (1,4.5) -- (1.8,4.5) -- (1.8, 0) -- (1.1,0); + \draw[->,line width=0.5mm] (1,3.5) -- (2.5,3.5); + \draw (2.6,3.1) node[anchor=south west] {\tt back to main()}; + + \draw[->,red,line width=2mm] (2.5,0.1) -- (4.2,0.1); +\end{tikzpicture} +\end{textblock} + +\begin{textblock}{7}(6.4,8) +\begin{bubble}[6.8cm] +\footnotesize +\lstinputlisting[language=C,xleftmargin=5mm]{../progs/example1.c} +\end{bubble} +\end{textblock} + + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] \frametitle{The Problem} \begin{itemize} \item The basic problem is that library routines in C look as follows: +\end{itemize} \begin{center} -\footnotesize\lstinputlisting[language=C]{../progs/app5.c} +\small\lstinputlisting[language=C,numbers=none]{../progs/app5.c} \end{center} -\item the resulting problems are often remotely exploitable -\item can be used to circumvents all access control\\ -(for grooming botnets for further attacks) -\end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%