# HG changeset patch # User Christian Urban # Date 1447686977 0 # Node ID a65767fe5d71d2520afd921e96ef8d3a09423cff # Parent 43c0ed4737208d1353e9fcab6291ce1821e608be updated diff -r 43c0ed473720 -r a65767fe5d71 handouts/ho07.tex --- a/handouts/ho07.tex Sun Nov 15 21:31:31 2015 +0000 +++ b/handouts/ho07.tex Mon Nov 16 15:16:17 2015 +0000 @@ -1,11 +1,12 @@ \documentclass{article} \usepackage{../style} \usepackage{../langs} - +\usepackage{../grammar} +\usepackage{tikz-qtree} \begin{document} -\section*{Handout 7 (Compilation)} +\section*{Handout 7 (Compilation of the WHILE-Language)} The purpose of a compiler is to transform a program, a human can write, into code the machine can run as fast as possible. @@ -19,8 +20,142 @@ We will be generating code for the Java Virtual Machine. This is a stack-based virtual machine which will make it easy to -generate code for arithmetic expressions. Recall -that our +generate code for arithmetic expressions. For example for +generating code for the expression $1 + 2$ we need to issue +the following three instructions + +\begin{lstlisting}[numbers=none] +ldc 1 +ldc 2 +iadd +\end{lstlisting} + +\noindent The first instruction loads the constant $1$ on the +stack, the next one $2$, the third instruction add both +numbers together replacing the top elements of the stack with +the result $3$. We will throughout consider only integer +numbers and results, therefore we can use the instructions +\code{iadd}, \code{isub}, \code{imul}, \code{idiv} and so on. +The \code{i} stands for integer instructions (alternatives are +\code{d} for doubles, \code{l} for longs and \code{f} for +floats). + +Recall our grammar for arithmetic expressions: + + +\begin{plstx}[rhs style=, margin=3cm] : \meta{E} ::= \meta{T} $+$ \meta{E} + | \meta{T} $-$ \meta{E} + | \meta{T}\\ +: \meta{T} ::= \meta{F} $*$ \meta{T} + | \meta{F} $\backslash$ \meta{T} + | \meta{F}\\ +: \meta{F} ::= ( \meta{E} ) + | \meta{Id} + | \meta{Num}\\ \end{plstx} + + +\noindent where \meta{Id} stands for variables and +\meta{Num} for number. For the moment let us omit variables from +arithmetic expressions. Our parser will take this grammar and +produce abstract syntax trees, for +example for the expression $1 + ((2 * 3) + (4 - 3))$ it will +produce the following tree. + +\begin{center} +\begin{tikzpicture} \Tree [.$+$ [.$1$ ] [.$+$ [.$*$ $2$ $3$ ] [.$-$ $4$ $3$ ]]] \end{tikzpicture} +\end{center} + +\noindent +To generate code for this expression, we need to traverse this +tree in post-order fashion---this will produce code for a +stack-machine, like the JVM. Doing so gives the instructions + +\begin{lstlisting}[numbers=none] +ldc 1 +ldc 2 +ldc 3 +imul +ldc 4 +ldc 3 +isub +iadd +iadd +\end{lstlisting} + +\noindent If we ``run'' these instructions, the result $8$ +will be on top of the stack. This will be a convention we +always observe, namely that the results of arithmetic +expressions will always be on top of the stack. Note, that a +different bracketing, for example $(1 + (2 * 3)) + (4 - 3)$, +produces a different abstract syntax tree and thus potentially +also a different list of instructions. Generating code in this +fashion is rather simple: it can be done by the following +\textit{compile}-function: + +\begin{center} +\begin{tabular}{lcl} +$\textit{compile}(n)$ & $\dn$ & $\pcode{ldc}\; n$\\ +$\textit{compile}(a_1 + a_2)$ & $\dn$ & +$\textit{compile}(a_1) \;@\;\textit{compile}(a_2)\;@\; \pcode{iadd}$\\ +$\textit{compile}(a_1 - a_2)$ & $\dn$ & +$\textit{compile}(a_1) \;@\; \textit{compile}(a_2)\;@\; \pcode{isub}$\\ +$\textit{compile}(a_1 * a_2)$ & $\dn$ & +$\textit{compile}(a_1) \;@\; \textit{compile}(a_2)\;@\; \pcode{imul}$\\ +$\textit{compile}(a_1 \backslash a_2)$ & $\dn$ & +$\textit{compile}(a_1) \;@\; \textit{compile}(a_2)\;@\; \pcode{idiv}$\\ +\end{tabular} +\end{center} + +\noindent However, our arithmetic expressions can also contain +variables. We will represent them as \emph{local variables} in +the JVM. Essentially, local variables are an array or pointers +to the memory containing in our case only integers. Looking up +a variable can be done by the instruction + +\begin{lstlisting}[mathescape,numbers=none] +iload $index$ +\end{lstlisting} + +\noindent +which places the content of the local variable $index$ onto +thestack. Storing the top of the stack into a local variable +can be done by the instruction + +\begin{lstlisting}[mathescape,numbers=none] +istore $index$ +\end{lstlisting} + +\noindent Note that this also pops off the top of the stack. +One problem we have to overcome is that local variables are +addressed, not by identifiers, but by numbers (starting from +$0$). Therefore our compiler needs to maintain a kind of +environment (similar to the interpreter) where variables are +associated to numbers. This association needs to be unique: if +we muddle up the numbers, then we essentially confuse +variables and the result will usually be an erroneous result. +Therefore our \textit{compile}-function will take two +arguments: the abstract syntax tree and the environment, $E$, +that maps identifiers to index-numbers. + +\begin{center} +\begin{tabular}{lcl} +$\textit{compile}(n, E)$ & $\dn$ & $\pcode{ldc}\;n$\\ +$\textit{compile}(a_1 + a_2, E)$ & $\dn$ & +$\textit{compile}(a_1, E) \;@\;\textit{compile}(a_2, E)\;@\; \pcode{iadd}$\\ +$\textit{compile}(a_1 - a_2, E)$ & $\dn$ & +$\textit{compile}(a_1, E) \;@\; \textit{compile}(a_2, E)\;@\; \pcode{isub}$\\ +$\textit{compile}(a_1 * a_2, E)$ & $\dn$ & +$\textit{compile}(a_1, E) \;@\; \textit{compile}(a_2, E)\;@\; \pcode{imul}$\\ +$\textit{compile}(a_1 \backslash a_2, E)$ & $\dn$ & +$\textit{compile}(a_1, E) \;@\; \textit{compile}(a_2, E)\;@\; \pcode{idiv}$\\ +$\textit{compile}(x, E)$ & $\dn$ & $\pcode{iload}\;E(x)$\\ +\end{tabular} +\end{center} + +\noindent In the last line we generate the code for variables +where $E(x)$ stands for looking up to which index the variable +$x$ maps to. + \end{document} diff -r 43c0ed473720 -r a65767fe5d71 slides/slides05.tex --- a/slides/slides05.tex Sun Nov 15 21:31:31 2015 +0000 +++ b/slides/slides05.tex Mon Nov 16 15:16:17 2015 +0000 @@ -1038,7 +1038,7 @@ \frametitle{While-Language} \mbox{}\\[-23mm]\mbox{} -\bl{\begin{plstx}[rhs style=,one per line] : \meta{Stmt} ::= skip +\bl{\begin{plstx}[rhs style=,one per line]: \meta{Stmt} ::= skip | \meta{Id} := \meta{AExp} | if \meta{BExp} then \meta{Block} else \meta{Block} | while \meta{BExp} do \meta{Block}\\ @@ -1047,7 +1047,7 @@ : \meta{Block} ::= \{ \meta{Stmts} \} | \meta{Stmt}\\ : \meta{AExp} ::= \ldots\\ -: \meta{BExp} ::= \ldots\\ \end{plstx}} +: \meta{BExp} ::= \ldots\\\end{plstx}} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff -r 43c0ed473720 -r a65767fe5d71 slides/slides07.tex --- a/slides/slides07.tex Sun Nov 15 21:31:31 2015 +0000 +++ b/slides/slides07.tex Mon Nov 16 15:16:17 2015 +0000 @@ -281,9 +281,8 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ \begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} +\frametitle{Compiling AExps} \begin{center} \bl{\begin{tabular}{@{}lcl@{}} @@ -295,9 +294,9 @@ $\text{compile}(a_1 * a_2)$ & $\dn$\\ \multicolumn{3}{l}{$\qquad\text{compile}(a_1) \;@\; \text{compile}(a_2)\;@\; \text{imul}$}\smallskip\\ \end{tabular}} -\end{center}\pause +\end{center} -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -325,9 +324,8 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ \begin{frame}[t] -\frametitle{\begin{tabular}{c}Variables\end{tabular}} +\frametitle{Variables} {\Large\bl{$x := 5 + y * 2$}}\bigskip\pause @@ -343,28 +341,27 @@ \bl{$\text{compile}(a, E)$} \end{center} -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ \begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} +\frametitle{Compiling AExps} \begin{center} \bl{\begin{tabular}{@{}lcl@{}} $\text{compile}(n, E)$ & $\dn$ & $\text{ldc}\;n$\\ $\text{compile}(a_1 + a_2, E)$ & $\dn$\\ -\multicolumn{3}{l}{$\qquad\text{compile}(a_1, E) \;@\;\text{compile}(a_2. E)\;@\; \text{iadd}$}\smallskip\\ +\multicolumn{3}{l}{$\qquad\text{compile}(a_1, E) \;@\;\text{compile}(a_2, E)\;@\; \text{iadd}$}\smallskip\\ $\text{compile}(a_1 - a_2, E)$ & $\dn$\\ \multicolumn{3}{l}{$\qquad\text{compile}(a_1, E) \;@\; \text{compile}(a_2, E)\;@\; \text{isub}$}\smallskip\\ $\text{compile}(a_1 * a_2, E)$ & $\dn$\\ \multicolumn{3}{l}{$\qquad\text{compile}(a_1, E) \;@\; \text{compile}(a_2, E)\;@\; \text{imul}$}\bigskip\\ $\text{compile}(x, E)$ & $\dn$ & $\text{iload}\;E(x)$\\ \end{tabular}} -\end{center}\pause +\end{center} -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%