# HG changeset patch # User Christian Urban # Date 1608336838 0 # Node ID 7fd1f611c21d2f50c29e285e867125075771ed06 # Parent fd88a06561641b3cc17db2d6436b1b221c225186 updated diff -r fd88a0656164 -r 7fd1f611c21d cws/cw05.pdf Binary file cws/cw05.pdf has changed diff -r fd88a0656164 -r 7fd1f611c21d cws/cw05.tex --- a/cws/cw05.tex Thu Dec 17 17:43:40 2020 +0000 +++ b/cws/cw05.tex Sat Dec 19 00:13:58 2020 +0000 @@ -1,22 +1,36 @@ % !TEX program = xelatex \documentclass{article} \usepackage{../style} +\usepackage{../graphics} \usepackage{../langs} \begin{document} -\section*{Coursework 5} +\section*{Coursework 5\footnote{\today}} \noindent This coursework is worth 12\% and is due on \cwFIVE{} at -18:00. You are asked to implement a compiler targetting the LLVM-IR. -You can do the implementation in any programming +18:00. You are asked to implement a compiler targeting the LLVM-IR. +Be careful that this CW needs some material about the LLVM-IR +that has not been shown in the lectures and your own experiments +might be required. You can find information about the LLVM-IR at + +\begin{itemize} +\item \url{https://bit.ly/3rheZYr} +\item \url{https://llvm.org/docs/LangRef.html} +\end{itemize} + +\noindent +You can do the implementation of your compiler in any programming language you like, but you need to submit the source code with which -you answered the questions, otherwise a mark of 0\% will be -awarded. You should use the lexer from the previous coursework for the -parser. Please package everything(!) in a zip-file that creates a -directory with the name \texttt{YournameYourFamilyname} on my end. +you generated the LLVM-IR files, otherwise a mark of 0\% will be +awarded. You should use the lexer and parser from the previous +courseworks, but you need to make some modifications to them for the +`typed' fun-language. I will award up to 4\% if a lexer and parser are +implemented. At the end, please package everything(!) in a zip-file +that creates a directory with the name \texttt{YournameYourFamilyname} +on my end. \subsection*{Disclaimer\alert} @@ -27,11 +41,186 @@ CW~4. -\subsection*{Question 1} +\subsection*{Task} + +The goal is to lex and parse the Mandelbrot program shown in +Figure~\ref{mand} and generate corresponding code for the +LLVM-IR. Unfortunately the calculations for the Mandelbrot set require +floating point arithmetic and therefore we cannot be as simple-minded +about types as we have been so far (remember the LLVM-IR is a +fully-typed language and needs to know the exact types of each +expression). The idea is to deal appropriately with three types, +namely \texttt{Int}, \texttt{Double} and \texttt{Void} (they are +represented in the LLVM-IR as \texttt{i32}, \texttt{double} and +\texttt{void}). You need to extend the lexer and parser accordingly in +order to deal with type annotations. The Fun-language includes global +constants, such as + +\begin{lstlisting}[numbers=none] + val Ymin: Double = -1.3; + val Maxiters: Int = 1000; +\end{lstlisting} + +\noindent +where you want to assume that they are `normal' identifiers, just +starting with a capital letter---all other identifiers should have +lower-case letters. Function definitions can take arguments of +type \texttt{Int} or \texttt{Double}, and need to specify a return +type, which can be \texttt{Void}, for example + +\begin{lstlisting}[numbers=none] + def foo(n: Int, x: Double) : Double = ... + def bar() : Void = ... +\end{lstlisting} + +\noindent +The idea is to record all typing information that is given +in the program, but then delay any further typing inference to +after the CPS-translation. That means the parser should +generate ASTs given by the Scala dataypes: + +\begin{lstlisting}[numbers=none,language=Scala] +abstract class Exp +abstract class BExp +abstract class Decl + +case class Def(name: String, args: List[(String, String)], + ty: String, body: Exp) extends Decl +case class Main(e: Exp) extends Decl +case class Const(name: String, v: Int) extends Decl +case class FConst(name: String, x: Float) extends Decl + +case class Call(name: String, args: List[Exp]) extends Exp +case class If(a: BExp, e1: Exp, e2: Exp) extends Exp +case class Var(s: String) extends Exp +case class Num(i: Int) extends Exp // integer numbers +case class FNum(i: Float) extends Exp // floating numbers +case class Aop(o: String, a1: Exp, a2: Exp) extends Exp +case class Sequence(e1: Exp, e2: Exp) extends Exp +case class Bop(o: String, a1: Exp, a2: Exp) extends BExp +\end{lstlisting} + +\noindent +This datatype distinguishes whether the global constant is an integer +constant or floating constant. Also a function definition needs to +record the return type of the function, namely the argument +\texttt{ty} in \texttt{Def}, and the arguments consist of an pairs of +identifier names and types (\texttt{Int} or \texttt{Double}). The hard +part of the CW is to design the K-intermediate language and infer all +necessary types in order to generate LLVM-IR code. You can check +your LLVM-IR code by running it with the interpreter \texttt{lli}. + +\begin{figure}[t] +\lstinputlisting[language=Scala]{../progs/fun2/mand.fun} +\caption{The Mandelbrot program in the `typed' Fun-language.\label{mand}} +\end{figure} + +\begin{figure}[t] +\includegraphics[scale=0.35]{../progs/fun2/out.png} +\caption{Ascii output of the Mandelbrot program.\label{mand}} +\end{figure} + +\subsection*{LLVM-IR} + +There are some subtleties in the LLVM-IR you need to be aware of: + +\begin{itemize} +\item \textbf{Global constants}: While global constants such as + +\begin{lstlisting}[numbers=none] +val Max : Int = 10; +\end{lstlisting} -\subsection*{Question 2} +\noindent +can be easily defined in the LLVM-IR as follows + +\begin{lstlisting}[numbers=none] +@Max = global i32 10 +\end{lstlisting} + +\noindent +they cannot easily be referenced. If you want to use +this constant then you need to generate code such as + +\begin{lstlisting}[numbers=none] +%tmp_22 = load i32, i32* @Max +\end{lstlisting} + +\noindent +first, which treats \texttt{@Max} as an Integer-pointer (type +\texttt{i32*}) that needs to be loaded into a local variable, +here \texttt{\%tmp\_22}. + +\item \textbf{Void-Functions}: While integer and double functions + can easily be called and their results can be allocated to a + temporary variable: + + \begin{lstlisting}[numbers=none] + %tmp_23 = call i32 @sqr (i32 %n) + \end{lstlisting} + + void-functions cannot be allocated to a variable. They need to be + called just as + + \begin{lstlisting}[numbers=none] + call void @print_int (i32 %tmp_23) +\end{lstlisting} + +\item \textbf{Floating-Point Operations}: While integer operations + are specified in the LLVM-IR as -\subsection*{Question 3} + \begin{lstlisting}[numbers=none,language=Scala] + def compile_op(op: String) = op match { + case "+" => "add i32 " + case "*" => "mul i32 " + case "-" => "sub i32 " + case "==" => "icmp eq i32 " + case "<=" => "icmp sle i32 " // signed less or equal + case "<" => "icmp slt i32 " // signed less than + }\end{lstlisting} + + the corresponding operations on doubles are + + \begin{lstlisting}[numbers=none,language=Scala] + def compile_dop(op: String) = op match { + case "+" => "fadd double " + case "*" => "fmul double " + case "-" => "fsub double " + case "==" => "fcmp oeq double " + case "<=" => "fcmp ole double " + case "<" => "fcmp olt double " + }\end{lstlisting} + +\item \textbf{Typing}: In order to leave the CPS-translations + as is, it makes sense to defer the full type-inference to the + K-intermediate-language. For this it is good to define + the \texttt{KVar} constructor as + +\begin{lstlisting}[numbers=none,language=Scala] +case class KVar(s: String, ty: Ty = "UNDEF") extends KVal\end{lstlisting} + + where first a default type, for example \texttt{UNDEF}, is + given. Then you need to define two typing functions + + \begin{lstlisting}[numbers=none,language=Scala] + def typ_val(v: KVal, ts: TyEnv) = ??? + def typ_exp(a: KExp, ts: TyEnv) = ??? + \end{lstlisting} + + Both functions require a typing-environment that updates + the information about what type each variable, operation + and so on receives. Once the types are inferred, the + LLVM-IR code can be generated. Since we are dealing only + with simple first-order functions, nothing on the scale + as the `Hindley-Milner' typing-algorithm is needed. I suggest + to just look at what data is avaliable and generate all + missing information by simple means. + +\item \textbf{Build-In Functions}: The `prelude' comes + with several build-in functions: \texttt{new\_line()}, + \texttt{skip}, \texttt{print\_int(n)}, \texttt{print\_space()} + and \texttt{print\_star()}. +\end{itemize} \end{document} diff -r fd88a0656164 -r 7fd1f611c21d langs.sty --- a/langs.sty Thu Dec 17 17:43:40 2020 +0000 +++ b/langs.sty Sat Dec 19 00:13:58 2020 +0000 @@ -36,7 +36,7 @@ \AfterEndEnvironment{lstlisting}{\end{minipage}\par} \lstdefinelanguage{Scala}{ - morekeywords={abstract,case,catch,class,def,% + morekeywords={abstract,then,case,catch,class,def,% do,else,extends,false,final,finally,% for,if,implicit,import,match,mixin,% new,null,object,override,package,% diff -r fd88a0656164 -r 7fd1f611c21d progs/fun2/mand.fun --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/fun2/mand.fun Sat Dec 19 00:13:58 2020 +0000 @@ -0,0 +1,36 @@ +// Mandelbrot program + +val Ymin: Double = -1.3; +val Ymax: Double = 1.3; +val Ystep: Double = 0.05; //0.025; + +val Xmin: Double = -2.1; +val Xmax: Double = 1.1; +val Xstep: Double = 0.02; //0.01; + +val Maxiters: Int = 1000; + +def m_iter(m: Int, x: Double, y: Double, + zr: Double, zi: Double) : Void = { + if Maxiters <= m + then print_star() + else { + if 4.0 <= zi*zi+zr*zr then print_space() + else m_iter(m + 1, x, y, x+zr*zr-zi*zi, 2.0*zr*zi+y) + } +}; + +def x_iter(x: Double, y: Double) : Void = { + if x <= Xmax + then { m_iter(0, x, y, 0.0, 0.0) ; x_iter(x + Xstep, y) } + else skip() +}; + +def y_iter(y: Double) : Void = { + if y <= Ymax + then { x_iter(Xmin, y) ; new_line() ; y_iter(y + Ystep) } + else skip() +}; + + +y_iter(Ymin) \ No newline at end of file diff -r fd88a0656164 -r 7fd1f611c21d progs/fun2/sqr.fun --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/fun2/sqr.fun Sat Dec 19 00:13:58 2020 +0000 @@ -0,0 +1,12 @@ +val Max : Int = 10; + +def sqr(x: Int) : Int = x * x; + +def all(n: Int) : Void = { + if n <= Max + then print_int(sqr(n)) ; all(n + 1) + else skip() +}; + +all(0) + \ No newline at end of file diff -r fd88a0656164 -r 7fd1f611c21d progs/fun2/sqr.ll --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/fun2/sqr.ll Sat Dec 19 00:13:58 2020 +0000 @@ -0,0 +1,67 @@ + +declare i32 @printf(i8*, ...) + +@.str_nl = private constant [2 x i8] c"\0A\00" +@.str_star = private constant [2 x i8] c"*\00" +@.str_space = private constant [2 x i8] c" \00" + +define void @new_line() #0 { + %t0 = getelementptr [2 x i8], [2 x i8]* @.str_nl, i32 0, i32 0 + %1 = call i32 (i8*, ...) @printf(i8* %t0) + ret void +} + +define void @print_star() #0 { + %t0 = getelementptr [2 x i8], [2 x i8]* @.str_star, i32 0, i32 0 + %1 = call i32 (i8*, ...) @printf(i8* %t0) + ret void +} + +define void @print_space() #0 { + %t0 = getelementptr [2 x i8], [2 x i8]* @.str_space, i32 0, i32 0 + %1 = call i32 (i8*, ...) @printf(i8* %t0) + ret void +} + +define void @skip() #0 { + ret void +} + +@.str = private constant [4 x i8] c"%d\0A\00" + +define void @print_int(i32 %x) { + %t0 = getelementptr [4 x i8], [4 x i8]* @.str, i32 0, i32 0 + call i32 (i8*, ...) @printf(i8* %t0, i32 %x) + ret void +} + +; END OF BUILD-IN FUNCTIONS (prelude) +@Max = global i32 10 + +define i32 @sqr (i32 %x) { + %tmp_20 = mul i32 %x, %x + ret i32 %tmp_20 +} + +define void @all (i32 %n) { + %tmp_22 = load i32, i32* @Max + %tmp_21 = icmp sle i32 %n, %tmp_22 + br i1 %tmp_21, label %if_branch_28, label %else_branch_29 + +if_branch_28: + %tmp_23 = call i32 @sqr (i32 %n) + call void @print_int (i32 %tmp_23) + %tmp_25 = add i32 %n, 1 + call void @all (i32 %tmp_25) + ret void + +else_branch_29: + call void @skip () + ret void +} + +define i32 @main() { + call void @all (i32 0) + ret i32 0 +} +