updated
authorChristian Urban <christian.urban@kcl.ac.uk>
Sat, 19 Dec 2020 00:13:58 +0000
changeset 820 7fd1f611c21d
parent 819 fd88a0656164
child 821 f914b9476dc7
updated
cws/cw05.pdf
cws/cw05.tex
langs.sty
progs/fun2/mand.fun
progs/fun2/sqr.fun
progs/fun2/sqr.ll
Binary file cws/cw05.pdf has changed
--- a/cws/cw05.tex	Thu Dec 17 17:43:40 2020 +0000
+++ b/cws/cw05.tex	Sat Dec 19 00:13:58 2020 +0000
@@ -1,22 +1,36 @@
 % !TEX program = xelatex
 \documentclass{article}
 \usepackage{../style}
+\usepackage{../graphics}
 \usepackage{../langs}
 
 \begin{document}
 
-\section*{Coursework 5}
+\section*{Coursework 5\footnote{\today}}
 
 
 
 \noindent This coursework is worth 12\% and is due on \cwFIVE{} at
-18:00. You are asked to implement a compiler targetting the LLVM-IR.
-You can do the implementation in any programming
+18:00. You are asked to implement a compiler targeting the LLVM-IR.
+Be careful that this CW needs some material about the LLVM-IR
+that has not been shown in the lectures and your own experiments
+might be required. You can find information about the LLVM-IR at
+
+\begin{itemize}
+\item \url{https://bit.ly/3rheZYr}
+\item \url{https://llvm.org/docs/LangRef.html}  
+\end{itemize}  
+
+\noindent
+You can do the implementation of your compiler in any programming
 language you like, but you need to submit the source code with which
-you answered the questions, otherwise a mark of 0\% will be
-awarded. You should use the lexer from the previous coursework for the
-parser.  Please package everything(!) in a zip-file that creates a
-directory with the name \texttt{YournameYourFamilyname} on my end.
+you generated the LLVM-IR files, otherwise a mark of 0\% will be
+awarded. You should use the lexer and parser from the previous
+courseworks, but you need to make some modifications to them for the
+`typed' fun-language. I will award up to 4\% if a lexer and parser are
+implemented. At the end, please package everything(!) in a zip-file
+that creates a directory with the name \texttt{YournameYourFamilyname}
+on my end.
 
 \subsection*{Disclaimer\alert}
 
@@ -27,11 +41,186 @@
 CW~4.
 
 
-\subsection*{Question 1}
+\subsection*{Task}
+
+The goal is to lex and parse the Mandelbrot program shown in
+Figure~\ref{mand} and generate corresponding code for the
+LLVM-IR. Unfortunately the calculations for the Mandelbrot set require
+floating point arithmetic and therefore we cannot be as simple-minded
+about types as we have been so far (remember the LLVM-IR is a
+fully-typed language and needs to know the exact types of each
+expression). The idea is to deal appropriately with three types,
+namely \texttt{Int}, \texttt{Double} and \texttt{Void} (they are
+represented in the LLVM-IR as \texttt{i32}, \texttt{double} and
+\texttt{void}). You need to extend the lexer and parser accordingly in
+order to deal with type annotations. The Fun-language includes global
+constants, such as
+
+\begin{lstlisting}[numbers=none]
+  val Ymin: Double = -1.3;
+  val Maxiters: Int = 1000;
+\end{lstlisting}
+
+\noindent
+where you want to assume that they are `normal' identifiers, just
+starting with a capital letter---all other identifiers should have
+lower-case letters. Function definitions can take arguments of
+type \texttt{Int} or \texttt{Double}, and need to specify a return
+type, which can be \texttt{Void}, for example
+
+\begin{lstlisting}[numbers=none]
+  def foo(n: Int, x: Double) : Double = ...
+  def bar() : Void = ...
+\end{lstlisting}
+
+\noindent
+The idea is to record all typing information that is given
+in the program, but then delay any further typing inference to
+after the CPS-translation. That means the parser should
+generate ASTs given by the Scala dataypes:
+
+\begin{lstlisting}[numbers=none,language=Scala]
+abstract class Exp 
+abstract class BExp  
+abstract class Decl 
+
+case class Def(name: String, args: List[(String, String)],
+               ty: String, body: Exp) extends Decl
+case class Main(e: Exp) extends Decl
+case class Const(name: String, v: Int) extends Decl
+case class FConst(name: String, x: Float) extends Decl
+
+case class Call(name: String, args: List[Exp]) extends Exp
+case class If(a: BExp, e1: Exp, e2: Exp) extends Exp
+case class Var(s: String) extends Exp
+case class Num(i: Int) extends Exp    // integer numbers
+case class FNum(i: Float) extends Exp // floating numbers
+case class Aop(o: String, a1: Exp, a2: Exp) extends Exp
+case class Sequence(e1: Exp, e2: Exp) extends Exp
+case class Bop(o: String, a1: Exp, a2: Exp) extends BExp
+\end{lstlisting}
+
+\noindent
+This datatype distinguishes whether the global constant is an integer
+constant or floating constant. Also a function definition needs to
+record the return type of the function, namely the argument
+\texttt{ty} in \texttt{Def}, and the arguments consist of an pairs of
+identifier names and types (\texttt{Int} or \texttt{Double}). The hard
+part of the CW is to design the K-intermediate language and infer all
+necessary types in order to generate LLVM-IR code. You can check
+your LLVM-IR code by running it with the interpreter \texttt{lli}.
+
+\begin{figure}[t]
+\lstinputlisting[language=Scala]{../progs/fun2/mand.fun}
+\caption{The Mandelbrot program in the `typed' Fun-language.\label{mand}}
+\end{figure}
+
+\begin{figure}[t]
+\includegraphics[scale=0.35]{../progs/fun2/out.png}
+\caption{Ascii output of the Mandelbrot program.\label{mand}}
+\end{figure}
+
+\subsection*{LLVM-IR}
+
+There are some subtleties in the LLVM-IR you need to be aware of:
+
+\begin{itemize}
+\item \textbf{Global constants}: While global constants such as
+
+\begin{lstlisting}[numbers=none]  
+val Max : Int = 10;
+\end{lstlisting}
 
-\subsection*{Question 2}
+\noindent
+can be easily defined in the LLVM-IR as follows
+
+\begin{lstlisting}[numbers=none]  
+@Max = global i32 10
+\end{lstlisting}
+
+\noindent
+they cannot easily be referenced. If you want to use
+this constant then you need to generate code such as
+
+\begin{lstlisting}[numbers=none]  
+%tmp_22 = load i32, i32* @Max
+\end{lstlisting}
+
+\noindent
+first, which treats \texttt{@Max} as an Integer-pointer (type
+\texttt{i32*}) that needs to be loaded into a local variable,
+here \texttt{\%tmp\_22}.
+
+\item \textbf{Void-Functions}: While integer and double functions
+  can easily be called and their results can be allocated to a
+  temporary variable:
+
+  \begin{lstlisting}[numbers=none]  
+   %tmp_23 = call i32 @sqr (i32 %n)
+  \end{lstlisting}
+
+  void-functions cannot be allocated to a variable. They need to be
+  called just as
+
+  \begin{lstlisting}[numbers=none]  
+  call void @print_int (i32 %tmp_23)
+\end{lstlisting}
+
+\item \textbf{Floating-Point Operations}: While integer operations
+  are specified in the LLVM-IR as
 
-\subsection*{Question 3}
+  \begin{lstlisting}[numbers=none,language=Scala]
+  def compile_op(op: String) = op match {
+    case "+" => "add i32 "
+    case "*" => "mul i32 "
+    case "-" => "sub i32 "
+    case "==" => "icmp eq i32 "
+    case "<=" => "icmp sle i32 " // signed less or equal
+    case "<"  => "icmp slt i32 " // signed less than
+  }\end{lstlisting}
+
+  the corresponding operations on doubles are
+
+  \begin{lstlisting}[numbers=none,language=Scala]
+  def compile_dop(op: String) = op match {
+    case "+" => "fadd double "
+    case "*" => "fmul double "
+    case "-" => "fsub double "
+    case "==" => "fcmp oeq double "
+    case "<=" => "fcmp ole double "   
+    case "<"  => "fcmp olt double "   
+  }\end{lstlisting}
+
+\item \textbf{Typing}: In order to leave the CPS-translations
+  as is, it makes sense to defer the full type-inference to the
+  K-intermediate-language. For this it is good to define
+  the \texttt{KVar} constructor as
+
+\begin{lstlisting}[numbers=none,language=Scala]  
+case class KVar(s: String, ty: Ty = "UNDEF") extends KVal\end{lstlisting}
+
+  where first a default type, for example \texttt{UNDEF}, is
+  given. Then you need to define two typing functions
+
+  \begin{lstlisting}[numbers=none,language=Scala]  
+    def typ_val(v: KVal, ts: TyEnv) = ???
+    def typ_exp(a: KExp, ts: TyEnv) = ???
+  \end{lstlisting}
+
+  Both functions require a typing-environment that updates
+  the information about what type each variable, operation
+  and so on receives. Once the types are inferred, the
+  LLVM-IR code can be generated. Since we are dealing only
+  with simple first-order functions, nothing on the scale
+  as the `Hindley-Milner' typing-algorithm is needed. I suggest
+  to just look at what data is avaliable and generate all
+  missing information by simple means.
+
+\item \textbf{Build-In Functions}: The `prelude' comes
+  with several build-in functions: \texttt{new\_line()},
+  \texttt{skip}, \texttt{print\_int(n)}, \texttt{print\_space()}
+  and \texttt{print\_star()}.
+\end{itemize}  
 
 \end{document}
 
--- a/langs.sty	Thu Dec 17 17:43:40 2020 +0000
+++ b/langs.sty	Sat Dec 19 00:13:58 2020 +0000
@@ -36,7 +36,7 @@
 \AfterEndEnvironment{lstlisting}{\end{minipage}\par}
 
 \lstdefinelanguage{Scala}{
-  morekeywords={abstract,case,catch,class,def,%
+  morekeywords={abstract,then,case,catch,class,def,%
     do,else,extends,false,final,finally,%
     for,if,implicit,import,match,mixin,%
     new,null,object,override,package,%
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/fun2/mand.fun	Sat Dec 19 00:13:58 2020 +0000
@@ -0,0 +1,36 @@
+// Mandelbrot program
+
+val Ymin: Double = -1.3;
+val Ymax: Double =  1.3;
+val Ystep: Double = 0.05;  //0.025;
+
+val Xmin: Double = -2.1;
+val Xmax: Double =  1.1;
+val Xstep: Double = 0.02;  //0.01;
+
+val Maxiters: Int = 1000;
+
+def m_iter(m: Int, x: Double, y: Double,
+                   zr: Double, zi: Double) : Void = {
+  if Maxiters <= m
+  then print_star() 
+  else {
+    if 4.0 <= zi*zi+zr*zr then print_space() 
+    else m_iter(m + 1, x, y, x+zr*zr-zi*zi, 2.0*zr*zi+y) 
+  }
+};
+
+def x_iter(x: Double, y: Double) : Void = {
+  if x <= Xmax
+  then { m_iter(0, x, y, 0.0, 0.0) ; x_iter(x + Xstep, y) }
+  else skip()
+};
+
+def y_iter(y: Double) : Void = {
+  if y <= Ymax
+  then { x_iter(Xmin, y) ; new_line() ; y_iter(y + Ystep) }
+  else skip() 
+};    
+
+
+y_iter(Ymin)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/fun2/sqr.fun	Sat Dec 19 00:13:58 2020 +0000
@@ -0,0 +1,12 @@
+val Max : Int = 10;
+
+def sqr(x: Int) : Int = x * x;
+
+def all(n: Int) : Void = {
+  if n <= Max
+  then print_int(sqr(n)) ; all(n + 1)
+  else skip()
+};
+
+all(0)
+ 
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/fun2/sqr.ll	Sat Dec 19 00:13:58 2020 +0000
@@ -0,0 +1,67 @@
+
+declare i32 @printf(i8*, ...)
+
+@.str_nl = private constant [2 x i8] c"\0A\00"
+@.str_star = private constant [2 x i8] c"*\00"
+@.str_space = private constant [2 x i8] c" \00"
+
+define void @new_line() #0 {
+  %t0 = getelementptr [2 x i8], [2 x i8]* @.str_nl, i32 0, i32 0
+  %1 = call i32 (i8*, ...) @printf(i8* %t0)
+  ret void
+}
+
+define void @print_star() #0 {
+  %t0 = getelementptr [2 x i8], [2 x i8]* @.str_star, i32 0, i32 0
+  %1 = call i32 (i8*, ...) @printf(i8* %t0)
+  ret void
+}
+
+define void @print_space() #0 {
+  %t0 = getelementptr [2 x i8], [2 x i8]* @.str_space, i32 0, i32 0
+  %1 = call i32 (i8*, ...) @printf(i8* %t0)
+  ret void
+}
+
+define void @skip() #0 {
+  ret void
+}
+
+@.str = private constant [4 x i8] c"%d\0A\00"
+
+define void @print_int(i32 %x) {
+   %t0 = getelementptr [4 x i8], [4 x i8]* @.str, i32 0, i32 0
+   call i32 (i8*, ...) @printf(i8* %t0, i32 %x) 
+   ret void
+}
+
+; END OF BUILD-IN FUNCTIONS (prelude)
+@Max = global i32 10
+
+define i32 @sqr (i32 %x) {
+   %tmp_20 = mul i32  %x, %x
+   ret i32 %tmp_20
+}
+
+define void @all (i32 %n) {
+   %tmp_22 = load i32, i32* @Max
+   %tmp_21 = icmp sle i32  %n, %tmp_22
+   br i1 %tmp_21, label %if_branch_28, label %else_branch_29
+
+if_branch_28:
+   %tmp_23 = call i32 @sqr (i32 %n)
+   call void @print_int (i32 %tmp_23)
+   %tmp_25 = add i32  %n, 1
+   call void @all (i32 %tmp_25)
+   ret void
+
+else_branch_29:
+   call void @skip ()
+   ret void
+}
+
+define i32 @main() {
+   call void @all (i32 0)
+   ret i32 0
+}
+