| author | Christian Urban <urbanc@in.tum.de> | 
| Sun, 27 Oct 2019 11:46:06 +0000 | |
| changeset 671 | 7457eabb8c76 | 
| parent 657 | 065a119cc511 | 
| child 678 | 6601ff1d9e0a | 
| permissions | -rw-r--r-- | 
| 654 | 1 | // A Small LLVM Compiler for a Simple Functional Language | 
| 644 | 2 | // (includes an external lexer and parser) | 
| 645 | 3 | // | 
| 4 | // call with | |
| 5 | // | |
| 654 | 6 | // scala fun_llvm.scala fact | 
| 645 | 7 | // | 
| 654 | 8 | // scala fun_llvm.scala defs | 
| 9 | // | |
| 655 | 10 | // this will generate a .ll file. You can interpret this file | 
| 11 | // using lli. | |
| 12 | // | |
| 13 | // The optimiser can be invoked as | |
| 14 | // | |
| 15 | // opt -O1 -S in_file.ll > out_file.ll | |
| 16 | // opt -O3 -S in_file.ll > out_file.ll | |
| 17 | // | |
| 18 | // The code produced for the various architectures can be obtains with | |
| 19 | // | |
| 20 | // llc -march=x86 -filetype=asm in_file.ll -o - | |
| 21 | // llc -march=arm -filetype=asm in_file.ll -o - | |
| 22 | // | |
| 23 | // Producing an executable can be achieved by | |
| 24 | // | |
| 25 | // llc -filetype=obj in_file.ll | |
| 26 | // gcc in_file.o -o a.out | |
| 27 | // ./a.out | |
| 28 | ||
| 645 | 29 | |
| 625 | 30 | |
| 649 | 31 | object Compiler {
 | 
| 220 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 32 | |
| 645 | 33 | import java.io._ | 
| 34 | import scala.util._ | |
| 35 | import scala.sys.process._ | |
| 36 | ||
| 644 | 37 | // Abstract syntax trees for the Fun language | 
| 38 | abstract class Exp extends Serializable | |
| 39 | abstract class BExp extends Serializable | |
| 40 | abstract class Decl extends Serializable | |
| 626 | 41 | |
| 42 | case class Def(name: String, args: List[String], body: Exp) extends Decl | |
| 43 | case class Main(e: Exp) extends Decl | |
| 44 | ||
| 45 | case class Call(name: String, args: List[Exp]) extends Exp | |
| 46 | case class If(a: BExp, e1: Exp, e2: Exp) extends Exp | |
| 47 | case class Write(e: Exp) extends Exp | |
| 48 | case class Var(s: String) extends Exp | |
| 49 | case class Num(i: Int) extends Exp | |
| 50 | case class Aop(o: String, a1: Exp, a2: Exp) extends Exp | |
| 51 | case class Sequence(e1: Exp, e2: Exp) extends Exp | |
| 52 | case class Bop(o: String, a1: Exp, a2: Exp) extends BExp | |
| 53 | ||
| 54 | ||
| 220 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 55 | // for generating new labels | 
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 56 | var counter = -1 | 
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 57 | |
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 58 | def Fresh(x: String) = {
 | 
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 59 | counter += 1 | 
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 60 | x ++ "_" ++ counter.toString() | 
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 61 | } | 
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 62 | |
| 648 | 63 | // Abstract syntax trees for the Fun language | 
| 64 | abstract class KExp | |
| 653 | 65 | abstract class KVal | 
| 648 | 66 | |
| 653 | 67 | case class KVar(s: String) extends KVal | 
| 68 | case class KNum(i: Int) extends KVal | |
| 656 | 69 | case class Kop(o: String, v1: KVal, v2: KVal) extends KVal | 
| 653 | 70 | case class KCall(o: String, vrs: List[KVal]) extends KVal | 
| 655 | 71 | case class KWrite(v: KVal) extends KVal | 
| 649 | 72 | |
| 653 | 73 | case class KIf(x1: String, e1: KExp, e2: KExp) extends KExp {
 | 
| 74 | override def toString = s"KIf $x1\nIF\n$e1\nELSE\n$e2" | |
| 649 | 75 | } | 
| 653 | 76 | case class KLet(x: String, e1: KVal, e2: KExp) extends KExp {
 | 
| 648 | 77 | override def toString = s"let $x = $e1 in \n$e2" | 
| 78 | } | |
| 653 | 79 | case class KReturn(v: KVal) extends KExp | 
| 648 | 80 | |
| 654 | 81 | |
| 655 | 82 | // CPS translation from Exps to KExps using a | 
| 654 | 83 | // continuation k. | 
| 653 | 84 | def CPS(e: Exp)(k: KVal => KExp) : KExp = e match {
 | 
| 85 | case Var(s) => k(KVar(s)) | |
| 86 | case Num(i) => k(KNum(i)) | |
| 87 |   case Aop(o, e1, e2) => {
 | |
| 88 |     val z = Fresh("tmp")
 | |
| 89 | CPS(e1)(y1 => | |
| 656 | 90 | CPS(e2)(y2 => KLet(z, Kop(o, y1, y2), k(KVar(z))))) | 
| 653 | 91 | } | 
| 92 |   case If(Bop(o, b1, b2), e1, e2) => {
 | |
| 93 |     val z = Fresh("tmp")
 | |
| 94 | CPS(b1)(y1 => | |
| 655 | 95 | CPS(b2)(y2 => | 
| 656 | 96 | KLet(z, Kop(o, y1, y2), KIf(z, CPS(e1)(k), CPS(e2)(k))))) | 
| 653 | 97 | } | 
| 98 |   case Call(name, args) => {
 | |
| 99 |     def aux(args: List[Exp], vs: List[KVal]) : KExp = args match {
 | |
| 100 |       case Nil => {
 | |
| 101 |           val z = Fresh("tmp")
 | |
| 102 | KLet(z, KCall(name, vs), k(KVar(z))) | |
| 103 | } | |
| 104 | case e::es => CPS(e)(y => aux(es, vs ::: List(y))) | |
| 648 | 105 | } | 
| 653 | 106 | aux(args, Nil) | 
| 107 | } | |
| 656 | 108 | case Sequence(e1, e2) => | 
| 109 | CPS(e1)(y1 => CPS(e2)(y2 => k(y2))) | |
| 655 | 110 |   case Write(e) => {
 | 
| 111 |     val z = Fresh("tmp")
 | |
| 112 | CPS(e)(y => KLet(z, KWrite(y), k(KVar(z)))) | |
| 113 | } | |
| 653 | 114 | } | 
| 115 | ||
| 116 | def CPSi(e: Exp) = CPS(e)(KReturn) | |
| 117 | ||
| 654 | 118 | // some testcases | 
| 653 | 119 | val e1 = Aop("*", Var("a"), Num(3))
 | 
| 654 | 120 | CPSi(e1) | 
| 653 | 121 | |
| 122 | val e2 = Aop("+", Aop("*", Var("a"), Num(3)), Num(4))
 | |
| 654 | 123 | CPSi(e2) | 
| 653 | 124 | |
| 125 | val e3 = Aop("+", Num(2), Aop("*", Var("a"), Num(3)))
 | |
| 654 | 126 | CPSi(e3) | 
| 648 | 127 | |
| 653 | 128 | val e4 = Aop("+", Aop("-", Num(1), Num(2)), Aop("*", Var("a"), Num(3)))
 | 
| 654 | 129 | CPSi(e4) | 
| 653 | 130 | |
| 131 | val e5 = If(Bop("==", Num(1), Num(1)), Num(3), Num(4))
 | |
| 654 | 132 | CPSi(e5) | 
| 653 | 133 | |
| 134 | val e6 = If(Bop("!=", Num(10), Num(10)), e5, Num(40))
 | |
| 654 | 135 | CPSi(e6) | 
| 648 | 136 | |
| 653 | 137 | val e7 = Call("foo", List(Num(3)))
 | 
| 654 | 138 | CPSi(e7) | 
| 653 | 139 | |
| 140 | val e8 = Call("foo", List(Num(3), Num(4), Aop("+", Num(5), Num(6))))
 | |
| 654 | 141 | CPSi(e8) | 
| 653 | 142 | |
| 143 | val e9 = Sequence(Aop("*", Var("a"), Num(3)), Aop("+", Var("b"), Num(6)))
 | |
| 654 | 144 | CPSi(e9) | 
| 649 | 145 | |
| 146 | val e = Aop("*", Aop("+", Num(1), Call("foo", List(Var("a"), Num(3)))), Num(4))
 | |
| 654 | 147 | CPSi(e) | 
| 653 | 148 | |
| 648 | 149 | |
| 150 | ||
| 151 | ||
| 625 | 152 | // convenient string interpolations | 
| 153 | // for instructions, labels and methods | |
| 154 | import scala.language.implicitConversions | |
| 155 | import scala.language.reflectiveCalls | |
| 156 | ||
| 157 | implicit def sring_inters(sc: StringContext) = new {
 | |
| 158 | def i(args: Any*): String = " " ++ sc.s(args:_*) ++ "\n" | |
| 159 | def l(args: Any*): String = sc.s(args:_*) ++ ":\n" | |
| 160 | def m(args: Any*): String = sc.s(args:_*) ++ "\n" | |
| 161 | } | |
| 162 | ||
| 656 | 163 | // mathematical and boolean operations | 
| 653 | 164 | def compile_op(op: String) = op match {
 | 
| 165 | case "+" => "add i32 " | |
| 166 | case "*" => "mul i32 " | |
| 167 | case "-" => "sub i32 " | |
| 656 | 168 | case "/" => "sdiv i32 " | 
| 169 | case "%" => "srem i32 " | |
| 653 | 170 | case "==" => "icmp eq i32 " | 
| 656 | 171 | case "<=" => "icmp sle i32 " // signed less or equal | 
| 172 | case "<" => "icmp slt i32 " // signed less than | |
| 653 | 173 | } | 
| 220 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 174 | |
| 653 | 175 | def compile_val(v: KVal) : String = v match {
 | 
| 176 | case KNum(i) => s"$i" | |
| 177 | case KVar(s) => s"%$s" | |
| 656 | 178 | case Kop(op, x1, x2) => | 
| 653 | 179 |     s"${compile_op(op)} ${compile_val(x1)}, ${compile_val(x2)}"
 | 
| 180 | case KCall(x1, args) => | |
| 181 |     s"call i32 @$x1 (${args.map(compile_val).mkString("i32 ", ", i32 ", "")})"
 | |
| 655 | 182 | case KWrite(x1) => | 
| 183 |     s"call i32 @printInt (i32 ${compile_val(x1)})"
 | |
| 653 | 184 | } | 
| 648 | 185 | |
| 649 | 186 | // compile K expressions | 
| 187 | def compile_exp(a: KExp) : String = a match {
 | |
| 653 | 188 | case KReturn(v) => | 
| 189 |     i"ret i32 ${compile_val(v)}"
 | |
| 190 | case KLet(x: String, v: KVal, e: KExp) => | |
| 191 |     i"%$x = ${compile_val(v)}" ++ compile_exp(e)
 | |
| 192 |   case KIf(x, e1, e2) => {
 | |
| 649 | 193 |     val if_br = Fresh("if_br")
 | 
| 194 |     val else_br = Fresh("else_br")
 | |
| 195 | i"br i1 %$x, label %$if_br, label %$else_br" ++ | |
| 196 | l"\n$if_br" ++ | |
| 653 | 197 | compile_exp(e1) ++ | 
| 649 | 198 | l"\n$else_br" ++ | 
| 653 | 199 | compile_exp(e2) | 
| 220 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 200 | } | 
| 653 | 201 | } | 
| 202 | ||
| 655 | 203 | |
| 204 | val prelude = """ | |
| 205 | @.str = private constant [4 x i8] c"%d\0A\00" | |
| 206 | ||
| 207 | declare i32 @printf(i8*, ...) | |
| 220 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 208 | |
| 655 | 209 | define i32 @printInt(i32 %x) {
 | 
| 210 | %t0 = getelementptr [4 x i8], [4 x i8]* @.str, i32 0, i32 0 | |
| 211 | call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %x) | |
| 212 | ret i32 %x | |
| 213 | } | |
| 214 | ||
| 215 | """ | |
| 653 | 216 | |
| 220 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 217 | |
| 625 | 218 | // compile function for declarations and main | 
| 219 | def compile_decl(d: Decl) : String = d match {
 | |
| 649 | 220 |   case Def(name, args, body) => { 
 | 
| 221 |     m"define i32 @$name (${args.mkString("i32 %", ", i32 %", "")}) {" ++
 | |
| 653 | 222 | compile_exp(CPSi(body)) ++ | 
| 649 | 223 | m"}\n" | 
| 221 
824ffbf66ab4
added fun tail
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
220diff
changeset | 224 | } | 
| 649 | 225 |   case Main(body) => {
 | 
| 226 |     m"define i32 @main() {" ++
 | |
| 653 | 227 | compile_exp(CPSi(body)) ++ | 
| 649 | 228 | m"}\n" | 
| 221 
824ffbf66ab4
added fun tail
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
220diff
changeset | 229 | } | 
| 220 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 230 | } | 
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 231 | |
| 626 | 232 | // main compiler functions | 
| 220 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 233 | |
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 234 | def time_needed[T](i: Int, code: => T) = {
 | 
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 235 | val start = System.nanoTime() | 
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 236 | for (j <- 1 to i) code | 
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 237 | val end = System.nanoTime() | 
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 238 | (end - start)/(i * 1.0e9) | 
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 239 | } | 
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 240 | |
| 645 | 241 | def deserialise[T](fname: String) : Try[T] = {
 | 
| 242 | import scala.util.Using | |
| 243 |   Using(new ObjectInputStream(new FileInputStream(fname))) {
 | |
| 244 | in => in.readObject.asInstanceOf[T] | |
| 245 | } | |
| 644 | 246 | } | 
| 247 | ||
| 655 | 248 | def compile(fname: String) : String = {
 | 
| 249 | val ast = deserialise[List[Decl]](fname ++ ".prs").getOrElse(Nil) | |
| 250 | prelude ++ (ast.map(compile_decl).mkString) | |
| 626 | 251 | } | 
| 252 | ||
| 655 | 253 | def compile_to_file(fname: String) = {
 | 
| 254 | val output = compile(fname) | |
| 255 |   scala.tools.nsc.io.File(s"${fname}.ll").writeAll(output)
 | |
| 626 | 256 | } | 
| 257 | ||
| 655 | 258 | def compile_and_run(fname: String) : Unit = {
 | 
| 259 | compile_to_file(fname) | |
| 260 |   (s"llc -filetype=obj ${fname}.ll").!!
 | |
| 261 |   (s"gcc ${fname}.o -o a.out").!!
 | |
| 262 |   println("Time: " + time_needed(2, (s"./a.out").!))
 | |
| 220 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 263 | } | 
| 
141041fc76b5
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 264 | |
| 626 | 265 | // some examples of .fun files | 
| 645 | 266 | //compile_to_file("fact")
 | 
| 267 | //compile_and_run("fact")
 | |
| 268 | //compile_and_run("defs")
 | |
| 269 | ||
| 644 | 270 | |
| 649 | 271 | def main(args: Array[String]) : Unit = | 
| 655 | 272 | //println(compile(args(0))) | 
| 273 | compile_and_run(args(0)) | |
| 657 | 274 | } | 
| 644 | 275 | |
| 657 | 276 | |
| 277 | ||
| 278 | ||
| 279 | ||
| 280 | /* | |
| 281 | LLVM notes | |
| 282 | ||
| 283 | Registers are places for data inside the CPU. | |
| 284 | + up to 10 times faster access than to main memory | |
| 285 | - expensive; typically just 32 of them in a 32-bit CPU | |
| 286 | ||
| 287 | High-level view of x86 | |
| 288 | • Not a stack machine; no direct correspondence to operand stacks | |
| 289 | • Arithmetics, etc. is done with values in registers | |
| 290 | ||
| 291 | • Started as academic project at University of Illinois in 2002 | |
| 292 | • Now a large open source project with many contributors and a growing user base | |
| 293 | ||
| 294 | Single Static Assignment (SSA) form | |
| 295 | • Only one assignment in the program text to each variable | |
| 296 | • But dynamically, this assignment can be executed many times | |
| 297 | • Many stores to a memory location are allowed | |
| 298 | • Also, Φ (phi) instructions can be used, in the beginning of a basic block | |
| 299 | • Value is one of the arguments, depending on from which block control came to this block | |
| 300 | • Register allocation tries to keep these variables in same real register | |
| 301 | ||
| 302 | Why SSA form? | |
| 303 | Many code optimizations can be done more efficiently | |
| 304 | ||
| 305 | Function definition form | |
| 306 |  define t @name(t1 x1, t2 x2, ..., tn xn) {
 | |
| 307 | l1: block1 | |
| 308 | l2: block2 | |
| 309 | ... | |
| 310 | lm : blockm | |
| 311 | } | |
| 312 | ||
| 313 | ||
| 314 | ||
| 315 | ||
| 316 | */ |