progs/fun_llvm.scala
author Christian Urban <urbanc@in.tum.de>
Wed, 06 Nov 2019 23:17:05 +0000
changeset 683 c6c79d21f8a8
parent 679 8fc109f36b78
child 701 681c36b2af27
permissions -rw-r--r--
updated

// A Small LLVM Compiler for a Simple Functional Language
// (includes an external lexer and parser)
//
// call with 
//
//     scala fun_llvm.scala fact
//
//     scala fun_llvm.scala defs
//
// this will generate a .ll file. You can interpret this file
// using lli.
//
// The optimiser can be invoked as
//
//      opt -O1 -S in_file.ll > out_file.ll
//      opt -O3 -S in_file.ll > out_file.ll
//
// The code produced for the various architectures can be obtains with
//   
//   llc -march=x86 -filetype=asm in_file.ll -o -
//   llc -march=arm -filetype=asm in_file.ll -o -  
//
// Producing an executable can be achieved by
//
//    llc -filetype=obj in_file.ll
//    gcc in_file.o -o a.out
//    ./a.out



object Compiler {

import java.io._  
import scala.util._
import scala.sys.process._

// Abstract syntax trees for the Fun language
abstract class Exp extends Serializable 
abstract class BExp extends Serializable 
abstract class Decl extends Serializable 

case class Def(name: String, args: List[String], body: Exp) extends Decl
case class Main(e: Exp) extends Decl

case class Call(name: String, args: List[Exp]) extends Exp
case class If(a: BExp, e1: Exp, e2: Exp) extends Exp
case class Write(e: Exp) extends Exp
case class Var(s: String) extends Exp
case class Num(i: Int) extends Exp
case class Aop(o: String, a1: Exp, a2: Exp) extends Exp
case class Sequence(e1: Exp, e2: Exp) extends Exp
case class Bop(o: String, a1: Exp, a2: Exp) extends BExp


// for generating new labels
var counter = -1

def Fresh(x: String) = {
  counter += 1
  x ++ "_" ++ counter.toString()
}

// Internal CPS language for FUN
abstract class KExp
abstract class KVal

case class KVar(s: String) extends KVal
case class KNum(i: Int) extends KVal
case class Kop(o: String, v1: KVal, v2: KVal) extends KVal
case class KCall(o: String, vrs: List[KVal]) extends KVal
case class KWrite(v: KVal) extends KVal

case class KIf(x1: String, e1: KExp, e2: KExp) extends KExp {
  override def toString = s"KIf $x1\nIF\n$e1\nELSE\n$e2"
}
case class KLet(x: String, e1: KVal, e2: KExp) extends KExp {
  override def toString = s"let $x = $e1 in \n$e2" 
}
case class KReturn(v: KVal) extends KExp


// CPS translation from Exps to KExps using a
// continuation k.
def CPS(e: Exp)(k: KVal => KExp) : KExp = e match {
  case Var(s) => k(KVar(s)) 
  case Num(i) => k(KNum(i))
  case Aop(o, e1, e2) => {
    val z = Fresh("tmp")
    CPS(e1)(y1 => 
      CPS(e2)(y2 => KLet(z, Kop(o, y1, y2), k(KVar(z)))))
  }
  case If(Bop(o, b1, b2), e1, e2) => {
    val z = Fresh("tmp")
    CPS(b1)(y1 => 
      CPS(b2)(y2 => 
        KLet(z, Kop(o, y1, y2), KIf(z, CPS(e1)(k), CPS(e2)(k)))))
  }
  case Call(name, args) => {
    def aux(args: List[Exp], vs: List[KVal]) : KExp = args match {
      case Nil => {
          val z = Fresh("tmp")
          KLet(z, KCall(name, vs), k(KVar(z)))
      }
      case e::es => CPS(e)(y => aux(es, vs ::: List(y)))
    }
    aux(args, Nil)
  }
  case Sequence(e1, e2) => 
    CPS(e1)(_ => CPS(e2)(y2 => k(y2)))
  case Write(e) => {
    val z = Fresh("tmp")
    CPS(e)(y => KLet(z, KWrite(y), k(KVar(z))))
  }
}   

//initial continuation
def CPSi(e: Exp) = CPS(e)(KReturn)

// some testcases
val e1 = Aop("*", Var("a"), Num(3))
CPSi(e1)

val e2 = Aop("+", Aop("*", Var("a"), Num(3)), Num(4))
CPSi(e2)

val e3 = Aop("+", Num(2), Aop("*", Var("a"), Num(3)))
CPSi(e3)

val e4 = Aop("+", Aop("-", Num(1), Num(2)), Aop("*", Var("a"), Num(3)))
CPSi(e4)

val e5 = If(Bop("==", Num(1), Num(1)), Num(3), Num(4))
CPSi(e5)

val e6 = If(Bop("!=", Num(10), Num(10)), e5, Num(40))
CPSi(e6)

val e7 = Call("foo", List(Num(3)))
CPSi(e7)

val e8 = Call("foo", List(Num(3), Num(4), Aop("+", Num(5), Num(6))))
CPSi(e8)

val e9 = Sequence(Aop("*", Var("a"), Num(3)), Aop("+", Var("b"), Num(6)))
CPSi(e9)

val e = Aop("*", Aop("+", Num(1), Call("foo", List(Var("a"), Num(3)))), Num(4))
CPSi(e)




// convenient string interpolations 
// for instructions, labels and methods
import scala.language.implicitConversions
import scala.language.reflectiveCalls

implicit def sring_inters(sc: StringContext) = new {
    def i(args: Any*): String = "   " ++ sc.s(args:_*) ++ "\n"
    def l(args: Any*): String = sc.s(args:_*) ++ ":\n"
    def m(args: Any*): String = sc.s(args:_*) ++ "\n"
}

// mathematical and boolean operations
def compile_op(op: String) = op match {
  case "+" => "add i32 "
  case "*" => "mul i32 "
  case "-" => "sub i32 "
  case "/" => "sdiv i32 "
  case "%" => "srem i32 "
  case "==" => "icmp eq i32 "
  case "<=" => "icmp sle i32 "    // signed less or equal
  case "<" => "icmp slt i32 "     // signed less than
}

def compile_val(v: KVal) : String = v match {
  case KNum(i) => s"$i"
  case KVar(s) => s"%$s"
  case Kop(op, x1, x2) => 
    s"${compile_op(op)} ${compile_val(x1)}, ${compile_val(x2)}"
  case KCall(x1, args) => 
    s"call i32 @$x1 (${args.map(compile_val).mkString("i32 ", ", i32 ", "")})"
  case KWrite(x1) =>
    s"call i32 @printInt (i32 ${compile_val(x1)})"
}

// compile K expressions
def compile_exp(a: KExp) : String = a match {
  case KReturn(v) =>
    i"ret i32 ${compile_val(v)}"
  case KLet(x: String, v: KVal, e: KExp) => 
    i"%$x = ${compile_val(v)}" ++ compile_exp(e)
  case KIf(x, e1, e2) => {
    val if_br = Fresh("if_branch")
    val else_br = Fresh("else_branch")
    i"br i1 %$x, label %$if_br, label %$else_br" ++
    l"\n$if_br" ++
    compile_exp(e1) ++
    l"\n$else_br" ++ 
    compile_exp(e2)
  }
}


val prelude = """
@.str = private constant [4 x i8] c"%d\0A\00"

declare i32 @printf(i8*, ...)

define i32 @printInt(i32 %x) {
   %t0 = getelementptr [4 x i8], [4 x i8]* @.str, i32 0, i32 0
   call i32 (i8*, ...) @printf(i8* %t0, i32 %x) 
   ret i32 %x
}

"""


// compile function for declarations and main
def compile_decl(d: Decl) : String = d match {
  case Def(name, args, body) => { 
    m"define i32 @$name (${args.mkString("i32 %", ", i32 %", "")}) {" ++
    compile_exp(CPSi(body)) ++
    m"}\n"
  }
  case Main(body) => {
    m"define i32 @main() {" ++
    compile_exp(CPSi(body)) ++
    m"}\n"
  }
}

// main compiler functions

def time_needed[T](i: Int, code: => T) = {
  val start = System.nanoTime()
  for (j <- 1 to i) code
  val end = System.nanoTime()
  (end - start)/(i * 1.0e9)
}

def deserialise[T](fname: String) : Try[T] = {
  import scala.util.Using
  Using(new ObjectInputStream(new FileInputStream(fname))) {
    in => in.readObject.asInstanceOf[T]
  }
}

def compile(fname: String) : String = {
  val ast = deserialise[List[Decl]](fname ++ ".prs").getOrElse(Nil) 
  prelude ++ (ast.map(compile_decl).mkString)
}

def compile_to_file(fname: String) = {
  val output = compile(fname)
  scala.tools.nsc.io.File(s"${fname}.ll").writeAll(output)
}

def compile_and_run(fname: String) : Unit = {
  compile_to_file(fname)
  (s"llc -filetype=obj ${fname}.ll").!!
  (s"gcc ${fname}.o -o a.out").!!
  println("Time: " + time_needed(2, (s"./a.out").!))
}

// some examples of .fun files
//compile_to_file("fact")
//compile_and_run("fact")
//compile_and_run("defs")


def main(args: Array[String]) : Unit = 
   //println(compile(args(0)))
   compile_and_run(args(0))
}





/*
LLVM notes

Registers are places for data inside the CPU.
+ up to 10 times faster access than to main memory 
- expensive; typically just 32 of them in a 32-bit CPU

High-level view of x86
• Not a stack machine; no direct correspondence to operand stacks
• Arithmetics, etc. is done with values in registers

• Started as academic project at University of Illinois in 2002
• Now a large open source project with many contributors and a growing user base

Single Static Assignment (SSA) form
• Only one assignment in the program text to each variable
• But dynamically, this assignment can be executed many times
• Many stores to a memory location are allowed
• Also, Φ (phi) instructions can be used, in the beginning of a basic block
• Value is one of the arguments, depending on from which block control came to this block
• Register allocation tries to keep these variables in same real register

Why SSA form?
Many code optimizations can be done more efficiently

Function definition form
 define t @name(t1 x1, t2 x2, ..., tn xn) {
 l1: block1
 l2: block2
 ... 
 lm : blockm
 }




*/