solutions/cw5/fun_llvm.sc
changeset 903 2f86ebda3629
parent 894 02ef5c3abc51
child 920 7af2eea19646
--- a/solutions/cw5/fun_llvm.sc	Sat Dec 03 21:58:47 2022 +0000
+++ b/solutions/cw5/fun_llvm.sc	Fri Dec 09 11:00:05 2022 +0000
@@ -1,50 +1,17 @@
-// A Small LLVM Compiler for a Simple Functional Language
-// (includes an external lexer and parser)
-//
-//
-// call with                 -- prints out llvm code
-//
-//     amm fun_llvm.sc main fact.fun
-//     amm fun_llvm.sc main defs.fun
-//
-// or                        -- writes llvm code to disk
-//
-//     amm fun_llvm.sc write fact.fun
-//     amm fun_llvm.sc write defs.fun
-//
-//       this will generate an .ll file. 
-//
-// or                       -- runs the generated llvm code via lli
-//
-//     amm fun_llvm.sc run fact.fun
-//     amm fun_llvm.sc run defs.fun
+// Author: Zhuo Ying Jiang Li
+// Starting code by Dr Christian Urban
+
+// 
+// Use amm compiler.sc XXX.fun
+// ./XXX
+// This will generate XXX.ll, XXX.o as well as the binary program.
 //
-//
-// You can interpret an .ll file using lli, for example
-//
-//      lli fact.ll
-//
-// The optimiser can be invoked as
-//
-//      opt -O1 -S in_file.ll > out_file.ll
-//      opt -O3 -S in_file.ll > out_file.ll
-//
-// The code produced for the various architectures can be obtain with
-//   
-//   llc -march=x86 -filetype=asm in_file.ll -o -
-//   llc -march=arm -filetype=asm in_file.ll -o -  
-//
-// Producing an executable can be achieved by
-//
-//    llc -filetype=obj in_file.ll
-//    gcc in_file.o -o a.out
-//    ./a.out
 
+// lexer + parser
 
 import $file.fun_tokens, fun_tokens._
 import $file.fun_parser, fun_parser._ 
 
-
 // for generating new labels
 var counter = -1
 
@@ -53,76 +20,51 @@
   x ++ "_" ++ counter.toString()
 }
 
+// typing
+type Ty = String
+type TyEnv = Map[String, Ty]
+
+// initial typing environment
+val initialEnv = Map[String, Ty]("skip" -> "Void", "print_int" -> "Void", "print_char" -> "Void",
+                                "print_space" -> "Void", "print_star" -> "Void", "new_line" -> "Void")
+
+val typeConversion = Map("Int" -> "i32", "Double" -> "double", "Void" -> "void")
+
 // Internal CPS language for FUN
 abstract class KExp
 abstract class KVal
 
-type Ty = String
-type TyEnv = Map[String, Ty]
-
 case class KVar(s: String, ty: Ty = "UNDEF") extends KVal
-case class KLoad(v: KVal) extends KVal
-case class KNum(i: Int) extends KVal
-case class KFNum(i: Double) extends KVal
-case class KChr(c: Int) extends KVal
+case class KConst(s: String, ty: Ty = "UNDEF") extends KVal
+case class KNum(i: Int) extends KVal  // known type
+case class KFNum(d: Float) extends KVal  // known type
+case class KChConst(c: Int) extends KVal  // known type
 case class Kop(o: String, v1: KVal, v2: KVal, ty: Ty = "UNDEF") extends KVal
 case class KCall(o: String, vrs: List[KVal], ty: Ty = "UNDEF") extends KVal
 
-case class KIf(x1: String, e1: KExp, e2: KExp) extends KExp {
-  override def toString = s"KIf $x1\nIF\n$e1\nELSE\n$e2"
+case class KLet(x: String, e1: KVal, e2: KExp) extends KExp {
+  override def toString = s"LET $x = $e1 in \n$e2" 
 }
-case class KLet(x: String, e1: KVal, e2: KExp) extends KExp {
-  override def toString = s"let $x = $e1 in \n$e2" 
+case class KIf(x1: String, e1: KExp, e2: KExp) extends KExp {
+  def pad(e: KExp) = e.toString.replaceAll("(?m)^", "  ")
+
+  override def toString = 
+     s"IF $x1\nTHEN\n${pad(e1)}\nELSE\n${pad(e2)}"
 }
 case class KReturn(v: KVal) extends KExp
 
-// typing K values
-def typ_val(v: KVal, ts: TyEnv) : (KVal, Ty) = v match {
-  case KVar(s, _) => {
-    val ty = ts.getOrElse(s, "TUNDEF")
-    (KVar(s, ty), ty)  
-  }
-  case Kop(op, v1, v2, _) => {
-    val (tv1, ty1) = typ_val(v1, ts)
-    val (tv2, ty2) = typ_val(v2, ts)
-    if (ty1 == ty2) (Kop(op, tv1, tv2, ty1), ty1) else (Kop(op, tv1, tv2, "TMISMATCH"), "TMISMATCH") 
-  }
-  case KCall(fname, args, _) => {
-    val ty = ts.getOrElse(fname, "TCALLUNDEF" ++ fname)
-    (KCall(fname, args.map(typ_val(_, ts)._1), ty), ty)
-  }  
-  case KLoad(v) => {
-    val (tv, ty) = typ_val(v, ts)
-    (KLoad(tv), ty)
-  }
-  case KNum(i) => (KNum(i), "Int")
-  case KFNum(i) => (KFNum(i), "Double")
-  case KChr(c) => (KChr(c), "Int")
-}
-
-def typ_exp(a: KExp, ts: TyEnv) : KExp = a match {
-  case KReturn(v) => KReturn(typ_val(v, ts)._1)
-  case KLet(x: String, v: KVal, e: KExp) => {
-    val (tv, ty) = typ_val(v, ts)
-    KLet(x, tv, typ_exp(e, ts + (x -> ty)))
-  }
-  case KIf(b, e1, e2) => KIf(b, typ_exp(e1, ts), typ_exp(e2, ts))
-}
-
-
-
-
 // CPS translation from Exps to KExps using a
 // continuation k.
 def CPS(e: Exp)(k: KVal => KExp) : KExp = e match {
-  case Var(s) if (s.head.isUpper) => {
+  case Var(s) => {
+    if (s.head.isUpper) {  // if this variable is a global
       val z = Fresh("tmp")
-      KLet(z, KLoad(KVar(s)), k(KVar(z)))
+      KLet(z, KConst(s), k(KVar(z)))
+    } else k(KVar(s))
   }
-  case Var(s) => k(KVar(s))
   case Num(i) => k(KNum(i))
-  case ChConst(c) => k(KChr(c))
-  case FNum(i) => k(KFNum(i))
+  case FNum(d) => k(KFNum(d))
+  case ChConst(c) => k(KChConst(c))
   case Aop(o, e1, e2) => {
     val z = Fresh("tmp")
     CPS(e1)(y1 => 
@@ -146,77 +88,122 @@
   }
   case Sequence(e1, e2) => 
     CPS(e1)(_ => CPS(e2)(y2 => k(y2)))
-}   
+}
 
-//initial continuation
+// initial continuation
 def CPSi(e: Exp) = CPS(e)(KReturn)
 
-// some testcases
-val e1 = Aop("*", Var("a"), Num(3))
-CPSi(e1)
 
-val e2 = Aop("+", Aop("*", Var("a"), Num(3)), Num(4))
-CPSi(e2)
+// get type of KVal
+def get_typ_val(v: KVal) : Ty = v match {
+  case KNum(i) => "Int"
+  case KFNum(d) => "Double"
+  case KChConst(i) => "Int"
+  case KVar(name, ty) => ty
+  case KConst(name, ty) => ty
+  case Kop(o, v1, v2, ty) => ty
+  case KCall(o, vrs, ty) => ty
+}
 
-val e3 = Aop("+", Num(2), Aop("*", Var("a"), Num(3)))
-CPSi(e3)
-
-val e4 = Aop("+", Aop("-", Num(1), Num(2)), Aop("*", Var("a"), Num(3)))
-CPSi(e4)
-
-val e5 = If(Bop("==", Num(1), Num(1)), Num(3), Num(4))
-CPSi(e5)
+// update type information for KValues
+def typ_val(v: KVal, ts: TyEnv) : KVal = v match {
+  case KVar(name, ty) => {
+    if (ts.contains(name)) {
+      KVar(name, ts(name))
+    } else throw new Exception(s"Compile error: unknown type for $name")
+  }
+  case KConst(name, ty) => {
+    if (ts.contains(name)) {
+      KConst(name, ts(name))
+    } else throw new Exception(s"Compile error: unknown type for $name")
+  }
+  case Kop(o, v1, v2, ty) => {
+    val tv1 = typ_val(v1, ts)
+    val tv2 = typ_val(v2, ts)
+    val t1 = get_typ_val(tv1)
+    val t2 = get_typ_val(tv2)
+    if (t1 != t2) throw new Exception(s"Compile error: cannot compare $t1 with $t2")
+    Kop(o, tv1, tv2, t1)
+  }
+  case KCall(o, vrs, ty) => {
+    val new_vrs = vrs.map(vr => typ_val(vr, ts))
+    if (ts.contains(o)) {
+      KCall(o, new_vrs, ts(o))
+    } else throw new Exception(s"Compile error: unknown type for $o")
+  }
+  case x => x  // no changes: KNum, KFNum, KChConst
+}
 
-val e6 = If(Bop("!=", Num(10), Num(10)), e5, Num(40))
-CPSi(e6)
+// update type information for KExpressions
+def typ_exp(a: KExp, ts: TyEnv) : KExp = a match {
+  case KLet(x, e1, e2) => {
+    val te1 = typ_val(e1, ts)
+    val env1 = ts + (x -> get_typ_val(te1))
+    val te2 = typ_exp(e2, env1)
+    KLet(x, te1, te2)
+  }
+  case KIf(x1, e1, e2) => KIf(x1, typ_exp(e1, ts), typ_exp(e2, ts))
+  case KReturn(v) => KReturn(typ_val(v, ts))
+}
 
-val e7 = Call("foo", List(Num(3)))
-CPSi(e7)
+// prelude
+val prelude = """
+declare i32 @printf(i8*, ...)
 
-val e8 = Call("foo", List(Aop("*", Num(3), Num(1)), Num(4), Aop("+", Num(5), Num(6))))
-CPSi(e8)
+@.str_nl = private constant [2 x i8] c"\0A\00"
+@.str_star = private constant [2 x i8] c"*\00"
+@.str_space = private constant [2 x i8] c" \00"
+@.str_int = private constant [3 x i8] c"%d\00"
+@.str_c = private constant [3 x i8] c"%c\00"
+
+define void @new_line() #0 {
+  %t0 = getelementptr [2 x i8], [2 x i8]* @.str_nl, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %t0)
+  ret void
+}
 
-val e9 = Sequence(Aop("*", Var("a"), Num(3)), Aop("+", Var("b"), Num(6)))
-CPSi(e9)
+define void @print_star() #0 {
+  %t0 = getelementptr [2 x i8], [2 x i8]* @.str_star, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %t0)
+  ret void
+}
+
+define void @print_space() #0 {
+  %t0 = getelementptr [2 x i8], [2 x i8]* @.str_space, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %t0)
+  ret void
+}
 
-val e = Aop("*", Aop("+", Num(1), Call("foo", List(Var("a"), Num(3)))), Num(4))
-CPSi(e)
+define void @print_int(i32 %x) {
+  %t0 = getelementptr [3 x i8], [3 x i8]* @.str_int, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %t0, i32 %x) 
+  ret void
+}
 
+define void @print_char(i32 %x) {
+  %t0 = getelementptr [3 x i8], [3 x i8]* @.str_c, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %t0, i32 %x)
+  ret void
+}
 
+define void @skip() #0 {
+  ret void
+}
 
+; END OF BUILT-IN FUNCTIONS (prelude)
+"""
 
 // convenient string interpolations 
 // for instructions, labels and methods
 import scala.language.implicitConversions
 import scala.language.reflectiveCalls
 
-
-
-
-implicit def sring_inters(sc: StringContext) = new {
+implicit def string_inters(sc: StringContext) = new {
     def i(args: Any*): String = "   " ++ sc.s(args:_*) ++ "\n"
     def l(args: Any*): String = sc.s(args:_*) ++ ":\n"
     def m(args: Any*): String = sc.s(args:_*) ++ "\n"
 }
 
-def get_ty(s: String) = s match {
-  case "Double" => "double"
-  case "Void" => "void"
-  case "Int" => "i32"
-  case "Bool" => "i2"
-  case _ => s
-}
-
-def compile_call_arg(a: KVal) = a match {
-  case KNum(i) => s"i32 $i"
-  case KFNum(i) => s"double $i"
-  case KChr(c) => s"i32 $c"
-  case KVar(s, ty) => s"${get_ty(ty)} %$s" 
-}
-
-def compile_arg(s: (String, String)) = s"${get_ty(s._2)} %${s._1}" 
-
-
 // mathematical and boolean operations
 def compile_op(op: String) = op match {
   case "+" => "add i32 "
@@ -225,48 +212,70 @@
   case "/" => "sdiv i32 "
   case "%" => "srem i32 "
   case "==" => "icmp eq i32 "
-  case "!=" => "icmp ne i32 "      // not equal 
-  case "<=" => "icmp sle i32 "     // signed less or equal
-  case "<"  => "icmp slt i32 "     // signed less than
+  case "!=" => "icmp ne i32 "
+  case "<=" => "icmp sle i32 "
+  case "<"  => "icmp slt i32 "
+  case ">=" => "icmp sge i32 "
+  case ">" => "icmp sgt i32 "
 }
 
 def compile_dop(op: String) = op match {
   case "+" => "fadd double "
   case "*" => "fmul double "
   case "-" => "fsub double "
+  case "/" => "fdiv double "
+  case "%" => "frem double "
   case "==" => "fcmp oeq double "
-  case "<=" => "fcmp ole double "   
-  case "<"  => "fcmp olt double "   
+  case "!=" => "fcmp one double "
+  case "<=" => "fcmp ole double "
+  case "<" => "fcmp olt double "
+  case ">=" => "icmp sge double "
+  case ">" => "icmp sgt double "
+}
+
+def compile_args(vrs: List[KVal]) : List[String] = vrs match {
+  case Nil => Nil
+  case x::xs => s"${typeConversion(get_typ_val(x))} ${compile_val(x)}" :: compile_args(xs)
 }
 
 // compile K values
 def compile_val(v: KVal) : String = v match {
   case KNum(i) => s"$i"
-  case KFNum(i) => s"$i"
-  case KChr(c) => s"$c"
-  case KVar(s, ty) => s"%$s" 
-  case KLoad(KVar(s, ty)) => s"load ${get_ty(ty)}, ${get_ty(ty)}* @$s"
-  case Kop(op, x1, x2, ty) => ty match { 
-    case "Int" => s"${compile_op(op)} ${compile_val(x1)}, ${compile_val(x2)}"
-    case "Double" => s"${compile_dop(op)} ${compile_val(x1)}, ${compile_val(x2)}"
-    case _ => Kop(op, x1, x2, ty).toString
+  case KFNum(d) => s"$d"
+  case KChConst(i) => s"$i"  // as integer
+  case KVar(s, ty) => s"%$s"
+  case KConst(s, ty) => {
+    val t = typeConversion(ty)
+    s"load $t, $t* @$s"
   }
-  case KCall(fname, args, ty) => 
-    s"call ${get_ty(ty)} @$fname (${args.map(compile_call_arg).mkString(", ")})"
+  case Kop(op, x1, x2, ty) => {
+    if (ty == "Double") {
+      s"${compile_dop(op)} ${compile_val(x1)}, ${compile_val(x2)}"
+    } else if (ty == "Int") {
+      s"${compile_op(op)} ${compile_val(x1)}, ${compile_val(x2)}"
+    } else throw new Exception("Compile error: unknown type for comparison")
+  }
+  case KCall(x1, args, ty) => {
+    s"call ${typeConversion(ty)} @$x1 (${compile_args(args).mkString(", ")})"
+  }
 }
 
 // compile K expressions
 def compile_exp(a: KExp) : String = a match {
-  case KReturn(KVar("void", _)) =>
-    i"ret void"
-  case KReturn(KVar(x, ty)) =>
-    i"ret ${get_ty(ty)} %$x"
-  case KReturn(KNum(i)) =>
-    i"ret i32 $i"
-  case KLet(x: String, KCall(o: String, vrs: List[KVal], "Void"), e: KExp) => 
-    i"${compile_val(KCall(o: String, vrs: List[KVal], "Void"))}" ++ compile_exp(e)
-  case KLet(x: String, v: KVal, e: KExp) => 
-    i"%$x = ${compile_val(v)}" ++ compile_exp(e)
+  case KReturn(v) => {
+    val ty = get_typ_val(v)
+    if (ty == "Void") {
+      i"ret void"
+    } else {
+      i"ret ${typeConversion(ty)} ${compile_val(v)}"
+    }
+  }
+  case KLet(x: String, v: KVal, e: KExp) => {
+    val tv = get_typ_val(v)
+    if (tv == "Void") {
+      i"${compile_val(v)}" ++ compile_exp(e)
+    } else i"%$x = ${compile_val(v)}" ++ compile_exp(e)
+  }
   case KIf(x, e1, e2) => {
     val if_br = Fresh("if_branch")
     val else_br = Fresh("else_branch")
@@ -278,100 +287,50 @@
   }
 }
 
-
-val prelude = """
-declare i32 @printf(i8*, ...)
-
-@.str_nl = private constant [2 x i8] c"\0A\00"
-@.str_star = private constant [2 x i8] c"*\00"
-@.str_space = private constant [2 x i8] c" \00"
-
-define void @new_line() #0 {
-  %t0 = getelementptr [2 x i8], [2 x i8]* @.str_nl, i32 0, i32 0
-  %1 = call i32 (i8*, ...) @printf(i8* %t0)
-  ret void
-}
-
-define void @print_star() #0 {
-  %t0 = getelementptr [2 x i8], [2 x i8]* @.str_star, i32 0, i32 0
-  %1 = call i32 (i8*, ...) @printf(i8* %t0)
-  ret void
-}
-
-define void @print_space() #0 {
-  %t0 = getelementptr [2 x i8], [2 x i8]* @.str_space, i32 0, i32 0
-  %1 = call i32 (i8*, ...) @printf(i8* %t0)
-  ret void
-}
-
-define void @skip() #0 {
-  ret void
-}
-
-@.str_int = private constant [3 x i8] c"%d\00"
-
-define void @print_int(i32 %x) {
-   %t0 = getelementptr [3 x i8], [3 x i8]* @.str_int, i32 0, i32 0
-   call i32 (i8*, ...) @printf(i8* %t0, i32 %x) 
-   ret void
-}
-
-@.str_char = private constant [3 x i8] c"%c\00"
-
-define void @print_char(i32 %x) {
-   %t0 = getelementptr [3 x i8], [3 x i8]* @.str_char, i32 0, i32 0
-   call i32 (i8*, ...) @printf(i8* %t0, i32 %x) 
-   ret void
-}
-
-; END OF BUILD-IN FUNCTIONS (prelude)
-
-"""
-
-def get_cont(ty: Ty) = ty match {
-  case "Int" =>    KReturn
-  case "Double" => KReturn
-  case "Void" =>   { (_: KVal) => KReturn(KVar("void", "Void")) }
-} 
-
-// compile function for declarations and main
-def compile_decl(d: Decl, ts: TyEnv) : (String, TyEnv) = d match {
-  case Def(name, args, ty, body) => { 
-    val ts2 = ts + (name -> ty)
-    val tkbody = typ_exp(CPS(body)(get_cont(ty)), ts2 ++ args.toMap)
-    (m"define ${get_ty(ty)} @$name (${args.map(compile_arg).mkString(",")}) {" ++
-     compile_exp(tkbody) ++
-     m"}\n", ts2)
-  }
-  case Main(body) => {
-    val tbody = typ_exp(CPS(body)(_ => KReturn(KNum(0))), ts)
-    (m"define i32 @main() {" ++
-     compile_exp(tbody) ++
-     m"}\n", ts)
-  }
-  case Const(name, n) => {
-    (m"@$name = global i32 $n\n", ts + (name -> "Int"))
-  }
-  case FConst(name, x) => {
-    (m"@$name = global double $x\n", ts + (name -> "Double"))
+def compile_def_args(args: List[(String, String)], ts: TyEnv) : (List[String], TyEnv) = args match {
+  case Nil => (Nil, ts)
+  case (n, t)::xs => {
+    if (t == "Void") throw new Exception("Compile error: argument of type void is invalid")
+    val (rest, env) = compile_def_args(xs, ts + (n -> t))
+    (s"${typeConversion(t)} %$n" :: rest, env)
   }
 }
 
-def compile_prog(prog: List[Decl], ty: TyEnv) : String = prog match {
-  case Nil => ""
-  case d::ds => {
-    val (s2, ty2) = compile_decl(d, ty)
-    s2 ++ compile_prog(ds, ty2)
+def compile_decl(d: Decl, ts: TyEnv) : (String, TyEnv) = d match {
+  case Const(name, value) => {
+    (m"@$name = global i32 $value\n", ts + (name -> "Int"))
+  }
+  case FConst(name, value) => {
+    (m"@$name = global double $value\n", ts + (name -> "Double"))
+  }
+  case Def(name, args, ty, body) => {
+    val (argList, env1) = compile_def_args(args, ts + (name -> ty))
+    (m"define ${typeConversion(ty)} @$name (${argList.mkString(", ")}) {" ++
+    compile_exp(typ_exp(CPSi(body), env1)) ++
+    m"}\n", ts + (name -> ty))  // don't preserve local variables in environment
+  }
+  case Main(body) => {
+    (m"define i32 @main() {" ++
+    compile_exp(typ_exp(CPS(body)(_ => KReturn(KNum(0))), ts + ("main" -> "Int"))) ++
+    m"}\n", ts + ("main" -> "Int"))
   }
 }
-// main compiler functions
-def compile(prog: List[Decl]) : String = 
-  prelude ++ compile_prog(prog, Map("new_line" -> "Void", "skip" -> "Void", 
-				    "print_star" -> "Void", "print_space" -> "Void",
-                                    "print_int" -> "Void", "print_char" -> "Void"))
 
+// recursively update the typing environment while compiling
+def compile_block(prog: List[Decl], ts: TyEnv) : (String, TyEnv) = prog match {
+  case Nil => ("", ts)
+  case x::xs => {
+    val (compiled, env) = compile_decl(x, ts)
+    val (compiled_block, env1) = compile_block(xs, env)
+    (compiled ++ compiled_block, env1)
+  }
+}
 
-//import ammonite.ops._
+def fun_compile(prog: List[Decl]) : String = {
+  val tyenv = initialEnv
+  val (compiled, _) = compile_block(prog, tyenv)
+  prelude ++ compiled
+}
 
 
 @main
@@ -379,8 +338,8 @@
     val path = os.pwd / fname
     val file = fname.stripSuffix("." ++ path.ext)
     val tks = tokenise(os.read(path))
-    val ast = parse_tks(tks)
-    val code = compile(ast)
+    val ast = parse_tks(tks).head
+    val code = fun_compile(ast)
     println(code)
 }
 
@@ -389,8 +348,8 @@
     val path = os.pwd / fname
     val file = fname.stripSuffix("." ++ path.ext)
     val tks = tokenise(os.read(path))
-    val ast = parse_tks(tks)
-    val code = compile(ast)
+    val ast = parse_tks(tks).head
+    val code = fun_compile(ast)
     //println(code)
     os.write.over(os.pwd / (file ++ ".ll"), code)
 }
@@ -407,6 +366,3 @@
 }
 
 
-
-
-