progs/while-compiler-arrays/compile_bfc.sc
author Christian Urban <christian.urban@kcl.ac.uk>
Sat, 03 Oct 2020 00:51:47 +0100
changeset 769 f9686b22db7e
parent 757 ea0be0662be0
permissions -rw-r--r--
updated

// Some more interesting testcases for the 
// WHILE compiler with arrays, it includes: 
//
//  - a small parser for WHILE programs
//
//  - transpiles BF programs into WHILE programs
//    and then compiles and runs them
//
//  - the power-example is the Mandelbrot set:
//       
//       * 65k for the transpiled WHILE program 
//       * parsing uses around 30 secs using fastparse
//       * the jasmin assembly file is 236k
//       * the resulting Java program takes about 20 secs 
//
// Call with (X being 0,1,..,4)
//
//  amm compile_bfc.sc all
//  amm compile_bfc.sc bfcX


// load the compiler
import $file.compile_arrays
import compile_arrays._ 

def time_needed[T](i: Int, code: => T) = {
  val start = System.nanoTime()
  for (j <- 2 to i) code
  val result = code
  val end = System.nanoTime()
  ((end - start) / (i * 1.0e9), result)
}

// for BF we have to change the write library-function,
// because in BF integers are written out as characters
val beginning = """
.class public XXX.XXX
.super java/lang/Object

.method public static write(I)V 
    .limit locals 1 
    .limit stack 2 
    getstatic java/lang/System/out Ljava/io/PrintStream; 
    iload 0
    i2c                                           ; Int => Char
    invokevirtual java/io/PrintStream/print(C)V   ; println(I)V => print(C)V    
    return 
.end method

.method public static main([Ljava/lang/String;)V
   .limit locals 200
   .limit stack 200

; COMPILED CODE STARTS   

"""


// modified main compilation function for blocks
def compile(bl: Block, class_name: String) : String = {
  val instructions = compile_block(bl, Map())._1
  (beginning ++ instructions ++ ending).replace("XXX", class_name)
}

// automating the above
import ammonite.ops._

def compile_to_file(bl: Block, class_name: String) : Unit = 
  write.over(pwd / s"$class_name.j", compile(bl, class_name))  

def compile_and_run(bl: Block, class_name: String) : Unit = {
  println(s"Start of compilation")
  compile_to_file(bl, class_name)
  println(s"generated $class_name.j file")
  val (jasmin_time, _) = 
    time_needed(1, os.proc("java", "-jar", "jasmin.jar", s"$class_name.j").call())
  println(s"generated $class_name.class file (in $jasmin_time secs).")
  val (running_time, output) = 
    time_needed(1, os.proc("java", s"${class_name}/${class_name}").call().out.text())
  println(output)
  println(s"done (in $running_time secs).")
}


//=====================================
// Grammar Rules for WHILE with arrays
//=====================================

import fastparse._
import MultiLineWhitespace._

def lowercase [_ : P] = P( CharIn("a-z") )
def uppercase[_ : P]  = P( CharIn("A-Z") )
def letter[_ : P]     = P( lowercase | uppercase )
def digit [_ : P]     = P( CharIn("0-9") )

def Number[_ : P]: P[Int] =  P( digit.rep(1) ).!.map(_.toInt)
def Ident[_ : P]: P[String] = P( letter ~ (letter | digit | "_").rep ).!

// arithmetic expressions
def AExp[_ : P]: P[AExp] = 
  P(  P(Te ~ "+" ~ AExp).map{ case (l, r) => Aop("+", l, r)} 
    | P(Te ~ "-" ~ AExp).map{ case (l, r) => Aop("-", l, r)}
    | Te )
def Te[_ : P]: P[AExp] = 
  P(  P(Fa ~ "*" ~ Te).map{ case (l, r) => Aop("*", l, r)} 
    | Fa )   
def Fa[_ : P]: P[AExp] = 
  P( "(" ~ AExp ~ ")" 
     | P (Ident ~ "[" ~ AExp ~ "]").map{Ref.tupled}
     | P(Number).map{Num} 
     | P(Ident).map{Var} )

// boolean expressions
def BExp[_ : P]: P[BExp] = 
  P(  P(AExp ~ "=" ~ AExp).map{ case (x, z) => Bop("=", x, z)} 
    | P(AExp ~ "!=" ~ AExp).map{ case (x, z) => Bop("!=", x, z)}  
    | P(AExp ~ "<" ~ AExp).map{ case (x, z) => Bop("<", x, z)}  
    | P(AExp ~ ">" ~ AExp).map{ case (x, z) => Bop("<", z, x)}  
    | P("true").map{ _ => True} 
    | P("false").map{ _ => False} 
    | "(" ~ BExp ~ ")" )

// statements and blocks
def Stmt[_ : P]: P[Stmt] =
  P(  P("skip").map( _ => Skip) 
    | P(Ident ~ ":=" ~ AExp).map{Assign.tupled} 
    | P(Ident ~ "[" ~ AExp ~ "]" ~ ":=" ~ AExp).map{AssignA.tupled} 
    | P("if" ~ BExp ~ "then" ~ Block ~ "else" ~ Block).map{If.tupled} 
    | P("while" ~ BExp ~ "do" ~ Block).map{While.tupled} 
    | P("new(" ~ Ident ~ "[" ~ Number ~ "])").map{ArrayDef.tupled} 
    | P("write(" ~ Ident ~ ")").map{Write} ) 

def Stmts[_ : P]: P[Block] =
  P(  P(Stmt ~ ";" ~ Stmts).map{ case (x, z) => x :: z } 
    | P(Stmt).map{s => List(s)} ) 

def Block[_ : P]: P[Block] =
  P(  "{" ~ Stmts ~ "}" 
    | P(Stmt).map(s => List(s)) )

// some test cases for the parser

//println(fastparse.parse("(1 + (2 + 5)) + 4", AExp(_)).get)
//println(fastparse.parse("1 + 2 + 3 + 45", AExp(_)))
//println(fastparse.parse("1 + 2 * 3", AExp(_)))
//println(fastparse.parse("x + 2 * 3", AExp(_)))
//println(fastparse.parse("x2 := 5 + a", Stmts(_)))
//println(fastparse.parse("x2 := 5 + a[3+a]", Stmts(_)))
//println(fastparse.parse("a[x2+3] := 5 + a[3+a]", Stmts(_)))
//println(fastparse.parse("{x := 5; y := 8}", Block(_)))
//println(fastparse.parse("if (false) then {x := 5} else {x := 10}", Block(_)))

val fib = 
 """n := 10;
    minus1 := 0;
    minus2 := 1;
    temp:=0;
    while (n > 0) do {
      temp := minus2; 
      minus2 := minus1 + minus2;
      minus1 := temp;
      n := n - 1};
    result := minus2;
    write(result)
   """

//println(fastparse.parse(fib, Stmts(_)).get.value)



//======================================
// BF transpiler into WHILE with arrays
//======================================

// simple BF instructions translation
def instr(c: Char) : String = c match {
  case '>' => "ptr := ptr + 1;"
  case '<' => "ptr := ptr - 1;"
  case '+' => "mem[ptr] := mem[ptr] + 1;"
  case '-' => "mem[ptr] := mem[ptr] - 1;"
  case '.' => "x := mem[ptr]; write x;"
  //case ',' => "XXX" // "ptr = getchar();\n"
  case '['  => "while (mem[ptr] != 0) do {"
  case ']'  => "skip};"
  case _ => ""
}

def instrs(prog: String) : String =
  prog.toList.map(instr).mkString


// Note: Unfortunately, the transpiled mandelbrot.bf program 
// is so large that it does not fit inside the limitations of 
// what the JVM imposes on methods (only 64K of instructions).
// Therefore some optimisations are first applied to 
// BF programs before WHILE programs are created. The
// optimisations are 
//  
//  - replacing BF-loops of the form [-] with a new 0-instruction 
//  - combining single increment/decrement instructions
//
// The size of the resulting .j-file is 270K.


def splice(cs: List[Char], acc: List[(Char, Int)]) : List[(Char, Int)] = (cs, acc) match {
  case (Nil, acc) => acc
  case (c :: cs, Nil) => splice(cs, List((c, 1)))
  case (c :: cs, (d, n) :: acc) => 
    if (c == d) splice(cs, (c, n + 1) :: acc)
    else splice(cs, (c, 1) :: (d, n) :: acc)
}

def spl(s: String) = splice(s.toList, Nil).reverse

def instr2(c: Char, n: Int) : String = c match {
  case '>' => s"ptr := ptr + $n;"
  case '<' => s"ptr := ptr - $n;"
  case '0' => s"mem[ptr] := 0;"
  case '+' => s"mem[ptr] := mem[ptr] + $n;"
  case '-' => s"mem[ptr] := mem[ptr] - $n;"
  case '.' => s"x := mem[ptr]; write(x);" 
  case '['  => "while (mem[ptr] != 0) do {" * n 
  case ']'  => "skip};" * n
  case _ => ""
}

def instrs2(prog: String) : String =
  spl(prog.replaceAll("""\[-\]""", "0")).map{ case (c, n) => instr2(c, n) }.mkString

// adding the "header" to the BF program
def bf_str(prog: String) : String = {
  "new(mem[30000]);" ++
  "ptr := 15000;" ++
  instrs2(prog) ++
  "skip"
}


def bf_run(prog: String, name: String) = {
  println(s"BF pre-processing of $name")
  val bf_string = bf_str(prog)
  println(s"BF parsing (program length ${bf_string.length} characters)")
  val (time, bf_prog) = 
    time_needed(1, fastparse.parse(bf_string, Stmts(_)).get.value)
  println(s"BF generated WHILE program (needed $time secs for parsing)")
  compile_and_run(bf_prog, name)
}

// a benchmark program (counts down from 'Z' to 'A')
@doc(" Benchmark 'Z' to 'A'.")
@main
def bfc0() = bf_run(read(pwd / "benchmark.bf"), "bench")


@doc(" Sierpinski triangle.")
@main
def bfc1() = bf_run(read(pwd / "sierpinski.bf"), "sier")

// Hello World
val bf2 = """++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]
      >>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++."""

@doc(" Hello world.")
@main
def bfc2() = bf_run(bf2, "hello")

// Fibonacci 
val bf3 = """+++++++++++
      >+>>>>++++++++++++++++++++++++++++++++++++++++++++
      >++++++++++++++++++++++++++++++++<<<<<<[>[>>>>>>+>
      +<<<<<<<-]>>>>>>>[<<<<<<<+>>>>>>>-]<[>++++++++++[-
      <-[>>+>+<<<-]>>>[<<<+>>>-]+<[>[-]<[-]]>[<<[>>>+<<<
      -]>>[-]]<<]>>>[>>+>+<<<-]>>>[<<<+>>>-]+<[>[-]<[-]]
      >[<<+>>[-]]<<<<<<<]>>>>>[+++++++++++++++++++++++++
      +++++++++++++++++++++++.[-]]++++++++++<[->-<]>++++
      ++++++++++++++++++++++++++++++++++++++++++++.[-]<<
      <<<<<<<<<<[>>>+>+<<<<-]>>>>[<<<<+>>>>-]<-[>>.>.<<<
      [-]]<<[>>+>+<<<-]>>>[<<<+>>>-]<<[<+>-]>[<+>-]<<<-]
      [-]++++++++++."""

@doc(" Fibonacci numbers.")
@main
def bfc3() = bf_run(bf3, "fibs")

// Mandelbrot Set
//----------------
//
// Note: Parsing of the generated WHILE program (around 60K in size)
// takes approximately 10 minutes to parse with our parser combinators,
// and approximately 30 seconds with Ammonite's fastparse.

@doc(" Mandelbrot set.")
@main
def bfc4() = bf_run(read(pwd / "mandelbrot.bf"), "mandelbrot")


// this unfortunately hits the capacity of the JVM, even with optimisations
//@doc(" Coolatz serries up to 30.")
//@main
//def bfc5() = bf_run(read(pwd / "collatz.bf"), "coll")


//
@doc(" All benchmarks.")
@main
def all() = { bfc0(); bfc1(); bfc2(); bfc3(); bfc4() }