progs/compile_arr.scala
changeset 707 2fcd7c2da729
parent 696 3a5a7908517f
child 708 4980f421b3b0
--- a/progs/compile_arr.scala	Fri Jan 24 20:51:19 2020 +0000
+++ b/progs/compile_arr.scala	Sun Jan 26 14:16:30 2020 +0000
@@ -1,6 +1,16 @@
 // A Small Compiler for the WHILE Language
+//
 //  - includes arrays and a small parser for
-//    translated BF programs
+//    WHILE programs
+//
+//  - transpiles BF programs into WHILE programs
+//    and compiles and runs them
+//
+// Call with
+//
+// scala compiler_arr.scala
+
+
 
 // the abstract syntax trees
 abstract class Stmt
@@ -10,11 +20,11 @@
 
 // statements
 case object Skip extends Stmt
-case class Array(s: String, n: Int) extends Stmt
+case class ArrayDef(s: String, n: Int) extends Stmt
 case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
 case class While(b: BExp, bl: Block) extends Stmt
-case class Assign(s: String, a: AExp) extends Stmt
-case class AssignA(s: String, a1: AExp, a2: AExp) extends Stmt
+case class Assign(s: String, a: AExp) extends Stmt             // var := exp
+case class AssignA(s: String, a1: AExp, a2: AExp) extends Stmt // arr[exp1] := exp2
 case class Write(s: String) extends Stmt
 case class Read(s: String) extends Stmt
 
@@ -46,40 +56,6 @@
     return 
 .end method
 
-.method public static read()I 
-    .limit locals 10 
-    .limit stack 10
-
-    ldc 0 
-    istore 1  ; this will hold our final integer 
-Label1: 
-    getstatic java/lang/System/in Ljava/io/InputStream; 
-    invokevirtual java/io/InputStream/read()I 
-    istore 2 
-    iload 2 
-    ldc 10   ; the newline delimiter 
-    isub 
-    ifeq Label2 
-    iload 2 
-    ldc 32   ; the space delimiter 
-    isub 
-    ifeq Label2
-
-    iload 2 
-    ldc 48   ; we have our digit in ASCII, have to subtract it from 48 
-    isub 
-    ldc 10 
-    iload 1 
-    imul 
-    iadd 
-    istore 1 
-    goto Label1 
-Label2: 
-    ;when we come here we have our integer computed in local variable 1 
-    iload 1 
-    ireturn 
-.end method
-
 .method public static main([Ljava/lang/String;)V
    .limit locals 200
    .limit stack 200
@@ -126,7 +102,7 @@
 // arithmetic expression compilation
 def compile_aexp(a: AExp, env : Env) : String = a match {
   case Num(i) => i"ldc $i"
-  case Var(s) => i"iload ${env(s)} \t\t; $s"
+  case Var(s) => i"iload ${env(s)}"
   case Aop(op, a1, a2) => 
     compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ compile_op(op)
   case Ref(s, a) =>
@@ -135,7 +111,7 @@
 
 // boolean expression compilation
 def compile_bexp(b: BExp, env : Env, jmp: String) : String = b match {
-    case True => ""
+  case True => ""
   case False => i"goto $jmp"
   case Bop("==", a1, a2) => 
     compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ i"if_icmpne $jmp"
@@ -182,7 +158,7 @@
     (i"invokestatic XXX/XXX/read()I" ++ 
      i"istore $index \t\t; $x", env + (x -> index))
   }
-  case Array(s: String, n: Int) => {
+  case ArrayDef(s: String, n: Int) => {
     val index = if (env.isDefinedAt(s)) throw new Exception("array def error") else 
                     env.keys.size
     (i"ldc $n" ++
@@ -199,7 +175,7 @@
   } 
 }
 
-// compilation of a block (i.e. list of instructions)
+// compilation of a block (i.e. list of statements)
 def compile_block(bl: Block, env: Env) : (String, Env) = bl match {
   case Nil => ("", env)
   case s::bl => {
@@ -212,8 +188,8 @@
 
 // main compilation function for blocks
 def compile(bl: Block, class_name: String) : String = {
-  val instructions = compile_block(bl, Map.empty)._1
-  (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name)
+  val instructions = compile_block(bl, Map())._1
+  (beginning ++ instructions ++ ending).replaceAllLiterally("XXX", class_name)
 }
 
 
@@ -231,11 +207,11 @@
                Assign("n",Aop("-",Var("n"),Num(1))))), //  n := n - 1 };
        Write("minus1"))                 //  write minus1
 
-// prints out the JVM-assembly program
-
+// prints out the JVM-assembly instructions for fib
+//
 // println(compile(fib_test, "fib"))
-
-// can be assembled with 
+//
+// can be assembled by hand with 
 //
 //    java -jar jvm/jasmin-2.4/jasmin.jar fib.j
 //
@@ -245,41 +221,33 @@
 
 import scala.util._
 import scala.sys.process._
-import scala.io
-
-def compile_tofile(bl: Block, class_name: String) = {
-  val output = compile(bl, class_name)
-  val fw = new java.io.FileWriter(class_name + ".j") 
-  fw.write(output) 
-  fw.close()
-}
-
-def compile_all(bl: Block, class_name: String) : Unit = {
-  compile_tofile(bl, class_name)
-  println("compiled ")
-  val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!!
-  println("assembled ")
-}
 
 def time_needed[T](i: Int, code: => T) = {
   val start = System.nanoTime()
-  for (j <- 1 to i) code
+  for (j <- 2 to i) code
+  val result = code
   val end = System.nanoTime()
-  (end - start)/(i * 1.0e9)
+  ((end - start) / (i * 1.0e9), result)
+}
+
+def compile_to_file(bl: Block, class_name: String) : Unit = 
+  Using(new java.io.FileWriter(class_name + ".j")) {
+    _.write(compile(bl, class_name))
+  }
+
+def compile_and_run(bl: Block, class_name: String) : Unit = {
+  println(s"Start compilation of $class_name")
+  compile_to_file(bl, class_name)
+  println("generated .j file")
+  (s"java -jar jvm/jasmin-2.4/jasmin.jar ${class_name}.j").!!
+  println("generated .class file ")
+  println("Time: " + time_needed(1, (s"java ${class_name}/${class_name}").!)._1)
 }
 
 
-def compile_run(bl: Block, class_name: String) : Unit = {
-  println("Start compilation")
-  compile_all(bl, class_name)
-  println("Time: " + time_needed(1, ("java " + class_name + "/" + class_name).!))
-}
-
-
-
 val arr_test = 
-  List(Array("a", 10),
-       Array("b", 2),
+  List(ArrayDef("a", 10),
+       ArrayDef("b", 2),
        AssignA("a", Num(0), Num(10)),
        Assign("x", Ref("a", Num(0))),
        Write("x"),
@@ -288,7 +256,7 @@
        Write("x"))       
 
 
-//compile_run(arr_test, "a")
+//compile_and_run(arr_test, "a")
 
 //====================
 // Parser Combinators
@@ -297,6 +265,9 @@
 import scala.language.implicitConversions
 import scala.language.reflectiveCalls
 
+// more convenience for the semantic actions later on
+case class ~[+A, +B](_1: A, _2: B)
+
 type IsSeq[A] = A => Seq[_]
 
 abstract class Parser[I : IsSeq, T] {
@@ -306,10 +277,10 @@
     for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head
 }
 
-class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
+class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
   def parse(sb: I) = 
     for ((head1, tail1) <- p.parse(sb); 
-         (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
+         (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2)
 }
 
 class AltParser[I : IsSeq, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
@@ -357,47 +328,48 @@
 
 
 
-// Grammar Rules
+// Grammar Rules for WHILE with arrays
 
 lazy val AExp: Parser[String, AExp] = 
-  (Te ~ "+" ~ AExp) ==> { case ((x, _), z) => Aop("+", x, z):AExp } |
-  (Te ~ "-" ~ AExp) ==> { case ((x, _), z) => Aop("-", x, z):AExp } | Te
+  (Te ~ "+" ~ AExp) ==> { case x ~ _ ~ z => Aop("+", x, z):AExp } |
+  (Te ~ "-" ~ AExp) ==> { case x ~ _ ~ z => Aop("-", x, z):AExp } | Te
 lazy val Te: Parser[String, AExp] = 
-  (Fa ~ "*" ~ Te) ==> { case ((x, _), z) => Aop("*", x, z):AExp } | Fa
+  (Fa ~ "*" ~ Te) ==> { case x ~ _ ~ z => Aop("*", x, z):AExp } | Fa
 lazy val Fa: Parser[String, AExp] = 
-   ("(" ~ AExp ~ ")") ==> { case ((_, y), _) => y } | 
-   (IdParser ~ "[" ~ AExp ~ "]") ==> { case (((x, _), y), _) => Ref(x, y) } |
+   ("(" ~ AExp ~ ")") ==> { case _ ~ y ~ _ => y } | 
+   (IdParser ~ "[" ~ AExp ~ "]") ==> { case x ~ _ ~ y ~ _ => Ref(x, y) } |
    IdParser ==> Var | 
    NumParser ==> Num
 
 // boolean expressions
 lazy val BExp: Parser[String, BExp] = 
-   (AExp ~ "=" ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z):BExp } | 
-   (AExp ~ "!=" ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z):BExp } | 
-   (AExp ~ "<" ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z):BExp } | 
-   (AExp ~ ">" ~ AExp) ==> { case ((x, y), z) => Bop("<", z, x):BExp } | 
+   (AExp ~ "=" ~ AExp) ==> { case x ~ y ~ z => Bop("=", x, z):BExp } | 
+   (AExp ~ "!=" ~ AExp) ==> { case x ~ y ~ z => Bop("!=", x, z):BExp } | 
+   (AExp ~ "<" ~ AExp) ==> { case x ~ y ~ z => Bop("<", x, z):BExp } | 
+   (AExp ~ ">" ~ AExp) ==> { case x ~ y ~ z => Bop("<", z, x):BExp } | 
    ("true" ==> ((_) => True:BExp )) | 
    ("false" ==> ((_) => False:BExp )) |
-   ("(" ~ BExp ~ ")") ==> { case ((x, y), z) => y}
+   ("(" ~ BExp ~ ")") ==> { case x ~ y ~ z => y}
 
 lazy val Stmt: Parser[String, Stmt] =
    ("skip" ==> (_ => Skip: Stmt)) |
-   (IdParser ~ ":=" ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } |
+   (IdParser ~ ":=" ~ AExp) ==> { case x ~ y ~z => Assign(x, z): Stmt } |
    (IdParser ~ "[" ~ AExp ~ "]" ~ ":=" ~ AExp) ==> { 
-     case (((((x, y), z), v), w), u) => AssignA(x, z, u): Stmt } |
+     case x ~ _ ~ z ~ _ ~ _ ~ u => AssignA(x, z, u): Stmt } |
    ("if" ~ BExp ~ "then" ~ Block ~ "else" ~ Block) ==>
-    { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } |
-   ("while" ~ BExp ~ "do" ~ Block) ==> { case (((x, y), z), w) => While(y, w) } |
-   ("new" ~ IdParser ~ "[" ~ NumParser ~ "]") ==> { case ((((x, y), z), u), v) => Array(y, u) } |
-   ("write" ~ IdParser) ==> { case (_, y) => Write(y) } 
+    { case _ ~ y ~ _ ~ u ~ _ ~w => If(y, u, w): Stmt } |
+   ("while" ~ BExp ~ "do" ~ Block) ==> { case _ ~ y ~ _ ~ w => While(y, w) } |
+   ("new" ~ IdParser ~ "[" ~ NumParser ~ "]") ==> { 
+    case _ ~ y ~ _ ~ u ~ _ => ArrayDef(y, u) } |
+   ("write" ~ IdParser) ==> { case _ ~ y => Write(y) } 
  
 lazy val Stmts: Parser[String, Block] =
-  (Stmt ~ ";" ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } |
+  (Stmt ~ ";" ~ Stmts) ==> { case x ~ _ ~ z => x :: z : Block } |
   (Stmt ==> ((s) => List(s) : Block)) 
 
 
 lazy val Block: Parser[String, Block] =
-   ("{" ~ Stmts ~ "}") ==> { case ((x, y), z) => y} | 
+   ("{" ~ Stmts ~ "}") ==> { case _ ~ y ~ _ => y} | 
    (Stmt ==> (s => List(s)))
 
 //Stmts.parse_all("x2:=5+a")
@@ -423,12 +395,13 @@
    """.replaceAll("\\s+", "")
 
 val fib_prog = Stmts.parse_all(fib).toList
-//compile_run(fib_prog.head, "fib")
-
+//compile_and_run(fib_prog.head, "fib")
 
-// BF 
+//======================================
+// BF transpiler into WHILE with arrays
+//======================================
 
-// simple instructions
+// simple BF instructions translation
 def instr(c: Char) : String = c match {
   case '>' => "ptr := ptr + 1;"
   case '<' => "ptr := ptr - 1;"
@@ -445,12 +418,17 @@
   prog.toList.map(instr).mkString
 
 
-// the mandelbrot.bf program is so large that
-// it does not fit into the limitations of what the
-// JVM imposes on methods.
+// Note: the mandelbrot.bf program is so large that
+// it does not fit inside the limitations of what the
+// JVM imposes on methods (only 64K of instructions).
+// Therefore some optimisations are first applied to 
+// BF programs before WHILE programs are created. The
+// optimisations are 
+//  
+//  - replacing BF-loops of the form [-] 
+//  - combining single increment/decrement instructions
 //
-// below it just about fits by optimising [-] loops
-// and combining instructions
+// The size of the resulting .j-file is 270K.
 
 def splice(cs: List[Char], acc: List[(Char, Int)]) : List[(Char, Int)] = (cs, acc) match {
   case (Nil, acc) => acc
@@ -485,13 +463,12 @@
 }
 
 def bf_run(prog: String, name: String) = {
-  println("BF processing start")
+  println(s"BF pre-processing of $name")
   val bf_string = bf_str(prog).replaceAll("\\s", "")
-  
-  println(s"BF parsing start (string length ${bf_string.length})")
-  val bf_prog = Stmts.parse_all(bf_string).toList.head
-  println(s"BF Compile start ${bf_string.toList.length} characters")
-  compile_run(Array("field", 30000) :: bf_prog, name)
+  println(s"BF parsing (program length ${bf_string.length} characters)")
+  val (time, bf_prog) = time_needed(1, Stmts.parse_all(bf_string).toList.head)
+  println(s"BF generated WHILE program (needed $time for parsing)")
+  compile_and_run(ArrayDef("field", 30000) :: bf_prog, name)
 }
 
 // a benchmark program (counts down from 'Z' to 'A')
@@ -502,18 +479,18 @@
 
 bf_run(bf0, "bench")
 
-
-
+// Sierpinski triangle
 val bf1 = """++++++++[>+>++++<<-]>++>>+<[-[>>+<<-]+>>]>+[-<<<[
       ->[+[-]+>++>>>-<<]<[<]>>++++++[<<+++++>>-]+<<++.[-]<<
       ]>.>+[>>]>+]"""
 
 bf_run(bf1, "sier")
 
+// Helo World!
 bf_run("""++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++
        ..+++.>>.<-.<.+++.------.--------.>>+.>++.""", "hello")
 
-
+// Fibonacci 
 val bf2 = """+++++++++++
       >+>>>>++++++++++++++++++++++++++++++++++++++++++++
       >++++++++++++++++++++++++++++++++<<<<<<[>[>>>>>>+>
@@ -528,9 +505,13 @@
 
 bf_run(bf2, "fibs")
 
-
+// Mandelbrot Set
+//----------------
+//
+// Note: Parsing of the generated WHILE program (around 60K in size)
+// takes approximately 10 minutes
 
-bf_run("""      A mandelbrot set fractal viewer in brainf*** written by Erik Bosman
+bf_run("""A mandelbrot set fractal viewer in brainf*** written by Erik Bosman
 +++++++++++++[->++>>>+++++>++>+<<<<<<]>>>>>++++++>--->>>>>>>>>>+++++++++++++++[[
 >>>>>>>>>]+[<<<<<<<<<]>>>>>>>>>-]+[>>>>>>>>[-]>]<<<<<<<<<[<<<<<<<<<]>>>>>>>>[-]+
 <<<<<<<+++++[-[->>>>>>>>>+<<<<<<<<<]>>>>>>>>>]>>>>>>>+>>>>>>>>>>>>>>>>>>>>>>>>>>