1 // A Small Compiler for the WHILE Language |
1 // A Small Compiler for the WHILE Language |
|
2 // |
2 // - includes arrays and a small parser for |
3 // - includes arrays and a small parser for |
3 // translated BF programs |
4 // WHILE programs |
|
5 // |
|
6 // - transpiles BF programs into WHILE programs |
|
7 // and compiles and runs them |
|
8 // |
|
9 // Call with |
|
10 // |
|
11 // scala compiler_arr.scala |
|
12 |
|
13 |
4 |
14 |
5 // the abstract syntax trees |
15 // the abstract syntax trees |
6 abstract class Stmt |
16 abstract class Stmt |
7 abstract class AExp |
17 abstract class AExp |
8 abstract class BExp |
18 abstract class BExp |
9 type Block = List[Stmt] |
19 type Block = List[Stmt] |
10 |
20 |
11 // statements |
21 // statements |
12 case object Skip extends Stmt |
22 case object Skip extends Stmt |
13 case class Array(s: String, n: Int) extends Stmt |
23 case class ArrayDef(s: String, n: Int) extends Stmt |
14 case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt |
24 case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt |
15 case class While(b: BExp, bl: Block) extends Stmt |
25 case class While(b: BExp, bl: Block) extends Stmt |
16 case class Assign(s: String, a: AExp) extends Stmt |
26 case class Assign(s: String, a: AExp) extends Stmt // var := exp |
17 case class AssignA(s: String, a1: AExp, a2: AExp) extends Stmt |
27 case class AssignA(s: String, a1: AExp, a2: AExp) extends Stmt // arr[exp1] := exp2 |
18 case class Write(s: String) extends Stmt |
28 case class Write(s: String) extends Stmt |
19 case class Read(s: String) extends Stmt |
29 case class Read(s: String) extends Stmt |
20 |
30 |
21 // arithmetic expressions |
31 // arithmetic expressions |
22 case class Var(s: String) extends AExp |
32 case class Var(s: String) extends AExp |
124 } |
100 } |
125 |
101 |
126 // arithmetic expression compilation |
102 // arithmetic expression compilation |
127 def compile_aexp(a: AExp, env : Env) : String = a match { |
103 def compile_aexp(a: AExp, env : Env) : String = a match { |
128 case Num(i) => i"ldc $i" |
104 case Num(i) => i"ldc $i" |
129 case Var(s) => i"iload ${env(s)} \t\t; $s" |
105 case Var(s) => i"iload ${env(s)}" |
130 case Aop(op, a1, a2) => |
106 case Aop(op, a1, a2) => |
131 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ compile_op(op) |
107 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ compile_op(op) |
132 case Ref(s, a) => |
108 case Ref(s, a) => |
133 i"aload ${env(s)}" ++ compile_aexp(a, env) ++ i"iaload" |
109 i"aload ${env(s)}" ++ compile_aexp(a, env) ++ i"iaload" |
134 } |
110 } |
135 |
111 |
136 // boolean expression compilation |
112 // boolean expression compilation |
137 def compile_bexp(b: BExp, env : Env, jmp: String) : String = b match { |
113 def compile_bexp(b: BExp, env : Env, jmp: String) : String = b match { |
138 case True => "" |
114 case True => "" |
139 case False => i"goto $jmp" |
115 case False => i"goto $jmp" |
140 case Bop("==", a1, a2) => |
116 case Bop("==", a1, a2) => |
141 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ i"if_icmpne $jmp" |
117 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ i"if_icmpne $jmp" |
142 case Bop("!=", a1, a2) => |
118 case Bop("!=", a1, a2) => |
143 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ i"if_icmpeq $jmp" |
119 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ i"if_icmpeq $jmp" |
229 // minus2 := minus1 + minus2; |
205 // minus2 := minus1 + minus2; |
230 Assign("minus1",Var("temp")), // minus1 := temp; |
206 Assign("minus1",Var("temp")), // minus1 := temp; |
231 Assign("n",Aop("-",Var("n"),Num(1))))), // n := n - 1 }; |
207 Assign("n",Aop("-",Var("n"),Num(1))))), // n := n - 1 }; |
232 Write("minus1")) // write minus1 |
208 Write("minus1")) // write minus1 |
233 |
209 |
234 // prints out the JVM-assembly program |
210 // prints out the JVM-assembly instructions for fib |
235 |
211 // |
236 // println(compile(fib_test, "fib")) |
212 // println(compile(fib_test, "fib")) |
237 |
213 // |
238 // can be assembled with |
214 // can be assembled by hand with |
239 // |
215 // |
240 // java -jar jvm/jasmin-2.4/jasmin.jar fib.j |
216 // java -jar jvm/jasmin-2.4/jasmin.jar fib.j |
241 // |
217 // |
242 // and started with |
218 // and started with |
243 // |
219 // |
244 // java fib/fib |
220 // java fib/fib |
245 |
221 |
246 import scala.util._ |
222 import scala.util._ |
247 import scala.sys.process._ |
223 import scala.sys.process._ |
248 import scala.io |
|
249 |
|
250 def compile_tofile(bl: Block, class_name: String) = { |
|
251 val output = compile(bl, class_name) |
|
252 val fw = new java.io.FileWriter(class_name + ".j") |
|
253 fw.write(output) |
|
254 fw.close() |
|
255 } |
|
256 |
|
257 def compile_all(bl: Block, class_name: String) : Unit = { |
|
258 compile_tofile(bl, class_name) |
|
259 println("compiled ") |
|
260 val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!! |
|
261 println("assembled ") |
|
262 } |
|
263 |
224 |
264 def time_needed[T](i: Int, code: => T) = { |
225 def time_needed[T](i: Int, code: => T) = { |
265 val start = System.nanoTime() |
226 val start = System.nanoTime() |
266 for (j <- 1 to i) code |
227 for (j <- 2 to i) code |
|
228 val result = code |
267 val end = System.nanoTime() |
229 val end = System.nanoTime() |
268 (end - start)/(i * 1.0e9) |
230 ((end - start) / (i * 1.0e9), result) |
269 } |
231 } |
270 |
232 |
271 |
233 def compile_to_file(bl: Block, class_name: String) : Unit = |
272 def compile_run(bl: Block, class_name: String) : Unit = { |
234 Using(new java.io.FileWriter(class_name + ".j")) { |
273 println("Start compilation") |
235 _.write(compile(bl, class_name)) |
274 compile_all(bl, class_name) |
236 } |
275 println("Time: " + time_needed(1, ("java " + class_name + "/" + class_name).!)) |
237 |
276 } |
238 def compile_and_run(bl: Block, class_name: String) : Unit = { |
277 |
239 println(s"Start compilation of $class_name") |
|
240 compile_to_file(bl, class_name) |
|
241 println("generated .j file") |
|
242 (s"java -jar jvm/jasmin-2.4/jasmin.jar ${class_name}.j").!! |
|
243 println("generated .class file ") |
|
244 println("Time: " + time_needed(1, (s"java ${class_name}/${class_name}").!)._1) |
|
245 } |
278 |
246 |
279 |
247 |
280 val arr_test = |
248 val arr_test = |
281 List(Array("a", 10), |
249 List(ArrayDef("a", 10), |
282 Array("b", 2), |
250 ArrayDef("b", 2), |
283 AssignA("a", Num(0), Num(10)), |
251 AssignA("a", Num(0), Num(10)), |
284 Assign("x", Ref("a", Num(0))), |
252 Assign("x", Ref("a", Num(0))), |
285 Write("x"), |
253 Write("x"), |
286 AssignA("b", Num(1), Num(5)), |
254 AssignA("b", Num(1), Num(5)), |
287 Assign("x", Ref("b", Num(1))), |
255 Assign("x", Ref("b", Num(1))), |
288 Write("x")) |
256 Write("x")) |
289 |
257 |
290 |
258 |
291 //compile_run(arr_test, "a") |
259 //compile_and_run(arr_test, "a") |
292 |
260 |
293 //==================== |
261 //==================== |
294 // Parser Combinators |
262 // Parser Combinators |
295 //==================== |
263 //==================== |
296 |
264 |
297 import scala.language.implicitConversions |
265 import scala.language.implicitConversions |
298 import scala.language.reflectiveCalls |
266 import scala.language.reflectiveCalls |
299 |
267 |
|
268 // more convenience for the semantic actions later on |
|
269 case class ~[+A, +B](_1: A, _2: B) |
|
270 |
300 type IsSeq[A] = A => Seq[_] |
271 type IsSeq[A] = A => Seq[_] |
301 |
272 |
302 abstract class Parser[I : IsSeq, T] { |
273 abstract class Parser[I : IsSeq, T] { |
303 def parse(ts: I): Set[(T, I)] |
274 def parse(ts: I): Set[(T, I)] |
304 |
275 |
305 def parse_all(ts: I) : Set[T] = |
276 def parse_all(ts: I) : Set[T] = |
306 for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head |
277 for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head |
307 } |
278 } |
308 |
279 |
309 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { |
280 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, ~[T, S]] { |
310 def parse(sb: I) = |
281 def parse(sb: I) = |
311 for ((head1, tail1) <- p.parse(sb); |
282 for ((head1, tail1) <- p.parse(sb); |
312 (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) |
283 (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2) |
313 } |
284 } |
314 |
285 |
315 class AltParser[I : IsSeq, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { |
286 class AltParser[I : IsSeq, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { |
316 def parse(sb: I) = p.parse(sb) ++ q.parse(sb) |
287 def parse(sb: I) = p.parse(sb) ++ q.parse(sb) |
317 } |
288 } |
355 val NumParser = RegexParser("[0-9]+".r) ==> (s => s.toInt : Int) |
326 val NumParser = RegexParser("[0-9]+".r) ==> (s => s.toInt : Int) |
356 val IdParser = RegexParser("[a-z][a-z,0-9]*".r) |
327 val IdParser = RegexParser("[a-z][a-z,0-9]*".r) |
357 |
328 |
358 |
329 |
359 |
330 |
360 // Grammar Rules |
331 // Grammar Rules for WHILE with arrays |
361 |
332 |
362 lazy val AExp: Parser[String, AExp] = |
333 lazy val AExp: Parser[String, AExp] = |
363 (Te ~ "+" ~ AExp) ==> { case ((x, _), z) => Aop("+", x, z):AExp } | |
334 (Te ~ "+" ~ AExp) ==> { case x ~ _ ~ z => Aop("+", x, z):AExp } | |
364 (Te ~ "-" ~ AExp) ==> { case ((x, _), z) => Aop("-", x, z):AExp } | Te |
335 (Te ~ "-" ~ AExp) ==> { case x ~ _ ~ z => Aop("-", x, z):AExp } | Te |
365 lazy val Te: Parser[String, AExp] = |
336 lazy val Te: Parser[String, AExp] = |
366 (Fa ~ "*" ~ Te) ==> { case ((x, _), z) => Aop("*", x, z):AExp } | Fa |
337 (Fa ~ "*" ~ Te) ==> { case x ~ _ ~ z => Aop("*", x, z):AExp } | Fa |
367 lazy val Fa: Parser[String, AExp] = |
338 lazy val Fa: Parser[String, AExp] = |
368 ("(" ~ AExp ~ ")") ==> { case ((_, y), _) => y } | |
339 ("(" ~ AExp ~ ")") ==> { case _ ~ y ~ _ => y } | |
369 (IdParser ~ "[" ~ AExp ~ "]") ==> { case (((x, _), y), _) => Ref(x, y) } | |
340 (IdParser ~ "[" ~ AExp ~ "]") ==> { case x ~ _ ~ y ~ _ => Ref(x, y) } | |
370 IdParser ==> Var | |
341 IdParser ==> Var | |
371 NumParser ==> Num |
342 NumParser ==> Num |
372 |
343 |
373 // boolean expressions |
344 // boolean expressions |
374 lazy val BExp: Parser[String, BExp] = |
345 lazy val BExp: Parser[String, BExp] = |
375 (AExp ~ "=" ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z):BExp } | |
346 (AExp ~ "=" ~ AExp) ==> { case x ~ y ~ z => Bop("=", x, z):BExp } | |
376 (AExp ~ "!=" ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z):BExp } | |
347 (AExp ~ "!=" ~ AExp) ==> { case x ~ y ~ z => Bop("!=", x, z):BExp } | |
377 (AExp ~ "<" ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z):BExp } | |
348 (AExp ~ "<" ~ AExp) ==> { case x ~ y ~ z => Bop("<", x, z):BExp } | |
378 (AExp ~ ">" ~ AExp) ==> { case ((x, y), z) => Bop("<", z, x):BExp } | |
349 (AExp ~ ">" ~ AExp) ==> { case x ~ y ~ z => Bop("<", z, x):BExp } | |
379 ("true" ==> ((_) => True:BExp )) | |
350 ("true" ==> ((_) => True:BExp )) | |
380 ("false" ==> ((_) => False:BExp )) | |
351 ("false" ==> ((_) => False:BExp )) | |
381 ("(" ~ BExp ~ ")") ==> { case ((x, y), z) => y} |
352 ("(" ~ BExp ~ ")") ==> { case x ~ y ~ z => y} |
382 |
353 |
383 lazy val Stmt: Parser[String, Stmt] = |
354 lazy val Stmt: Parser[String, Stmt] = |
384 ("skip" ==> (_ => Skip: Stmt)) | |
355 ("skip" ==> (_ => Skip: Stmt)) | |
385 (IdParser ~ ":=" ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } | |
356 (IdParser ~ ":=" ~ AExp) ==> { case x ~ y ~z => Assign(x, z): Stmt } | |
386 (IdParser ~ "[" ~ AExp ~ "]" ~ ":=" ~ AExp) ==> { |
357 (IdParser ~ "[" ~ AExp ~ "]" ~ ":=" ~ AExp) ==> { |
387 case (((((x, y), z), v), w), u) => AssignA(x, z, u): Stmt } | |
358 case x ~ _ ~ z ~ _ ~ _ ~ u => AssignA(x, z, u): Stmt } | |
388 ("if" ~ BExp ~ "then" ~ Block ~ "else" ~ Block) ==> |
359 ("if" ~ BExp ~ "then" ~ Block ~ "else" ~ Block) ==> |
389 { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } | |
360 { case _ ~ y ~ _ ~ u ~ _ ~w => If(y, u, w): Stmt } | |
390 ("while" ~ BExp ~ "do" ~ Block) ==> { case (((x, y), z), w) => While(y, w) } | |
361 ("while" ~ BExp ~ "do" ~ Block) ==> { case _ ~ y ~ _ ~ w => While(y, w) } | |
391 ("new" ~ IdParser ~ "[" ~ NumParser ~ "]") ==> { case ((((x, y), z), u), v) => Array(y, u) } | |
362 ("new" ~ IdParser ~ "[" ~ NumParser ~ "]") ==> { |
392 ("write" ~ IdParser) ==> { case (_, y) => Write(y) } |
363 case _ ~ y ~ _ ~ u ~ _ => ArrayDef(y, u) } | |
|
364 ("write" ~ IdParser) ==> { case _ ~ y => Write(y) } |
393 |
365 |
394 lazy val Stmts: Parser[String, Block] = |
366 lazy val Stmts: Parser[String, Block] = |
395 (Stmt ~ ";" ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } | |
367 (Stmt ~ ";" ~ Stmts) ==> { case x ~ _ ~ z => x :: z : Block } | |
396 (Stmt ==> ((s) => List(s) : Block)) |
368 (Stmt ==> ((s) => List(s) : Block)) |
397 |
369 |
398 |
370 |
399 lazy val Block: Parser[String, Block] = |
371 lazy val Block: Parser[String, Block] = |
400 ("{" ~ Stmts ~ "}") ==> { case ((x, y), z) => y} | |
372 ("{" ~ Stmts ~ "}") ==> { case _ ~ y ~ _ => y} | |
401 (Stmt ==> (s => List(s))) |
373 (Stmt ==> (s => List(s))) |
402 |
374 |
403 //Stmts.parse_all("x2:=5+a") |
375 //Stmts.parse_all("x2:=5+a") |
404 //Stmts.parse_all("x2:=5+a[3+a]") |
376 //Stmts.parse_all("x2:=5+a[3+a]") |
405 //Stmts.parse_all("a[x2+3]:=5+a[3+a]") |
377 //Stmts.parse_all("a[x2+3]:=5+a[3+a]") |
443 |
416 |
444 def instrs(prog: String) : String = |
417 def instrs(prog: String) : String = |
445 prog.toList.map(instr).mkString |
418 prog.toList.map(instr).mkString |
446 |
419 |
447 |
420 |
448 // the mandelbrot.bf program is so large that |
421 // Note: the mandelbrot.bf program is so large that |
449 // it does not fit into the limitations of what the |
422 // it does not fit inside the limitations of what the |
450 // JVM imposes on methods. |
423 // JVM imposes on methods (only 64K of instructions). |
451 // |
424 // Therefore some optimisations are first applied to |
452 // below it just about fits by optimising [-] loops |
425 // BF programs before WHILE programs are created. The |
453 // and combining instructions |
426 // optimisations are |
|
427 // |
|
428 // - replacing BF-loops of the form [-] |
|
429 // - combining single increment/decrement instructions |
|
430 // |
|
431 // The size of the resulting .j-file is 270K. |
454 |
432 |
455 def splice(cs: List[Char], acc: List[(Char, Int)]) : List[(Char, Int)] = (cs, acc) match { |
433 def splice(cs: List[Char], acc: List[(Char, Int)]) : List[(Char, Int)] = (cs, acc) match { |
456 case (Nil, acc) => acc |
434 case (Nil, acc) => acc |
457 case (c :: cs, Nil) => splice(cs, List((c, 1))) |
435 case (c :: cs, Nil) => splice(cs, List((c, 1))) |
458 case (c :: cs, (d, n) :: acc) => |
436 case (c :: cs, (d, n) :: acc) => |
483 instrs2(prog) ++ |
461 instrs2(prog) ++ |
484 "skip" |
462 "skip" |
485 } |
463 } |
486 |
464 |
487 def bf_run(prog: String, name: String) = { |
465 def bf_run(prog: String, name: String) = { |
488 println("BF processing start") |
466 println(s"BF pre-processing of $name") |
489 val bf_string = bf_str(prog).replaceAll("\\s", "") |
467 val bf_string = bf_str(prog).replaceAll("\\s", "") |
490 |
468 println(s"BF parsing (program length ${bf_string.length} characters)") |
491 println(s"BF parsing start (string length ${bf_string.length})") |
469 val (time, bf_prog) = time_needed(1, Stmts.parse_all(bf_string).toList.head) |
492 val bf_prog = Stmts.parse_all(bf_string).toList.head |
470 println(s"BF generated WHILE program (needed $time for parsing)") |
493 println(s"BF Compile start ${bf_string.toList.length} characters") |
471 compile_and_run(ArrayDef("field", 30000) :: bf_prog, name) |
494 compile_run(Array("field", 30000) :: bf_prog, name) |
|
495 } |
472 } |
496 |
473 |
497 // a benchmark program (counts down from 'Z' to 'A') |
474 // a benchmark program (counts down from 'Z' to 'A') |
498 val bf0 = """>++[<+++++++++++++>-]<[[>+>+<<-]>[<+>-]++++++++ |
475 val bf0 = """>++[<+++++++++++++>-]<[[>+>+<<-]>[<+>-]++++++++ |
499 [>++++++++<-]>.[-]<<>++++++++++[>++++++++++[>++ |
476 [>++++++++<-]>.[-]<<>++++++++++[>++++++++++[>++ |
500 ++++++++[>++++++++++[>++++++++++[>++++++++++[>+ |
477 ++++++++[>++++++++++[>++++++++++[>++++++++++[>+ |
501 +++++++++[-]<-]<-]<-]<-]<-]<-]<-]++++++++++.""" |
478 +++++++++[-]<-]<-]<-]<-]<-]<-]<-]++++++++++.""" |
502 |
479 |
503 bf_run(bf0, "bench") |
480 bf_run(bf0, "bench") |
504 |
481 |
505 |
482 // Sierpinski triangle |
506 |
|
507 val bf1 = """++++++++[>+>++++<<-]>++>>+<[-[>>+<<-]+>>]>+[-<<<[ |
483 val bf1 = """++++++++[>+>++++<<-]>++>>+<[-[>>+<<-]+>>]>+[-<<<[ |
508 ->[+[-]+>++>>>-<<]<[<]>>++++++[<<+++++>>-]+<<++.[-]<< |
484 ->[+[-]+>++>>>-<<]<[<]>>++++++[<<+++++>>-]+<<++.[-]<< |
509 ]>.>+[>>]>+]""" |
485 ]>.>+[>>]>+]""" |
510 |
486 |
511 bf_run(bf1, "sier") |
487 bf_run(bf1, "sier") |
512 |
488 |
|
489 // Helo World! |
513 bf_run("""++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++ |
490 bf_run("""++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++ |
514 ..+++.>>.<-.<.+++.------.--------.>>+.>++.""", "hello") |
491 ..+++.>>.<-.<.+++.------.--------.>>+.>++.""", "hello") |
515 |
492 |
516 |
493 // Fibonacci |
517 val bf2 = """+++++++++++ |
494 val bf2 = """+++++++++++ |
518 >+>>>>++++++++++++++++++++++++++++++++++++++++++++ |
495 >+>>>>++++++++++++++++++++++++++++++++++++++++++++ |
519 >++++++++++++++++++++++++++++++++<<<<<<[>[>>>>>>+> |
496 >++++++++++++++++++++++++++++++++<<<<<<[>[>>>>>>+> |
520 +<<<<<<<-]>>>>>>>[<<<<<<<+>>>>>>>-]<[>++++++++++[- |
497 +<<<<<<<-]>>>>>>>[<<<<<<<+>>>>>>>-]<[>++++++++++[- |
521 <-[>>+>+<<<-]>>>[<<<+>>>-]+<[>[-]<[-]]>[<<[>>>+<<< |
498 <-[>>+>+<<<-]>>>[<<<+>>>-]+<[>[-]<[-]]>[<<[>>>+<<< |