1 // A Small Compiler for a Simple Functional Language |
1 // A Small Compiler for a Simple Functional Language |
2 // (includes a lexer and a parser) |
2 // (includes a lexer and a parser) |
3 |
3 |
4 import scala.language.implicitConversions |
4 import scala.language.implicitConversions |
5 import scala.language.reflectiveCalls |
5 import scala.language.reflectiveCalls |
6 import scala.util._ |
|
7 import scala.annotation.tailrec |
|
8 import scala.sys.process._ |
|
9 |
|
10 def fromFile(name: String) : String = |
|
11 io.Source.fromFile(name).mkString |
|
12 |
|
13 |
6 |
14 abstract class Rexp |
7 abstract class Rexp |
15 case object ZERO extends Rexp |
8 case object ZERO extends Rexp |
16 case object ONE extends Rexp |
9 case object ONE extends Rexp |
17 case class CHAR(c: Char) extends Rexp |
10 case class CHAR(c: Char) extends Rexp |
18 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
11 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
19 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
12 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
20 case class STAR(r: Rexp) extends Rexp |
13 case class STAR(r: Rexp) extends Rexp |
21 case class RECD(x: String, r: Rexp) extends Rexp |
14 case class RECD(x: String, r: Rexp) extends Rexp |
22 |
15 |
23 abstract class Val |
16 abstract class Val |
24 case object Empty extends Val |
17 case object Empty extends Val |
25 case class Chr(c: Char) extends Val |
18 case class Chr(c: Char) extends Val |
26 case class Sequ(v1: Val, v2: Val) extends Val |
19 case class Sequ(v1: Val, v2: Val) extends Val |
27 case class Left(v: Val) extends Val |
20 case class Left(v: Val) extends Val |
99 case Rec(x, v) => (x, flatten(v))::env(v) |
92 case Rec(x, v) => (x, flatten(v))::env(v) |
100 } |
93 } |
101 |
94 |
102 // The Injection Part of the lexer |
95 // The Injection Part of the lexer |
103 |
96 |
104 // calculates a value for how a nullable regex |
|
105 // matches the empty string |
|
106 def mkeps(r: Rexp) : Val = r match { |
97 def mkeps(r: Rexp) : Val = r match { |
107 case ONE => Empty |
98 case ONE => Empty |
108 case ALT(r1, r2) => |
99 case ALT(r1, r2) => |
109 if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) |
100 if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) |
110 case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
101 case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
111 case STAR(r) => Stars(Nil) |
102 case STAR(r) => Stars(Nil) |
112 case RECD(x, r) => Rec(x, mkeps(r)) |
103 case RECD(x, r) => Rec(x, mkeps(r)) |
113 } |
104 } |
114 |
105 |
115 // injects back a character into a value |
|
116 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
106 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
117 case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
107 case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
118 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
108 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
119 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
109 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
120 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
110 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
121 case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) |
111 case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) |
122 case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) |
112 case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) |
123 case (CHAR(d), Empty) => Chr(c) |
113 case (CHAR(d), Empty) => Chr(c) |
124 case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) |
114 case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) |
125 } |
115 case _ => { println ("Injection error") ; sys.exit(-1) } |
126 |
116 } |
127 |
117 |
128 // some "rectification" functions for simplification |
118 // some "rectification" functions for simplification |
129 def F_ID(v: Val): Val = v |
119 def F_ID(v: Val): Val = v |
130 def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v)) |
120 def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v)) |
131 def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v)) |
121 def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v)) |
249 |
236 |
250 def tokenise(s: String) : List[Token] = |
237 def tokenise(s: String) : List[Token] = |
251 lexing_simp(WHILE_REGS, s).collect(token) |
238 lexing_simp(WHILE_REGS, s).collect(token) |
252 |
239 |
253 |
240 |
254 // Parser - Abstract syntax trees |
241 |
|
242 // Parser combinators |
|
243 abstract class Parser[I, T](implicit ev: I => Seq[_]) { |
|
244 def parse(ts: I): Set[(T, I)] |
|
245 |
|
246 def parse_all(ts: I) : Set[T] = |
|
247 for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head |
|
248 |
|
249 def parse_single(ts: I) : T = parse_all(ts).toList match { |
|
250 case List(t) => t |
|
251 case _ => { println ("Parse Error\n") ; sys.exit(-1) } |
|
252 } |
|
253 } |
|
254 |
|
255 class SeqParser[I, T, S](p: => Parser[I, T], |
|
256 q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, (T, S)] { |
|
257 def parse(sb: I) = |
|
258 for ((head1, tail1) <- p.parse(sb); |
|
259 (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) |
|
260 } |
|
261 |
|
262 class AltParser[I, T](p: => Parser[I, T], |
|
263 q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] { |
|
264 def parse(sb: I) = p.parse(sb) ++ q.parse(sb) |
|
265 } |
|
266 |
|
267 class FunParser[I, T, S](p: => Parser[I, T], |
|
268 f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] { |
|
269 def parse(sb: I) = |
|
270 for ((head, tail) <- p.parse(sb)) yield (f(head), tail) |
|
271 } |
|
272 |
|
273 implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new { |
|
274 def || (q : => Parser[I, T]) = new AltParser[I, T](p, q) |
|
275 def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f) |
|
276 def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) |
|
277 } |
|
278 |
|
279 def ListParser[I, T, S](p: => Parser[I, T], |
|
280 q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = { |
|
281 (p ~ q ~ ListParser(p, q)) ==> { case ((x, y), z) => x :: z : List[T] } || |
|
282 (p ==> ((s) => List(s))) |
|
283 } |
|
284 |
|
285 case class TokParser(tok: Token) extends Parser[List[Token], Token] { |
|
286 def parse(ts: List[Token]) = ts match { |
|
287 case t::ts if (t == tok) => Set((t, ts)) |
|
288 case _ => Set () |
|
289 } |
|
290 } |
|
291 |
|
292 implicit def token2tparser(t: Token) = TokParser(t) |
|
293 |
|
294 implicit def TokOps(t: Token) = new { |
|
295 def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q) |
|
296 def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f) |
|
297 def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q) |
|
298 } |
|
299 |
|
300 case object NumParser extends Parser[List[Token], Int] { |
|
301 def parse(ts: List[Token]) = ts match { |
|
302 case T_NUM(n)::ts => Set((n, ts)) |
|
303 case _ => Set () |
|
304 } |
|
305 } |
|
306 |
|
307 case object IdParser extends Parser[List[Token], String] { |
|
308 def parse(ts: List[Token]) = ts match { |
|
309 case T_ID(s)::ts => Set((s, ts)) |
|
310 case _ => Set () |
|
311 } |
|
312 } |
|
313 |
|
314 |
|
315 |
|
316 // Abstract syntax trees for Fun |
255 abstract class Exp |
317 abstract class Exp |
256 abstract class BExp |
318 abstract class BExp |
257 abstract class Decl |
319 abstract class Decl |
258 |
320 |
259 case class Def(name: String, args: List[String], body: Exp) extends Decl |
321 case class Def(name: String, args: List[String], body: Exp) extends Decl |
264 case class Write(e: Exp) extends Exp |
326 case class Write(e: Exp) extends Exp |
265 case class Var(s: String) extends Exp |
327 case class Var(s: String) extends Exp |
266 case class Num(i: Int) extends Exp |
328 case class Num(i: Int) extends Exp |
267 case class Aop(o: String, a1: Exp, a2: Exp) extends Exp |
329 case class Aop(o: String, a1: Exp, a2: Exp) extends Exp |
268 case class Sequence(e1: Exp, e2: Exp) extends Exp |
330 case class Sequence(e1: Exp, e2: Exp) extends Exp |
269 |
|
270 case class Bop(o: String, a1: Exp, a2: Exp) extends BExp |
331 case class Bop(o: String, a1: Exp, a2: Exp) extends BExp |
271 |
332 |
272 // calculating the maximal needed stack size |
333 |
273 def max_stack_exp(e: Exp): Int = e match { |
334 |
274 case Call(_, args) => args.map(max_stack_exp).sum |
335 // Grammar Rules for Fun |
275 case If(a, e1, e2) => max_stack_bexp(a) + (List(max_stack_exp(e1), max_stack_exp(e2)).max) |
|
276 case Write(e) => max_stack_exp(e) + 1 |
|
277 case Var(_) => 1 |
|
278 case Num(_) => 1 |
|
279 case Aop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2) |
|
280 case Sequence(e1, e2) => List(max_stack_exp(e1), max_stack_exp(e2)).max |
|
281 } |
|
282 def max_stack_bexp(e: BExp): Int = e match { |
|
283 case Bop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2) |
|
284 } |
|
285 |
|
286 // Parser combinators |
|
287 abstract class Parser[I, T](implicit ev: I => Seq[_]) { |
|
288 def parse(ts: I): Set[(T, I)] |
|
289 |
|
290 def parse_all(ts: I) : Set[T] = |
|
291 for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head |
|
292 |
|
293 def parse_single(ts: I) : T = parse_all(ts).toList match { |
|
294 case List(t) => t |
|
295 case _ => { println ("Parse Error\n") ; sys.exit(-1) } |
|
296 } |
|
297 } |
|
298 |
|
299 class SeqParser[I, T, S](p: => Parser[I, T], |
|
300 q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, (T, S)] { |
|
301 def parse(sb: I) = |
|
302 for ((head1, tail1) <- p.parse(sb); |
|
303 (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) |
|
304 } |
|
305 |
|
306 class AltParser[I, T](p: => Parser[I, T], |
|
307 q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] { |
|
308 def parse(sb: I) = p.parse(sb) ++ q.parse(sb) |
|
309 } |
|
310 |
|
311 class FunParser[I, T, S](p: => Parser[I, T], |
|
312 f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] { |
|
313 def parse(sb: I) = |
|
314 for ((head, tail) <- p.parse(sb)) yield (f(head), tail) |
|
315 } |
|
316 |
|
317 implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new { |
|
318 def || (q : => Parser[I, T]) = new AltParser[I, T](p, q) |
|
319 def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f) |
|
320 def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) |
|
321 } |
|
322 |
|
323 def ListParser[I, T, S](p: => Parser[I, T], |
|
324 q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = { |
|
325 (p ~ q ~ ListParser(p, q)) ==> { case ((x, y), z) => x :: z : List[T] } || |
|
326 (p ==> ((s) => List(s))) |
|
327 } |
|
328 |
|
329 case class TokParser(tok: Token) extends Parser[List[Token], Token] { |
|
330 def parse(ts: List[Token]) = ts match { |
|
331 case t::ts if (t == tok) => Set((t, ts)) |
|
332 case _ => Set () |
|
333 } |
|
334 } |
|
335 |
|
336 implicit def token2tparser(t: Token) = TokParser(t) |
|
337 |
|
338 implicit def TokOps(t: Token) = new { |
|
339 def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q) |
|
340 def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f) |
|
341 def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q) |
|
342 } |
|
343 |
|
344 case object NumParser extends Parser[List[Token], Int] { |
|
345 def parse(ts: List[Token]) = ts match { |
|
346 case T_NUM(n)::ts => Set((n, ts)) |
|
347 case _ => Set () |
|
348 } |
|
349 } |
|
350 |
|
351 case object IdParser extends Parser[List[Token], String] { |
|
352 def parse(ts: List[Token]) = ts match { |
|
353 case T_ID(s)::ts => Set((s, ts)) |
|
354 case _ => Set () |
|
355 } |
|
356 } |
|
357 |
|
358 |
|
359 // Grammar Rules |
|
360 |
336 |
361 // arithmetic expressions |
337 // arithmetic expressions |
362 lazy val Exp: Parser[List[Token], Exp] = |
338 lazy val Exp: Parser[List[Token], Exp] = |
363 (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==> |
339 (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==> |
364 { case (((((x, y), z), u), v), w) => If(y, u, w): Exp } || |
340 { case (((((x, y), z), u), v), w) => If(y, u, w): Exp } || |
512 i"return" ++ |
503 i"return" ++ |
513 m".end method\n" |
504 m".end method\n" |
514 } |
505 } |
515 } |
506 } |
516 |
507 |
517 |
508 // main compiler functions |
518 def compile(class_name: String, input: String) : String = { |
|
519 val tks = tokenise(input) |
|
520 println(Prog.parse_single(tks).mkString("\n")) |
|
521 val ast = Prog.parse_single(tks) |
|
522 val instructions = ast.map(compile_decl).mkString |
|
523 (library + instructions).replaceAllLiterally("XXX", class_name) |
|
524 } |
|
525 |
|
526 |
|
527 def compile_file(file_name: String) = { |
|
528 val class_name = file_name.split('.')(0) |
|
529 val output = compile(class_name, fromFile(file_name)) |
|
530 val fw = new java.io.FileWriter(class_name + ".j") |
|
531 fw.write(output) |
|
532 fw.close() |
|
533 } |
|
534 |
509 |
535 def time_needed[T](i: Int, code: => T) = { |
510 def time_needed[T](i: Int, code: => T) = { |
536 val start = System.nanoTime() |
511 val start = System.nanoTime() |
537 for (j <- 1 to i) code |
512 for (j <- 1 to i) code |
538 val end = System.nanoTime() |
513 val end = System.nanoTime() |
539 (end - start)/(i * 1.0e9) |
514 (end - start)/(i * 1.0e9) |
540 } |
515 } |
541 |
516 |
542 def compile_run(file_name: String) : Unit = { |
517 def compile(class_name: String, input: String) : String = { |
543 val class_name = file_name.split('.')(0) |
518 val tks = tokenise(input) |
544 compile_file(file_name) |
519 val ast = Prog.parse_single(tks) |
545 val test = (s"java -jar jvm/jasmin-2.4/jasmin.jar ${class_name}.j").!! |
520 val instructions = ast.map(compile_decl).mkString |
|
521 (library + instructions).replaceAllLiterally("XXX", class_name) |
|
522 } |
|
523 |
|
524 def compile_file(class_name: String) = { |
|
525 val input = io.Source.fromFile(s"${class_name}.fun").mkString |
|
526 val output = compile(class_name, input) |
|
527 scala.tools.nsc.io.File(s"${class_name}.j").writeAll(output) |
|
528 } |
|
529 |
|
530 import scala.sys.process._ |
|
531 |
|
532 def compile_run(class_name: String) : Unit = { |
|
533 compile_file(class_name) |
|
534 (s"java -jar jvm/jasmin-2.4/jasmin.jar ${class_name}.j").!! |
546 println("Time: " + time_needed(2, (s"java ${class_name}/${class_name}").!)) |
535 println("Time: " + time_needed(2, (s"java ${class_name}/${class_name}").!)) |
547 } |
536 } |
548 |
537 |
549 |
538 |
550 // some examples |
539 // some examples of .fun files |
551 compile_file("fact.rec") |
540 //compile_file("fact") |
552 compile_run("defs.rec") |
541 //compile_run("defs") |
553 compile_run("fact.rec") |
542 compile_run("fact") |