1 // A Small Compiler for a Simple Functional Language  | 
     1 // A Small Compiler for a Simple Functional Language  | 
     2 // (includes a lexer and a parser)  | 
     2 // (includes a lexer and a parser)  | 
     3   | 
     3   | 
     4 import scala.language.implicitConversions      | 
     4 import scala.language.implicitConversions      | 
     5 import scala.language.reflectiveCalls   | 
     5 import scala.language.reflectiveCalls   | 
     6 import scala.util._  | 
         | 
     7 import scala.annotation.tailrec  | 
         | 
     8 import scala.sys.process._  | 
         | 
     9   | 
         | 
    10 def fromFile(name: String) : String =   | 
         | 
    11   io.Source.fromFile(name).mkString  | 
         | 
    12   | 
         | 
    13   | 
     6   | 
    14 abstract class Rexp   | 
     7 abstract class Rexp   | 
    15 case object ZERO extends Rexp  | 
     8 case object ZERO extends Rexp  | 
    16 case object ONE extends Rexp  | 
     9 case object ONE extends Rexp  | 
    17 case class CHAR(c: Char) extends Rexp  | 
    10 case class CHAR(c: Char) extends Rexp  | 
    18 case class ALT(r1: Rexp, r2: Rexp) extends Rexp   | 
    11 case class ALT(r1: Rexp, r2: Rexp) extends Rexp   | 
    19 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   | 
    12 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   | 
    20 case class STAR(r: Rexp) extends Rexp   | 
    13 case class STAR(r: Rexp) extends Rexp   | 
    21 case class RECD(x: String, r: Rexp) extends Rexp  | 
    14 case class RECD(x: String, r: Rexp) extends Rexp  | 
    22      | 
    15     | 
    23 abstract class Val  | 
    16 abstract class Val  | 
    24 case object Empty extends Val  | 
    17 case object Empty extends Val  | 
    25 case class Chr(c: Char) extends Val  | 
    18 case class Chr(c: Char) extends Val  | 
    26 case class Sequ(v1: Val, v2: Val) extends Val  | 
    19 case class Sequ(v1: Val, v2: Val) extends Val  | 
    27 case class Left(v: Val) extends Val  | 
    20 case class Left(v: Val) extends Val  | 
    99   case Rec(x, v) => (x, flatten(v))::env(v)  | 
    92   case Rec(x, v) => (x, flatten(v))::env(v)  | 
   100 }  | 
    93 }  | 
   101   | 
    94   | 
   102 // The Injection Part of the lexer  | 
    95 // The Injection Part of the lexer  | 
   103   | 
    96   | 
   104 // calculates a value for how a nullable regex   | 
         | 
   105 // matches the empty string   | 
         | 
   106 def mkeps(r: Rexp) : Val = r match { | 
    97 def mkeps(r: Rexp) : Val = r match { | 
   107   case ONE => Empty  | 
    98   case ONE => Empty  | 
   108   case ALT(r1, r2) =>   | 
    99   case ALT(r1, r2) =>   | 
   109     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))  | 
   100     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))  | 
   110   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))  | 
   101   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))  | 
   111   case STAR(r) => Stars(Nil)  | 
   102   case STAR(r) => Stars(Nil)  | 
   112   case RECD(x, r) => Rec(x, mkeps(r))  | 
   103   case RECD(x, r) => Rec(x, mkeps(r))  | 
   113 }  | 
   104 }  | 
   114   | 
   105   | 
   115 // injects back a character into a value  | 
         | 
   116 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { | 
   106 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { | 
   117   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)  | 
   107   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)  | 
   118   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)  | 
   108   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)  | 
   119   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)  | 
   109   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)  | 
   120   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))  | 
   110   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))  | 
   121   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))  | 
   111   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))  | 
   122   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))  | 
   112   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))  | 
   123   case (CHAR(d), Empty) => Chr(c)   | 
   113   case (CHAR(d), Empty) => Chr(c)   | 
   124   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))  | 
   114   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))  | 
   125 }  | 
   115   case _ => { println ("Injection error") ; sys.exit(-1) }  | 
   126   | 
   116 }  | 
   127   | 
   117   | 
   128 // some "rectification" functions for simplification  | 
   118 // some "rectification" functions for simplification  | 
   129 def F_ID(v: Val): Val = v  | 
   119 def F_ID(v: Val): Val = v  | 
   130 def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))  | 
   120 def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))  | 
   131 def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))  | 
   121 def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))  | 
   249   | 
   236   | 
   250 def tokenise(s: String) : List[Token] =   | 
   237 def tokenise(s: String) : List[Token] =   | 
   251   lexing_simp(WHILE_REGS, s).collect(token)  | 
   238   lexing_simp(WHILE_REGS, s).collect(token)  | 
   252   | 
   239   | 
   253   | 
   240   | 
   254 // Parser - Abstract syntax trees  | 
   241   | 
         | 
   242 // Parser combinators  | 
         | 
   243 abstract class Parser[I, T](implicit ev: I => Seq[_]) { | 
         | 
   244   def parse(ts: I): Set[(T, I)]  | 
         | 
   245   | 
         | 
   246   def parse_all(ts: I) : Set[T] =  | 
         | 
   247     for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head  | 
         | 
   248   | 
         | 
   249   def parse_single(ts: I) : T = parse_all(ts).toList match { | 
         | 
   250     case List(t) => t  | 
         | 
   251     case _ => { println ("Parse Error\n") ; sys.exit(-1) } | 
         | 
   252   }  | 
         | 
   253 }  | 
         | 
   254   | 
         | 
   255 class SeqParser[I, T, S](p: => Parser[I, T],   | 
         | 
   256                          q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, (T, S)] { | 
         | 
   257   def parse(sb: I) =   | 
         | 
   258     for ((head1, tail1) <- p.parse(sb);   | 
         | 
   259          (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)  | 
         | 
   260 }  | 
         | 
   261   | 
         | 
   262 class AltParser[I, T](p: => Parser[I, T],   | 
         | 
   263                       q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] { | 
         | 
   264   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)     | 
         | 
   265 }  | 
         | 
   266   | 
         | 
   267 class FunParser[I, T, S](p: => Parser[I, T],   | 
         | 
   268                          f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] { | 
         | 
   269   def parse(sb: I) =   | 
         | 
   270     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)  | 
         | 
   271 }  | 
         | 
   272   | 
         | 
   273 implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new { | 
         | 
   274   def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)  | 
         | 
   275   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)  | 
         | 
   276   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)  | 
         | 
   277 }  | 
         | 
   278   | 
         | 
   279 def ListParser[I, T, S](p: => Parser[I, T],   | 
         | 
   280                         q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = { | 
         | 
   281   (p ~ q ~ ListParser(p, q)) ==> { case ((x, y), z) => x :: z : List[T] } || | 
         | 
   282   (p ==> ((s) => List(s)))  | 
         | 
   283 }  | 
         | 
   284   | 
         | 
   285 case class TokParser(tok: Token) extends Parser[List[Token], Token] { | 
         | 
   286   def parse(ts: List[Token]) = ts match { | 
         | 
   287     case t::ts if (t == tok) => Set((t, ts))   | 
         | 
   288     case _ => Set ()  | 
         | 
   289   }  | 
         | 
   290 }  | 
         | 
   291   | 
         | 
   292 implicit def token2tparser(t: Token) = TokParser(t)  | 
         | 
   293   | 
         | 
   294 implicit def TokOps(t: Token) = new { | 
         | 
   295   def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q)  | 
         | 
   296   def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f)  | 
         | 
   297   def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q)  | 
         | 
   298 }  | 
         | 
   299   | 
         | 
   300 case object NumParser extends Parser[List[Token], Int] { | 
         | 
   301   def parse(ts: List[Token]) = ts match { | 
         | 
   302     case T_NUM(n)::ts => Set((n, ts))   | 
         | 
   303     case _ => Set ()  | 
         | 
   304   }  | 
         | 
   305 }  | 
         | 
   306   | 
         | 
   307 case object IdParser extends Parser[List[Token], String] { | 
         | 
   308   def parse(ts: List[Token]) = ts match { | 
         | 
   309     case T_ID(s)::ts => Set((s, ts))   | 
         | 
   310     case _ => Set ()  | 
         | 
   311   }  | 
         | 
   312 }  | 
         | 
   313   | 
         | 
   314   | 
         | 
   315   | 
         | 
   316 // Abstract syntax trees for Fun  | 
   255 abstract class Exp  | 
   317 abstract class Exp  | 
   256 abstract class BExp   | 
   318 abstract class BExp   | 
   257 abstract class Decl  | 
   319 abstract class Decl  | 
   258   | 
   320   | 
   259 case class Def(name: String, args: List[String], body: Exp) extends Decl  | 
   321 case class Def(name: String, args: List[String], body: Exp) extends Decl  | 
   264 case class Write(e: Exp) extends Exp  | 
   326 case class Write(e: Exp) extends Exp  | 
   265 case class Var(s: String) extends Exp  | 
   327 case class Var(s: String) extends Exp  | 
   266 case class Num(i: Int) extends Exp  | 
   328 case class Num(i: Int) extends Exp  | 
   267 case class Aop(o: String, a1: Exp, a2: Exp) extends Exp  | 
   329 case class Aop(o: String, a1: Exp, a2: Exp) extends Exp  | 
   268 case class Sequence(e1: Exp, e2: Exp) extends Exp  | 
   330 case class Sequence(e1: Exp, e2: Exp) extends Exp  | 
   269   | 
         | 
   270 case class Bop(o: String, a1: Exp, a2: Exp) extends BExp  | 
   331 case class Bop(o: String, a1: Exp, a2: Exp) extends BExp  | 
   271   | 
   332   | 
   272 // calculating the maximal needed stack size  | 
   333   | 
   273 def max_stack_exp(e: Exp): Int = e match { | 
   334   | 
   274   case Call(_, args) => args.map(max_stack_exp).sum  | 
   335 // Grammar Rules for Fun  | 
   275   case If(a, e1, e2) => max_stack_bexp(a) + (List(max_stack_exp(e1), max_stack_exp(e2)).max)  | 
         | 
   276   case Write(e) => max_stack_exp(e) + 1  | 
         | 
   277   case Var(_) => 1  | 
         | 
   278   case Num(_) => 1  | 
         | 
   279   case Aop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2)  | 
         | 
   280   case Sequence(e1, e2) => List(max_stack_exp(e1), max_stack_exp(e2)).max  | 
         | 
   281 }  | 
         | 
   282 def max_stack_bexp(e: BExp): Int = e match { | 
         | 
   283   case Bop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2)  | 
         | 
   284 }  | 
         | 
   285   | 
         | 
   286 // Parser combinators  | 
         | 
   287 abstract class Parser[I, T](implicit ev: I => Seq[_]) { | 
         | 
   288   def parse(ts: I): Set[(T, I)]  | 
         | 
   289   | 
         | 
   290   def parse_all(ts: I) : Set[T] =  | 
         | 
   291     for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head  | 
         | 
   292   | 
         | 
   293   def parse_single(ts: I) : T = parse_all(ts).toList match { | 
         | 
   294     case List(t) => t  | 
         | 
   295     case _ => { println ("Parse Error\n") ; sys.exit(-1) } | 
         | 
   296   }  | 
         | 
   297 }  | 
         | 
   298   | 
         | 
   299 class SeqParser[I, T, S](p: => Parser[I, T],   | 
         | 
   300                          q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, (T, S)] { | 
         | 
   301   def parse(sb: I) =   | 
         | 
   302     for ((head1, tail1) <- p.parse(sb);   | 
         | 
   303          (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)  | 
         | 
   304 }  | 
         | 
   305   | 
         | 
   306 class AltParser[I, T](p: => Parser[I, T],   | 
         | 
   307                       q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] { | 
         | 
   308   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)     | 
         | 
   309 }  | 
         | 
   310   | 
         | 
   311 class FunParser[I, T, S](p: => Parser[I, T],   | 
         | 
   312                          f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] { | 
         | 
   313   def parse(sb: I) =   | 
         | 
   314     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)  | 
         | 
   315 }  | 
         | 
   316   | 
         | 
   317 implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new { | 
         | 
   318   def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)  | 
         | 
   319   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)  | 
         | 
   320   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)  | 
         | 
   321 }  | 
         | 
   322   | 
         | 
   323 def ListParser[I, T, S](p: => Parser[I, T],   | 
         | 
   324                         q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = { | 
         | 
   325   (p ~ q ~ ListParser(p, q)) ==> { case ((x, y), z) => x :: z : List[T] } || | 
         | 
   326   (p ==> ((s) => List(s)))  | 
         | 
   327 }  | 
         | 
   328   | 
         | 
   329 case class TokParser(tok: Token) extends Parser[List[Token], Token] { | 
         | 
   330   def parse(ts: List[Token]) = ts match { | 
         | 
   331     case t::ts if (t == tok) => Set((t, ts))   | 
         | 
   332     case _ => Set ()  | 
         | 
   333   }  | 
         | 
   334 }  | 
         | 
   335   | 
         | 
   336 implicit def token2tparser(t: Token) = TokParser(t)  | 
         | 
   337   | 
         | 
   338 implicit def TokOps(t: Token) = new { | 
         | 
   339   def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q)  | 
         | 
   340   def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f)  | 
         | 
   341   def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q)  | 
         | 
   342 }  | 
         | 
   343   | 
         | 
   344 case object NumParser extends Parser[List[Token], Int] { | 
         | 
   345   def parse(ts: List[Token]) = ts match { | 
         | 
   346     case T_NUM(n)::ts => Set((n, ts))   | 
         | 
   347     case _ => Set ()  | 
         | 
   348   }  | 
         | 
   349 }  | 
         | 
   350   | 
         | 
   351 case object IdParser extends Parser[List[Token], String] { | 
         | 
   352   def parse(ts: List[Token]) = ts match { | 
         | 
   353     case T_ID(s)::ts => Set((s, ts))   | 
         | 
   354     case _ => Set ()  | 
         | 
   355   }  | 
         | 
   356 }  | 
         | 
   357   | 
         | 
   358   | 
         | 
   359 // Grammar Rules  | 
         | 
   360   | 
   336   | 
   361 // arithmetic expressions  | 
   337 // arithmetic expressions  | 
   362 lazy val Exp: Parser[List[Token], Exp] =   | 
   338 lazy val Exp: Parser[List[Token], Exp] =   | 
   363   (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==> | 
   339   (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==> | 
   364     { case (((((x, y), z), u), v), w) => If(y, u, w): Exp } || | 
   340     { case (((((x, y), z), u), v), w) => If(y, u, w): Exp } || | 
   512     i"return" ++  | 
   503     i"return" ++  | 
   513     m".end method\n"  | 
   504     m".end method\n"  | 
   514   }  | 
   505   }  | 
   515 }  | 
   506 }  | 
   516   | 
   507   | 
   517   | 
   508 // main compiler functions  | 
   518 def compile(class_name: String, input: String) : String = { | 
         | 
   519   val tks = tokenise(input)  | 
         | 
   520   println(Prog.parse_single(tks).mkString("\n")) | 
         | 
   521   val ast = Prog.parse_single(tks)  | 
         | 
   522   val instructions = ast.map(compile_decl).mkString  | 
         | 
   523   (library + instructions).replaceAllLiterally("XXX", class_name) | 
         | 
   524 }  | 
         | 
   525   | 
         | 
   526   | 
         | 
   527 def compile_file(file_name: String) = { | 
         | 
   528   val class_name = file_name.split('.')(0) | 
         | 
   529   val output = compile(class_name, fromFile(file_name))  | 
         | 
   530   val fw = new java.io.FileWriter(class_name + ".j")   | 
         | 
   531   fw.write(output)   | 
         | 
   532   fw.close()  | 
         | 
   533 }  | 
         | 
   534   | 
   509   | 
   535 def time_needed[T](i: Int, code: => T) = { | 
   510 def time_needed[T](i: Int, code: => T) = { | 
   536   val start = System.nanoTime()  | 
   511   val start = System.nanoTime()  | 
   537   for (j <- 1 to i) code  | 
   512   for (j <- 1 to i) code  | 
   538   val end = System.nanoTime()  | 
   513   val end = System.nanoTime()  | 
   539   (end - start)/(i * 1.0e9)  | 
   514   (end - start)/(i * 1.0e9)  | 
   540 }  | 
   515 }  | 
   541   | 
   516   | 
   542 def compile_run(file_name: String) : Unit = { | 
   517 def compile(class_name: String, input: String) : String = { | 
   543   val class_name = file_name.split('.')(0) | 
   518   val tks = tokenise(input)  | 
   544   compile_file(file_name)  | 
   519   val ast = Prog.parse_single(tks)  | 
   545   val test = (s"java -jar jvm/jasmin-2.4/jasmin.jar ${class_name}.j").!! | 
   520   val instructions = ast.map(compile_decl).mkString  | 
         | 
   521   (library + instructions).replaceAllLiterally("XXX", class_name) | 
         | 
   522 }  | 
         | 
   523   | 
         | 
   524 def compile_file(class_name: String) = { | 
         | 
   525   val input = io.Source.fromFile(s"${class_name}.fun").mkString | 
         | 
   526   val output = compile(class_name, input)  | 
         | 
   527   scala.tools.nsc.io.File(s"${class_name}.j").writeAll(output) | 
         | 
   528 }  | 
         | 
   529   | 
         | 
   530 import scala.sys.process._  | 
         | 
   531   | 
         | 
   532 def compile_run(class_name: String) : Unit = { | 
         | 
   533   compile_file(class_name)  | 
         | 
   534   (s"java -jar jvm/jasmin-2.4/jasmin.jar ${class_name}.j").!! | 
   546   println("Time: " + time_needed(2, (s"java ${class_name}/${class_name}").!)) | 
   535   println("Time: " + time_needed(2, (s"java ${class_name}/${class_name}").!)) | 
   547 }  | 
   536 }  | 
   548   | 
   537   | 
   549   | 
   538   | 
   550 // some examples  | 
   539 // some examples of .fun files  | 
   551 compile_file("fact.rec") | 
   540 //compile_file("fact") | 
   552 compile_run("defs.rec") | 
   541 //compile_run("defs") | 
   553 compile_run("fact.rec") | 
   542 compile_run("fact") |