progs/fun.scala
changeset 626 2d91b2107656
parent 625 6709fa87410b
child 628 8067d0a8ba04
equal deleted inserted replaced
625:6709fa87410b 626:2d91b2107656
     1 // A Small Compiler for a Simple Functional Language
     1 // A Small Compiler for a Simple Functional Language
     2 // (includes a lexer and a parser)
     2 // (includes a lexer and a parser)
     3 
     3 
     4 import scala.language.implicitConversions    
     4 import scala.language.implicitConversions    
     5 import scala.language.reflectiveCalls 
     5 import scala.language.reflectiveCalls 
     6 import scala.util._
       
     7 import scala.annotation.tailrec
       
     8 import scala.sys.process._
       
     9 
       
    10 def fromFile(name: String) : String = 
       
    11   io.Source.fromFile(name).mkString
       
    12 
       
    13 
     6 
    14 abstract class Rexp 
     7 abstract class Rexp 
    15 case object ZERO extends Rexp
     8 case object ZERO extends Rexp
    16 case object ONE extends Rexp
     9 case object ONE extends Rexp
    17 case class CHAR(c: Char) extends Rexp
    10 case class CHAR(c: Char) extends Rexp
    18 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    11 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    19 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    12 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    20 case class STAR(r: Rexp) extends Rexp 
    13 case class STAR(r: Rexp) extends Rexp 
    21 case class RECD(x: String, r: Rexp) extends Rexp
    14 case class RECD(x: String, r: Rexp) extends Rexp
    22    
    15   
    23 abstract class Val
    16 abstract class Val
    24 case object Empty extends Val
    17 case object Empty extends Val
    25 case class Chr(c: Char) extends Val
    18 case class Chr(c: Char) extends Val
    26 case class Sequ(v1: Val, v2: Val) extends Val
    19 case class Sequ(v1: Val, v2: Val) extends Val
    27 case class Left(v: Val) extends Val
    20 case class Left(v: Val) extends Val
    99   case Rec(x, v) => (x, flatten(v))::env(v)
    92   case Rec(x, v) => (x, flatten(v))::env(v)
   100 }
    93 }
   101 
    94 
   102 // The Injection Part of the lexer
    95 // The Injection Part of the lexer
   103 
    96 
   104 // calculates a value for how a nullable regex 
       
   105 // matches the empty string 
       
   106 def mkeps(r: Rexp) : Val = r match {
    97 def mkeps(r: Rexp) : Val = r match {
   107   case ONE => Empty
    98   case ONE => Empty
   108   case ALT(r1, r2) => 
    99   case ALT(r1, r2) => 
   109     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
   100     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
   110   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   101   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   111   case STAR(r) => Stars(Nil)
   102   case STAR(r) => Stars(Nil)
   112   case RECD(x, r) => Rec(x, mkeps(r))
   103   case RECD(x, r) => Rec(x, mkeps(r))
   113 }
   104 }
   114 
   105 
   115 // injects back a character into a value
       
   116 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
   106 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
   117   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   107   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   118   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   108   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   119   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   109   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   120   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   110   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   121   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
   111   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
   122   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
   112   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
   123   case (CHAR(d), Empty) => Chr(c) 
   113   case (CHAR(d), Empty) => Chr(c) 
   124   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
   114   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
   125 }
   115   case _ => { println ("Injection error") ; sys.exit(-1) } 
   126 
   116 }
   127 
   117 
   128 // some "rectification" functions for simplification
   118 // some "rectification" functions for simplification
   129 def F_ID(v: Val): Val = v
   119 def F_ID(v: Val): Val = v
   130 def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
   120 def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
   131 def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
   121 def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
   143 def F_RECD(f: Val => Val) = (v:Val) => v match {
   133 def F_RECD(f: Val => Val) = (v:Val) => v match {
   144   case Rec(x, v) => Rec(x, f(v))
   134   case Rec(x, v) => Rec(x, f(v))
   145 }
   135 }
   146 def F_ERROR(v: Val): Val = throw new Exception("error")
   136 def F_ERROR(v: Val): Val = throw new Exception("error")
   147 
   137 
   148 // simplification of regular expressions returns now also 
       
   149 // an rectification function; no simplification under STAR 
       
   150 def simp(r: Rexp): (Rexp, Val => Val) = r match {
   138 def simp(r: Rexp): (Rexp, Val => Val) = r match {
   151   case ALT(r1, r2) => {
   139   case ALT(r1, r2) => {
   152     val (r1s, f1s) = simp(r1)
   140     val (r1s, f1s) = simp(r1)
   153     val (r2s, f2s) = simp(r2)
   141     val (r2s, f2s) = simp(r2)
   154     (r1s, r2s) match {
   142     (r1s, r2s) match {
   208 val ALL = SYM | DIGIT | OP | " " | ":" | ";" | "\"" | "=" | "," | "(" | ")"
   196 val ALL = SYM | DIGIT | OP | " " | ":" | ";" | "\"" | "=" | "," | "(" | ")"
   209 val ALL2 = ALL | "\n"
   197 val ALL2 = ALL | "\n"
   210 val COMMENT = ("/*" ~ ALL2.% ~ "*/") | ("//" ~ ALL.% ~ "\n")
   198 val COMMENT = ("/*" ~ ALL2.% ~ "*/") | ("//" ~ ALL.% ~ "\n")
   211 
   199 
   212 
   200 
   213 
       
   214 val WHILE_REGS = (("k" $ KEYWORD) | 
   201 val WHILE_REGS = (("k" $ KEYWORD) | 
   215                   ("i" $ ID) | 
   202                   ("i" $ ID) | 
   216                   ("o" $ OP) | 
   203                   ("o" $ OP) | 
   217                   ("n" $ NUM) | 
   204                   ("n" $ NUM) | 
   218                   ("s" $ SEMI) | 
   205                   ("s" $ SEMI) | 
   221                   ("pr" $ RPAREN) |
   208                   ("pr" $ RPAREN) |
   222                   ("w" $ (WHITESPACE | COMMENT))).%
   209                   ("w" $ (WHITESPACE | COMMENT))).%
   223 
   210 
   224 
   211 
   225 
   212 
   226 // The tokens for the While language
   213 // The tokens for the Fun language
   227 
   214 
   228 abstract class Token
   215 abstract class Token
   229 case object T_SEMI extends Token
   216 case object T_SEMI extends Token
   230 case object T_COMMA extends Token
   217 case object T_COMMA extends Token
   231 case object T_LPAREN extends Token
   218 case object T_LPAREN extends Token
   249 
   236 
   250 def tokenise(s: String) : List[Token] = 
   237 def tokenise(s: String) : List[Token] = 
   251   lexing_simp(WHILE_REGS, s).collect(token)
   238   lexing_simp(WHILE_REGS, s).collect(token)
   252 
   239 
   253 
   240 
   254 // Parser - Abstract syntax trees
   241 
       
   242 // Parser combinators
       
   243 abstract class Parser[I, T](implicit ev: I => Seq[_]) {
       
   244   def parse(ts: I): Set[(T, I)]
       
   245 
       
   246   def parse_all(ts: I) : Set[T] =
       
   247     for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head
       
   248 
       
   249   def parse_single(ts: I) : T = parse_all(ts).toList match {
       
   250     case List(t) => t
       
   251     case _ => { println ("Parse Error\n") ; sys.exit(-1) }
       
   252   }
       
   253 }
       
   254 
       
   255 class SeqParser[I, T, S](p: => Parser[I, T], 
       
   256                          q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, (T, S)] {
       
   257   def parse(sb: I) = 
       
   258     for ((head1, tail1) <- p.parse(sb); 
       
   259          (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
       
   260 }
       
   261 
       
   262 class AltParser[I, T](p: => Parser[I, T], 
       
   263                       q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] {
       
   264   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
       
   265 }
       
   266 
       
   267 class FunParser[I, T, S](p: => Parser[I, T], 
       
   268                          f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] {
       
   269   def parse(sb: I) = 
       
   270     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
       
   271 }
       
   272 
       
   273 implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
       
   274   def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
       
   275   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
       
   276   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
       
   277 }
       
   278 
       
   279 def ListParser[I, T, S](p: => Parser[I, T], 
       
   280                         q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
       
   281   (p ~ q ~ ListParser(p, q)) ==> { case ((x, y), z) => x :: z : List[T] } ||
       
   282   (p ==> ((s) => List(s)))
       
   283 }
       
   284 
       
   285 case class TokParser(tok: Token) extends Parser[List[Token], Token] {
       
   286   def parse(ts: List[Token]) = ts match {
       
   287     case t::ts if (t == tok) => Set((t, ts)) 
       
   288     case _ => Set ()
       
   289   }
       
   290 }
       
   291 
       
   292 implicit def token2tparser(t: Token) = TokParser(t)
       
   293 
       
   294 implicit def TokOps(t: Token) = new {
       
   295   def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q)
       
   296   def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f)
       
   297   def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q)
       
   298 }
       
   299 
       
   300 case object NumParser extends Parser[List[Token], Int] {
       
   301   def parse(ts: List[Token]) = ts match {
       
   302     case T_NUM(n)::ts => Set((n, ts)) 
       
   303     case _ => Set ()
       
   304   }
       
   305 }
       
   306 
       
   307 case object IdParser extends Parser[List[Token], String] {
       
   308   def parse(ts: List[Token]) = ts match {
       
   309     case T_ID(s)::ts => Set((s, ts)) 
       
   310     case _ => Set ()
       
   311   }
       
   312 }
       
   313 
       
   314 
       
   315 
       
   316 // Abstract syntax trees for Fun
   255 abstract class Exp
   317 abstract class Exp
   256 abstract class BExp 
   318 abstract class BExp 
   257 abstract class Decl
   319 abstract class Decl
   258 
   320 
   259 case class Def(name: String, args: List[String], body: Exp) extends Decl
   321 case class Def(name: String, args: List[String], body: Exp) extends Decl
   264 case class Write(e: Exp) extends Exp
   326 case class Write(e: Exp) extends Exp
   265 case class Var(s: String) extends Exp
   327 case class Var(s: String) extends Exp
   266 case class Num(i: Int) extends Exp
   328 case class Num(i: Int) extends Exp
   267 case class Aop(o: String, a1: Exp, a2: Exp) extends Exp
   329 case class Aop(o: String, a1: Exp, a2: Exp) extends Exp
   268 case class Sequence(e1: Exp, e2: Exp) extends Exp
   330 case class Sequence(e1: Exp, e2: Exp) extends Exp
   269 
       
   270 case class Bop(o: String, a1: Exp, a2: Exp) extends BExp
   331 case class Bop(o: String, a1: Exp, a2: Exp) extends BExp
   271 
   332 
   272 // calculating the maximal needed stack size
   333 
   273 def max_stack_exp(e: Exp): Int = e match {
   334 
   274   case Call(_, args) => args.map(max_stack_exp).sum
   335 // Grammar Rules for Fun
   275   case If(a, e1, e2) => max_stack_bexp(a) + (List(max_stack_exp(e1), max_stack_exp(e2)).max)
       
   276   case Write(e) => max_stack_exp(e) + 1
       
   277   case Var(_) => 1
       
   278   case Num(_) => 1
       
   279   case Aop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2)
       
   280   case Sequence(e1, e2) => List(max_stack_exp(e1), max_stack_exp(e2)).max
       
   281 }
       
   282 def max_stack_bexp(e: BExp): Int = e match {
       
   283   case Bop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2)
       
   284 }
       
   285 
       
   286 // Parser combinators
       
   287 abstract class Parser[I, T](implicit ev: I => Seq[_]) {
       
   288   def parse(ts: I): Set[(T, I)]
       
   289 
       
   290   def parse_all(ts: I) : Set[T] =
       
   291     for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head
       
   292 
       
   293   def parse_single(ts: I) : T = parse_all(ts).toList match {
       
   294     case List(t) => t
       
   295     case _ => { println ("Parse Error\n") ; sys.exit(-1) }
       
   296   }
       
   297 }
       
   298 
       
   299 class SeqParser[I, T, S](p: => Parser[I, T], 
       
   300                          q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, (T, S)] {
       
   301   def parse(sb: I) = 
       
   302     for ((head1, tail1) <- p.parse(sb); 
       
   303          (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
       
   304 }
       
   305 
       
   306 class AltParser[I, T](p: => Parser[I, T], 
       
   307                       q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] {
       
   308   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
       
   309 }
       
   310 
       
   311 class FunParser[I, T, S](p: => Parser[I, T], 
       
   312                          f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] {
       
   313   def parse(sb: I) = 
       
   314     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
       
   315 }
       
   316 
       
   317 implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
       
   318   def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
       
   319   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
       
   320   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
       
   321 }
       
   322 
       
   323 def ListParser[I, T, S](p: => Parser[I, T], 
       
   324                         q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
       
   325   (p ~ q ~ ListParser(p, q)) ==> { case ((x, y), z) => x :: z : List[T] } ||
       
   326   (p ==> ((s) => List(s)))
       
   327 }
       
   328 
       
   329 case class TokParser(tok: Token) extends Parser[List[Token], Token] {
       
   330   def parse(ts: List[Token]) = ts match {
       
   331     case t::ts if (t == tok) => Set((t, ts)) 
       
   332     case _ => Set ()
       
   333   }
       
   334 }
       
   335 
       
   336 implicit def token2tparser(t: Token) = TokParser(t)
       
   337 
       
   338 implicit def TokOps(t: Token) = new {
       
   339   def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q)
       
   340   def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f)
       
   341   def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q)
       
   342 }
       
   343 
       
   344 case object NumParser extends Parser[List[Token], Int] {
       
   345   def parse(ts: List[Token]) = ts match {
       
   346     case T_NUM(n)::ts => Set((n, ts)) 
       
   347     case _ => Set ()
       
   348   }
       
   349 }
       
   350 
       
   351 case object IdParser extends Parser[List[Token], String] {
       
   352   def parse(ts: List[Token]) = ts match {
       
   353     case T_ID(s)::ts => Set((s, ts)) 
       
   354     case _ => Set ()
       
   355   }
       
   356 }
       
   357 
       
   358 
       
   359 // Grammar Rules
       
   360 
   336 
   361 // arithmetic expressions
   337 // arithmetic expressions
   362 lazy val Exp: Parser[List[Token], Exp] = 
   338 lazy val Exp: Parser[List[Token], Exp] = 
   363   (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==>
   339   (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==>
   364     { case (((((x, y), z), u), v), w) => If(y, u, w): Exp } ||
   340     { case (((((x, y), z), u), v), w) => If(y, u, w): Exp } ||
   420         return 
   396         return 
   421 .end method
   397 .end method
   422 
   398 
   423 """
   399 """
   424 
   400 
       
   401 // calculating the maximal needed stack size
       
   402 def max_stack_exp(e: Exp): Int = e match {
       
   403   case Call(_, args) => args.map(max_stack_exp).sum
       
   404   case If(a, e1, e2) => max_stack_bexp(a) + (List(max_stack_exp(e1), max_stack_exp(e2)).max)
       
   405   case Write(e) => max_stack_exp(e) + 1
       
   406   case Var(_) => 1
       
   407   case Num(_) => 1
       
   408   case Aop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2)
       
   409   case Sequence(e1, e2) => List(max_stack_exp(e1), max_stack_exp(e2)).max
       
   410 }
       
   411 def max_stack_bexp(e: BExp): Int = e match {
       
   412   case Bop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2)
       
   413 }
       
   414 
       
   415 
   425 // for generating new labels
   416 // for generating new labels
   426 var counter = -1
   417 var counter = -1
   427 
   418 
   428 def Fresh(x: String) = {
   419 def Fresh(x: String) = {
   429   counter += 1
   420   counter += 1
   512     i"return" ++
   503     i"return" ++
   513     m".end method\n"
   504     m".end method\n"
   514   }
   505   }
   515 }
   506 }
   516 
   507 
   517 
   508 // main compiler functions
   518 def compile(class_name: String, input: String) : String = {
       
   519   val tks = tokenise(input)
       
   520   println(Prog.parse_single(tks).mkString("\n"))
       
   521   val ast = Prog.parse_single(tks)
       
   522   val instructions = ast.map(compile_decl).mkString
       
   523   (library + instructions).replaceAllLiterally("XXX", class_name)
       
   524 }
       
   525 
       
   526 
       
   527 def compile_file(file_name: String) = {
       
   528   val class_name = file_name.split('.')(0)
       
   529   val output = compile(class_name, fromFile(file_name))
       
   530   val fw = new java.io.FileWriter(class_name + ".j") 
       
   531   fw.write(output) 
       
   532   fw.close()
       
   533 }
       
   534 
   509 
   535 def time_needed[T](i: Int, code: => T) = {
   510 def time_needed[T](i: Int, code: => T) = {
   536   val start = System.nanoTime()
   511   val start = System.nanoTime()
   537   for (j <- 1 to i) code
   512   for (j <- 1 to i) code
   538   val end = System.nanoTime()
   513   val end = System.nanoTime()
   539   (end - start)/(i * 1.0e9)
   514   (end - start)/(i * 1.0e9)
   540 }
   515 }
   541 
   516 
   542 def compile_run(file_name: String) : Unit = {
   517 def compile(class_name: String, input: String) : String = {
   543   val class_name = file_name.split('.')(0)
   518   val tks = tokenise(input)
   544   compile_file(file_name)
   519   val ast = Prog.parse_single(tks)
   545   val test = (s"java -jar jvm/jasmin-2.4/jasmin.jar ${class_name}.j").!!
   520   val instructions = ast.map(compile_decl).mkString
       
   521   (library + instructions).replaceAllLiterally("XXX", class_name)
       
   522 }
       
   523 
       
   524 def compile_file(class_name: String) = {
       
   525   val input = io.Source.fromFile(s"${class_name}.fun").mkString
       
   526   val output = compile(class_name, input)
       
   527   scala.tools.nsc.io.File(s"${class_name}.j").writeAll(output)
       
   528 }
       
   529 
       
   530 import scala.sys.process._
       
   531 
       
   532 def compile_run(class_name: String) : Unit = {
       
   533   compile_file(class_name)
       
   534   (s"java -jar jvm/jasmin-2.4/jasmin.jar ${class_name}.j").!!
   546   println("Time: " + time_needed(2, (s"java ${class_name}/${class_name}").!))
   535   println("Time: " + time_needed(2, (s"java ${class_name}/${class_name}").!))
   547 }
   536 }
   548 
   537 
   549 
   538 
   550 // some examples
   539 // some examples of .fun files
   551 compile_file("fact.rec")
   540 //compile_file("fact")
   552 compile_run("defs.rec")
   541 //compile_run("defs")
   553 compile_run("fact.rec")
   542 compile_run("fact")