progs/fun.scala
changeset 625 6709fa87410b
parent 380 1e88390e81aa
child 626 2d91b2107656
equal deleted inserted replaced
624:8d0af38389bc 625:6709fa87410b
       
     1 // A Small Compiler for a Simple Functional Language
       
     2 // (includes a lexer and a parser)
       
     3 
     1 import scala.language.implicitConversions    
     4 import scala.language.implicitConversions    
     2 import scala.language.reflectiveCalls 
     5 import scala.language.reflectiveCalls 
     3 import scala.util._
     6 import scala.util._
     4 import scala.annotation.tailrec
     7 import scala.annotation.tailrec
     5 import scala.sys.process._
     8 import scala.sys.process._
     6 
     9 
     7 def fromFile(name: String) : String = 
    10 def fromFile(name: String) : String = 
     8   io.Source.fromFile(name).mkString
    11   io.Source.fromFile(name).mkString
     9 
    12 
       
    13 
    10 abstract class Rexp 
    14 abstract class Rexp 
    11 case object NULL extends Rexp
    15 case object ZERO extends Rexp
    12 case object EMPTY extends Rexp
    16 case object ONE extends Rexp
    13 case class CHAR(c: Char) extends Rexp
    17 case class CHAR(c: Char) extends Rexp
    14 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    18 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    15 case class RANGE(cs: List[Char]) extends Rexp 
       
    16 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    19 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    17 case class PLUS(r: Rexp) extends Rexp 
       
    18 case class STAR(r: Rexp) extends Rexp 
    20 case class STAR(r: Rexp) extends Rexp 
    19 case class NTIMES(r: Rexp, n: Int) extends Rexp 
    21 case class RECD(x: String, r: Rexp) extends Rexp
    20 case class NUPTOM(r: Rexp, n: Int, m: Int) extends Rexp
    22    
    21 
    23 abstract class Val
    22 object RANGE {
    24 case object Empty extends Val
    23   def apply(s: String) : RANGE = RANGE(s.toList)
    25 case class Chr(c: Char) extends Val
    24 }
    26 case class Sequ(v1: Val, v2: Val) extends Val
    25 def NMTIMES(r: Rexp, n: Int, m: Int) = {
    27 case class Left(v: Val) extends Val
    26   if (m < n) throw new IllegalArgumentException("the number m cannot be smaller than n.")
    28 case class Right(v: Val) extends Val
    27   else NUPTOM(r, n, m - n)
    29 case class Stars(vs: List[Val]) extends Val
    28 }
    30 case class Rec(x: String, v: Val) extends Val
    29 
    31    
    30 case class NOT(r: Rexp) extends Rexp 
       
    31 case class OPT(r: Rexp) extends Rexp 
       
    32 
       
    33 // some convenience for typing in regular expressions
    32 // some convenience for typing in regular expressions
    34 def charlist2rexp(s : List[Char]) : Rexp = s match {
    33 def charlist2rexp(s : List[Char]): Rexp = s match {
    35   case Nil => EMPTY
    34   case Nil => ONE
    36   case c::Nil => CHAR(c)
    35   case c::Nil => CHAR(c)
    37   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    36   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    38 }
    37 }
    39 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
    38 implicit def string2rexp(s : String) : Rexp = 
    40 
    39   charlist2rexp(s.toList)
    41 implicit def RexpOps (r: Rexp) = new {
    40 
       
    41 implicit def RexpOps(r: Rexp) = new {
    42   def | (s: Rexp) = ALT(r, s)
    42   def | (s: Rexp) = ALT(r, s)
    43   def % = STAR(r)
    43   def % = STAR(r)
    44   def ~ (s: Rexp) = SEQ(r, s)
    44   def ~ (s: Rexp) = SEQ(r, s)
    45 }
    45 }
    46 
    46 
    47 implicit def stringOps (s: String) = new {
    47 implicit def stringOps(s: String) = new {
    48   def | (r: Rexp) = ALT(s, r)
    48   def | (r: Rexp) = ALT(s, r)
    49   def | (r: String) = ALT(s, r)
    49   def | (r: String) = ALT(s, r)
    50   def % = STAR(s)
    50   def % = STAR(s)
    51   def ~ (r: Rexp) = SEQ(s, r)
    51   def ~ (r: Rexp) = SEQ(s, r)
    52   def ~ (r: String) = SEQ(s, r)
    52   def ~ (r: String) = SEQ(s, r)
    53 }
    53   def $ (r: Rexp) = RECD(s, r)
    54 
    54 }
    55 
    55 
    56 // nullable function: tests whether the regular 
       
    57 // expression can recognise the empty string
       
    58 def nullable (r: Rexp) : Boolean = r match {
    56 def nullable (r: Rexp) : Boolean = r match {
    59   case NULL => false
    57   case ZERO => false
    60   case EMPTY => true
    58   case ONE => true
    61   case CHAR(_) => false
    59   case CHAR(_) => false
    62   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    60   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    63   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    61   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    64   case STAR(_) => true
    62   case STAR(_) => true
    65   case PLUS(r) => nullable(r)
    63   case RECD(_, r1) => nullable(r1)
    66   case NTIMES(r, i) => if (i == 0) true else nullable(r)
    64 }
    67   case NUPTOM(r, i, j) => if (i == 0) true else nullable(r)
    65 
    68   case RANGE(_) => false
       
    69   case NOT(r) => !(nullable(r))
       
    70   case OPT(_) => true
       
    71 }
       
    72 
       
    73 // derivative of a regular expression w.r.t. a character
       
    74 def der (c: Char, r: Rexp) : Rexp = r match {
    66 def der (c: Char, r: Rexp) : Rexp = r match {
    75   case NULL => NULL
    67   case ZERO => ZERO
    76   case EMPTY => NULL
    68   case ONE => ZERO
    77   case CHAR(d) => if (c == d) EMPTY else NULL
    69   case CHAR(d) => if (c == d) ONE else ZERO
    78   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    70   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    79   case SEQ(r1, r2) => 
    71   case SEQ(r1, r2) => 
    80     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    72     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    81     else SEQ(der(c, r1), r2)
    73     else SEQ(der(c, r1), r2)
    82   case STAR(r) => SEQ(der(c, r), STAR(r))
    74   case STAR(r) => SEQ(der(c, r), STAR(r))
    83   case PLUS(r) => SEQ(der(c, r), STAR(r))
    75   case RECD(_, r1) => der(c, r1)
    84   case NTIMES(r, i) => 
    76 }
    85     if (i == 0) NULL else der(c, SEQ(r, NTIMES(r, i - 1)))
    77 
    86   case NUPTOM(r, i, j) =>
    78 
    87     if (i == 0 && j == 0) NULL else 
    79 // extracts a string from value
    88     if (i == 0) ALT(der(c, NTIMES(r, j)), der(c, NUPTOM(r, 0, j - 1)))
    80 def flatten(v: Val) : String = v match {
    89     else der(c, SEQ(r, NUPTOM(r, i - 1, j)))
    81   case Empty => ""
    90   case RANGE(cs) => if (cs contains c) EMPTY else NULL
    82   case Chr(c) => c.toString
    91   case NOT(r) => NOT(der (c, r))
    83   case Left(v) => flatten(v)
    92   case OPT(r) => der(c, r)
    84   case Right(v) => flatten(v)
    93 }
    85   case Sequ(v1, v2) => flatten(v1) + flatten(v2)
    94 
    86   case Stars(vs) => vs.map(flatten).mkString
    95 def zeroable (r: Rexp) : Boolean = r match {
    87   case Rec(_, v) => flatten(v)
    96   case NULL => true
    88 }
    97   case EMPTY => false
    89 
    98   case CHAR(_) => false
    90 // extracts an environment from a value;
    99   case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
    91 // used for tokenise a string
   100   case SEQ(r1, r2) => zeroable(r1) || zeroable(r2)
    92 def env(v: Val) : List[(String, String)] = v match {
   101   case STAR(_) => false
    93   case Empty => Nil
   102   case PLUS(r) => zeroable(r)
    94   case Chr(c) => Nil
   103   case NTIMES(r, i) => if (i == 0) false else zeroable(r)
    95   case Left(v) => env(v)
   104   case NUPTOM(r, i, j) => if (i == 0) false else zeroable(r)
    96   case Right(v) => env(v)
   105   case RANGE(_) => false
    97   case Sequ(v1, v2) => env(v1) ::: env(v2)
   106   case NOT(r) => !(zeroable(r))     // bug: incorrect definition for NOT
    98   case Stars(vs) => vs.flatMap(env)
   107   case OPT(_) => false
    99   case Rec(x, v) => (x, flatten(v))::env(v)
   108 }
   100 }
   109 
   101 
   110 // derivative w.r.t. a string (iterates der)
   102 // The Injection Part of the lexer
   111 def ders (s: List[Char], r: Rexp) : Rexp = s match {
   103 
   112   case Nil => r
   104 // calculates a value for how a nullable regex 
   113   case c::s => ders(s, der(c, r))
   105 // matches the empty string 
   114 }
   106 def mkeps(r: Rexp) : Val = r match {
   115 
   107   case ONE => Empty
   116 
   108   case ALT(r1, r2) => 
   117 // regular expressions for the While language
   109     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
   118 val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_".toList)
   110   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   119 val DIGIT = RANGE("0123456789".toList)
   111   case STAR(r) => Stars(Nil)
       
   112   case RECD(x, r) => Rec(x, mkeps(r))
       
   113 }
       
   114 
       
   115 // injects back a character into a value
       
   116 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
       
   117   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
       
   118   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
       
   119   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
       
   120   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
       
   121   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
       
   122   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
       
   123   case (CHAR(d), Empty) => Chr(c) 
       
   124   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
       
   125 }
       
   126 
       
   127 
       
   128 // some "rectification" functions for simplification
       
   129 def F_ID(v: Val): Val = v
       
   130 def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
       
   131 def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
       
   132 def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
       
   133   case Right(v) => Right(f2(v))
       
   134   case Left(v) => Left(f1(v))
       
   135 }
       
   136 def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
       
   137   case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
       
   138 }
       
   139 def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
       
   140   (v:Val) => Sequ(f1(Empty), f2(v))
       
   141 def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
       
   142   (v:Val) => Sequ(f1(v), f2(Empty))
       
   143 def F_RECD(f: Val => Val) = (v:Val) => v match {
       
   144   case Rec(x, v) => Rec(x, f(v))
       
   145 }
       
   146 def F_ERROR(v: Val): Val = throw new Exception("error")
       
   147 
       
   148 // simplification of regular expressions returns now also 
       
   149 // an rectification function; no simplification under STAR 
       
   150 def simp(r: Rexp): (Rexp, Val => Val) = r match {
       
   151   case ALT(r1, r2) => {
       
   152     val (r1s, f1s) = simp(r1)
       
   153     val (r2s, f2s) = simp(r2)
       
   154     (r1s, r2s) match {
       
   155       case (ZERO, _) => (r2s, F_RIGHT(f2s))
       
   156       case (_, ZERO) => (r1s, F_LEFT(f1s))
       
   157       case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
       
   158                 else (ALT (r1s, r2s), F_ALT(f1s, f2s)) 
       
   159     }
       
   160   }
       
   161   case SEQ(r1, r2) => {
       
   162     val (r1s, f1s) = simp(r1)
       
   163     val (r2s, f2s) = simp(r2)
       
   164     (r1s, r2s) match {
       
   165       case (ZERO, _) => (ZERO, F_ERROR)
       
   166       case (_, ZERO) => (ZERO, F_ERROR)
       
   167       case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
       
   168       case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
       
   169       case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
       
   170     }
       
   171   }
       
   172   case RECD(x, r1) => {
       
   173     val (r1s, f1s) = simp(r1)
       
   174     (RECD(x, r1s), F_RECD(f1s))
       
   175   }
       
   176   case r => (r, F_ID)
       
   177 }
       
   178 
       
   179 // lexing functions including simplification
       
   180 def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
       
   181   case Nil => if (nullable(r)) mkeps(r) else { println ("Lexing Error") ; sys.exit(-1) } 
       
   182   case c::cs => {
       
   183     val (r_simp, f_simp) = simp(der(c, r))
       
   184     inj(r, c, f_simp(lex_simp(r_simp, cs)))
       
   185   }
       
   186 }
       
   187 
       
   188 def lexing_simp(r: Rexp, s: String) = env(lex_simp(r, s.toList))
       
   189 
       
   190 
       
   191 // The Lexing Rules for the Fun Language
       
   192 
       
   193 def PLUS(r: Rexp) = r ~ r.%
       
   194 
       
   195 val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | 
       
   196           "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | 
       
   197           "w" | "x" | "y" | "z" | "T" | "_"
       
   198 val DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
   120 val ID = SYM ~ (SYM | DIGIT).% 
   199 val ID = SYM ~ (SYM | DIGIT).% 
   121 val NUM = PLUS(DIGIT)
   200 val NUM = PLUS(DIGIT)
   122 val KEYWORD : Rexp = "if" | "then" | "else" | "write" | "def"
   201 val KEYWORD : Rexp = "if" | "then" | "else" | "write" | "def"
   123 val SEMI: Rexp = ";"
   202 val SEMI: Rexp = ";"
   124 val COMMA: Rexp = ","
   203 val OP: Rexp = "=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
   125 val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<=" | "=>" | "<" | ">" | "%" | "=" | "/"
       
   126 val WHITESPACE = PLUS(" " | "\n" | "\t")
   204 val WHITESPACE = PLUS(" " | "\n" | "\t")
   127 val RPAREN: Rexp = ")"
   205 val RPAREN: Rexp = ")"
   128 val LPAREN: Rexp = "("
   206 val LPAREN: Rexp = "("
       
   207 val COMMA: Rexp = ","
   129 val ALL = SYM | DIGIT | OP | " " | ":" | ";" | "\"" | "=" | "," | "(" | ")"
   208 val ALL = SYM | DIGIT | OP | " " | ":" | ";" | "\"" | "=" | "," | "(" | ")"
   130 val ALL2 = ALL | "\n"
   209 val ALL2 = ALL | "\n"
   131 val COMMENT = ("/*" ~ ALL2.% ~ "*/") | ("//" ~ ALL.% ~ "\n")
   210 val COMMENT = ("/*" ~ ALL2.% ~ "*/") | ("//" ~ ALL.% ~ "\n")
   132 
   211 
   133 
   212 
   134 // token for While language
   213 
       
   214 val WHILE_REGS = (("k" $ KEYWORD) | 
       
   215                   ("i" $ ID) | 
       
   216                   ("o" $ OP) | 
       
   217                   ("n" $ NUM) | 
       
   218                   ("s" $ SEMI) | 
       
   219                   ("c" $ COMMA) |
       
   220                   ("pl" $ LPAREN) |
       
   221                   ("pr" $ RPAREN) |
       
   222                   ("w" $ (WHITESPACE | COMMENT))).%
       
   223 
       
   224 
       
   225 
       
   226 // The tokens for the While language
       
   227 
   135 abstract class Token
   228 abstract class Token
   136 case object T_WHITESPACE extends Token
       
   137 case object T_SEMI extends Token
   229 case object T_SEMI extends Token
   138 case object T_COMMA extends Token
   230 case object T_COMMA extends Token
   139 case object T_LPAREN extends Token
   231 case object T_LPAREN extends Token
   140 case object T_RPAREN extends Token
   232 case object T_RPAREN extends Token
   141 case object T_COMMENT extends Token
       
   142 case class T_ID(s: String) extends Token
   233 case class T_ID(s: String) extends Token
   143 case class T_OP(s: String) extends Token
   234 case class T_OP(s: String) extends Token
   144 case class T_NUM(s: String) extends Token
   235 case class T_NUM(n: Int) extends Token
   145 case class T_KWD(s: String) extends Token
   236 case class T_KWD(s: String) extends Token
   146 case class T_ERR(s: String) extends Token // special error token
   237 
   147 
   238 val token : PartialFunction[(String, String), Token] = {
   148 
   239   case ("k", s) => T_KWD(s)
   149 type TokenFun = String => Token
   240   case ("i", s) => T_ID(s)
   150 type LexRules = List[(Rexp, TokenFun)]
   241   case ("o", s) => T_OP(s)
   151 val While_lexing_rules: LexRules = 
   242   case ("n", s) => T_NUM(s.toInt)
   152   List((KEYWORD, (s) => T_KWD(s)),
   243   case ("s", _) => T_SEMI
   153        (ID, (s) => T_ID(s)),
   244   case ("c", _) => T_COMMA
   154        (COMMENT, (s) => T_COMMENT),
   245   case ("pl", _) => T_LPAREN
   155        (OP, (s) => T_OP(s)),
   246   case ("pr", _) => T_RPAREN
   156        (NUM, (s) => T_NUM(s)),
   247 }
   157        (SEMI, (s) => T_SEMI),
   248 
   158        (COMMA, (s) => T_COMMA),
   249 
   159        (LPAREN, (s) => T_LPAREN),
   250 def tokenise(s: String) : List[Token] = 
   160        (RPAREN, (s) => T_RPAREN),
   251   lexing_simp(WHILE_REGS, s).collect(token)
   161        (WHITESPACE, (s) => T_WHITESPACE))
       
   162 
       
   163 
       
   164 // calculates derivatives until all of them are zeroable
       
   165 @tailrec
       
   166 def munch(s: List[Char], 
       
   167           pos: Int, 
       
   168           rs: LexRules, 
       
   169           last: Option[(Int, TokenFun)]): Option[(Int, TokenFun)] = {
       
   170   rs match {
       
   171   case Nil => last
       
   172   case rs if (s.length <= pos) => last
       
   173   case rs => {
       
   174     val ders = rs.map({case (r, tf) => (der(s(pos), r), tf)})
       
   175     val rs_nzero = ders.filterNot({case (r, _) => zeroable(r)})
       
   176     val rs_nulls = ders.filter({case (r, _) => nullable(r)})
       
   177     val new_last = if (rs_nulls != Nil) Some((pos, rs_nulls.head._2)) else last
       
   178     munch(s, 1 + pos, rs_nzero, new_last)
       
   179   }
       
   180 }}
       
   181 
       
   182 // iterates the munching function and returns a Token list
       
   183 def tokenize(s: String, rs: LexRules) : List[Token] = munch(s.toList, 0, rs, None) match {
       
   184   case None if (s == "") => Nil
       
   185   case None => List(T_ERR(s"Lexing error: $s"))
       
   186   case Some((n, tf)) => {
       
   187     val (head, tail) = s.splitAt(n + 1)
       
   188     tf(head)::tokenize(tail, rs)
       
   189   }
       
   190 }
       
   191 
       
   192 def tokenizer(s:String) : List[Token] = 
       
   193   tokenize(s, While_lexing_rules).filter {
       
   194     case T_ERR(s) => { println(s); sys.exit(-1) }
       
   195     case T_WHITESPACE => false
       
   196     case T_COMMENT => false
       
   197     case _ => true
       
   198   } 
       
   199 
       
   200 
   252 
   201 
   253 
   202 // Parser - Abstract syntax trees
   254 // Parser - Abstract syntax trees
   203 abstract class Exp
   255 abstract class Exp
   204 abstract class BExp 
   256 abstract class BExp 
   211 case class If(a: BExp, e1: Exp, e2: Exp) extends Exp
   263 case class If(a: BExp, e1: Exp, e2: Exp) extends Exp
   212 case class Write(e: Exp) extends Exp
   264 case class Write(e: Exp) extends Exp
   213 case class Var(s: String) extends Exp
   265 case class Var(s: String) extends Exp
   214 case class Num(i: Int) extends Exp
   266 case class Num(i: Int) extends Exp
   215 case class Aop(o: String, a1: Exp, a2: Exp) extends Exp
   267 case class Aop(o: String, a1: Exp, a2: Exp) extends Exp
   216 case class Sequ(e1: Exp, e2: Exp) extends Exp
   268 case class Sequence(e1: Exp, e2: Exp) extends Exp
   217 
   269 
   218 case class Bop(o: String, a1: Exp, a2: Exp) extends BExp
   270 case class Bop(o: String, a1: Exp, a2: Exp) extends BExp
   219 
   271 
   220 // calculating the maximal needed stack size
   272 // calculating the maximal needed stack size
   221 def max_stack_exp(e: Exp): Int = e match {
   273 def max_stack_exp(e: Exp): Int = e match {
   223   case If(a, e1, e2) => max_stack_bexp(a) + (List(max_stack_exp(e1), max_stack_exp(e2)).max)
   275   case If(a, e1, e2) => max_stack_bexp(a) + (List(max_stack_exp(e1), max_stack_exp(e2)).max)
   224   case Write(e) => max_stack_exp(e) + 1
   276   case Write(e) => max_stack_exp(e) + 1
   225   case Var(_) => 1
   277   case Var(_) => 1
   226   case Num(_) => 1
   278   case Num(_) => 1
   227   case Aop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2)
   279   case Aop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2)
   228   case Sequ(e1, e2) => List(max_stack_exp(e1), max_stack_exp(e2)).max
   280   case Sequence(e1, e2) => List(max_stack_exp(e1), max_stack_exp(e2)).max
   229 }
   281 }
   230 def max_stack_bexp(e: BExp): Int = e match {
   282 def max_stack_bexp(e: BExp): Int = e match {
   231   case Bop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2)
   283   case Bop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2)
   232 }
   284 }
   233 
   285 
   234 // Parser combinators
   286 // Parser combinators
   235 abstract class Parser[I <% Seq[_], T] {
   287 abstract class Parser[I, T](implicit ev: I => Seq[_]) {
   236   def parse(ts: I): Set[(T, I)]
   288   def parse(ts: I): Set[(T, I)]
   237 
   289 
   238   def parse_all(ts: I) : Set[T] =
   290   def parse_all(ts: I) : Set[T] =
   239     for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head
   291     for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head
   240 
   292 
   241   def parse_single(ts: I) : T = parse_all(ts).toList match {
   293   def parse_single(ts: I) : T = parse_all(ts).toList match {
   242     case List(t) => t
   294     case List(t) => t
   243     case _ => { println ("Parse Error") ; sys.exit(-1) }
   295     case _ => { println ("Parse Error\n") ; sys.exit(-1) }
   244   }
   296   }
   245 }
   297 }
   246 
   298 
   247 class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
   299 class SeqParser[I, T, S](p: => Parser[I, T], 
       
   300                          q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, (T, S)] {
   248   def parse(sb: I) = 
   301   def parse(sb: I) = 
   249     for ((head1, tail1) <- p.parse(sb); 
   302     for ((head1, tail1) <- p.parse(sb); 
   250          (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
   303          (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
   251 }
   304 }
   252 
   305 
   253 class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
   306 class AltParser[I, T](p: => Parser[I, T], 
       
   307                       q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] {
   254   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
   308   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
   255 }
   309 }
   256 
   310 
   257 class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
   311 class FunParser[I, T, S](p: => Parser[I, T], 
       
   312                          f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] {
   258   def parse(sb: I) = 
   313   def parse(sb: I) = 
   259     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
   314     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
       
   315 }
       
   316 
       
   317 implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
       
   318   def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
       
   319   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
       
   320   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
       
   321 }
       
   322 
       
   323 def ListParser[I, T, S](p: => Parser[I, T], 
       
   324                         q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
       
   325   (p ~ q ~ ListParser(p, q)) ==> { case ((x, y), z) => x :: z : List[T] } ||
       
   326   (p ==> ((s) => List(s)))
   260 }
   327 }
   261 
   328 
   262 case class TokParser(tok: Token) extends Parser[List[Token], Token] {
   329 case class TokParser(tok: Token) extends Parser[List[Token], Token] {
   263   def parse(ts: List[Token]) = ts match {
   330   def parse(ts: List[Token]) = ts match {
   264     case t::ts if (t == tok) => Set((t, ts)) 
   331     case t::ts if (t == tok) => Set((t, ts)) 
   266   }
   333   }
   267 }
   334 }
   268 
   335 
   269 implicit def token2tparser(t: Token) = TokParser(t)
   336 implicit def token2tparser(t: Token) = TokParser(t)
   270 
   337 
       
   338 implicit def TokOps(t: Token) = new {
       
   339   def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q)
       
   340   def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f)
       
   341   def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q)
       
   342 }
       
   343 
   271 case object NumParser extends Parser[List[Token], Int] {
   344 case object NumParser extends Parser[List[Token], Int] {
   272   def parse(ts: List[Token]) = ts match {
   345   def parse(ts: List[Token]) = ts match {
   273     case T_NUM(s)::ts => Set((s.toInt, ts)) 
   346     case T_NUM(n)::ts => Set((n, ts)) 
   274     case _ => Set ()
   347     case _ => Set ()
   275   }
   348   }
   276 }
   349 }
   277 
   350 
   278 case object IdParser extends Parser[List[Token], String] {
   351 case object IdParser extends Parser[List[Token], String] {
   281     case _ => Set ()
   354     case _ => Set ()
   282   }
   355   }
   283 }
   356 }
   284 
   357 
   285 
   358 
   286 implicit def ParserOps[I<% Seq[_], T](p: Parser[I, T]) = new {
   359 // Grammar Rules
   287   def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
   360 
   288   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
   361 // arithmetic expressions
   289   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
       
   290 }
       
   291 implicit def TokOps(t: Token) = new {
       
   292   def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q)
       
   293   def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f)
       
   294   def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q)
       
   295 }
       
   296 
       
   297 def ListParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]): Parser[I, List[T]] = {
       
   298   (p ~ q ~ ListParser(p, q)) ==> { case ((x, y), z) => x :: z : List[T] } ||
       
   299   (p ==> ((s) => List(s)))
       
   300 }
       
   301 
       
   302 
       
   303 // expressions
       
   304 lazy val Exp: Parser[List[Token], Exp] = 
   362 lazy val Exp: Parser[List[Token], Exp] = 
   305   (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==>
   363   (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==>
   306     { case (((((x, y), z), u), v), w) => If(y, u, w): Exp } ||
   364     { case (((((x, y), z), u), v), w) => If(y, u, w): Exp } ||
   307   (M ~ T_SEMI ~ Exp) ==> { case ((x, y), z) => Sequ(x, z): Exp } || M
   365   (M ~ T_SEMI ~ Exp) ==> { case ((x, y), z) => Sequence(x, z): Exp } || M
   308 lazy val M: Parser[List[Token], Exp] =
   366 lazy val M: Parser[List[Token], Exp] =
   309   (T_KWD("write") ~ L) ==> { case (x, y) => Write(y): Exp } || L
   367   (T_KWD("write") ~ L) ==> { case (x, y) => Write(y): Exp } || L
   310 lazy val L: Parser[List[Token], Exp] = 
   368 lazy val L: Parser[List[Token], Exp] = 
   311   (T ~ T_OP("+") ~ Exp) ==> { case ((x, y), z) => Aop("+", x, z): Exp } ||
   369   (T ~ T_OP("+") ~ Exp) ==> { case ((x, y), z) => Aop("+", x, z): Exp } ||
   312   (T ~ T_OP("-") ~ Exp) ==> { case ((x, y), z) => Aop("-", x, z): Exp } || T  
   370   (T ~ T_OP("-") ~ Exp) ==> { case ((x, y), z) => Aop("-", x, z): Exp } || T  
   370 def Fresh(x: String) = {
   428 def Fresh(x: String) = {
   371   counter += 1
   429   counter += 1
   372   x ++ "_" ++ counter.toString()
   430   x ++ "_" ++ counter.toString()
   373 }
   431 }
   374 
   432 
   375 type Mem = Map[String, Int]
   433 // convenient string interpolations 
   376 type Instrs = List[String]
   434 // for instructions, labels and methods
   377 
   435 import scala.language.implicitConversions
   378 def compile_exp(a: Exp, env : Mem) : Instrs = a match {
   436 import scala.language.reflectiveCalls
   379   case Num(i) => List("ldc " + i.toString + "\n")
   437 
   380   case Var(s) => List("iload " + env(s).toString + "\n")
   438 implicit def sring_inters(sc: StringContext) = new {
   381   case Aop("+", a1, a2) => compile_exp(a1, env) ++ compile_exp(a2, env) ++ List("iadd\n")
   439     def i(args: Any*): String = "   " ++ sc.s(args:_*) ++ "\n"
   382   case Aop("-", a1, a2) => compile_exp(a1, env) ++ compile_exp(a2, env) ++ List("isub\n")
   440     def l(args: Any*): String = sc.s(args:_*) ++ ":\n"
   383   case Aop("*", a1, a2) => compile_exp(a1, env) ++ compile_exp(a2, env) ++ List("imul\n")
   441     def m(args: Any*): String = sc.s(args:_*) ++ "\n"
   384   case Aop("/", a1, a2) => compile_exp(a1, env) ++ compile_exp(a2, env) ++ List("idiv\n")
   442 }
   385   case Aop("%", a1, a2) => compile_exp(a1, env) ++ compile_exp(a2, env) ++ List("irem\n")
   443 
       
   444 
       
   445 type Env = Map[String, Int]
       
   446 
       
   447 // compile expressions
       
   448 def compile_exp(a: Exp, env : Env) : String = a match {
       
   449   case Num(i) => i"ldc $i"
       
   450   case Var(s) => i"iload ${env(s)}"
       
   451   case Aop("+", a1, a2) => compile_exp(a1, env) ++ compile_exp(a2, env) ++ i"iadd"
       
   452   case Aop("-", a1, a2) => compile_exp(a1, env) ++ compile_exp(a2, env) ++ i"isub"
       
   453   case Aop("*", a1, a2) => compile_exp(a1, env) ++ compile_exp(a2, env) ++ i"imul"
       
   454   case Aop("/", a1, a2) => compile_exp(a1, env) ++ compile_exp(a2, env) ++ i"idiv"
       
   455   case Aop("%", a1, a2) => compile_exp(a1, env) ++ compile_exp(a2, env) ++ i"irem"
   386   case If(b, a1, a2) => {
   456   case If(b, a1, a2) => {
   387     val if_else = Fresh("If_else")
   457     val if_else = Fresh("If_else")
   388     val if_end = Fresh("If_end")
   458     val if_end = Fresh("If_end")
   389     compile_bexp(b, env, if_else) ++
   459     compile_bexp(b, env, if_else) ++
   390     compile_exp(a1, env) ++
   460     compile_exp(a1, env) ++
   391     List("goto " + if_end + "\n") ++
   461     i"goto $if_end" ++
   392     List("\n" + if_else + ":\n\n") ++
   462     l"$if_else" ++
   393     compile_exp(a2, env) ++
   463     compile_exp(a2, env) ++
   394     List("\n" + if_end + ":\n\n")
   464     l"$if_end"
   395   }
   465   }
   396   case Call(n, args) => {
   466   case Call(name, args) => {
   397     val is = "I" * args.length
   467     val is = "I" * args.length
   398     args.flatMap(a => compile_exp(a, env)) ++
   468     args.map(a => compile_exp(a, env)).mkString ++
   399     List ("invokestatic XXX/XXX/" + n + "(" + is + ")I\n")
   469     i"invokestatic XXX/XXX/$name($is)I"
   400   }
   470   }
   401   case Sequ(a1, a2) => {
   471   case Sequence(a1, a2) => {
   402     compile_exp(a1, env) ++ List("pop\n") ++ compile_exp(a2, env)
   472     compile_exp(a1, env) ++ i"pop" ++ compile_exp(a2, env)
   403   }
   473   }
   404   case Write(a1) => {
   474   case Write(a1) => {
   405     compile_exp(a1, env) ++
   475     compile_exp(a1, env) ++
   406     List("dup\n",
   476     i"dup" ++
   407          "invokestatic XXX/XXX/write(I)V\n")
   477     i"invokestatic XXX/XXX/write(I)V"
   408   }
   478   }
   409 }
   479 }
   410 
   480 
   411 def compile_bexp(b: BExp, env : Mem, jmp: String) : Instrs = b match {
   481 // compile boolean expressions
       
   482 def compile_bexp(b: BExp, env : Env, jmp: String) : String = b match {
   412   case Bop("==", a1, a2) => 
   483   case Bop("==", a1, a2) => 
   413     compile_exp(a1, env) ++ compile_exp(a2, env) ++ List("if_icmpne " + jmp + "\n")
   484     compile_exp(a1, env) ++ compile_exp(a2, env) ++ i"if_icmpne $jmp"
   414   case Bop("!=", a1, a2) => 
   485   case Bop("!=", a1, a2) => 
   415     compile_exp(a1, env) ++ compile_exp(a2, env) ++ List("if_icmpeq " + jmp + "\n")
   486     compile_exp(a1, env) ++ compile_exp(a2, env) ++ i"if_icmpeq $jmp"
   416   case Bop("<", a1, a2) => 
   487   case Bop("<", a1, a2) => 
   417     compile_exp(a1, env) ++ compile_exp(a2, env) ++ List("if_icmpge " + jmp + "\n")
   488     compile_exp(a1, env) ++ compile_exp(a2, env) ++ i"if_icmpge $jmp"
   418   case Bop("<=", a1, a2) => 
   489   case Bop("<=", a1, a2) => 
   419     compile_exp(a1, env) ++ compile_exp(a2, env) ++ List("if_icmpgt " + jmp + "\n")
   490     compile_exp(a1, env) ++ compile_exp(a2, env) ++ i"if_icmpgt $jmp"
   420 }
   491 }
   421 
   492 
   422 def compile_decl(d: Decl) : Instrs = d match {
   493 // compile function for declarations and main
       
   494 def compile_decl(d: Decl) : String = d match {
   423   case Def(name, args, a) => { 
   495   case Def(name, args, a) => { 
   424     val env = args.zipWithIndex.toMap
   496     val env = args.zipWithIndex.toMap
   425     val is = "I" * args.length
   497     val is = "I" * args.length
   426     List(".method public static " + name + "(" + is + ")I \n",
   498     m".method public static $name($is)I" ++
   427          ".limit locals " + args.length.toString + "\n",
   499     m".limit locals ${args.length.toString}" ++
   428          ".limit stack " + (1 + max_stack_exp(a)).toString + "\n",
   500     m".limit stack ${1 + max_stack_exp(a)}" ++
   429          name + "_Start:\n") ++   
   501     l"${name}_Start" ++   
   430     compile_exp(a, env) ++
   502     compile_exp(a, env) ++
   431     List("ireturn\n",
   503     i"ireturn" ++
   432          ".end method \n\n")
   504     m".end method\n"
   433   }
   505   }
   434   case Main(a) => {
   506   case Main(a) => {
   435     List(".method public static main([Ljava/lang/String;)V\n",
   507     m".method public static main([Ljava/lang/String;)V" ++
   436          ".limit locals 200\n",
   508     m".limit locals 200" ++
   437          ".limit stack 200\n") ++
   509     m".limit stack 200" ++
   438     compile_exp(a, Map()) ++
   510     compile_exp(a, Map()) ++
   439     List("invokestatic XXX/XXX/write(I)V\n",
   511     i"invokestatic XXX/XXX/write(I)V" ++
   440          "return\n",
   512     i"return" ++
   441          ".end method\n")
   513     m".end method\n"
   442   }
   514   }
   443 }
   515 }
       
   516 
   444 
   517 
   445 def compile(class_name: String, input: String) : String = {
   518 def compile(class_name: String, input: String) : String = {
   446   val tks = tokenizer(input)
   519   val tks = tokenise(input)
   447   println(Prog.parse_single(tks).mkString("\n"))
   520   println(Prog.parse_single(tks).mkString("\n"))
   448   val ast = Prog.parse_single(tks)
   521   val ast = Prog.parse_single(tks)
   449   val instructions = ast.flatMap(compile_decl).mkString
   522   val instructions = ast.map(compile_decl).mkString
   450   (library + instructions).replaceAllLiterally("XXX", class_name)
   523   (library + instructions).replaceAllLiterally("XXX", class_name)
   451 }
   524 }
   452 
       
   453 
       
   454 
   525 
   455 
   526 
   456 def compile_file(file_name: String) = {
   527 def compile_file(file_name: String) = {
   457   val class_name = file_name.split('.')(0)
   528   val class_name = file_name.split('.')(0)
   458   val output = compile(class_name, fromFile(file_name))
   529   val output = compile(class_name, fromFile(file_name))
   469 }
   540 }
   470 
   541 
   471 def compile_run(file_name: String) : Unit = {
   542 def compile_run(file_name: String) : Unit = {
   472   val class_name = file_name.split('.')(0)
   543   val class_name = file_name.split('.')(0)
   473   compile_file(file_name)
   544   compile_file(file_name)
   474   val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!!
   545   val test = (s"java -jar jvm/jasmin-2.4/jasmin.jar ${class_name}.j").!!
   475   println("Time: " + time_needed(2, ("java " + class_name + "/" + class_name).!))
   546   println("Time: " + time_needed(2, (s"java ${class_name}/${class_name}").!))
   476 }
   547 }
   477 
   548 
   478 
   549 
   479 //examples
   550 // some examples
   480 compile_file("fact.rec")
   551 compile_file("fact.rec")
   481 //compile_run("defs.rec")
   552 compile_run("defs.rec")
   482 //compile_run("fact.rec")
   553 compile_run("fact.rec")